summary refs log tree commit diff stats
path: root/generator/generator.h
diff options
context:
space:
mode:
Diffstat (limited to 'generator/generator.h')
-rw-r--r--generator/generator.h114
1 files changed, 114 insertions, 0 deletions
diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..a97b0b0 --- /dev/null +++ b/generator/generator.h
@@ -0,0 +1,114 @@
1#ifndef GENERATOR_H_D5C6A724
2#define GENERATOR_H_D5C6A724
3
4#include <optional>
5#include <set>
6#include <string>
7#include <unordered_map>
8#include <vector>
9
10enum PuzzleType {
11 kWhiteTop = 0,
12 kWhiteBottom = 1,
13 kYellowTop = 2,
14 kYellowMiddle = 3,
15 kBlackTop = 4,
16 kBlackMiddle = 5,
17};
18
19class generator {
20 public:
21 // Constructor
22
23 generator(std::string agidPath, std::string wordNetPath,
24 std::string cmudictPath, std::string wordfreqPath,
25 std::string outputPath);
26
27 // Action
28
29 void run();
30
31 private:
32 // Helpers
33
34 size_t LookupOrCreatePronunciation(const std::string& phonemes);
35
36 size_t LookupOrCreateForm(const std::string& form);
37
38 size_t LookupOrCreateWord(const std::string& word);
39
40 void AddPronunciationToForm(size_t pronunciation_id, size_t form_id);
41
42 void AddFormToWord(size_t form_id, size_t word_id);
43
44 void AddWordToSynset(size_t word_id, int wnid);
45
46 void AddFormToAnagramSet(size_t form_id, const std::string& sorted_letters);
47
48 void AddPronunciationToAnaphoneSet(size_t pronunciation_id,
49 const std::string& sorted_phonemes);
50
51 // Input
52
53 std::string agidPath_;
54 std::string wordNetPath_;
55 std::string cmudictPath_;
56 std::string wordfreqPath_;
57
58 // Output
59
60 std::string outputPath_;
61
62 // Indexes
63
64 struct Pronunciation {
65 size_t id;
66 std::string phonemes;
67 std::string prerhyme;
68 std::string rhyme;
69 std::vector<size_t> form_ids;
70 std::optional<size_t> anaphone_set_id;
71 std::string stressless_phonemes;
72 };
73
74 struct Form {
75 size_t id;
76 std::string text;
77 std::vector<size_t> word_ids;
78 std::vector<size_t> pronunciation_ids;
79 std::optional<size_t> anagram_set_id;
80 std::optional<size_t> reverse_form_id;
81
82 std::unordered_map<PuzzleType, std::set<size_t>> puzzles;
83 };
84
85 struct Word {
86 size_t id;
87 size_t base_form_id;
88 std::vector<size_t> form_ids;
89 std::vector<size_t> synsets;
90 };
91
92 std::vector<Pronunciation> pronunciations_;
93 std::unordered_map<std::string, size_t> pronunciation_by_phonemes_;
94 std::unordered_map<std::string, std::vector<size_t>> pronunciations_by_rhyme_;
95 std::unordered_map<std::string, std::vector<size_t>>
96 pronunciations_by_blank_phonemes_;
97
98 std::vector<std::vector<size_t>> anaphone_sets_;
99 std::unordered_map<std::string, size_t> anaphone_set_by_sorted_phonemes_;
100
101 std::vector<Form> forms_;
102 std::unordered_map<std::string, size_t> form_by_text_;
103
104 std::vector<std::vector<size_t>> anagram_sets_;
105 std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_;
106
107 std::vector<Word> words_;
108 std::unordered_map<std::string, size_t> word_by_base_;
109
110 std::vector<std::vector<size_t>> synsets_;
111 std::unordered_map<int, size_t> synset_by_wnid_;
112};
113
114#endif /* end of include guard: GENERATOR_H_D5C6A724 */ \ No newline at end of file