| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
 | #ifndef GENERATOR_H_D5C6A724
#define GENERATOR_H_D5C6A724
#include <filesystem>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
enum PuzzleType {
  kWhiteTop = 0,
  kWhiteBottom = 1,
  kYellowTop = 2,
  kYellowMiddle = 3,
  kBlackTop = 4,
  kBlackMiddle = 5,
};
class generator {
 public:
  // Constructor
  generator(std::string agidPath, std::string wordNetPath,
            std::string cmudictPath, std::string wordfreqPath,
            std::string datadirPath, std::string outputPath);
  // Action
  void run();
 private:
  // Helpers
  size_t LookupOrCreatePronunciation(const std::string& phonemes);
  size_t LookupOrCreateForm(const std::string& form);
  size_t LookupOrCreateWord(const std::string& word);
  void AddPronunciationToForm(size_t pronunciation_id, size_t form_id);
  void AddFormToWord(size_t form_id, size_t word_id);
  void AddWordToSynset(size_t word_id, int wnid);
  void AddFormToAnagramSet(size_t form_id, const std::string& sorted_letters);
  void AddPronunciationToAnaphoneSet(size_t pronunciation_id,
                                     const std::string& sorted_phonemes);
  // Input
  std::string agidPath_;
  std::string wordNetPath_;
  std::string cmudictPath_;
  std::string wordfreqPath_;
  std::filesystem::path datadirPath_;
  // Output
  std::string outputPath_;
  // Indexes
  struct Pronunciation {
    size_t id;
    std::string phonemes;
    std::string prerhyme;
    std::string rhyme;
    std::vector<size_t> form_ids;
    std::optional<size_t> anaphone_set_id;
    std::string stressless_phonemes;
  };
  struct Form {
    size_t id;
    std::string text;
    std::vector<size_t> word_ids;
    std::vector<size_t> pronunciation_ids;
    std::optional<size_t> anagram_set_id;
    std::optional<size_t> reverse_form_id;
    std::unordered_map<PuzzleType, std::set<size_t>> puzzles;
  };
  struct Word {
    size_t id;
    size_t base_form_id;
    std::vector<size_t> form_ids;
    std::vector<size_t> synsets;
  };
  std::vector<Pronunciation> pronunciations_;
  std::unordered_map<std::string, size_t> pronunciation_by_phonemes_;
  std::unordered_map<std::string, std::vector<size_t>> pronunciations_by_rhyme_;
  std::unordered_map<std::string, std::vector<size_t>>
      pronunciations_by_blank_phonemes_;
  std::vector<std::vector<size_t>> anaphone_sets_;
  std::unordered_map<std::string, size_t> anaphone_set_by_sorted_phonemes_;
  std::vector<Form> forms_;
  std::unordered_map<std::string, size_t> form_by_text_;
  std::vector<std::vector<size_t>> anagram_sets_;
  std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_;
  std::vector<Word> words_;
  std::unordered_map<std::string, size_t> word_by_base_;
  std::vector<std::vector<size_t>> synsets_;
  std::unordered_map<int, size_t> synset_by_wnid_;
};
#endif /* end of include guard: GENERATOR_H_D5C6A724 */
 |