summary refs log tree commit diff stats
path: root/generator/generator.h
blob: a97b0b048a16619182fd2b9a8582b0d0f72773aa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#ifndef GENERATOR_H_D5C6A724
#define GENERATOR_H_D5C6A724

#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>

enum PuzzleType {
  kWhiteTop = 0,
  kWhiteBottom = 1,
  kYellowTop = 2,
  kYellowMiddle = 3,
  kBlackTop = 4,
  kBlackMiddle = 5,
};

class generator {
 public:
  // Constructor

  generator(std::string agidPath, std::string wordNetPath,
            std::string cmudictPath, std::string wordfreqPath,
            std::string outputPath);

  // Action

  void run();

 private:
  // Helpers

  size_t LookupOrCreatePronunciation(const std::string& phonemes);

  size_t LookupOrCreateForm(const std::string& form);

  size_t LookupOrCreateWord(const std::string& word);

  void AddPronunciationToForm(size_t pronunciation_id, size_t form_id);

  void AddFormToWord(size_t form_id, size_t word_id);

  void AddWordToSynset(size_t word_id, int wnid);

  void AddFormToAnagramSet(size_t form_id, const std::string& sorted_letters);

  void AddPronunciationToAnaphoneSet(size_t pronunciation_id,
                                     const std::string& sorted_phonemes);

  // Input

  std::string agidPath_;
  std::string wordNetPath_;
  std::string cmudictPath_;
  std::string wordfreqPath_;

  // Output

  std::string outputPath_;

  // Indexes

  struct Pronunciation {
    size_t id;
    std::string phonemes;
    std::string prerhyme;
    std::string rhyme;
    std::vector<size_t> form_ids;
    std::optional<size_t> anaphone_set_id;
    std::string stressless_phonemes;
  };

  struct Form {
    size_t id;
    std::string text;
    std::vector<size_t> word_ids;
    std::vector<size_t> pronunciation_ids;
    std::optional<size_t> anagram_set_id;
    std::optional<size_t> reverse_form_id;

    std::unordered_map<PuzzleType, std::set<size_t>> puzzles;
  };

  struct Word {
    size_t id;
    size_t base_form_id;
    std::vector<size_t> form_ids;
    std::vector<size_t> synsets;
  };

  std::vector<Pronunciation> pronunciations_;
  std::unordered_map<std::string, size_t> pronunciation_by_phonemes_;
  std::unordered_map<std::string, std::vector<size_t>> pronunciations_by_rhyme_;
  std::unordered_map<std::string, std::vector<size_t>>
      pronunciations_by_blank_phonemes_;

  std::vector<std::vector<size_t>> anaphone_sets_;
  std::unordered_map<std::string, size_t> anaphone_set_by_sorted_phonemes_;

  std::vector<Form> forms_;
  std::unordered_map<std::string, size_t> form_by_text_;

  std::vector<std::vector<size_t>> anagram_sets_;
  std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_;

  std::vector<Word> words_;
  std::unordered_map<std::string, size_t> word_by_base_;

  std::vector<std::vector<size_t>> synsets_;
  std::unordered_map<int, size_t> synset_by_wnid_;
};

#endif /* end of include guard: GENERATOR_H_D5C6A724 */