summary refs log tree commit diff stats
path: root/generator/generator.h
blob: 21774d7bb29049ebe550b1e1337d1a3750da439d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#ifndef GENERATOR_H_D5C6A724
#define GENERATOR_H_D5C6A724

#include <filesystem>
#include <map>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

enum PuzzleType {
  kWhiteTop = 0,
  kWhiteBottom = 1,
  kYellowTop = 2,
  kYellowMiddle = 3,
  kBlackTop = 4,
  kBlackMiddle = 5,
  kBlackBottom = 6,
  kDoubleBlackBottom = 7,
  kRedTop = 8,
  kRedMiddle = 9,
  kRedBottom = 10,
  kBlueTop = 11,
  kBlueMiddle = 12,
  kBlueBottom = 13,
  kPurpleTop = 14,
  kPurpleMiddle = 15,
  kColorIs = 16,
};

class generator {
 public:
  // Constructor

  generator(std::string agidPath, std::string wordNetPath,
            std::string cmudictPath, std::string wordfreqPath,
            std::string datadirPath, std::string outputPath);

  // Action

  void run();

 private:
  // Helpers

  size_t LookupOrCreatePronunciation(const std::string& phonemes);

  size_t LookupOrCreateForm(const std::string& form);

  size_t LookupOrCreateWord(const std::string& word);

  void AddPronunciationToForm(size_t pronunciation_id, size_t form_id);

  void AddFormToWord(size_t form_id, size_t word_id);

  void AddWordToSynset(size_t word_id, int wnid);

  void AddFormToAnagramSet(size_t form_id, const std::string& sorted_letters);

  void AddPronunciationToAnaphoneSet(size_t pronunciation_id,
                                     const std::string& sorted_phonemes);

  void FindComboPuzzles(std::string text, PuzzleType left_type,
                        PuzzleType right_type);

  // Input

  std::string agidPath_;
  std::string wordNetPath_;
  std::string cmudictPath_;
  std::string wordfreqPath_;
  std::filesystem::path datadirPath_;

  // Output

  std::string outputPath_;

  // Indexes

  struct Pronunciation {
    size_t id;
    std::string phonemes;
    std::string prerhyme;
    std::string rhyme;
    std::vector<size_t> form_ids;
    std::optional<size_t> anaphone_set_id;
    std::string stressless_phonemes;
  };

  struct Form {
    size_t id;
    std::string text;
    bool is_base_form = false;
    std::vector<size_t> word_ids;
    std::vector<size_t> pronunciation_ids;
    std::optional<size_t> anagram_set_id;
    std::optional<size_t> reverse_form_id;
    std::optional<int> ciphered;

    std::unordered_map<PuzzleType, std::set<size_t>> puzzles;
  };

  struct Word {
    size_t id;
    size_t base_form_id;
    std::vector<size_t> form_ids;
    std::vector<size_t> synsets;
  };

  std::vector<Pronunciation> pronunciations_;
  std::unordered_map<std::string, size_t> pronunciation_by_phonemes_;
  std::unordered_map<std::string, std::vector<size_t>> pronunciations_by_rhyme_;
  std::unordered_map<std::string, std::vector<size_t>>
      pronunciations_by_blank_phonemes_;

  std::vector<std::vector<size_t>> anaphone_sets_;
  std::unordered_map<std::string, size_t> anaphone_set_by_sorted_phonemes_;

  std::vector<Form> forms_;
  std::unordered_map<std::string, size_t> form_by_text_;

  std::vector<std::vector<size_t>> anagram_sets_;
  std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_;

  std::vector<Word> words_;
  std::unordered_map<std::string, std::vector<size_t>> words_by_base_;
  std::map<std::pair<int, int>, size_t> word_by_wnid_and_wnum_;

  std::vector<std::vector<size_t>> synsets_;
  std::unordered_map<int, size_t> synset_by_wnid_;

  std::map<int, size_t> wanderlust_;

  std::map<PuzzleType,
           std::map<PuzzleType, std::vector<std::tuple<int, int, int>>>>
      combos_;
};

#endif /* end of include guard: GENERATOR_H_D5C6A724 */