diff options
Diffstat (limited to 'generator')
-rw-r--r-- | generator/generator.cpp | 123 | ||||
-rw-r--r-- | generator/generator.h | 7 |
2 files changed, 105 insertions, 25 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 0309482..1c3b5c4 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -117,7 +117,6 @@ void generator::run() { | |||
117 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); | 117 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
118 | hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); | 118 | hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); |
119 | 119 | ||
120 | std::set<std::pair<int, int>> wnid_and_wnum; | ||
121 | for (std::string line : lines) { | 120 | for (std::string line : lines) { |
122 | ppgs.update(); | 121 | ppgs.update(); |
123 | 122 | ||
@@ -161,13 +160,12 @@ void generator::run() { | |||
161 | // The WordNet data does contain duplicates, so we need to check that we | 160 | // The WordNet data does contain duplicates, so we need to check that we |
162 | // haven't already created this word. | 161 | // haven't already created this word. |
163 | std::pair<int, int> lookup(synset_id, wnum); | 162 | std::pair<int, int> lookup(synset_id, wnum); |
164 | if (wnid_and_wnum.count(lookup)) { | 163 | if (word_by_wnid_and_wnum_.count(lookup)) { |
165 | continue; | 164 | continue; |
166 | } | 165 | } |
167 | 166 | ||
168 | wnid_and_wnum.insert(lookup); | ||
169 | |||
170 | size_t word_id = LookupOrCreateWord(text); | 167 | size_t word_id = LookupOrCreateWord(text); |
168 | word_by_wnid_and_wnum_[lookup] = word_id; | ||
171 | AddWordToSynset(word_id, synset_id); | 169 | AddWordToSynset(word_id, synset_id); |
172 | } | 170 | } |
173 | } | 171 | } |
@@ -190,14 +188,10 @@ void generator::run() { | |||
190 | line.erase(0, 3); | 188 | line.erase(0, 3); |
191 | } | 189 | } |
192 | 190 | ||
193 | if (!word_by_base_.count(infinitive) && | 191 | if (!words_by_base_.count(infinitive)) { |
194 | !(type == 'V' && word_frequencies[infinitive] >= MIN_FREQUENCY && | ||
195 | !profane.count(infinitive))) { | ||
196 | continue; | 192 | continue; |
197 | } | 193 | } |
198 | 194 | ||
199 | size_t word_id = LookupOrCreateWord(infinitive); | ||
200 | |||
201 | auto inflWordList = hatkirby::split<std::list<std::string>>(line, " | "); | 195 | auto inflWordList = hatkirby::split<std::list<std::string>>(line, " | "); |
202 | 196 | ||
203 | std::vector<std::list<std::string>> agidForms; | 197 | std::vector<std::list<std::string>> agidForms; |
@@ -277,11 +271,13 @@ void generator::run() { | |||
277 | } | 271 | } |
278 | 272 | ||
279 | // Compile the forms we have mapped. | 273 | // Compile the forms we have mapped. |
280 | for (const std::list<std::string>& infl_list : inflections) { | 274 | for (size_t word_id : words_by_base_.at(infinitive)) { |
281 | for (const std::string& infl : infl_list) { | 275 | for (const std::list<std::string>& infl_list : inflections) { |
282 | if (!profane.count(infl)) { | 276 | for (const std::string& infl : infl_list) { |
283 | size_t form_id = LookupOrCreateForm(infl); | 277 | if (!profane.count(infl)) { |
284 | AddFormToWord(form_id, word_id); | 278 | size_t form_id = LookupOrCreateForm(infl); |
279 | AddFormToWord(form_id, word_id); | ||
280 | } | ||
285 | } | 281 | } |
286 | } | 282 | } |
287 | } | 283 | } |
@@ -454,6 +450,86 @@ void generator::run() { | |||
454 | } | 450 | } |
455 | } | 451 | } |
456 | 452 | ||
453 | // Black Bottom | ||
454 | std::unordered_map<size_t, std::set<size_t>> antonyms; | ||
455 | { | ||
456 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl", true)); | ||
457 | |||
458 | hatkirby::progress ppgs("Generating black bottom puzzles...", lines.size()); | ||
459 | for (const std::string& line : lines) { | ||
460 | ppgs.update(); | ||
461 | |||
462 | std::regex relation( | ||
463 | "^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
464 | |||
465 | std::smatch relation_data; | ||
466 | if (!std::regex_search(line, relation_data, relation)) { | ||
467 | continue; | ||
468 | } | ||
469 | |||
470 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), | ||
471 | std::stoi(relation_data[2])); | ||
472 | |||
473 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), | ||
474 | std::stoi(relation_data[4])); | ||
475 | |||
476 | if (word_by_wnid_and_wnum_.count(lookup1) && | ||
477 | word_by_wnid_and_wnum_.count(lookup2)) { | ||
478 | const Word& word1 = words_.at(word_by_wnid_and_wnum_.at(lookup1)); | ||
479 | const Word& word2 = words_.at(word_by_wnid_and_wnum_.at(lookup2)); | ||
480 | |||
481 | Form& form1 = forms_.at(word1.base_form_id); | ||
482 | form1.puzzles[kBlackBottom].insert(word2.base_form_id); | ||
483 | |||
484 | antonyms[word1.id].insert(word2.id); | ||
485 | } | ||
486 | } | ||
487 | } | ||
488 | |||
489 | // Black Double Bottom | ||
490 | { | ||
491 | hatkirby::progress ppgs("Generating black double bottom puzzles...", | ||
492 | antonyms.size()); | ||
493 | for (const auto& [word1, ant_words] : antonyms) { | ||
494 | ppgs.update(); | ||
495 | |||
496 | for (size_t word2 : ant_words) { | ||
497 | const Word& word2_obj = words_.at(word2); | ||
498 | const Form& form2 = forms_.at(word2_obj.base_form_id); | ||
499 | |||
500 | for (size_t word25 : form2.word_ids) { | ||
501 | if (word25 == word2) { | ||
502 | continue; | ||
503 | } | ||
504 | |||
505 | const auto& double_ant_words = antonyms[word25]; | ||
506 | |||
507 | for (size_t word3 : double_ant_words) { | ||
508 | const Word& word1_obj = words_.at(word1); | ||
509 | const Word& word3_obj = words_.at(word3); | ||
510 | |||
511 | bool synset_overlap = false; | ||
512 | for (size_t synset1 : word1_obj.synsets) { | ||
513 | for (size_t synset3 : word3_obj.synsets) { | ||
514 | if (synset1 == synset3) { | ||
515 | synset_overlap = true; | ||
516 | break; | ||
517 | } | ||
518 | } | ||
519 | if (synset_overlap) { | ||
520 | break; | ||
521 | } | ||
522 | } | ||
523 | if (!synset_overlap) { | ||
524 | Form& form1 = forms_.at(word1_obj.base_form_id); | ||
525 | form1.puzzles[kDoubleBlackBottom].insert(word3_obj.base_form_id); | ||
526 | } | ||
527 | } | ||
528 | } | ||
529 | } | ||
530 | } | ||
531 | } | ||
532 | |||
457 | // Count up all of the generated puzzles. | 533 | // Count up all of the generated puzzles. |
458 | int total_puzzles = 0; | 534 | int total_puzzles = 0; |
459 | int reusable_words = 0; | 535 | int reusable_words = 0; |
@@ -476,6 +552,9 @@ void generator::run() { | |||
476 | << std::endl; | 552 | << std::endl; |
477 | std::cout << "Black tops: " << per_puzzle_type[kBlackTop] << std::endl; | 553 | std::cout << "Black tops: " << per_puzzle_type[kBlackTop] << std::endl; |
478 | std::cout << "Black middles: " << per_puzzle_type[kBlackMiddle] << std::endl; | 554 | std::cout << "Black middles: " << per_puzzle_type[kBlackMiddle] << std::endl; |
555 | std::cout << "Black bottoms: " << per_puzzle_type[kBlackBottom] << std::endl; | ||
556 | std::cout << "Black double bottoms: " << per_puzzle_type[kDoubleBlackBottom] | ||
557 | << std::endl; | ||
479 | } | 558 | } |
480 | 559 | ||
481 | size_t generator::LookupOrCreatePronunciation(const std::string& phonemes) { | 560 | size_t generator::LookupOrCreatePronunciation(const std::string& phonemes) { |
@@ -567,16 +646,12 @@ size_t generator::LookupOrCreateForm(const std::string& word) { | |||
567 | } | 646 | } |
568 | 647 | ||
569 | size_t generator::LookupOrCreateWord(const std::string& word) { | 648 | size_t generator::LookupOrCreateWord(const std::string& word) { |
570 | if (word_by_base_.count(word)) { | 649 | size_t word_id = words_.size(); |
571 | return word_by_base_[word]; | 650 | words_by_base_[word].push_back(word_id); |
572 | } else { | 651 | size_t form_id = LookupOrCreateForm(word); |
573 | size_t word_id = words_.size(); | 652 | words_.push_back({.id = word_id, .base_form_id = form_id}); |
574 | word_by_base_[word] = words_.size(); | 653 | AddFormToWord(form_id, word_id); |
575 | size_t form_id = LookupOrCreateForm(word); | 654 | return word_id; |
576 | words_.push_back({.id = word_id, .base_form_id = form_id}); | ||
577 | AddFormToWord(form_id, word_id); | ||
578 | return word_id; | ||
579 | } | ||
580 | } | 655 | } |
581 | 656 | ||
582 | void generator::AddPronunciationToForm(size_t pronunciation_id, | 657 | void generator::AddPronunciationToForm(size_t pronunciation_id, |
diff --git a/generator/generator.h b/generator/generator.h index 923fc17..fc66789 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
@@ -2,10 +2,12 @@ | |||
2 | #define GENERATOR_H_D5C6A724 | 2 | #define GENERATOR_H_D5C6A724 |
3 | 3 | ||
4 | #include <filesystem> | 4 | #include <filesystem> |
5 | #include <map> | ||
5 | #include <optional> | 6 | #include <optional> |
6 | #include <set> | 7 | #include <set> |
7 | #include <string> | 8 | #include <string> |
8 | #include <unordered_map> | 9 | #include <unordered_map> |
10 | #include <utility> | ||
9 | #include <vector> | 11 | #include <vector> |
10 | 12 | ||
11 | enum PuzzleType { | 13 | enum PuzzleType { |
@@ -15,6 +17,8 @@ enum PuzzleType { | |||
15 | kYellowMiddle = 3, | 17 | kYellowMiddle = 3, |
16 | kBlackTop = 4, | 18 | kBlackTop = 4, |
17 | kBlackMiddle = 5, | 19 | kBlackMiddle = 5, |
20 | kBlackBottom = 6, | ||
21 | kDoubleBlackBottom = 7, | ||
18 | }; | 22 | }; |
19 | 23 | ||
20 | class generator { | 24 | class generator { |
@@ -107,7 +111,8 @@ class generator { | |||
107 | std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_; | 111 | std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_; |
108 | 112 | ||
109 | std::vector<Word> words_; | 113 | std::vector<Word> words_; |
110 | std::unordered_map<std::string, size_t> word_by_base_; | 114 | std::unordered_map<std::string, std::vector<size_t>> words_by_base_; |
115 | std::map<std::pair<int, int>, size_t> word_by_wnid_and_wnum_; | ||
111 | 116 | ||
112 | std::vector<std::vector<size_t>> synsets_; | 117 | std::vector<std::vector<size_t>> synsets_; |
113 | std::unordered_map<int, size_t> synset_by_wnid_; | 118 | std::unordered_map<int, size_t> synset_by_wnid_; |