diff options
| author | Star Rauchenberger <fefferburbia@gmail.com> | 2023-12-02 18:04:59 -0500 |
|---|---|---|
| committer | Star Rauchenberger <fefferburbia@gmail.com> | 2023-12-02 18:04:59 -0500 |
| commit | 123887d73dbab74f784e6ca41a6b33ce8c5c5e52 (patch) | |
| tree | 86ab1976611aaa7d669bfe2f0c8fae2f1f7cddeb /generator | |
| parent | 17778ac3ab8598eb3d43f562a092b9aa7c0a1a42 (diff) | |
| download | lingo-randomizer-123887d73dbab74f784e6ca41a6b33ce8c5c5e52.tar.gz lingo-randomizer-123887d73dbab74f784e6ca41a6b33ce8c5c5e52.tar.bz2 lingo-randomizer-123887d73dbab74f784e6ca41a6b33ce8c5c5e52.zip | |
Add black bottom and double black bottom
Diffstat (limited to 'generator')
| -rw-r--r-- | generator/generator.cpp | 123 | ||||
| -rw-r--r-- | generator/generator.h | 7 |
2 files changed, 105 insertions, 25 deletions
| diff --git a/generator/generator.cpp b/generator/generator.cpp index 0309482..1c3b5c4 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -117,7 +117,6 @@ void generator::run() { | |||
| 117 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); | 117 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
| 118 | hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); | 118 | hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); |
| 119 | 119 | ||
| 120 | std::set<std::pair<int, int>> wnid_and_wnum; | ||
| 121 | for (std::string line : lines) { | 120 | for (std::string line : lines) { |
| 122 | ppgs.update(); | 121 | ppgs.update(); |
| 123 | 122 | ||
| @@ -161,13 +160,12 @@ void generator::run() { | |||
| 161 | // The WordNet data does contain duplicates, so we need to check that we | 160 | // The WordNet data does contain duplicates, so we need to check that we |
| 162 | // haven't already created this word. | 161 | // haven't already created this word. |
| 163 | std::pair<int, int> lookup(synset_id, wnum); | 162 | std::pair<int, int> lookup(synset_id, wnum); |
| 164 | if (wnid_and_wnum.count(lookup)) { | 163 | if (word_by_wnid_and_wnum_.count(lookup)) { |
| 165 | continue; | 164 | continue; |
| 166 | } | 165 | } |
| 167 | 166 | ||
| 168 | wnid_and_wnum.insert(lookup); | ||
| 169 | |||
| 170 | size_t word_id = LookupOrCreateWord(text); | 167 | size_t word_id = LookupOrCreateWord(text); |
| 168 | word_by_wnid_and_wnum_[lookup] = word_id; | ||
| 171 | AddWordToSynset(word_id, synset_id); | 169 | AddWordToSynset(word_id, synset_id); |
| 172 | } | 170 | } |
| 173 | } | 171 | } |
| @@ -190,14 +188,10 @@ void generator::run() { | |||
| 190 | line.erase(0, 3); | 188 | line.erase(0, 3); |
| 191 | } | 189 | } |
| 192 | 190 | ||
| 193 | if (!word_by_base_.count(infinitive) && | 191 | if (!words_by_base_.count(infinitive)) { |
| 194 | !(type == 'V' && word_frequencies[infinitive] >= MIN_FREQUENCY && | ||
| 195 | !profane.count(infinitive))) { | ||
| 196 | continue; | 192 | continue; |
| 197 | } | 193 | } |
| 198 | 194 | ||
| 199 | size_t word_id = LookupOrCreateWord(infinitive); | ||
| 200 | |||
| 201 | auto inflWordList = hatkirby::split<std::list<std::string>>(line, " | "); | 195 | auto inflWordList = hatkirby::split<std::list<std::string>>(line, " | "); |
| 202 | 196 | ||
| 203 | std::vector<std::list<std::string>> agidForms; | 197 | std::vector<std::list<std::string>> agidForms; |
| @@ -277,11 +271,13 @@ void generator::run() { | |||
| 277 | } | 271 | } |
| 278 | 272 | ||
| 279 | // Compile the forms we have mapped. | 273 | // Compile the forms we have mapped. |
| 280 | for (const std::list<std::string>& infl_list : inflections) { | 274 | for (size_t word_id : words_by_base_.at(infinitive)) { |
| 281 | for (const std::string& infl : infl_list) { | 275 | for (const std::list<std::string>& infl_list : inflections) { |
| 282 | if (!profane.count(infl)) { | 276 | for (const std::string& infl : infl_list) { |
| 283 | size_t form_id = LookupOrCreateForm(infl); | 277 | if (!profane.count(infl)) { |
| 284 | AddFormToWord(form_id, word_id); | 278 | size_t form_id = LookupOrCreateForm(infl); |
| 279 | AddFormToWord(form_id, word_id); | ||
| 280 | } | ||
| 285 | } | 281 | } |
| 286 | } | 282 | } |
| 287 | } | 283 | } |
| @@ -454,6 +450,86 @@ void generator::run() { | |||
| 454 | } | 450 | } |
| 455 | } | 451 | } |
| 456 | 452 | ||
| 453 | // Black Bottom | ||
| 454 | std::unordered_map<size_t, std::set<size_t>> antonyms; | ||
| 455 | { | ||
| 456 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl", true)); | ||
| 457 | |||
| 458 | hatkirby::progress ppgs("Generating black bottom puzzles...", lines.size()); | ||
| 459 | for (const std::string& line : lines) { | ||
| 460 | ppgs.update(); | ||
| 461 | |||
| 462 | std::regex relation( | ||
| 463 | "^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
| 464 | |||
| 465 | std::smatch relation_data; | ||
| 466 | if (!std::regex_search(line, relation_data, relation)) { | ||
| 467 | continue; | ||
| 468 | } | ||
| 469 | |||
| 470 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), | ||
| 471 | std::stoi(relation_data[2])); | ||
| 472 | |||
| 473 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), | ||
| 474 | std::stoi(relation_data[4])); | ||
| 475 | |||
| 476 | if (word_by_wnid_and_wnum_.count(lookup1) && | ||
| 477 | word_by_wnid_and_wnum_.count(lookup2)) { | ||
| 478 | const Word& word1 = words_.at(word_by_wnid_and_wnum_.at(lookup1)); | ||
| 479 | const Word& word2 = words_.at(word_by_wnid_and_wnum_.at(lookup2)); | ||
| 480 | |||
| 481 | Form& form1 = forms_.at(word1.base_form_id); | ||
| 482 | form1.puzzles[kBlackBottom].insert(word2.base_form_id); | ||
| 483 | |||
| 484 | antonyms[word1.id].insert(word2.id); | ||
| 485 | } | ||
| 486 | } | ||
| 487 | } | ||
| 488 | |||
| 489 | // Black Double Bottom | ||
| 490 | { | ||
| 491 | hatkirby::progress ppgs("Generating black double bottom puzzles...", | ||
| 492 | antonyms.size()); | ||
| 493 | for (const auto& [word1, ant_words] : antonyms) { | ||
| 494 | ppgs.update(); | ||
| 495 | |||
| 496 | for (size_t word2 : ant_words) { | ||
| 497 | const Word& word2_obj = words_.at(word2); | ||
| 498 | const Form& form2 = forms_.at(word2_obj.base_form_id); | ||
| 499 | |||
| 500 | for (size_t word25 : form2.word_ids) { | ||
| 501 | if (word25 == word2) { | ||
| 502 | continue; | ||
| 503 | } | ||
| 504 | |||
| 505 | const auto& double_ant_words = antonyms[word25]; | ||
| 506 | |||
| 507 | for (size_t word3 : double_ant_words) { | ||
| 508 | const Word& word1_obj = words_.at(word1); | ||
| 509 | const Word& word3_obj = words_.at(word3); | ||
| 510 | |||
| 511 | bool synset_overlap = false; | ||
| 512 | for (size_t synset1 : word1_obj.synsets) { | ||
| 513 | for (size_t synset3 : word3_obj.synsets) { | ||
| 514 | if (synset1 == synset3) { | ||
| 515 | synset_overlap = true; | ||
| 516 | break; | ||
| 517 | } | ||
| 518 | } | ||
| 519 | if (synset_overlap) { | ||
| 520 | break; | ||
| 521 | } | ||
| 522 | } | ||
| 523 | if (!synset_overlap) { | ||
| 524 | Form& form1 = forms_.at(word1_obj.base_form_id); | ||
| 525 | form1.puzzles[kDoubleBlackBottom].insert(word3_obj.base_form_id); | ||
| 526 | } | ||
| 527 | } | ||
| 528 | } | ||
| 529 | } | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 457 | // Count up all of the generated puzzles. | 533 | // Count up all of the generated puzzles. |
| 458 | int total_puzzles = 0; | 534 | int total_puzzles = 0; |
| 459 | int reusable_words = 0; | 535 | int reusable_words = 0; |
| @@ -476,6 +552,9 @@ void generator::run() { | |||
| 476 | << std::endl; | 552 | << std::endl; |
| 477 | std::cout << "Black tops: " << per_puzzle_type[kBlackTop] << std::endl; | 553 | std::cout << "Black tops: " << per_puzzle_type[kBlackTop] << std::endl; |
| 478 | std::cout << "Black middles: " << per_puzzle_type[kBlackMiddle] << std::endl; | 554 | std::cout << "Black middles: " << per_puzzle_type[kBlackMiddle] << std::endl; |
| 555 | std::cout << "Black bottoms: " << per_puzzle_type[kBlackBottom] << std::endl; | ||
| 556 | std::cout << "Black double bottoms: " << per_puzzle_type[kDoubleBlackBottom] | ||
| 557 | << std::endl; | ||
| 479 | } | 558 | } |
| 480 | 559 | ||
| 481 | size_t generator::LookupOrCreatePronunciation(const std::string& phonemes) { | 560 | size_t generator::LookupOrCreatePronunciation(const std::string& phonemes) { |
| @@ -567,16 +646,12 @@ size_t generator::LookupOrCreateForm(const std::string& word) { | |||
| 567 | } | 646 | } |
| 568 | 647 | ||
| 569 | size_t generator::LookupOrCreateWord(const std::string& word) { | 648 | size_t generator::LookupOrCreateWord(const std::string& word) { |
| 570 | if (word_by_base_.count(word)) { | 649 | size_t word_id = words_.size(); |
| 571 | return word_by_base_[word]; | 650 | words_by_base_[word].push_back(word_id); |
| 572 | } else { | 651 | size_t form_id = LookupOrCreateForm(word); |
| 573 | size_t word_id = words_.size(); | 652 | words_.push_back({.id = word_id, .base_form_id = form_id}); |
| 574 | word_by_base_[word] = words_.size(); | 653 | AddFormToWord(form_id, word_id); |
| 575 | size_t form_id = LookupOrCreateForm(word); | 654 | return word_id; |
| 576 | words_.push_back({.id = word_id, .base_form_id = form_id}); | ||
| 577 | AddFormToWord(form_id, word_id); | ||
| 578 | return word_id; | ||
| 579 | } | ||
| 580 | } | 655 | } |
| 581 | 656 | ||
| 582 | void generator::AddPronunciationToForm(size_t pronunciation_id, | 657 | void generator::AddPronunciationToForm(size_t pronunciation_id, |
| diff --git a/generator/generator.h b/generator/generator.h index 923fc17..fc66789 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
| @@ -2,10 +2,12 @@ | |||
| 2 | #define GENERATOR_H_D5C6A724 | 2 | #define GENERATOR_H_D5C6A724 |
| 3 | 3 | ||
| 4 | #include <filesystem> | 4 | #include <filesystem> |
| 5 | #include <map> | ||
| 5 | #include <optional> | 6 | #include <optional> |
| 6 | #include <set> | 7 | #include <set> |
| 7 | #include <string> | 8 | #include <string> |
| 8 | #include <unordered_map> | 9 | #include <unordered_map> |
| 10 | #include <utility> | ||
| 9 | #include <vector> | 11 | #include <vector> |
| 10 | 12 | ||
| 11 | enum PuzzleType { | 13 | enum PuzzleType { |
| @@ -15,6 +17,8 @@ enum PuzzleType { | |||
| 15 | kYellowMiddle = 3, | 17 | kYellowMiddle = 3, |
| 16 | kBlackTop = 4, | 18 | kBlackTop = 4, |
| 17 | kBlackMiddle = 5, | 19 | kBlackMiddle = 5, |
| 20 | kBlackBottom = 6, | ||
| 21 | kDoubleBlackBottom = 7, | ||
| 18 | }; | 22 | }; |
| 19 | 23 | ||
| 20 | class generator { | 24 | class generator { |
| @@ -107,7 +111,8 @@ class generator { | |||
| 107 | std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_; | 111 | std::unordered_map<std::string, size_t> anagram_set_by_sorted_letters_; |
| 108 | 112 | ||
| 109 | std::vector<Word> words_; | 113 | std::vector<Word> words_; |
| 110 | std::unordered_map<std::string, size_t> word_by_base_; | 114 | std::unordered_map<std::string, std::vector<size_t>> words_by_base_; |
| 115 | std::map<std::pair<int, int>, size_t> word_by_wnid_and_wnum_; | ||
| 111 | 116 | ||
| 112 | std::vector<std::vector<size_t>> synsets_; | 117 | std::vector<std::vector<size_t>> synsets_; |
| 113 | std::unordered_map<int, size_t> synset_by_wnid_; | 118 | std::unordered_map<int, size_t> synset_by_wnid_; |
