From 123887d73dbab74f784e6ca41a6b33ce8c5c5e52 Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Sat, 2 Dec 2023 18:04:59 -0500 Subject: Add black bottom and double black bottom --- generator/generator.cpp | 123 ++++++++++++++++++++++++++++++++++++++---------- generator/generator.h | 7 ++- 2 files changed, 105 insertions(+), 25 deletions(-) (limited to 'generator') diff --git a/generator/generator.cpp b/generator/generator.cpp index 0309482..1c3b5c4 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -117,7 +117,6 @@ void generator::run() { std::list lines(readFile(wordNetPath_ + "wn_s.pl")); hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); - std::set> wnid_and_wnum; for (std::string line : lines) { ppgs.update(); @@ -161,13 +160,12 @@ void generator::run() { // The WordNet data does contain duplicates, so we need to check that we // haven't already created this word. std::pair lookup(synset_id, wnum); - if (wnid_and_wnum.count(lookup)) { + if (word_by_wnid_and_wnum_.count(lookup)) { continue; } - wnid_and_wnum.insert(lookup); - size_t word_id = LookupOrCreateWord(text); + word_by_wnid_and_wnum_[lookup] = word_id; AddWordToSynset(word_id, synset_id); } } @@ -190,14 +188,10 @@ void generator::run() { line.erase(0, 3); } - if (!word_by_base_.count(infinitive) && - !(type == 'V' && word_frequencies[infinitive] >= MIN_FREQUENCY && - !profane.count(infinitive))) { + if (!words_by_base_.count(infinitive)) { continue; } - size_t word_id = LookupOrCreateWord(infinitive); - auto inflWordList = hatkirby::split>(line, " | "); std::vector> agidForms; @@ -277,11 +271,13 @@ void generator::run() { } // Compile the forms we have mapped. - for (const std::list& infl_list : inflections) { - for (const std::string& infl : infl_list) { - if (!profane.count(infl)) { - size_t form_id = LookupOrCreateForm(infl); - AddFormToWord(form_id, word_id); + for (size_t word_id : words_by_base_.at(infinitive)) { + for (const std::list& infl_list : inflections) { + for (const std::string& infl : infl_list) { + if (!profane.count(infl)) { + size_t form_id = LookupOrCreateForm(infl); + AddFormToWord(form_id, word_id); + } } } } @@ -454,6 +450,86 @@ void generator::run() { } } + // Black Bottom + std::unordered_map> antonyms; + { + std::list lines(readFile(wordNetPath_ + "wn_ant.pl", true)); + + hatkirby::progress ppgs("Generating black bottom puzzles...", lines.size()); + for (const std::string& line : lines) { + ppgs.update(); + + std::regex relation( + "^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); + + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { + continue; + } + + std::pair lookup1(std::stoi(relation_data[1]), + std::stoi(relation_data[2])); + + std::pair lookup2(std::stoi(relation_data[3]), + std::stoi(relation_data[4])); + + if (word_by_wnid_and_wnum_.count(lookup1) && + word_by_wnid_and_wnum_.count(lookup2)) { + const Word& word1 = words_.at(word_by_wnid_and_wnum_.at(lookup1)); + const Word& word2 = words_.at(word_by_wnid_and_wnum_.at(lookup2)); + + Form& form1 = forms_.at(word1.base_form_id); + form1.puzzles[kBlackBottom].insert(word2.base_form_id); + + antonyms[word1.id].insert(word2.id); + } + } + } + + // Black Double Bottom + { + hatkirby::progress ppgs("Generating black double bottom puzzles...", + antonyms.size()); + for (const auto& [word1, ant_words] : antonyms) { + ppgs.update(); + + for (size_t word2 : ant_words) { + const Word& word2_obj = words_.at(word2); + const Form& form2 = forms_.at(word2_obj.base_form_id); + + for (size_t word25 : form2.word_ids) { + if (word25 == word2) { + continue; + } + + const auto& double_ant_words = antonyms[word25]; + + for (size_t word3 : double_ant_words) { + const Word& word1_obj = words_.at(word1); + const Word& word3_obj = words_.at(word3); + + bool synset_overlap = false; + for (size_t synset1 : word1_obj.synsets) { + for (size_t synset3 : word3_obj.synsets) { + if (synset1 == synset3) { + synset_overlap = true; + break; + } + } + if (synset_overlap) { + break; + } + } + if (!synset_overlap) { + Form& form1 = forms_.at(word1_obj.base_form_id); + form1.puzzles[kDoubleBlackBottom].insert(word3_obj.base_form_id); + } + } + } + } + } + } + // Count up all of the generated puzzles. int total_puzzles = 0; int reusable_words = 0; @@ -476,6 +552,9 @@ void generator::run() { << std::endl; std::cout << "Black tops: " << per_puzzle_type[kBlackTop] << std::endl; std::cout << "Black middles: " << per_puzzle_type[kBlackMiddle] << std::endl; + std::cout << "Black bottoms: " << per_puzzle_type[kBlackBottom] << std::endl; + std::cout << "Black double bottoms: " << per_puzzle_type[kDoubleBlackBottom] + << std::endl; } size_t generator::LookupOrCreatePronunciation(const std::string& phonemes) { @@ -567,16 +646,12 @@ size_t generator::LookupOrCreateForm(const std::string& word) { } size_t generator::LookupOrCreateWord(const std::string& word) { - if (word_by_base_.count(word)) { - return word_by_base_[word]; - } else { - size_t word_id = words_.size(); - word_by_base_[word] = words_.size(); - size_t form_id = LookupOrCreateForm(word); - words_.push_back({.id = word_id, .base_form_id = form_id}); - AddFormToWord(form_id, word_id); - return word_id; - } + size_t word_id = words_.size(); + words_by_base_[word].push_back(word_id); + size_t form_id = LookupOrCreateForm(word); + words_.push_back({.id = word_id, .base_form_id = form_id}); + AddFormToWord(form_id, word_id); + return word_id; } void generator::AddPronunciationToForm(size_t pronunciation_id, diff --git a/generator/generator.h b/generator/generator.h index 923fc17..fc66789 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -2,10 +2,12 @@ #define GENERATOR_H_D5C6A724 #include +#include #include #include #include #include +#include #include enum PuzzleType { @@ -15,6 +17,8 @@ enum PuzzleType { kYellowMiddle = 3, kBlackTop = 4, kBlackMiddle = 5, + kBlackBottom = 6, + kDoubleBlackBottom = 7, }; class generator { @@ -107,7 +111,8 @@ class generator { std::unordered_map anagram_set_by_sorted_letters_; std::vector words_; - std::unordered_map word_by_base_; + std::unordered_map> words_by_base_; + std::map, size_t> word_by_wnid_and_wnum_; std::vector> synsets_; std::unordered_map synset_by_wnid_; -- cgit 1.4.1