From 7ea3569e3894f19fbae6cfdb3406f2240570e3c1 Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Thu, 8 Dec 2022 15:03:51 -0500 Subject: Added a bunch of stuff for making LINGO puzzles --- generator/CMakeLists.txt | 2 +- generator/form.cpp | 9 +- generator/form.h | 19 +++- generator/generator.cpp | 270 +++++++++++++++++++++++++++++++++++++++++++- generator/generator.h | 11 +- generator/pronunciation.cpp | 7 +- generator/pronunciation.h | 19 +++- generator/schema.sql | 24 +++- 8 files changed, 347 insertions(+), 14 deletions(-) (limited to 'generator') diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 8c070d2..5d2f977 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt @@ -11,6 +11,6 @@ include_directories( ../vendor/hkutil) add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) -set_property(TARGET generator PROPERTY CXX_STANDARD 11) +set_property(TARGET generator PROPERTY CXX_STANDARD 17) set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) diff --git a/generator/form.cpp b/generator/form.cpp index c66820c..a88363b 100644 --- a/generator/form.cpp +++ b/generator/form.cpp @@ -9,12 +9,13 @@ namespace verbly { int form::nextId_ = 0; - form::form(std::string text) : + form::form(std::string text, int anagram_set_id) : id_(nextId_++), text_(text), complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), - length_(text.length()) + length_(text.length()), + anagram_set_id_(anagram_set_id) { } @@ -34,7 +35,9 @@ namespace verbly { { "form", arg.getText() }, { "complexity", arg.getComplexity() }, { "proper", arg.isProper() }, - { "length", arg.getLength() } + { "length", arg.getLength() }, + { "anagram_set_id", arg.getAnagramSetId() }, + { "reverse_form_id", arg.getReverseId() } }); } diff --git a/generator/form.h b/generator/form.h index f3dd779..c83bbdc 100644 --- a/generator/form.h +++ b/generator/form.h @@ -15,7 +15,7 @@ namespace verbly { // Constructor - explicit form(std::string text); + form(std::string text, int anagram_set_id); // Mutators @@ -48,6 +48,21 @@ namespace verbly { return length_; } + int getAnagramSetId() const + { + return anagram_set_id_; + } + + void setReverseId(int id) + { + reverse_id_ = id; + } + + int getReverseId() const + { + return reverse_id_; + } + std::set getPronunciations() const { return pronunciations_; @@ -62,6 +77,8 @@ namespace verbly { const int complexity_; const bool proper_; const int length_; + const int anagram_set_id_; + int reverse_id_ = -1; std::set pronunciations_; diff --git a/generator/generator.cpp b/generator/generator.cpp index ad665a2..fdea10f 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include #include "role.h" #include "part.h" #include "../lib/enums.h" @@ -83,7 +86,7 @@ namespace verbly { readAdjectivePositioning(); // Counts the number of URLs ImageNet has per notion - readImageNetUrls(); + //readImageNetUrls(); // Creates a word by WordNet sense key lookup table readWordNetSenseKeys(); @@ -115,9 +118,17 @@ namespace verbly { // Writes the database version writeVersion(); + // Calculates and writes form merography + writeMerography(); + + // Calculates and writes pronunciation merophony + writeMerophony(); + // Dumps data to the database dumpObjects(); + + // Populates the antonymy relationship from WordNet readWordNetAntonymy(); @@ -577,9 +588,29 @@ namespace verbly { pronunciation& p = *pronunciationByPhonemes_[phonemes]; formByText_.at(canonical)->addPronunciation(p); } else { - pronunciations_.emplace_back(phonemes); + std::string stressless; + for (int i=0; i>(stressless, " "); + std::string stresslessPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); + std::sort(stresslessList.begin(), stresslessList.end()); + std::string sortedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); + + int anaphoneSetId; + if (anaphoneSets_.count(sortedPhonemes)) { + anaphoneSetId = anaphoneSets_[sortedPhonemes]; + } else { + anaphoneSetId = anaphoneSets_.size(); + anaphoneSets_[sortedPhonemes] = anaphoneSetId; + } + + pronunciations_.emplace_back(phonemes, anaphoneSetId); pronunciation& p = pronunciations_.back(); pronunciationByPhonemes_[phonemes] = &p; + pronunciationByBlankPhonemes_[stresslessPhonemes] = &p; formByText_.at(canonical)->addPronunciation(p); } } @@ -671,6 +702,12 @@ namespace verbly { for (form& f : forms_) { + std::string reverseText = f.getText(); + std::reverse(reverseText.begin(), reverseText.end()); + if (formByText_.count(reverseText)) { + f.setReverseId(formByText_[reverseText]->getId()); + } + db_ << f; ppgs.update(); @@ -682,6 +719,19 @@ namespace verbly { for (pronunciation& p : pronunciations_) { + std::string stressless; + for (int i=0; i>(stressless, " "); + std::reverse(stresslessList.begin(), stresslessList.end()); + std::string reversedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); + if (pronunciationByBlankPhonemes_.count(reversedPhonemes)) { + p.setReverseId(pronunciationByBlankPhonemes_[reversedPhonemes]->getId()); + } + db_ << p; ppgs.update(); @@ -698,6 +748,208 @@ namespace verbly { ppgs.update(); } } + + /*{ + hatkirby::progress ppgs("Writing merography...", formByText_.size()); + + for (const auto& [merotext, meroform] : formByText_) + { + for (const auto& [holotext, holoform] : formByText_) + { + if (isMero(merotext, holotext)) + { + db_.insertIntoTable( + "merography", + { + { "merograph_id", meroform->getId() }, + { "holograph_id", holoform->getId() } + }); + } + } + + ppgs.update(); + } + } + + { + hatkirby::progress ppgs("Writing merophony...", pronunciationByBlankPhonemes_.size()); + + for (const auto& [merotext, merop] : pronunciationByBlankPhonemes_) + { + auto merophonemes = hatkirby::split>(merotext, " "); + + for (const auto& [holotext, holop] : pronunciationByBlankPhonemes_) + { + auto holophonemes = hatkirby::split>(holotext, " "); + + if (isMero(merophonemes, holophonemes)) + { + db_.insertIntoTable( + "merophony", + { + { "merophone_id", merop->getId() }, + { "holophone_id", holop->getId() } + }); + } + } + + ppgs.update(); + } + }*/ + } + + void generator::writeMerography() + { + hatkirby::progress ppgs("Writing merography...", formByText_.size()); + for (const auto& [text, form] : formByText_) + { + ppgs.update(); + + std::unordered_set visited; + for (int i=0; igetId() }, + { "holograph_id", form->getId() } + }); + } + } + } + + + /* + std::string front = text; + while (front.size() > 2) + { + front.erase(0, 1); + + if (formByText_.count(front)) + { + visited.insert(front); + db_.insertIntoTable( + "merography", + { + { "merograph_id", formByText_[front]->getId() }, + { "holograph_id", form->getId() } + }); + } + } + + if (text.size() > 2) + { + std::string back = text; + + while (back.size() > 2) + { + back.pop_back(); + + if (formByText_.count(back) && !visited.count(back)) + { + db_.insertIntoTable( + "merography", + { + { "merograph_id", formByText_[back]->getId() }, + { "holograph_id", form->getId() } + }); + } + } + }*/ + } + } + + void generator::writeMerophony() + { + std::map, pronunciation*> tokenized; + for (const auto& [phonemes, pronunciation] : pronunciationByBlankPhonemes_) + { + tokenized[hatkirby::split>(phonemes, " ")] = pronunciation; + } + + hatkirby::progress ppgs("Writing merophony...", tokenized.size()); + for (const auto& [phonemes, pronunciation] : tokenized) + { + ppgs.update(); + + std::set> visited; + for (int i=0; i sublist; + for (auto j=std::next(phonemes.begin(),i); j!=std::next(phonemes.begin(),i+l); j++) + { + sublist.push_back(*j); + } + + if (tokenized.count(sublist) && !visited.count(sublist)) + { + visited.insert(sublist); + db_.insertIntoTable( + "merophony", + { + { "merophone_id", tokenized[sublist]->getId() }, + { "holophone_id", pronunciation->getId() } + }); + } + } + } + /*std::list front = phonemes; + while (front.size() > 1) + { + front.pop_front(); + + if (tokenized.count(front)) + { + visited.insert(front); + db_.insertIntoTable( + "merophony", + { + { "merophone_id", tokenized[front]->getId() }, + { "holophone_id", pronunciation->getId() } + }); + break; + } + } + + if (phonemes.size() > 1) + { + std::list back = phonemes; + + while (back.size() > 1) + { + back.pop_back(); + + if (tokenized.count(back) && !visited.count(back)) + { + db_.insertIntoTable( + "merophony", + { + { "merophone_id", tokenized[back]->getId() }, + { "holophone_id", pronunciation->getId() } + }); + break; + } + } + }*/ + } } void generator::readWordNetAntonymy() @@ -1316,7 +1568,19 @@ namespace verbly { { if (!formByText_.count(text)) { - forms_.emplace_back(text); + std::string sortedText = text; + std::sort(sortedText.begin(), sortedText.end()); + + int anagramSetId; + if (anagramSets_.count(sortedText)) + { + anagramSetId = anagramSets_[sortedText]; + } else { + anagramSetId = anagramSets_.size(); + anagramSets_[sortedText] = anagramSetId; + } + + forms_.emplace_back(text, anagramSetId); formByText_[text] = &forms_.back(); } diff --git a/generator/generator.h b/generator/generator.h index 2cd2ba9..70f0657 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -64,6 +66,10 @@ namespace verbly { void dumpObjects(); + void writeMerography(); + + void writeMerophony(); + void readWordNetAntonymy(); void readWordNetVariation(); @@ -138,8 +144,11 @@ namespace verbly { std::map, word*> wordByWnidAndWnum_; std::map> wordsByBaseForm_; std::map lemmaByBaseForm_; - std::map formByText_; + std::unordered_map formByText_; std::map pronunciationByPhonemes_; + std::unordered_map pronunciationByBlankPhonemes_; + std::map anagramSets_; + std::map anaphoneSets_; // Caches diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index 3075d42..5c4d8e2 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp @@ -10,9 +10,10 @@ namespace verbly { int pronunciation::nextId_ = 0; - pronunciation::pronunciation(std::string phonemes) : + pronunciation::pronunciation(std::string phonemes, int anaphone_set_id) : id_(nextId_++), - phonemes_(phonemes) + phonemes_(phonemes), + anaphone_set_id_(anaphone_set_id) { auto phonemeList = hatkirby::split>(phonemes, " "); @@ -88,6 +89,8 @@ namespace verbly { fields.emplace_back("phonemes", arg.getPhonemes()); fields.emplace_back("syllables", arg.getSyllables()); fields.emplace_back("stress", arg.getStress()); + fields.emplace_back("anaphone_set_id", arg.getAnaphoneSetId()); + fields.emplace_back("reverse_pronunciation_id", arg.getReverseId()); if (arg.hasRhyme()) { diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 3190e6d..e6dc4b4 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h @@ -13,7 +13,7 @@ namespace verbly { // Constructor - explicit pronunciation(std::string phonemes); + pronunciation(std::string phonemes, int anaphone_set_id); // Accessors @@ -62,6 +62,21 @@ namespace verbly { return stress_; } + int getAnaphoneSetId() const + { + return anaphone_set_id_; + } + + void setReverseId(int id) + { + reverse_id_ = id; + } + + int getReverseId() const + { + return reverse_id_; + } + private: static int nextId_; @@ -72,6 +87,8 @@ namespace verbly { std::string prerhyme_; int syllables_ = 0; std::string stress_; + int anaphone_set_id_; + int reverse_id_ = -1; }; diff --git a/generator/schema.sql b/generator/schema.sql index 6a7d223..8c910f4 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -160,7 +160,9 @@ CREATE TABLE `forms` ( `form` VARCHAR(32) NOT NULL, `complexity` SMALLINT NOT NULL, `proper` SMALLINT NOT NULL, - `length` SMALLINT NOT NULL + `length` SMALLINT NOT NULL, + `anagram_set_id` INTEGER NOT NULL, + `reverse_form_id` INTEGER NOT NULL ); CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); @@ -174,13 +176,23 @@ CREATE TABLE `lemmas_forms` ( CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); +CREATE TABLE `merography` ( + `merograph_id` INTEGER NOT NULL, + `holograph_id` INTEGER NOT NULL, + PRIMARY KEY(`merograph_id`,`holograph_id`) +) WITHOUT ROWID; + +CREATE INDEX `reverse_merography` ON `merography`(`holograph_id`,`merograph_id`); + CREATE TABLE `pronunciations` ( `pronunciation_id` INTEGER PRIMARY KEY, `phonemes` VARCHAR(64) NOT NULL, `prerhyme` VARCHAR(8), `rhyme` VARCHAR(64), `syllables` INTEGER NOT NULL, - `stress` VARCHAR(64) NOT NULL + `stress` VARCHAR(64) NOT NULL, + `anaphone_set_id` INTEGER NOT NULL, + `reverse_pronunciation_id` INTEGER NOT NULL ); CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); @@ -193,6 +205,14 @@ CREATE TABLE `forms_pronunciations` ( CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); +CREATE TABLE `merophony` ( + `merophone_id` INTEGER NOT NULL, + `holophone_id` INTEGER NOT NULL, + PRIMARY KEY(`merophone_id`,`holophone_id`) +) WITHOUT ROWID; + +CREATE INDEX `reverse_merophony` ON `merophony`(`holophone_id`,`merophone_id`); + CREATE TABLE `frames` ( `frame_id` INTEGER NOT NULL, `group_id` INTEGER NOT NULL, -- cgit 1.4.1