From 7ea3569e3894f19fbae6cfdb3406f2240570e3c1 Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Thu, 8 Dec 2022 15:03:51 -0500 Subject: Added a bunch of stuff for making LINGO puzzles --- generator/CMakeLists.txt | 2 +- generator/form.cpp | 9 +- generator/form.h | 19 +++- generator/generator.cpp | 270 +++++++++++++++++++++++++++++++++++++++++++- generator/generator.h | 11 +- generator/pronunciation.cpp | 7 +- generator/pronunciation.h | 19 +++- generator/schema.sql | 24 +++- lib/field.cpp | 5 +- lib/form.cpp | 5 + lib/form.h | 5 + lib/pronunciation.cpp | 5 + lib/pronunciation.h | 5 + lib/query.h | 2 +- lib/version.h | 2 +- 15 files changed, 373 insertions(+), 17 deletions(-) diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 8c070d2..5d2f977 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt @@ -11,6 +11,6 @@ include_directories( ../vendor/hkutil) add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) -set_property(TARGET generator PROPERTY CXX_STANDARD 11) +set_property(TARGET generator PROPERTY CXX_STANDARD 17) set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) diff --git a/generator/form.cpp b/generator/form.cpp index c66820c..a88363b 100644 --- a/generator/form.cpp +++ b/generator/form.cpp @@ -9,12 +9,13 @@ namespace verbly { int form::nextId_ = 0; - form::form(std::string text) : + form::form(std::string text, int anagram_set_id) : id_(nextId_++), text_(text), complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), - length_(text.length()) + length_(text.length()), + anagram_set_id_(anagram_set_id) { } @@ -34,7 +35,9 @@ namespace verbly { { "form", arg.getText() }, { "complexity", arg.getComplexity() }, { "proper", arg.isProper() }, - { "length", arg.getLength() } + { "length", arg.getLength() }, + { "anagram_set_id", arg.getAnagramSetId() }, + { "reverse_form_id", arg.getReverseId() } }); } diff --git a/generator/form.h b/generator/form.h index f3dd779..c83bbdc 100644 --- a/generator/form.h +++ b/generator/form.h @@ -15,7 +15,7 @@ namespace verbly { // Constructor - explicit form(std::string text); + form(std::string text, int anagram_set_id); // Mutators @@ -48,6 +48,21 @@ namespace verbly { return length_; } + int getAnagramSetId() const + { + return anagram_set_id_; + } + + void setReverseId(int id) + { + reverse_id_ = id; + } + + int getReverseId() const + { + return reverse_id_; + } + std::set getPronunciations() const { return pronunciations_; @@ -62,6 +77,8 @@ namespace verbly { const int complexity_; const bool proper_; const int length_; + const int anagram_set_id_; + int reverse_id_ = -1; std::set pronunciations_; diff --git a/generator/generator.cpp b/generator/generator.cpp index ad665a2..fdea10f 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include #include "role.h" #include "part.h" #include "../lib/enums.h" @@ -83,7 +86,7 @@ namespace verbly { readAdjectivePositioning(); // Counts the number of URLs ImageNet has per notion - readImageNetUrls(); + //readImageNetUrls(); // Creates a word by WordNet sense key lookup table readWordNetSenseKeys(); @@ -115,9 +118,17 @@ namespace verbly { // Writes the database version writeVersion(); + // Calculates and writes form merography + writeMerography(); + + // Calculates and writes pronunciation merophony + writeMerophony(); + // Dumps data to the database dumpObjects(); + + // Populates the antonymy relationship from WordNet readWordNetAntonymy(); @@ -577,9 +588,29 @@ namespace verbly { pronunciation& p = *pronunciationByPhonemes_[phonemes]; formByText_.at(canonical)->addPronunciation(p); } else { - pronunciations_.emplace_back(phonemes); + std::string stressless; + for (int i=0; i>(stressless, " "); + std::string stresslessPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); + std::sort(stresslessList.begin(), stresslessList.end()); + std::string sortedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); + + int anaphoneSetId; + if (anaphoneSets_.count(sortedPhonemes)) { + anaphoneSetId = anaphoneSets_[sortedPhonemes]; + } else { + anaphoneSetId = anaphoneSets_.size(); + anaphoneSets_[sortedPhonemes] = anaphoneSetId; + } + + pronunciations_.emplace_back(phonemes, anaphoneSetId); pronunciation& p = pronunciations_.back(); pronunciationByPhonemes_[phonemes] = &p; + pronunciationByBlankPhonemes_[stresslessPhonemes] = &p; formByText_.at(canonical)->addPronunciation(p); } } @@ -671,6 +702,12 @@ namespace verbly { for (form& f : forms_) { + std::string reverseText = f.getText(); + std::reverse(reverseText.begin(), reverseText.end()); + if (formByText_.count(reverseText)) { + f.setReverseId(formByText_[reverseText]->getId()); + } + db_ << f; ppgs.update(); @@ -682,6 +719,19 @@ namespace verbly { for (pronunciation& p : pronunciations_) { + std::string stressless; + for (int i=0; i>(stressless, " "); + std::reverse(stresslessList.begin(), stresslessList.end()); + std::string reversedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); + if (pronunciationByBlankPhonemes_.count(reversedPhonemes)) { + p.setReverseId(pronunciationByBlankPhonemes_[reversedPhonemes]->getId()); + } + db_ << p; ppgs.update(); @@ -698,6 +748,208 @@ namespace verbly { ppgs.update(); } } + + /*{ + hatkirby::progress ppgs("Writing merography...", formByText_.size()); + + for (const auto& [merotext, meroform] : formByText_) + { + for (const auto& [holotext, holoform] : formByText_) + { + if (isMero(merotext, holotext)) + { + db_.insertIntoTable( + "merography", + { + { "merograph_id", meroform->getId() }, + { "holograph_id", holoform->getId() } + }); + } + } + + ppgs.update(); + } + } + + { + hatkirby::progress ppgs("Writing merophony...", pronunciationByBlankPhonemes_.size()); + + for (const auto& [merotext, merop] : pronunciationByBlankPhonemes_) + { + auto merophonemes = hatkirby::split>(merotext, " "); + + for (const auto& [holotext, holop] : pronunciationByBlankPhonemes_) + { + auto holophonemes = hatkirby::split>(holotext, " "); + + if (isMero(merophonemes, holophonemes)) + { + db_.insertIntoTable( + "merophony", + { + { "merophone_id", merop->getId() }, + { "holophone_id", holop->getId() } + }); + } + } + + ppgs.update(); + } + }*/ + } + + void generator::writeMerography() + { + hatkirby::progress ppgs("Writing merography...", formByText_.size()); + for (const auto& [text, form] : formByText_) + { + ppgs.update(); + + std::unordered_set visited; + for (int i=0; igetId() }, + { "holograph_id", form->getId() } + }); + } + } + } + + + /* + std::string front = text; + while (front.size() > 2) + { + front.erase(0, 1); + + if (formByText_.count(front)) + { + visited.insert(front); + db_.insertIntoTable( + "merography", + { + { "merograph_id", formByText_[front]->getId() }, + { "holograph_id", form->getId() } + }); + } + } + + if (text.size() > 2) + { + std::string back = text; + + while (back.size() > 2) + { + back.pop_back(); + + if (formByText_.count(back) && !visited.count(back)) + { + db_.insertIntoTable( + "merography", + { + { "merograph_id", formByText_[back]->getId() }, + { "holograph_id", form->getId() } + }); + } + } + }*/ + } + } + + void generator::writeMerophony() + { + std::map, pronunciation*> tokenized; + for (const auto& [phonemes, pronunciation] : pronunciationByBlankPhonemes_) + { + tokenized[hatkirby::split>(phonemes, " ")] = pronunciation; + } + + hatkirby::progress ppgs("Writing merophony...", tokenized.size()); + for (const auto& [phonemes, pronunciation] : tokenized) + { + ppgs.update(); + + std::set> visited; + for (int i=0; i sublist; + for (auto j=std::next(phonemes.begin(),i); j!=std::next(phonemes.begin(),i+l); j++) + { + sublist.push_back(*j); + } + + if (tokenized.count(sublist) && !visited.count(sublist)) + { + visited.insert(sublist); + db_.insertIntoTable( + "merophony", + { + { "merophone_id", tokenized[sublist]->getId() }, + { "holophone_id", pronunciation->getId() } + }); + } + } + } + /*std::list front = phonemes; + while (front.size() > 1) + { + front.pop_front(); + + if (tokenized.count(front)) + { + visited.insert(front); + db_.insertIntoTable( + "merophony", + { + { "merophone_id", tokenized[front]->getId() }, + { "holophone_id", pronunciation->getId() } + }); + break; + } + } + + if (phonemes.size() > 1) + { + std::list back = phonemes; + + while (back.size() > 1) + { + back.pop_back(); + + if (tokenized.count(back) && !visited.count(back)) + { + db_.insertIntoTable( + "merophony", + { + { "merophone_id", tokenized[back]->getId() }, + { "holophone_id", pronunciation->getId() } + }); + break; + } + } + }*/ + } } void generator::readWordNetAntonymy() @@ -1316,7 +1568,19 @@ namespace verbly { { if (!formByText_.count(text)) { - forms_.emplace_back(text); + std::string sortedText = text; + std::sort(sortedText.begin(), sortedText.end()); + + int anagramSetId; + if (anagramSets_.count(sortedText)) + { + anagramSetId = anagramSets_[sortedText]; + } else { + anagramSetId = anagramSets_.size(); + anagramSets_[sortedText] = anagramSetId; + } + + forms_.emplace_back(text, anagramSetId); formByText_[text] = &forms_.back(); } diff --git a/generator/generator.h b/generator/generator.h index 2cd2ba9..70f0657 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -64,6 +66,10 @@ namespace verbly { void dumpObjects(); + void writeMerography(); + + void writeMerophony(); + void readWordNetAntonymy(); void readWordNetVariation(); @@ -138,8 +144,11 @@ namespace verbly { std::map, word*> wordByWnidAndWnum_; std::map> wordsByBaseForm_; std::map lemmaByBaseForm_; - std::map formByText_; + std::unordered_map formByText_; std::map pronunciationByPhonemes_; + std::unordered_map pronunciationByBlankPhonemes_; + std::map anagramSets_; + std::map anaphoneSets_; // Caches diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index 3075d42..5c4d8e2 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp @@ -10,9 +10,10 @@ namespace verbly { int pronunciation::nextId_ = 0; - pronunciation::pronunciation(std::string phonemes) : + pronunciation::pronunciation(std::string phonemes, int anaphone_set_id) : id_(nextId_++), - phonemes_(phonemes) + phonemes_(phonemes), + anaphone_set_id_(anaphone_set_id) { auto phonemeList = hatkirby::split>(phonemes, " "); @@ -88,6 +89,8 @@ namespace verbly { fields.emplace_back("phonemes", arg.getPhonemes()); fields.emplace_back("syllables", arg.getSyllables()); fields.emplace_back("stress", arg.getStress()); + fields.emplace_back("anaphone_set_id", arg.getAnaphoneSetId()); + fields.emplace_back("reverse_pronunciation_id", arg.getReverseId()); if (arg.hasRhyme()) { diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 3190e6d..e6dc4b4 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h @@ -13,7 +13,7 @@ namespace verbly { // Constructor - explicit pronunciation(std::string phonemes); + pronunciation(std::string phonemes, int anaphone_set_id); // Accessors @@ -62,6 +62,21 @@ namespace verbly { return stress_; } + int getAnaphoneSetId() const + { + return anaphone_set_id_; + } + + void setReverseId(int id) + { + reverse_id_ = id; + } + + int getReverseId() const + { + return reverse_id_; + } + private: static int nextId_; @@ -72,6 +87,8 @@ namespace verbly { std::string prerhyme_; int syllables_ = 0; std::string stress_; + int anaphone_set_id_; + int reverse_id_ = -1; }; diff --git a/generator/schema.sql b/generator/schema.sql index 6a7d223..8c910f4 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -160,7 +160,9 @@ CREATE TABLE `forms` ( `form` VARCHAR(32) NOT NULL, `complexity` SMALLINT NOT NULL, `proper` SMALLINT NOT NULL, - `length` SMALLINT NOT NULL + `length` SMALLINT NOT NULL, + `anagram_set_id` INTEGER NOT NULL, + `reverse_form_id` INTEGER NOT NULL ); CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); @@ -174,13 +176,23 @@ CREATE TABLE `lemmas_forms` ( CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); +CREATE TABLE `merography` ( + `merograph_id` INTEGER NOT NULL, + `holograph_id` INTEGER NOT NULL, + PRIMARY KEY(`merograph_id`,`holograph_id`) +) WITHOUT ROWID; + +CREATE INDEX `reverse_merography` ON `merography`(`holograph_id`,`merograph_id`); + CREATE TABLE `pronunciations` ( `pronunciation_id` INTEGER PRIMARY KEY, `phonemes` VARCHAR(64) NOT NULL, `prerhyme` VARCHAR(8), `rhyme` VARCHAR(64), `syllables` INTEGER NOT NULL, - `stress` VARCHAR(64) NOT NULL + `stress` VARCHAR(64) NOT NULL, + `anaphone_set_id` INTEGER NOT NULL, + `reverse_pronunciation_id` INTEGER NOT NULL ); CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); @@ -193,6 +205,14 @@ CREATE TABLE `forms_pronunciations` ( CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); +CREATE TABLE `merophony` ( + `merophone_id` INTEGER NOT NULL, + `holophone_id` INTEGER NOT NULL, + PRIMARY KEY(`merophone_id`,`holophone_id`) +) WITHOUT ROWID; + +CREATE INDEX `reverse_merophony` ON `merophony`(`holophone_id`,`merophone_id`); + CREATE TABLE `frames` ( `frame_id` INTEGER NOT NULL, `group_id` INTEGER NOT NULL, diff --git a/lib/field.cpp b/lib/field.cpp index ad6d958..4b7fff6 100644 --- a/lib/field.cpp +++ b/lib/field.cpp @@ -110,7 +110,10 @@ namespace verbly { field::operator filter() const { - if (isJoin()) + if (type_ == type::hierarchal_join) + { + return filter(*this, filter::comparison::hierarchally_matches, filter()); + } else if (isJoin()) { return filter(*this, filter::comparison::matches, filter()); } else { diff --git a/lib/form.cpp b/lib/form.cpp index 4983274..b5348cb 100644 --- a/lib/form.cpp +++ b/lib/form.cpp @@ -18,6 +18,11 @@ namespace verbly { const field form::pronunciations = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id"); + const field form::anagrams = field::joinField(object::form, "anagram_set_id", object::form); + + const field form::merographs = field::selfJoin(object::form, "form_id", "merography", "holograph_id", "merograph_id"); + const field form::holographs = field::selfJoin(object::form, "form_id", "merography", "merograph_id", "holograph_id"); + field form::words(inflection category) { return field::joinThroughWhere(object::form, "form_id", object::word, "lemmas_forms", "lemma_id", "category", static_cast(category)); diff --git a/lib/form.h b/lib/form.h index b365943..39f53aa 100644 --- a/lib/form.h +++ b/lib/form.h @@ -136,6 +136,11 @@ namespace verbly { static const field pronunciations; + static const field anagrams; + + static const field merographs; + static const field holographs; + private: bool valid_ = false; diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp index 3aef815..093e11a 100644 --- a/lib/pronunciation.cpp +++ b/lib/pronunciation.cpp @@ -18,6 +18,11 @@ namespace verbly { const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true); const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true); + const field pronunciation::anaphones = field::joinField(object::pronunciation, "anaphone_set_id", object::pronunciation); + + const field pronunciation::merophones = field::selfJoin(object::pronunciation, "pronunciation_id", "merophony", "holophone_id", "merophone_id"); + const field pronunciation::holophones = field::selfJoin(object::pronunciation, "pronunciation_id", "merophony", "merophone_id", "holophone_id"); + const field pronunciation::rhymes_field::rhymeJoin = field::joinField(object::pronunciation, "rhyme", object::pronunciation); const pronunciation::rhymes_field pronunciation::rhymes = {}; diff --git a/lib/pronunciation.h b/lib/pronunciation.h index cd241bd..210d61d 100644 --- a/lib/pronunciation.h +++ b/lib/pronunciation.h @@ -148,6 +148,11 @@ namespace verbly { static const field forms; + static const field anaphones; + + static const field merophones; + static const field holophones; + // Rhyming relationship class rhymes_field { diff --git a/lib/query.h b/lib/query.h index 65b4e9d..09d7f1f 100644 --- a/lib/query.h +++ b/lib/query.h @@ -43,9 +43,9 @@ namespace verbly { } statement stmt(Object::objectType, std::move(queryFilter)); - queryString_ = stmt.getQueryString(Object::select, std::move(sortOrder), limit); + bindings_ = stmt.getBindings(); } diff --git a/lib/version.h b/lib/version.h index 0404f5f..4dc5e52 100644 --- a/lib/version.h +++ b/lib/version.h @@ -4,7 +4,7 @@ namespace verbly { const int DATABASE_MAJOR_VERSION = 1; - const int DATABASE_MINOR_VERSION = 1; + const int DATABASE_MINOR_VERSION = 3; }; -- cgit 1.4.1