From 6816abc1e89fd955524d7c772477d6483d12cbf9 Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Wed, 30 Nov 2022 17:58:44 -0500 Subject: De-duped pronunciations in generated database Identical pronunciations will now share an idea and be re-used by multiple forms. This has a negligible effect on database size, but it's useful for writing queries looking for words with the exact same pronunciations. This constitutes a minor database update, which we will call d1.2. --- generator/generator.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'generator/generator.cpp') diff --git a/generator/generator.cpp b/generator/generator.cpp index 0d073be..ad665a2 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -573,9 +573,15 @@ namespace verbly { } std::string phonemes = phoneme_data[2]; - pronunciations_.emplace_back(phonemes); - pronunciation& p = pronunciations_.back(); - formByText_.at(canonical)->addPronunciation(p); + if (pronunciationByPhonemes_.count(phonemes)) { + pronunciation& p = *pronunciationByPhonemes_[phonemes]; + formByText_.at(canonical)->addPronunciation(p); + } else { + pronunciations_.emplace_back(phonemes); + pronunciation& p = pronunciations_.back(); + pronunciationByPhonemes_[phonemes] = &p; + formByText_.at(canonical)->addPronunciation(p); + } } } } -- cgit 1.4.1