diff options
author | Star Rauchenberger <fefferburbia@gmail.com> | 2022-11-30 17:58:44 -0500 |
---|---|---|
committer | Star Rauchenberger <fefferburbia@gmail.com> | 2022-11-30 17:58:44 -0500 |
commit | 6816abc1e89fd955524d7c772477d6483d12cbf9 (patch) | |
tree | b8707bdb5e180ae7be9d2ddf0ccfbeb539f36361 /generator/generator.cpp | |
parent | 38c17f093615a16a4b4ec6dc2b5d3edb5c1d3895 (diff) | |
download | verbly-hkutil.tar.gz verbly-hkutil.tar.bz2 verbly-hkutil.zip |
De-duped pronunciations in generated database hkutil
Identical pronunciations will now share an idea and be re-used by multiple forms. This has a negligible effect on database size, but it's useful for writing queries looking for words with the exact same pronunciations. This constitutes a minor database update, which we will call d1.2.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r-- | generator/generator.cpp | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 0d073be..ad665a2 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -573,9 +573,15 @@ namespace verbly { | |||
573 | } | 573 | } |
574 | 574 | ||
575 | std::string phonemes = phoneme_data[2]; | 575 | std::string phonemes = phoneme_data[2]; |
576 | pronunciations_.emplace_back(phonemes); | 576 | if (pronunciationByPhonemes_.count(phonemes)) { |
577 | pronunciation& p = pronunciations_.back(); | 577 | pronunciation& p = *pronunciationByPhonemes_[phonemes]; |
578 | formByText_.at(canonical)->addPronunciation(p); | 578 | formByText_.at(canonical)->addPronunciation(p); |
579 | } else { | ||
580 | pronunciations_.emplace_back(phonemes); | ||
581 | pronunciation& p = pronunciations_.back(); | ||
582 | pronunciationByPhonemes_[phonemes] = &p; | ||
583 | formByText_.at(canonical)->addPronunciation(p); | ||
584 | } | ||
579 | } | 585 | } |
580 | } | 586 | } |
581 | } | 587 | } |