summary refs log tree commit diff stats
path: root/generator/generator.cpp
diff options
context:
space:
mode:
authorStar Rauchenberger <fefferburbia@gmail.com>2022-11-30 17:58:44 -0500
committerStar Rauchenberger <fefferburbia@gmail.com>2022-11-30 17:58:44 -0500
commit6816abc1e89fd955524d7c772477d6483d12cbf9 (patch)
treeb8707bdb5e180ae7be9d2ddf0ccfbeb539f36361 /generator/generator.cpp
parent38c17f093615a16a4b4ec6dc2b5d3edb5c1d3895 (diff)
downloadverbly-6816abc1e89fd955524d7c772477d6483d12cbf9.tar.gz
verbly-6816abc1e89fd955524d7c772477d6483d12cbf9.tar.bz2
verbly-6816abc1e89fd955524d7c772477d6483d12cbf9.zip
De-duped pronunciations in generated database hkutil
Identical pronunciations will now share an idea and be re-used by multiple forms. This has a negligible effect on database size, but it's useful for writing queries looking for words with the exact same pronunciations.

This constitutes a minor database update, which we will call d1.2.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r--generator/generator.cpp12
1 files changed, 9 insertions, 3 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 0d073be..ad665a2 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -573,9 +573,15 @@ namespace verbly {
573 } 573 }
574 574
575 std::string phonemes = phoneme_data[2]; 575 std::string phonemes = phoneme_data[2];
576 pronunciations_.emplace_back(phonemes); 576 if (pronunciationByPhonemes_.count(phonemes)) {
577 pronunciation& p = pronunciations_.back(); 577 pronunciation& p = *pronunciationByPhonemes_[phonemes];
578 formByText_.at(canonical)->addPronunciation(p); 578 formByText_.at(canonical)->addPronunciation(p);
579 } else {
580 pronunciations_.emplace_back(phonemes);
581 pronunciation& p = pronunciations_.back();
582 pronunciationByPhonemes_[phonemes] = &p;
583 formByText_.at(canonical)->addPronunciation(p);
584 }
579 } 585 }
580 } 586 }
581 } 587 }