From 6816abc1e89fd955524d7c772477d6483d12cbf9 Mon Sep 17 00:00:00 2001
From: Star Rauchenberger <fefferburbia@gmail.com>
Date: Wed, 30 Nov 2022 17:58:44 -0500
Subject: De-duped pronunciations in generated database

Identical pronunciations will now share an idea and be re-used by multiple forms. This has a negligible effect on database size, but it's useful for writing queries looking for words with the exact same pronunciations.

This constitutes a minor database update, which we will call d1.2.
---
 generator/generator.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'generator/generator.cpp')

diff --git a/generator/generator.cpp b/generator/generator.cpp
index 0d073be..ad665a2 100644
--- a/generator/generator.cpp
+++ b/generator/generator.cpp
@@ -573,9 +573,15 @@ namespace verbly {
           }
 
           std::string phonemes = phoneme_data[2];
-          pronunciations_.emplace_back(phonemes);
-          pronunciation& p = pronunciations_.back();
-          formByText_.at(canonical)->addPronunciation(p);
+          if (pronunciationByPhonemes_.count(phonemes)) {
+            pronunciation& p = *pronunciationByPhonemes_[phonemes];
+            formByText_.at(canonical)->addPronunciation(p);
+          } else {
+            pronunciations_.emplace_back(phonemes);
+            pronunciation& p = pronunciations_.back();
+            pronunciationByPhonemes_[phonemes] = &p;
+            formByText_.at(canonical)->addPronunciation(p);
+          }
         }
       }
     }
-- 
cgit 1.4.1