1 files changed, 6 insertions, 8 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp
index f788cb1..a5402a3 100644
--- a/kgramstats.cpp
+++ b/kgramstats.cpp

@@ -61,12 +61,6 @@ kgramstats::kgramstats(std::string corpus, int maxK)
  freevar fv_emoticons {emoticons, "emoticons.txt"};
  std::map<std::string, std::string> canonical_form;
  
-  // Ensure the old-style freevars exist
-  canonical_form["$name$"] = "$name$";
-  words.emplace("$name$", std::string("$name$"));
-  canonical_form["$noun$"] = "$noun$";
-  words.emplace("$noun$", std::string("$noun$"));
-  
  AspellConfig* spell_config = new_aspell_config();
  AspellCanHaveError* possible_err = new_aspell_speller(spell_config);
  if (aspell_error_number(possible_err) != 0)
@@ -166,9 +160,13 @@ kgramstats::kgramstats(std::string corpus, int maxK)
        // Basically any other word
        if (canonical_form.count(canonical) == 0)
        {
-          if (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos)
+          if (
-          {
+            // Legacy freevars should be distinct from tokens containing similar words
+            (canonical.find_first_of("$name$") != std::string::npos) || (canonical.find_first_of("$noun$") != std::string::npos)              
            // Words with no letters will be mangled by the spell checker
+            || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos)
+            )
+          {
            canonical_form[canonical] = canonical;
            words.emplace(canonical, canonical);
          } else {