From 58cdbee6f08bd5d4f371d8eb9739d74332213703 Mon Sep 17 00:00:00 2001
From: Kelly Rauchenberger <fefferburbia@gmail.com>
Date: Mon, 16 Oct 2017 14:00:34 -0400
Subject: Added in made-up words

---
 data.txt      | 247 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 patterner.cpp | 161 ++++++++++++++++++++++----------------
 2 files changed, 342 insertions(+), 66 deletions(-)
diff --git a/data.txt b/data.txt
index f25657f..dd476ef 100644
--- a/data.txt
+++ b/data.txt
@@ -28,6 +28,251 @@ you're a piece of {WORD}
 what the {WORD}
 what the {WORD}ing {WORD}
 kindly catch the 9am train to {Word}sville
+If you look up "{WORD}" in the dictionary, there's a picture of you underneath!
+I never want to see your {WORD}ing {WORD} again
 
 INSULT,END
-you piece of {WORD}
\ No newline at end of file
+you piece of {WORD}
+
+WORD
+{STARTSONANT}{VOWEL}{ENDSONANT}
+{WORD2}
+
+VOWEL
+a
+e
+i
+o
+u
+a
+e
+i
+o
+u
+a
+e
+i
+o
+u
+a
+e
+i
+o
+u
+ae
+ai
+au
+ea
+ee
+ei
+ie
+io
+oi
+ou
+ui
+uu
+
+STARTSONANT
+b
+c
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+r
+s
+t
+b
+c
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+q
+r
+s
+t
+v
+w
+x
+z
+b
+c
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+q
+r
+s
+t
+v
+w
+x
+z
+bh
+bl
+br
+ch
+cl
+cr
+dr
+dw
+fl
+fr
+gl
+gr
+kl
+kn
+kr
+ph
+pl
+pr
+pt
+rh
+sc
+sh
+sk
+sl
+sm
+sn
+sp
+sq
+sr
+st
+sw
+th
+tr
+tw
+wh
+wr
+zh
+
+ENDSONANT
+b
+d
+f
+g
+h
+k
+l
+m
+n
+p
+r
+t
+b
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+r
+t
+v
+w
+x
+z
+b
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+r
+t
+v
+w
+x
+z
+bf
+bh
+bk
+ch
+ck
+dk
+dp
+dt
+ff
+fh
+fk
+fp
+ft
+gf
+gh
+gk
+hk
+lb
+ld
+lf
+lg
+lh
+lk
+lm
+ln
+lp
+lt
+mf
+mk
+mn
+mp
+nd
+nf
+ng
+nk
+np
+nt
+pf
+ph
+pk
+pt
+rb
+rd
+rf
+rg
+rk
+rm
+rn
+rp
+rt
+sk
+sp
+st
+wd
+wf
+wg
+wk
+wl
+wm
+wn
+wp
+wt
+zk
\ No newline at end of file
diff --git a/patterner.cpp b/patterner.cpp
index af844cf..1deffb8 100644
--- a/patterner.cpp
+++ b/patterner.cpp
@@ -47,92 +47,123 @@ patterner::patterner(
 std::string patterner::generate()
 {
   std::string action = "{MAIN}";
-  int tknloc;
-  while ((tknloc = action.find("{")) != std::string::npos)
-  {
-    std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
-    std::string modifier;
-    int modloc;
-    if ((modloc = token.find(":")) != std::string::npos)
-    {
-      modifier = token.substr(modloc+1);
-      token = token.substr(0, modloc);
-    }
 
-    std::string canontkn;
-    std::transform(std::begin(token), std::end(token),
-        std::back_inserter(canontkn), [] (char ch) {
-      return std::toupper(ch);
-    });
+  verbly::filter slurBlacklist =
+    (verbly::word::usageDomains %= (
+      (verbly::notion::wnid == 106718862) // ethnic slur
+      || (verbly::notion::wnid == 106717170) // disparagement (other slurs)
+      || (verbly::notion::wnid == 107124340))); // obscenity (other profanity)
 
-    std::string result;
-    if (canontkn == "WORD")
-    {
-      result = data_.words(
-        (verbly::word::forms(verbly::inflection::base) %=
-          (verbly::form::complexity == 1)
-            && (verbly::form::length == 4)
-            && (verbly::form::proper == false)
-            && (verbly::pronunciation::numOfSyllables == 1))
-        && !(verbly::word::usageDomains %=
-          (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs
-        .first().getBaseForm().getText();
-    } else if (canontkn == "\\N")
+  while (action == "{MAIN}")
+  {
+    int tknloc;
+    while ((tknloc = action.find("{")) != std::string::npos)
     {
-      result = "\n";
-    } else {
-      auto group = groups_[canontkn];
-      std::uniform_int_distribution<int> groupdist(0, group.size()-1);
-      int groupind = groupdist(rng_);
-      result = group[groupind];
-    }
+      std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
+      std::string modifier;
+      int modloc;
+      if ((modloc = token.find(":")) != std::string::npos)
+      {
+        modifier = token.substr(modloc+1);
+        token = token.substr(0, modloc);
+      }
 
-    if (modifier == "indefinite")
-    {
-      if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
+      std::string canontkn;
+      std::transform(std::begin(token), std::end(token),
+          std::back_inserter(canontkn), [] (char ch) {
+        return std::toupper(ch);
+      });
+
+      std::string result;
+      if (canontkn == "WORD2")
       {
-        result = "an " + result;
-      } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u'))
+        result = data_.words(
+          (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
+          && (verbly::word::forms(verbly::inflection::base) %=
+            (verbly::form::complexity == 1)
+              && (verbly::form::length == 4)
+              && (verbly::form::proper == false)
+              && (verbly::pronunciation::numOfSyllables == 1))
+          && !slurBlacklist)
+          .first().getBaseForm().getText();
+      } else if (canontkn == "\\N")
       {
-        result = "an " + result;
+        result = "\n";
       } else {
-        result = "a " + result;
+        auto group = groups_[canontkn];
+        std::uniform_int_distribution<int> groupdist(0, group.size()-1);
+        int groupind = groupdist(rng_);
+        result = group[groupind];
       }
-    }
 
-    std::string finalresult;
-    if (islower(token[0]))
-    {
-      std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
-        return std::tolower(ch);
-      });
-    } else if (isupper(token[0]) && !isupper(token[1]))
-    {
-      auto words = verbly::split<std::list<std::string>>(result, " ");
-      for (auto& word : words)
+      if (modifier == "indefinite")
       {
-        if (word[0] == '{')
+        if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
         {
-          word[1] = std::toupper(word[1]);
+          result = "an " + result;
+        } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u'))
+        {
+          result = "an " + result;
+        } else {
+          result = "a " + result;
+        }
+      }
 
-          for (int k=2; k<word.length(); k++)
+      std::string finalresult;
+      if (islower(token[0]))
+      {
+        std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
+          return std::tolower(ch);
+        });
+      } else if (isupper(token[0]) && !isupper(token[1]))
+      {
+        auto words = verbly::split<std::list<std::string>>(result, " ");
+        for (auto& word : words)
+        {
+          if (word[0] == '{')
           {
-            if (std::isalpha(word[k]))
+            word[1] = std::toupper(word[1]);
+
+            for (int k=2; k<word.length(); k++)
             {
-              word[k] = std::tolower(word[k]);
+              if (std::isalpha(word[k]))
+              {
+                word[k] = std::tolower(word[k]);
+              }
             }
+          } else {
+            word[0] = std::toupper(word[0]);
           }
-        } else {
-          word[0] = std::toupper(word[0]);
         }
+
+        finalresult = verbly::implode(std::begin(words), std::end(words), " ");
+      } else {
+        finalresult = result;
       }
 
-      finalresult = verbly::implode(std::begin(words), std::end(words), " ");
-    } else {
-      finalresult = result;
+      action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
     }
 
-    action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
+    std::string canonical;
+    std::transform(std::begin(action), std::end(action),
+      std::back_inserter(canonical), [] (char ch)
+    {
+      return std::tolower(ch);
+    });
+
+    std::list<std::string> words =
+      verbly::split<std::list<std::string>>(canonical, " ");
+
+    for (std::string word : words)
+    {
+      if (!data_.forms(
+        (verbly::form::text == word)
+        && slurBlacklist).all().empty())
+      {
+        action = "{MAIN}";
+        break;
+      }
+    }
   }
 
   return action;
-- 
cgit 1.4.1