From ba73e189ec5dcd41f6c85e9d52eacae4da0c949e Mon Sep 17 00:00:00 2001
From: Kelly Rauchenberger <fefferburbia@gmail.com>
Date: Sat, 6 Feb 2016 18:58:02 -0500
Subject: Changed how kgram cutting works

Whereas cutting occurred randomly before, now a token will be cut from the search kgram whenever the previously generated token was guaranteed by its search kgram (that is, it was the only token that could follow that specific query).
---
 kgramstats.cpp | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

(limited to 'kgramstats.cpp')
diff --git a/kgramstats.cpp b/kgramstats.cpp
index e6048d9..da8c326 100644
--- a/kgramstats.cpp
+++ b/kgramstats.cpp
@@ -423,7 +423,7 @@ std::string kgramstats::randomSentence(int n)
 {
   std::string result;
   kgram cur(1, wildcardQuery);
-  int cuts = 0;
+  bool cut = false;
   std::stack<parentype> open_delimiters;
 	
   for (int i=0; i<n; i++)
@@ -433,23 +433,10 @@ std::string kgramstats::randomSentence(int n)
       cur.pop_front();
     }
     
-    if (cur.size() > 0)
+    if ((cur.size() > 0) && cut)
     {
-      if (rand() % (maxK - cur.size() + 1) == 0)
-      {
-        while (cur.size() > 2)
-        {
-          if ((rand() % (n)) < cuts)
-          {
-            cur.pop_front();
-            cuts--;
-          } else {
-            break;
-          }
-        }
-      }
-      
-      cuts++;
+      cur.pop_front();
+      cut = false;
     }
     
     // Gotta circumvent the last line of the input corpus
@@ -569,6 +556,11 @@ std::string kgramstats::randomSentence(int n)
     {
       break;
     }
+    
+    if (next.all == max)
+    {
+      cut = true;
+    }
   }
   
   // Remove the trailing space
-- 
cgit 1.4.1