From ba25493b55b4e4e35de3fca69afd15ddcbaa545c Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sun, 27 Nov 2016 22:51:11 -0500 Subject: Tweaked the cutting algorithm and disabled newlines --- kgramstats.cpp | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'kgramstats.cpp') diff --git a/kgramstats.cpp b/kgramstats.cpp index 17c3c2d..4a7eb9d 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp @@ -327,7 +327,12 @@ void rawr::compile(int maxK) } else { tk.suffix = suffixtype::terminating; - w.terms.add({suffix, newline}); + if (!newline) + { + w.terms.add({suffix, false}); + } else { + w.terms.add({".", false}); + } } } } @@ -573,21 +578,10 @@ std::string rawr::randomSentence(int maxL) const cur.pop_front(); } - if (cur.size() > 0) + if ((cur.size() > 2) && (cuts > 0) && ((rand() % cuts) > 0)) { - if (rand() % (_maxK - cur.size() + 1) == 0) - { - while ((cur.size() > 2) && (cuts > 0)) - { - if ((rand() % cuts) > 2) - { - cur.pop_front(); - cuts--; - } else { - break; - } - } - } + cur.pop_front(); + cuts /= 2; } // Gotta circumvent the last line of the input corpus @@ -711,10 +705,13 @@ std::string rawr::randomSentence(int maxL) const nextToken.append(" "); } - // If this pick was guaranteed, increase cut chance if (next.all == max) { + // If this pick was guaranteed, increase cut chance cuts++; + } else if (cuts > 0) { + // Otherwise, decrease cut chance + cuts--; } if (next.corpora.size() == 1) @@ -728,7 +725,7 @@ std::string rawr::randomSentence(int maxL) const { std::cout << " " << cor; } - std::cout << std::endl; + std::cout << "; l=" << cur.size() << ",cuts=" << cuts << std::endl; cur.push_back(next.tok); result.append(nextToken); -- cgit 1.4.1