From 1a1d503dbb9530a9d3518deef1ad40727edc1736 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 24 Jul 2015 16:17:42 -0400 Subject: Added some newline recognition --- kgramstats.cpp | 86 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 31 deletions(-) (limited to 'kgramstats.cpp') diff --git a/kgramstats.cpp b/kgramstats.cpp index b0c3940..16bf598 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp @@ -66,7 +66,7 @@ kgramstats::kgramstats(string corpus, int maxK) newSentence = false; } - if (newClause) + if (newClause || newSentence) { kgram commaKgram(1, ","); if (tstats[commaKgram] == NULL) @@ -78,12 +78,36 @@ kgramstats::kgramstats(string corpus, int maxK) newClause = false; } - - if ((f.length() > 0) && ((f[f.length()-1] == '.') || (f[f.length()-1] == '!') || (f[f.length()-1] == '?'))) - { - td->period++; + + if ((f.length() > 0) && (f[f.length()-1] == '\n')) + { + td->period++; newSentence = true; - } + f.resize(f.length()-1); + } + + if (f.length() > 0) + { + if ((f[f.length()-1] == '.') || (f[f.length()-1] == '!') || (f[f.length()-1] == '?')) + { + if (!newSentence) + { + td->period++; + newSentence = true; + } + + f.resize(f.length()-1); + } else if (f[f.length()-1] == ',') + { + if (!newSentence) + { + td->comma++; + newClause = true; + } + + f.resize(f.length()-1); + } + } if (f.length() > 0) { @@ -92,42 +116,42 @@ kgramstats::kgramstats(string corpus, int maxK) td->startquote++; } - if (f[f.length()-1] == '"') + if (f[0] == '(') { - td->endquote++; - - if ((f.length() > 1) && (f[f.length()-2] == ',')) - { - td->comma++; - newClause = true; - } + td->startparen++; } - if (f[f.length()-1] == ',') + if ((f[f.length()-1] == '"') || (f[f.length()-1] == ')')) { - td->comma++; - newClause = true; - - if ((f.length() > 1) && (f[f.length()-2] == '"')) + if (f[f.length()-1] == '"') { td->endquote++; + } else if (f[f.length()-1] == ')') + { + td->endparen++; } - if ((f.length() > 1) && (f[f.length()-2] == ')')) + f.resize(f.length()-1); + + if (f.length() > 0) { - td->endparen++; + if ((f[f.length()-1] == '.') || (f[f.length()-1] == '!') || (f[f.length()-1] == '?')) + { + if (!newSentence) + { + td->period++; + newSentence = true; + } + } else if (f[f.length()-1] == ',') + { + if (!newSentence && !newClause) + { + td->comma++; + newClause = true; + } + } } } - - if (f[0] == '(') - { - td->startparen++; - } - - if (f[f.length()-1] == ')') - { - td->endparen++; - } } if (std::find_if(f.begin(), f.end(), ::islower) == f.end()) -- cgit 1.4.1