about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-02-13 23:43:13 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-02-13 23:43:13 -0500
commitd62c340f1841c6fc46968643fab63841083aec6f (patch)
treeb4f9cf6a3464bd4ac3882e7a99eb398b65580193 /kgramstats.cpp
parentcc8167a392eb435f657e1411d6bf6d7cbc6f3e3f (diff)
downloadrawr-ebooks-d62c340f1841c6fc46968643fab63841083aec6f.tar.gz
rawr-ebooks-d62c340f1841c6fc46968643fab63841083aec6f.tar.bz2
rawr-ebooks-d62c340f1841c6fc46968643fab63841083aec6f.zip
Fixed issue where queries with both the wildcard token and a terminating token would reset the prefix
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp19
1 files changed, 5 insertions, 14 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index ac694f3..f788cb1 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -344,21 +344,12 @@ kgramstats::kgramstats(std::string corpus, int maxK)
344 td.titlecase++; 344 td.titlecase++;
345 } 345 }
346 346
347 kgram term_prefix; 347 if (std::begin(prefix)->tok.suffix == suffixtype::terminating)
348 bool changed = false;
349 std::transform(prefix.begin(), prefix.end(), std::back_inserter(term_prefix), [&] (query& q) {
350 if (q.tok.suffix == suffixtype::terminating)
351 {
352 changed = true;
353
354 return wildcardQuery;
355 } else {
356 return q;
357 }
358 });
359
360 if (changed)
361 { 348 {
349 kgram term_prefix(prefix);
350 term_prefix.pop_front();
351 term_prefix.push_front(wildcardQuery);
352
362 if (tstats[term_prefix].count(f) == 0) 353 if (tstats[term_prefix].count(f) == 0)
363 { 354 {
364 tstats[term_prefix].emplace(f, f); 355 tstats[term_prefix].emplace(f, f);