diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-02-15 09:32:02 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-02-15 09:32:02 -0500 |
commit | c8e834c362ea80a781fa870338182a4c81ad3d78 (patch) | |
tree | d47f3614efc34f8a8e5ae6619955ddc4af760e9a | |
parent | fa6efc2e6fb0d11f90c06635766531ec52f2733e (diff) | |
download | rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.gz rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.bz2 rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.zip |
Fixed issue when names.txt was not present
Also removed any code mentioning $noun$ because it turns out the current version of the canonical corpus doesn't even use it anymore.
-rw-r--r-- | kgramstats.cpp | 37 |
1 files changed, 13 insertions, 24 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index f91ef52..f3fbcb2 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
@@ -162,7 +162,7 @@ kgramstats::kgramstats(std::string corpus, int maxK) | |||
162 | { | 162 | { |
163 | if ( | 163 | if ( |
164 | // Legacy freevars should be distinct from tokens containing similar words | 164 | // Legacy freevars should be distinct from tokens containing similar words |
165 | (canonical.find("$name$") != std::string::npos) || (canonical.find("$noun$") != std::string::npos) | 165 | (canonical.find("$name$") != std::string::npos) |
166 | // Words with no letters will be mangled by the spell checker | 166 | // Words with no letters will be mangled by the spell checker |
167 | || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos) | 167 | || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos) |
168 | ) | 168 | ) |
@@ -588,31 +588,20 @@ std::string kgramstats::randomSentence(int n) | |||
588 | // Replace old-style freevars while I can't be bothered to remake the corpus yet | 588 | // Replace old-style freevars while I can't be bothered to remake the corpus yet |
589 | std::vector<std::string> fv_names; | 589 | std::vector<std::string> fv_names; |
590 | std::ifstream namefile("names.txt"); | 590 | std::ifstream namefile("names.txt"); |
591 | while (!namefile.eof()) | 591 | if (namefile.is_open()) |
592 | { | 592 | { |
593 | std::string l; | 593 | while (!namefile.eof()) |
594 | getline(namefile, l); | 594 | { |
595 | fv_names.push_back(l); | 595 | std::string l; |
596 | } | 596 | getline(namefile, l); |
597 | 597 | fv_names.push_back(l); | |
598 | int cpos; | 598 | } |
599 | while ((cpos = result.find("$name$")) != std::string::npos) | ||
600 | { | ||
601 | result.replace(cpos, 6, fv_names[rand() % fv_names.size()]); | ||
602 | } | ||
603 | |||
604 | std::vector<std::string> fv_nouns; | ||
605 | std::ifstream nounfile("nouns.txt"); | ||
606 | while (!nounfile.eof()) | ||
607 | { | ||
608 | std::string l; | ||
609 | getline(nounfile, l); | ||
610 | fv_nouns.push_back(l); | ||
611 | } | ||
612 | 599 | ||
613 | while ((cpos = result.find("$noun$")) != std::string::npos) | 600 | int cpos; |
614 | { | 601 | while ((cpos = result.find("$name$")) != std::string::npos) |
615 | result.replace(cpos, 6, fv_nouns[rand() % fv_nouns.size()]); | 602 | { |
603 | result.replace(cpos, 6, fv_names[rand() % fv_names.size()]); | ||
604 | } | ||
616 | } | 605 | } |
617 | 606 | ||
618 | return result; | 607 | return result; |