about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-02-15 09:32:02 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-02-15 09:32:02 -0500
commitc8e834c362ea80a781fa870338182a4c81ad3d78 (patch)
treed47f3614efc34f8a8e5ae6619955ddc4af760e9a /kgramstats.cpp
parentfa6efc2e6fb0d11f90c06635766531ec52f2733e (diff)
downloadrawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.gz
rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.bz2
rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.zip
Fixed issue when names.txt was not present
Also removed any code mentioning $noun$ because it turns out the current version of the canonical corpus doesn't even use it anymore.
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp37
1 files changed, 13 insertions, 24 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index f91ef52..f3fbcb2 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -162,7 +162,7 @@ kgramstats::kgramstats(std::string corpus, int maxK)
162 { 162 {
163 if ( 163 if (
164 // Legacy freevars should be distinct from tokens containing similar words 164 // Legacy freevars should be distinct from tokens containing similar words
165 (canonical.find("$name$") != std::string::npos) || (canonical.find("$noun$") != std::string::npos) 165 (canonical.find("$name$") != std::string::npos)
166 // Words with no letters will be mangled by the spell checker 166 // Words with no letters will be mangled by the spell checker
167 || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos) 167 || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos)
168 ) 168 )
@@ -588,31 +588,20 @@ std::string kgramstats::randomSentence(int n)
588 // Replace old-style freevars while I can't be bothered to remake the corpus yet 588 // Replace old-style freevars while I can't be bothered to remake the corpus yet
589 std::vector<std::string> fv_names; 589 std::vector<std::string> fv_names;
590 std::ifstream namefile("names.txt"); 590 std::ifstream namefile("names.txt");
591 while (!namefile.eof()) 591 if (namefile.is_open())
592 { 592 {
593 std::string l; 593 while (!namefile.eof())
594 getline(namefile, l); 594 {
595 fv_names.push_back(l); 595 std::string l;
596 } 596 getline(namefile, l);
597 597 fv_names.push_back(l);
598 int cpos; 598 }
599 while ((cpos = result.find("$name$")) != std::string::npos)
600 {
601 result.replace(cpos, 6, fv_names[rand() % fv_names.size()]);
602 }
603
604 std::vector<std::string> fv_nouns;
605 std::ifstream nounfile("nouns.txt");
606 while (!nounfile.eof())
607 {
608 std::string l;
609 getline(nounfile, l);
610 fv_nouns.push_back(l);
611 }
612 599
613 while ((cpos = result.find("$noun$")) != std::string::npos) 600 int cpos;
614 { 601 while ((cpos = result.find("$name$")) != std::string::npos)
615 result.replace(cpos, 6, fv_nouns[rand() % fv_nouns.size()]); 602 {
603 result.replace(cpos, 6, fv_names[rand() % fv_names.size()]);
604 }
616 } 605 }
617 606
618 return result; 607 return result;