Fixed issue when names.txt was not present

Also removed any code mentioning $noun$ because it turns out the current version of the canonical corpus doesn't even use it anymore.
author: Kelly Rauchenberger <fefferburbia@gmail.com> 2016-02-15 09:32:02 -0500
committer: Kelly Rauchenberger <fefferburbia@gmail.com> 2016-02-15 09:32:02 -0500
commit: c8e834c362ea80a781fa870338182a4c81ad3d78 (patch)
tree: d47f3614efc34f8a8e5ae6619955ddc4af760e9a
parent: fa6efc2e6fb0d11f90c06635766531ec52f2733e (diff)
download: rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.gz
rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.bz2
rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.zip
1 files changed, 13 insertions, 24 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp
index f91ef52..f3fbcb2 100644
--- a/kgramstats.cpp
+++ b/kgramstats.cpp

@@ -162,7 +162,7 @@ kgramstats::kgramstats(std::string corpus, int maxK)
        {
          if (
            // Legacy freevars should be distinct from tokens containing similar words
-            (canonical.find("$name$") != std::string::npos) || (canonical.find("$noun$") != std::string::npos)
+            (canonical.find("$name$") != std::string::npos)
            // Words with no letters will be mangled by the spell checker
            || (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos)
            )
@@ -588,31 +588,20 @@ std::string kgramstats::randomSentence(int n)
  // Replace old-style freevars while I can't be bothered to remake the corpus yet
  std::vector<std::string> fv_names;
  std::ifstream namefile("names.txt");
-  while (!namefile.eof())
+  if (namefile.is_open())
  {
-    std::string l;
+    while (!namefile.eof())
-    getline(namefile, l);
+    {
-    fv_names.push_back(l);
+      std::string l;
-  }
+      getline(namefile, l);
-  
+      fv_names.push_back(l);
-  int cpos;
+    }
-  while ((cpos = result.find("$name$")) != std::string::npos)
-  {
-    result.replace(cpos, 6, fv_names[rand() % fv_names.size()]);
-  }
-  
-  std::vector<std::string> fv_nouns;
-  std::ifstream nounfile("nouns.txt");
-  while (!nounfile.eof())
-  {
-    std::string l;
-    getline(nounfile, l);
-    fv_nouns.push_back(l);
-  }
  
-  while ((cpos = result.find("$noun$")) != std::string::npos)
+    int cpos;
-  {
+    while ((cpos = result.find("$name$")) != std::string::npos)
-    result.replace(cpos, 6, fv_nouns[rand() % fv_nouns.size()]);
+    {
+      result.replace(cpos, 6, fv_names[rand() % fv_names.size()]);
+    }
  }
        
  return result;
author	Kelly Rauchenberger <fefferburbia@gmail.com>	2016-02-15 09:32:02 -0500
committer	Kelly Rauchenberger <fefferburbia@gmail.com>	2016-02-15 09:32:02 -0500
commit	c8e834c362ea80a781fa870338182a4c81ad3d78 (patch)
tree	d47f3614efc34f8a8e5ae6619955ddc4af760e9a
parent	fa6efc2e6fb0d11f90c06635766531ec52f2733e (diff)
download	rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.gz rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.tar.bz2 rawr-ebooks-c8e834c362ea80a781fa870338182a4c81ad3d78.zip

diff --git a/kgramstats.cpp b/kgramstats.cpp index f91ef52..f3fbcb2 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -162,7 +162,7 @@ kgramstats::kgramstats(std::string corpus, int maxK)
162	{	162	{
163	if (	163	if (
164	// Legacy freevars should be distinct from tokens containing similar words	164	// Legacy freevars should be distinct from tokens containing similar words
165	(canonical.find("$name$") != std::string::npos) \|\| (canonical.find("$noun$") != std::string::npos)	165	(canonical.find("$name$") != std::string::npos)
166	// Words with no letters will be mangled by the spell checker	166	// Words with no letters will be mangled by the spell checker
167	\|\| (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos)	167	\|\| (canonical.find_first_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") == std::string::npos)
168	)	168	)
@@ -588,31 +588,20 @@ std::string kgramstats::randomSentence(int n)
588	// Replace old-style freevars while I can't be bothered to remake the corpus yet	588	// Replace old-style freevars while I can't be bothered to remake the corpus yet
589	std::vector<std::string> fv_names;	589	std::vector<std::string> fv_names;
590	std::ifstream namefile("names.txt");	590	std::ifstream namefile("names.txt");
591	while (!namefile.eof())	591	if (namefile.is_open())
592	{	592	{
593	std::string l;	593	while (!namefile.eof())
594	getline(namefile, l);	594	{
595	fv_names.push_back(l);	595	std::string l;
596	}	596	getline(namefile, l);
597		597	fv_names.push_back(l);
598	int cpos;	598	}
599	while ((cpos = result.find("$name$")) != std::string::npos)
600	{
601	result.replace(cpos, 6, fv_names[rand() % fv_names.size()]);
602	}
603
604	std::vector<std::string> fv_nouns;
605	std::ifstream nounfile("nouns.txt");
606	while (!nounfile.eof())
607	{
608	std::string l;
609	getline(nounfile, l);
610	fv_nouns.push_back(l);
611	}
612		599
613	while ((cpos = result.find("$noun$")) != std::string::npos)	600	int cpos;
614	{	601	while ((cpos = result.find("$name$")) != std::string::npos)
615	result.replace(cpos, 6, fv_nouns[rand() % fv_nouns.size()]);	602	{
		603	result.replace(cpos, 6, fv_names[rand() % fv_names.size()]);
		604	}
616	}	605	}
617		606
618	return result;	607	return result;