Merge branch 'master' of http://github.com/hatkirby/rawr-ebooks

author: Feffernoose <fefferburbia@gmail.com> 2013-10-06 19:52:29 -0400
committer: Feffernoose <fefferburbia@gmail.com> 2013-10-06 19:52:29 -0400
commit: 60561c9b95de1043979fbe59c93342175f9febd8 (patch)
tree: 2af811602d041ef0ac481c3a6637fad37ba21d4d
parent: 8d28a8e13dbe602783a505adb1df375b0d65efe0 (diff)
parent: 3d995835426312809aa7a5a4403ef3984339be0f (diff)
download: rawr-ebooks-60561c9b95de1043979fbe59c93342175f9febd8.tar.gz
rawr-ebooks-60561c9b95de1043979fbe59c93342175f9febd8.tar.bz2
rawr-ebooks-60561c9b95de1043979fbe59c93342175f9febd8.zip
1 files changed, 8 insertions, 2 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp
index 6c0e4ce..327752b 100644
--- a/kgramstats.cpp
+++ b/kgramstats.cpp

@@ -4,6 +4,8 @@
 #include <cstdlib>
 #include <algorithm>
+// runs in O(t^2) time where t is the number of tokens in the input corpus
+// We consider maxK to be fairly constant
 kgramstats::kgramstats(string corpus, int maxK)
 {
        this->maxK = maxK;
@@ -16,7 +18,11 @@ kgramstats::kgramstats(string corpus, int maxK)
        {
           end = corpus.find(" ", start);
-           tokens.push_back(corpus.substr(start, (end == string::npos) ? string::npos : end - start));
+       string token = corpus.substr(start, (end == string::npos) ? string::npos : end - start);
+       if (token.compare(""))
+       {
+           tokens.push_back(token);
+       }
           start = ((end > (string::npos - 1) ) ? string::npos : end + 1);
        }
@@ -85,9 +91,9 @@ void printKgram(kgram k)
        {
                cout << *it << " ";
        }
-        cout << endl;
 }
+// runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus
 vector<string> kgramstats::randomSentence(int n)
 {
        vector<string> result;
author	Feffernoose <fefferburbia@gmail.com>	2013-10-06 19:52:29 -0400
committer	Feffernoose <fefferburbia@gmail.com>	2013-10-06 19:52:29 -0400
commit	60561c9b95de1043979fbe59c93342175f9febd8 (patch)
tree	2af811602d041ef0ac481c3a6637fad37ba21d4d
parent	8d28a8e13dbe602783a505adb1df375b0d65efe0 (diff)
parent	3d995835426312809aa7a5a4403ef3984339be0f (diff)
download	rawr-ebooks-60561c9b95de1043979fbe59c93342175f9febd8.tar.gz rawr-ebooks-60561c9b95de1043979fbe59c93342175f9febd8.tar.bz2 rawr-ebooks-60561c9b95de1043979fbe59c93342175f9febd8.zip