about summary refs log tree commit diff stats
path: root/kgramstats.h
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-05-29 21:01:07 -0400
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-05-29 21:01:07 -0400
commitaccbd7647de118cca7503a1bf0992529a0a76df8 (patch)
tree0ba1cb8105a21472d0b3beacd0aca699ff9c7ad1 /kgramstats.h
parent4d217ac6122120d9e86248432594864e114e3a46 (diff)
downloadrawr-ebooks-accbd7647de118cca7503a1bf0992529a0a76df8.tar.gz
rawr-ebooks-accbd7647de118cca7503a1bf0992529a0a76df8.tar.bz2
rawr-ebooks-accbd7647de118cca7503a1bf0992529a0a76df8.zip
Newlines, colons, and semicolons are now valid terminators
Diffstat (limited to 'kgramstats.h')
-rw-r--r--kgramstats.h16
1 files changed, 15 insertions, 1 deletions
diff --git a/kgramstats.h b/kgramstats.h index ee75ada..fc01101 100644 --- a/kgramstats.h +++ b/kgramstats.h
@@ -19,10 +19,22 @@ class rawr {
19 std::string randomSentence(int maxL); 19 std::string randomSentence(int maxL);
20 20
21 private: 21 private:
22 struct terminator {
23 std::string form;
24 bool newline = false;
25
26 terminator(std::string form, bool newline) : form(form), newline(newline) {}
27
28 bool operator<(const terminator& other) const
29 {
30 return std::tie(form, newline) < std::tie(other.form, other.newline);
31 }
32 };
33
22 struct word { 34 struct word {
23 std::string canon; 35 std::string canon;
24 histogram<std::string> forms; 36 histogram<std::string> forms;
25 histogram<std::string> terms; 37 histogram<terminator> terms;
26 38
27 word(std::string canon) : canon(canon) {} 39 word(std::string canon) : canon(canon) {}
28 40
@@ -68,6 +80,7 @@ class rawr {
68 std::map<delimiter, int> delimiters; 80 std::map<delimiter, int> delimiters;
69 suffixtype suffix; 81 suffixtype suffix;
70 std::string raw; 82 std::string raw;
83 bool newline = false;
71 84
72 token(const word& w) : w(w), suffix(suffixtype::none) {} 85 token(const word& w) : w(w), suffix(suffixtype::none) {}
73 86
@@ -119,6 +132,7 @@ class rawr {
119 friend std::ostream& operator<<(std::ostream& os, kgram k); 132 friend std::ostream& operator<<(std::ostream& os, kgram k);
120 friend std::ostream& operator<<(std::ostream& os, query q); 133 friend std::ostream& operator<<(std::ostream& os, query q);
121 friend std::ostream& operator<<(std::ostream& os, token t); 134 friend std::ostream& operator<<(std::ostream& os, token t);
135 friend std::ostream& operator<<(std::ostream& os, terminator t);
122 136
123 int _maxK; 137 int _maxK;
124 bool _compiled = false; 138 bool _compiled = false;