diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-01-04 23:29:12 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-01-04 23:29:12 -0500 |
commit | 53102431c2dc7266a322223f84e286a9aa7c0729 (patch) | |
tree | 0c9347a6c2b7b2c7d55a5f5fb681e474867046fd /kgramstats.h | |
parent | a28dc579f3a0cd53850d5eb10565c27a92d27c55 (diff) | |
parent | 9e89002477d1358de9be9cabdc1edba26bd32836 (diff) | |
download | rawr-ebooks-53102431c2dc7266a322223f84e286a9aa7c0729.tar.gz rawr-ebooks-53102431c2dc7266a322223f84e286a9aa7c0729.tar.bz2 rawr-ebooks-53102431c2dc7266a322223f84e286a9aa7c0729.zip |
Merge branch 'master' of https://github.com/hatkirby/rawr-ebooks
Conflicts: malaprop.cpp
Diffstat (limited to 'kgramstats.h')
-rw-r--r-- | kgramstats.h | 84 |
1 files changed, 73 insertions, 11 deletions
diff --git a/kgramstats.h b/kgramstats.h index b01dece..ca61df7 100644 --- a/kgramstats.h +++ b/kgramstats.h | |||
@@ -7,7 +7,71 @@ | |||
7 | #ifndef KGRAMSTATS_H | 7 | #ifndef KGRAMSTATS_H |
8 | #define KGRAMSTATS_H | 8 | #define KGRAMSTATS_H |
9 | 9 | ||
10 | typedef std::list<std::string> kgram; | 10 | struct token { |
11 | std::string canon; | ||
12 | bool terminating; | ||
13 | |||
14 | token(std::string canon) : canon(canon), terminating(false) {} | ||
15 | |||
16 | bool operator<(const token& other) const | ||
17 | { | ||
18 | if (canon == other.canon) | ||
19 | { | ||
20 | return !terminating && other.terminating; | ||
21 | } else { | ||
22 | return canon < other.canon; | ||
23 | } | ||
24 | } | ||
25 | }; | ||
26 | |||
27 | enum querytype { | ||
28 | querytype_literal, | ||
29 | querytype_sentence | ||
30 | }; | ||
31 | |||
32 | struct query { | ||
33 | querytype type; | ||
34 | token word; | ||
35 | |||
36 | query(token word) : word(word), type(querytype_literal) {} | ||
37 | |||
38 | query(querytype type) : word(""), type(type) {} | ||
39 | |||
40 | bool operator<(const query& other) const | ||
41 | { | ||
42 | if (type == other.type) | ||
43 | { | ||
44 | return word < other.word; | ||
45 | } else { | ||
46 | return type < other.type; | ||
47 | } | ||
48 | } | ||
49 | }; | ||
50 | |||
51 | typedef std::list<query> kgram; | ||
52 | |||
53 | struct termstats { | ||
54 | char terminator; | ||
55 | int occurrences; | ||
56 | |||
57 | termstats() : terminator('.'), occurrences(1) {} | ||
58 | |||
59 | termstats(char terminator, int occurrences) | ||
60 | { | ||
61 | this->terminator = terminator; | ||
62 | this->occurrences = occurrences; | ||
63 | } | ||
64 | |||
65 | bool operator<(const termstats& other) const | ||
66 | { | ||
67 | if (terminator == other.terminator) | ||
68 | { | ||
69 | return occurrences < other.occurrences; | ||
70 | } else { | ||
71 | return terminator < other.terminator; | ||
72 | } | ||
73 | } | ||
74 | }; | ||
11 | 75 | ||
12 | class kgramstats | 76 | class kgramstats |
13 | { | 77 | { |
@@ -16,22 +80,20 @@ public: | |||
16 | std::vector<std::string> randomSentence(int n); | 80 | std::vector<std::string> randomSentence(int n); |
17 | 81 | ||
18 | private: | 82 | private: |
19 | typedef struct | 83 | struct token_data |
20 | { | 84 | { |
21 | int all; | 85 | int all; |
22 | int titlecase; | 86 | int titlecase; |
23 | int uppercase; | 87 | int uppercase; |
24 | int period; | 88 | token word; |
25 | int startquote; | 89 | |
26 | int endquote; | 90 | token_data() : word(""), all(0), titlecase(0), uppercase(0) {} |
27 | int startparen; | 91 | }; |
28 | int endparen; | 92 | |
29 | int comma; | ||
30 | std::string* token; | ||
31 | } token_data; | ||
32 | int maxK; | 93 | int maxK; |
33 | std::map<kgram, std::map<int, token_data*>* >* stats; | 94 | std::map<kgram, std::map<int, token_data> > stats; |
34 | malaprop mstats; | 95 | malaprop mstats; |
96 | std::map<token, std::map<int, termstats> > endings; | ||
35 | }; | 97 | }; |
36 | 98 | ||
37 | void printKgram(kgram k); | 99 | void printKgram(kgram k); |