1 files changed, 73 insertions, 11 deletions
diff --git a/kgramstats.h b/kgramstats.h
index b01dece..ca61df7 100644
--- a/kgramstats.h
+++ b/kgramstats.h

@@ -7,7 +7,71 @@
 #ifndef KGRAMSTATS_H
 #define KGRAMSTATS_H
-typedef std::list<std::string> kgram;
+struct token {
+  std::string canon;
+  bool terminating;
+  
+  token(std::string canon) : canon(canon), terminating(false) {}
+  
+  bool operator<(const token& other) const
+  {
+    if (canon == other.canon)
+    {
+      return !terminating && other.terminating;
+    } else {
+      return canon < other.canon;
+    }
+  }
+};
+enum querytype {
+  querytype_literal,
+  querytype_sentence
+};
+struct query {
+  querytype type;
+  token word;
+  
+  query(token word) : word(word), type(querytype_literal) {}
+  
+  query(querytype type) : word(""), type(type) {}
+  
+  bool operator<(const query& other) const
+  {
+    if (type == other.type)
+    {
+      return word < other.word;
+    } else {
+      return type < other.type;
+    }
+  }
+};
+typedef std::list<query> kgram;
+struct termstats {
+  char terminator;
+  int occurrences;
+  
+  termstats() : terminator('.'), occurrences(1) {}
+  
+  termstats(char terminator, int occurrences)
+  {
+    this->terminator = terminator;
+    this->occurrences = occurrences;
+  }
+  
+  bool operator<(const termstats& other) const
+  {
+    if (terminator == other.terminator)
+    {
+      return occurrences < other.occurrences;
+    } else {
+      return terminator < other.terminator;
+    }
+  }
+};
 class kgramstats
 {
@@ -16,22 +80,20 @@ public:
        std::vector<std::string> randomSentence(int n);
        
 private:
-        typedef struct
+        struct token_data
        {
                int all;
                int titlecase;
                int uppercase;
-                int period;
+    token word;
-    int startquote;
+    
-    int endquote;
+    token_data() : word(""), all(0), titlecase(0), uppercase(0) {}
-    int startparen;
+        };
-    int endparen;
+  
-    int comma;
-                std::string* token;
-        } token_data;
        int maxK;
-        std::map<kgram, std::map<int, token_data*>* >* stats;
+        std::map<kgram, std::map<int, token_data> > stats;
  malaprop mstats;
+  std::map<token, std::map<int, termstats> > endings;
 };
 void printKgram(kgram k);

diff --git a/kgramstats.h b/kgramstats.h index b01dece..ca61df7 100644 --- a/kgramstats.h +++ b/kgramstats.h
@@ -7,7 +7,71 @@
7	#ifndef KGRAMSTATS_H	7	#ifndef KGRAMSTATS_H
8	#define KGRAMSTATS_H	8	#define KGRAMSTATS_H
9		9
10	typedef std::list<std::string> kgram;	10	struct token {
		11	std::string canon;
		12	bool terminating;
		13
		14	token(std::string canon) : canon(canon), terminating(false) {}
		15
		16	bool operator<(const token& other) const
		17	{
		18	if (canon == other.canon)
		19	{
		20	return !terminating && other.terminating;
		21	} else {
		22	return canon < other.canon;
		23	}
		24	}
		25	};
		26
		27	enum querytype {
		28	querytype_literal,
		29	querytype_sentence
		30	};
		31
		32	struct query {
		33	querytype type;
		34	token word;
		35
		36	query(token word) : word(word), type(querytype_literal) {}
		37
		38	query(querytype type) : word(""), type(type) {}
		39
		40	bool operator<(const query& other) const
		41	{
		42	if (type == other.type)
		43	{
		44	return word < other.word;
		45	} else {
		46	return type < other.type;
		47	}
		48	}
		49	};
		50
		51	typedef std::list<query> kgram;
		52
		53	struct termstats {
		54	char terminator;
		55	int occurrences;
		56
		57	termstats() : terminator('.'), occurrences(1) {}
		58
		59	termstats(char terminator, int occurrences)
		60	{
		61	this->terminator = terminator;
		62	this->occurrences = occurrences;
		63	}
		64
		65	bool operator<(const termstats& other) const
		66	{
		67	if (terminator == other.terminator)
		68	{
		69	return occurrences < other.occurrences;
		70	} else {
		71	return terminator < other.terminator;
		72	}
		73	}
		74	};
11		75
12	class kgramstats	76	class kgramstats
13	{	77	{
@@ -16,22 +80,20 @@ public:
16	std::vector<std::string> randomSentence(int n);	80	std::vector<std::string> randomSentence(int n);
17		81
18	private:	82	private:
19	typedef struct	83	struct token_data
20	{	84	{
21	int all;	85	int all;
22	int titlecase;	86	int titlecase;
23	int uppercase;	87	int uppercase;
24	int period;	88	token word;
25	int startquote;	89
26	int endquote;	90	token_data() : word(""), all(0), titlecase(0), uppercase(0) {}
27	int startparen;	91	};
28	int endparen;	92
29	int comma;
30	std::string* token;
31	} token_data;
32	int maxK;	93	int maxK;
33	std::map<kgram, std::map<int, token_data> >* stats;	94	std::map<kgram, std::map<int, token_data> > stats;
34	malaprop mstats;	95	malaprop mstats;
		96	std::map<token, std::map<int, termstats> > endings;
35	};	97	};
36		98
37	void printKgram(kgram k);	99	void printKgram(kgram k);