diff options
Diffstat (limited to 'malaprop.cpp')
-rw-r--r-- | malaprop.cpp | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/malaprop.cpp b/malaprop.cpp new file mode 100644 index 0000000..bfea579 --- /dev/null +++ b/malaprop.cpp | |||
@@ -0,0 +1,127 @@ | |||
1 | #include "malaprop.h" | ||
2 | #include <cstdlib> | ||
3 | #include <iostream> | ||
4 | |||
5 | bool removeIfM(char c) | ||
6 | { | ||
7 | return !isalpha(c); | ||
8 | } | ||
9 | |||
10 | char soundID(char l) | ||
11 | { | ||
12 | switch (l) | ||
13 | { | ||
14 | case 'b': | ||
15 | case 'f': | ||
16 | case 'p': | ||
17 | case 'v': | ||
18 | return '1'; | ||
19 | |||
20 | case 'c': | ||
21 | case 'g': | ||
22 | case 'j': | ||
23 | case 'k': | ||
24 | case 'q': | ||
25 | case 's': | ||
26 | case 'x': | ||
27 | case 'z': | ||
28 | return '2'; | ||
29 | |||
30 | case 'd': | ||
31 | case 't': | ||
32 | return '3'; | ||
33 | |||
34 | case 'l': | ||
35 | return '4'; | ||
36 | |||
37 | case 'm': | ||
38 | case 'n': | ||
39 | return '5'; | ||
40 | |||
41 | case 'r': | ||
42 | return '6'; | ||
43 | } | ||
44 | |||
45 | return l; | ||
46 | } | ||
47 | |||
48 | std::string canonizetwo(std::string f) | ||
49 | { | ||
50 | std::string canonical(f); | ||
51 | std::transform(canonical.begin(), canonical.end(), canonical.begin(), ::tolower); | ||
52 | |||
53 | std::string result; | ||
54 | std::remove_copy_if(canonical.begin(), canonical.end(), std::back_inserter(result), removeIfM); | ||
55 | |||
56 | return result; | ||
57 | } | ||
58 | |||
59 | malaprop::soundex malaprop::soundify(std::string f) | ||
60 | { | ||
61 | std::string result(canonizetwo(f)); | ||
62 | |||
63 | soundex ex; | ||
64 | ex.prefix = result[0]; | ||
65 | |||
66 | std::string output; | ||
67 | |||
68 | for (int i = 1; i<result.length(); i++) | ||
69 | { | ||
70 | int c = soundID(result[i]); | ||
71 | if ( | ||
72 | (isdigit(c)) // Not a vowel | ||
73 | && (c != soundID(result[i-1])) // Not the same as the previous character | ||
74 | && ((i < 2) || ((result[i-1] = 'h' || result[i-1] == 'w') && (c != soundID(result[i-2])))) // Not same as before h/w | ||
75 | ) | ||
76 | { | ||
77 | output += c; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | output.resize(3, '0'); | ||
82 | ex.code = atoi(output.c_str()); | ||
83 | |||
84 | return ex; | ||
85 | } | ||
86 | |||
87 | void malaprop::addWord(std::string word) | ||
88 | { | ||
89 | soundex ex = soundify(word); | ||
90 | |||
91 | dict[ex].insert(canonizetwo(word)); | ||
92 | } | ||
93 | |||
94 | void malaprop::stats() | ||
95 | { | ||
96 | for (std::map<soundex, std::set<std::string> >::iterator it = dict.begin(); it != dict.end(); it++) | ||
97 | { | ||
98 | printf("%c%03d (%d): ", it->first.prefix, it->first.code, it->second.size()); | ||
99 | |||
100 | for (std::set<std::string>::iterator jt = it->second.begin(); jt != it->second.end(); jt++) | ||
101 | { | ||
102 | std::cout << *jt << ", "; | ||
103 | } | ||
104 | |||
105 | std::cout << std::endl; | ||
106 | } | ||
107 | |||
108 | exit(0); | ||
109 | } | ||
110 | |||
111 | std::string malaprop::alternate(std::string word) | ||
112 | { | ||
113 | soundex ex = soundify(word); | ||
114 | std::set<std::string>& opts = dict[ex]; | ||
115 | int opt = rand() % opts.size(); | ||
116 | for (std::set<std::string>::iterator it = opts.begin(); it != opts.end(); it++) | ||
117 | { | ||
118 | if (opt == 0) | ||
119 | { | ||
120 | return *it; | ||
121 | } | ||
122 | |||
123 | opt--; | ||
124 | } | ||
125 | |||
126 | return word; | ||
127 | } | ||