about summary refs log tree commit diff stats
path: root/malaprop.cpp
blob: bfea57946a76ac5a7c18b73527d307e712d425c6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "malaprop.h"
#include <cstdlib>
#include <iostream>

bool removeIfM(char c)
{
  return !isalpha(c);
}

char soundID(char l)
{
  switch (l)
  {
    case 'b':
    case 'f':
    case 'p':
    case 'v':
      return '1';
      
    case 'c':
    case 'g':
    case 'j':
    case 'k':
    case 'q':
    case 's':
    case 'x':
    case 'z':
      return '2';
      
    case 'd':
    case 't':
      return '3';
      
    case 'l':
      return '4';
      
    case 'm':
    case 'n':
      return '5';
      
    case 'r':
      return '6';
  }
  
  return l;
}

std::string canonizetwo(std::string f)
{
	std::string canonical(f);
	std::transform(canonical.begin(), canonical.end(), canonical.begin(), ::tolower);
  
  std::string result;
  std::remove_copy_if(canonical.begin(), canonical.end(), std::back_inserter(result), removeIfM);
  
  return result;
}

malaprop::soundex malaprop::soundify(std::string f)
{
	std::string result(canonizetwo(f));
  
  soundex ex;
  ex.prefix = result[0];
  
  std::string output;
  
  for (int i = 1; i<result.length(); i++)
  {
    int c = soundID(result[i]);
    if (
      (isdigit(c)) // Not a vowel
      && (c != soundID(result[i-1])) // Not the same as the previous character
      && ((i < 2) || ((result[i-1] = 'h' || result[i-1] == 'w') && (c != soundID(result[i-2])))) // Not same as before h/w
        )
    {
      output += c;
    }
  }
  
  output.resize(3, '0');
  ex.code = atoi(output.c_str());
	
	return ex;
}

void malaprop::addWord(std::string word)
{
  soundex ex = soundify(word);
  
  dict[ex].insert(canonizetwo(word));
}

void malaprop::stats()
{
  for (std::map<soundex, std::set<std::string> >::iterator it = dict.begin(); it != dict.end(); it++)
  {
    printf("%c%03d (%d): ", it->first.prefix, it->first.code, it->second.size());
    
    for (std::set<std::string>::iterator jt = it->second.begin(); jt != it->second.end(); jt++)
    {
      std::cout << *jt << ", ";
    }
    
    std::cout << std::endl;
  }
  
  exit(0);
}

std::string malaprop::alternate(std::string word)
{
  soundex ex = soundify(word);
  std::set<std::string>& opts = dict[ex];
  int opt = rand() % opts.size();
  for (std::set<std::string>::iterator it = opts.begin(); it != opts.end(); it++)
  {
    if (opt == 0)
    {
      return *it;
    }
    
    opt--;
  }
  
  return word;
}