diff options
-rw-r--r-- | kgramstats.cpp | 85 |
1 files changed, 41 insertions, 44 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index 17598de..41517ca 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
@@ -206,7 +206,7 @@ std::vector<std::string> kgramstats::randomSentence(int n) | |||
206 | 206 | ||
207 | for (int i=0; i<n; i++) | 207 | for (int i=0; i<n; i++) |
208 | { | 208 | { |
209 | if ((cur.size() > 0) && (cur != newKgram)) | 209 | /*if ((cur.size() > 0) && (cur != newKgram)) |
210 | { | 210 | { |
211 | if (rand() % (maxK - cur.size() + 1) == 0) | 211 | if (rand() % (maxK - cur.size() + 1) == 0) |
212 | { | 212 | { |
@@ -223,7 +223,7 @@ std::vector<std::string> kgramstats::randomSentence(int n) | |||
223 | } | 223 | } |
224 | 224 | ||
225 | cuts++; | 225 | cuts++; |
226 | } | 226 | }*/ |
227 | 227 | ||
228 | std::map<int, token_data*> distribution = *(*stats)[cur]; | 228 | std::map<int, token_data*> distribution = *(*stats)[cur]; |
229 | int max = distribution.rbegin()->first; | 229 | int max = distribution.rbegin()->first; |
@@ -241,12 +241,9 @@ std::vector<std::string> kgramstats::randomSentence(int n) | |||
241 | if (casing < next->uppercase) | 241 | if (casing < next->uppercase) |
242 | { | 242 | { |
243 | std::transform(nextToken.begin(), nextToken.end(), nextToken.begin(), ::toupper); | 243 | std::transform(nextToken.begin(), nextToken.end(), nextToken.begin(), ::toupper); |
244 | } else if ((casing - next->uppercase) < next->titlecase) | ||
245 | { | ||
246 | nextToken[0] = toupper(nextToken[0]); | ||
247 | } | 244 | } |
248 | 245 | ||
249 | if ((cur == newKgram) && (rand() % 3 < 2)) | 246 | if ((cur == newKgram) && (rand() % 15 > 0)) |
250 | { | 247 | { |
251 | nextToken[0] = toupper(nextToken[0]); | 248 | nextToken[0] = toupper(nextToken[0]); |
252 | } | 249 | } |
@@ -255,48 +252,48 @@ std::vector<std::string> kgramstats::randomSentence(int n) | |||
255 | if (mess) | 252 | if (mess) |
256 | { | 253 | { |
257 | nextToken = mstats.alternate(nextToken); | 254 | nextToken = mstats.alternate(nextToken); |
255 | } | ||
258 | 256 | ||
259 | if (startquote < next->startquote) | 257 | if (startquote < next->startquote) |
258 | { | ||
259 | nextToken = "\"" + nextToken; | ||
260 | } else if (startparen < next->startparen) | ||
261 | { | ||
262 | nextToken = "(" + nextToken; | ||
263 | } | ||
264 | |||
265 | if (period < next->period) | ||
266 | { | ||
267 | if (endquote < next->endquote) | ||
260 | { | 268 | { |
261 | nextToken = "\"" + nextToken; | 269 | nextToken += "\""; |
262 | } else if (startparen < next->startparen) | 270 | } else if (endparen < next->endparen) |
263 | { | 271 | { |
264 | nextToken = "(" + nextToken; | 272 | nextToken += ")"; |
265 | } | 273 | } |
266 | 274 | ||
267 | if (period < next->period) | 275 | int type = rand() % 6; |
268 | { | 276 | |
269 | if (endquote < next->endquote) | 277 | if (type < 3) |
270 | { | ||
271 | nextToken += "\""; | ||
272 | } else if (endparen < next->endparen) | ||
273 | { | ||
274 | nextToken += ")"; | ||
275 | } | ||
276 | |||
277 | int type = rand() % 6; | ||
278 | |||
279 | if (type < 3) | ||
280 | { | ||
281 | nextToken += "."; | ||
282 | } else if (type < 5) | ||
283 | { | ||
284 | nextToken += "!"; | ||
285 | } else { | ||
286 | nextToken += "?"; | ||
287 | } | ||
288 | } else if (comma < next->comma) | ||
289 | { | 278 | { |
290 | if (endquote < next->endquote) | 279 | nextToken += "."; |
291 | { | 280 | } else if (type < 5) |
292 | nextToken += "\""; | 281 | { |
293 | } else if (endparen < next->endparen) | 282 | nextToken += "!"; |
294 | { | 283 | } else { |
295 | nextToken += ")"; | 284 | nextToken += "?"; |
296 | } | ||
297 | |||
298 | nextToken += ","; | ||
299 | } | 285 | } |
286 | } else if (comma < next->comma) | ||
287 | { | ||
288 | if (endquote < next->endquote) | ||
289 | { | ||
290 | nextToken += "\""; | ||
291 | } else if (endparen < next->endparen) | ||
292 | { | ||
293 | nextToken += ")"; | ||
294 | } | ||
295 | |||
296 | nextToken += ","; | ||
300 | } | 297 | } |
301 | 298 | ||
302 | if (cur.size() == maxK) | 299 | if (cur.size() == maxK) |
@@ -324,7 +321,7 @@ std::vector<std::string> kgramstats::randomSentence(int n) | |||
324 | cur.pop_front(); | 321 | cur.pop_front(); |
325 | } | 322 | } |
326 | 323 | ||
327 | if ((period < next->period) && ((rand() % 2) == 0)) | 324 | if ((period < next->period) && ((rand() % 3) == 0)) |
328 | { | 325 | { |
329 | cur = newKgram; | 326 | cur = newKgram; |
330 | } else if ((comma < next->comma) && ((rand() % 3) == 0)) | 327 | } else if ((comma < next->comma) && ((rand() % 3) == 0)) |
@@ -361,5 +358,5 @@ std::string canonize(std::string f) | |||
361 | std::string result; | 358 | std::string result; |
362 | std::remove_copy_if(canonical.begin(), canonical.end(), std::back_inserter(result), removeIf); | 359 | std::remove_copy_if(canonical.begin(), canonical.end(), std::back_inserter(result), removeIf); |
363 | 360 | ||
364 | return canonical; | 361 | return result; |
365 | } | 362 | } |