From b316e309559d7176af6cf0bb7dcd6dbaa83c01cd Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 29 Jan 2016 12:43:00 -0500 Subject: Rewrote how tokens are handled A 'word' is now an object that contains a distribution of forms that word can take. For now, most word just contain one form, the canonical one. The only special use is currently hashtags. Malapropisms have been disabled because of compatibility issues and because an upcoming feature is planned to replace it. --- histogram.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 histogram.cpp (limited to 'histogram.cpp') diff --git a/histogram.cpp b/histogram.cpp new file mode 100644 index 0000000..6896146 --- /dev/null +++ b/histogram.cpp @@ -0,0 +1,34 @@ +#include "histogram.h" +#include + +template +void histogram::add(const T& inst) +{ + freqtable[inst]++; +} + +template +void histogram::compile() +{ + distribution.clear(); + + int max = 0; + for (auto& it : freqtable) + { + max += it.second; + distribution.emplace(max, it.first); + } + + freqtable.clear(); +} + +template +const T& histogram::next() const +{ + int max = distribution.rbegin()->first; + int r = rand() % max; + + return distribution.upper_bound(r)->second; +} + +template class histogram ; -- cgit 1.4.1