From 8d28a8e13dbe602783a505adb1df375b0d65efe0 Mon Sep 17 00:00:00 2001 From: Feffernoose Date: Sun, 6 Oct 2013 19:51:45 -0400 Subject: Split rawr-ebooks and rawr-gen Also wrote README --- gen.cpp | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 gen.cpp (limited to 'gen.cpp') diff --git a/gen.cpp b/gen.cpp new file mode 100644 index 0000000..dc73e0f --- /dev/null +++ b/gen.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include "kgramstats.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace::std; + +int main(int argc, char** args) +{ + srand(time(NULL)); + + YAML::Node config = YAML::LoadFile("config.yml"); + + ifstream infile(config["corpus"].as().c_str()); + string corpus; + string line; + while (getline(infile, line)) + { + corpus += " " + line; + } + + cout << "Preprocessing corpus..." << endl; + kgramstats* stats = new kgramstats(corpus, 5); + + cout << "Generating..." << endl; + for (;;) + { + vector doc = stats->randomSentence(rand() % 35 + 15); + string hi; + for (vector::iterator it = doc.begin(); it != doc.end(); ++it) + { + hi += *it + " "; + } + + cout << hi << endl; + + getc(stdin); + } + + return 0; +} \ No newline at end of file -- cgit 1.4.1