about summary refs log tree commit diff stats
path: root/gen.cpp
blob: 952e3b5f111d6e86bc5d76dc031893872a23334f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <cstdio>
#include <list>
#include <map>
#include "kgramstats.h"
#include <vector>
#include <fstream>
#include <iostream>
#include <random>

int main(int argc, char** args)
{
  std::random_device randomDevice;
  std::mt19937 rng(randomDevice());
    
  if (argc == 1)
  {
    std::cout << "rawr-gen, version 1.0" << std::endl;
    std::cout << "Usage: rawr-gen corpus-file" << std::endl;
    std::cout << "  where 'corpus-file' is the path to your input" << std::endl;
        
    return 0;
  }
    
  std::ifstream infile(args[1]);
  if (!infile)
  {
    std::cout << "rawr-gen, version 1.0" << std::endl;
    std::cout << "Usage: rawr-gen corpus-file" << std::endl;
    std::cout << "  where 'corpus-file' is the path to your input" << std::endl;
    std::cout << std::endl;
    std::cout << "The file you specified does not exist." << std::endl;
        
    return 0;
  }
    
  std::string corpus;
  std::string line;
  while (getline(infile, line))
  {
    if (line.back() == '\r')
    {
      line.pop_back();
    }
    
    corpus += line + "\n";
  }
  
  // Replace old-style freevars while I can't be bothered to remake the corpus yet
  std::vector<std::string> fv_names;
  std::ifstream namefile("names.txt");
  if (namefile.is_open())
  {
    while (!namefile.eof())
    {
      std::string l;
      getline(namefile, l);
      if (l.back() == '\r')
      {
        l.pop_back();
      }
      
      fv_names.push_back(l);
    }
  }
  
  namefile.close();
	
  std::cout << "Preprocessing corpus..." << std::endl;
  rawr kgramstats;
  kgramstats.addCorpus(corpus);
  kgramstats.compile(5);
  kgramstats.setTransformCallback([&] (std::string, std::string form) {
    size_t pos = form.find("$name$");
    if (pos != std::string::npos)
    {
      int fvInd = std::uniform_int_distribution<int>(0, fv_names.size()-1)(rng);
      form.replace(pos, 6, fv_names[fvInd]);
    }
    
    return form;
  });
    
  std::cout << "Generating..." << std::endl;
  for (;;)
  {
    std::string doc = kgramstats.randomSentence(140, rng);
    doc.resize(140);

    std::cout << doc << std::endl;
		
    getc(stdin);
  }
	
  return 0;
}