diff options
-rw-r--r-- | Makefile.am | 4 | ||||
-rw-r--r-- | config-example.yml | 6 | ||||
-rw-r--r-- | configure.ac | 28 | ||||
-rw-r--r-- | kgramstats.cpp | 110 | ||||
-rw-r--r-- | kgramstats.h | 28 | ||||
-rw-r--r-- | main.cpp | 161 |
6 files changed, 336 insertions, 1 deletions
diff --git a/Makefile.am b/Makefile.am index 127042a..c5b52ce 100644 --- a/Makefile.am +++ b/Makefile.am | |||
@@ -2,4 +2,6 @@ AUTOMAKE_OPTIONS = subdir-objects | |||
2 | ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} | 2 | ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} |
3 | 3 | ||
4 | bin_PROGRAMS = rawr-ebooks | 4 | bin_PROGRAMS = rawr-ebooks |
5 | rawr_ebooks_SOURCES = main.cpp \ No newline at end of file | 5 | rawr_ebooks_SOURCES = main.cpp kgramstats.cpp |
6 | AM_CPPFLAGS = $(LIBTWITCURL_CFLAGS) $(YAML_CFLAGS) | ||
7 | rawr_ebooks_LDADD = $(LIBTWITCURL_LIBS) $(YAML_LIBS) \ No newline at end of file | ||
diff --git a/config-example.yml b/config-example.yml new file mode 100644 index 0000000..a280e4c --- /dev/null +++ b/config-example.yml | |||
@@ -0,0 +1,6 @@ | |||
1 | --- | ||
2 | corpus: "corpus.txt" | ||
3 | username: "" | ||
4 | password: "" | ||
5 | consumer_key: "" | ||
6 | consumer_secret: "" \ No newline at end of file | ||
diff --git a/configure.ac b/configure.ac index c1c50bf..31cf8b9 100644 --- a/configure.ac +++ b/configure.ac | |||
@@ -4,4 +4,32 @@ AM_INIT_AUTOMAKE([1.10 no-define foreign]) | |||
4 | AC_PROG_CXX | 4 | AC_PROG_CXX |
5 | AC_CONFIG_FILES([Makefile]) | 5 | AC_CONFIG_FILES([Makefile]) |
6 | 6 | ||
7 | # Get libtwitcurl library and include locations | ||
8 | AC_ARG_WITH([libtwitcurl-include-path], | ||
9 | [AS_HELP_STRING([--with-libtwitcurl-include-path], | ||
10 | [location of the libtwitcurl headers, defaults to /usr/include])], | ||
11 | [LIBTWITCURL_CFLAGS="-I$withval"], | ||
12 | [LIBTWITCURL_CFLAGS='']) | ||
13 | AC_SUBST([LIBTWITCURL_CFLAGS]) | ||
14 | |||
15 | AC_ARG_WITH([libtwitcurl-lib-path], | ||
16 | [AS_HELP_STRING([--with-libtwitcurl-lib-path], [location of the libtwitcurl libraries])], | ||
17 | [LIBTWITCURL_LIBS="-L$withval -llibtwitcurl"], | ||
18 | [LIBTWITCURL_LIBS='-ltwitcurl']) | ||
19 | AC_SUBST([LIBTWITCURL_LIBS]) | ||
20 | |||
21 | # Get yaml-cpp library and include locations | ||
22 | AC_ARG_WITH([yamlcpp-include-path], | ||
23 | [AS_HELP_STRING([--with-yamlcpp-include-path], | ||
24 | [location of the yamlcpp headers, defaults to /usr/include])], | ||
25 | [YAML_CFLAGS="-I$withval"], | ||
26 | [YAML_CFLAGS='']) | ||
27 | AC_SUBST([YAML_CFLAGS]) | ||
28 | |||
29 | AC_ARG_WITH([yamlcpp-lib-path], | ||
30 | [AS_HELP_STRING([--with-yamlcpp-lib-path], [location of the yamlcpp libraries])], | ||
31 | [YAML_LIBS="-L$withval -lyaml-cpp"], | ||
32 | [YAML_LIBS='-lyaml-cpp']) | ||
33 | AC_SUBST([YAML_LIBS]) | ||
34 | |||
7 | AC_OUTPUT | 35 | AC_OUTPUT |
diff --git a/kgramstats.cpp b/kgramstats.cpp new file mode 100644 index 0000000..142b5aa --- /dev/null +++ b/kgramstats.cpp | |||
@@ -0,0 +1,110 @@ | |||
1 | #include "kgramstats.h" | ||
2 | #include <vector> | ||
3 | #include <iostream> | ||
4 | #include <cstdlib> | ||
5 | |||
6 | kgramstats::kgramstats(string corpus, int maxK) | ||
7 | { | ||
8 | this->maxK = maxK; | ||
9 | |||
10 | vector<string> tokens; | ||
11 | int start = 0; | ||
12 | int end = 0; | ||
13 | |||
14 | while (end != string::npos) | ||
15 | { | ||
16 | end = corpus.find(" ", start); | ||
17 | |||
18 | tokens.push_back(corpus.substr(start, (end == string::npos) ? string::npos : end - start)); | ||
19 | |||
20 | start = ((end > (string::npos - 1) ) ? string::npos : end + 1); | ||
21 | } | ||
22 | |||
23 | stats = new map<kgram, map<string, int>* >(); | ||
24 | for (int k=0; k<=maxK; k++) | ||
25 | { | ||
26 | for (int i=0; i<(tokens.size() - k); i++) | ||
27 | { | ||
28 | kgram seq(tokens.begin()+i, tokens.begin()+i+k); | ||
29 | string f = tokens[i+k]; | ||
30 | |||
31 | if ((*stats)[seq] == NULL) | ||
32 | { | ||
33 | (*stats)[seq] = new map<string, int>(); | ||
34 | } | ||
35 | |||
36 | (*((*stats)[seq]))[f]++; | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | |||
41 | map<string, int>* kgramstats::lookupExts(kgram tk) | ||
42 | { | ||
43 | return (*stats)[tk]; | ||
44 | } | ||
45 | |||
46 | int kgramstats::getMaxK() | ||
47 | { | ||
48 | return maxK; | ||
49 | } | ||
50 | |||
51 | void printKgram(kgram k) | ||
52 | { | ||
53 | for (kgram::iterator it = k.begin(); it != k.end(); it++) | ||
54 | { | ||
55 | cout << *it << " "; | ||
56 | } | ||
57 | cout << endl; | ||
58 | } | ||
59 | |||
60 | vector<string> kgramstats::randomSentence(int n) | ||
61 | { | ||
62 | vector<string> result; | ||
63 | list<string> cur; | ||
64 | |||
65 | for (int i=0; i<n; i++) | ||
66 | { | ||
67 | if ((rand() % 4) != 0) | ||
68 | { | ||
69 | for (int i=0; i<cur.size(); i++) | ||
70 | { | ||
71 | if ((rand() % 3) != 0) | ||
72 | { | ||
73 | cur.pop_front(); | ||
74 | } else { | ||
75 | break; | ||
76 | } | ||
77 | } | ||
78 | } | ||
79 | |||
80 | map<string, int>* probtable = lookupExts(cur); | ||
81 | int max = 0; | ||
82 | for (map<string, int>::iterator it = probtable->begin(); it != probtable->end(); ++it) | ||
83 | { | ||
84 | max += it->second; | ||
85 | } | ||
86 | |||
87 | int r = rand() % (max+1); | ||
88 | string next = probtable->begin()->first; | ||
89 | for (map<string, int>::iterator it = probtable->begin(); it != probtable->end(); ++it) | ||
90 | { | ||
91 | if (it->second > r) | ||
92 | { | ||
93 | break; | ||
94 | } else { | ||
95 | next = it->first; | ||
96 | r -= it->second; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | if (cur.size() == maxK) | ||
101 | { | ||
102 | cur.pop_front(); | ||
103 | } | ||
104 | |||
105 | cur.push_back(next); | ||
106 | result.push_back(next); | ||
107 | } | ||
108 | |||
109 | return result; | ||
110 | } \ No newline at end of file | ||
diff --git a/kgramstats.h b/kgramstats.h new file mode 100644 index 0000000..069bb90 --- /dev/null +++ b/kgramstats.h | |||
@@ -0,0 +1,28 @@ | |||
1 | #include <string> | ||
2 | #include <map> | ||
3 | #include <list> | ||
4 | #include <vector> | ||
5 | |||
6 | using namespace::std; | ||
7 | |||
8 | #ifndef KGRAMSTATS_H | ||
9 | #define KGRAMSTATS_H | ||
10 | |||
11 | typedef list<string> kgram; | ||
12 | |||
13 | class kgramstats | ||
14 | { | ||
15 | public: | ||
16 | kgramstats(string corpus, int maxK); | ||
17 | map<string, int>* lookupExts(kgram tk); | ||
18 | int getMaxK(); | ||
19 | vector<string> randomSentence(int n); | ||
20 | |||
21 | private: | ||
22 | int maxK; | ||
23 | map<kgram, map<string, int>* >* stats; | ||
24 | }; | ||
25 | |||
26 | void printKgram(kgram k); | ||
27 | |||
28 | #endif \ No newline at end of file | ||
diff --git a/main.cpp b/main.cpp index 8185573..8310bf4 100644 --- a/main.cpp +++ b/main.cpp | |||
@@ -1,6 +1,167 @@ | |||
1 | #include <cstdio> | 1 | #include <cstdio> |
2 | #include <list> | ||
3 | #include <map> | ||
4 | #include "kgramstats.h" | ||
5 | #include <ctime> | ||
6 | #include <vector> | ||
7 | #include <cstdlib> | ||
8 | #include <fstream> | ||
9 | #include <iostream> | ||
10 | #include <twitcurl.h> | ||
11 | #include <unistd.h> | ||
12 | #include <yaml-cpp/yaml.h> | ||
13 | |||
14 | using namespace::std; | ||
2 | 15 | ||
3 | int main(int argc, char** args) | 16 | int main(int argc, char** args) |
4 | { | 17 | { |
18 | srand(time(NULL)); | ||
19 | |||
20 | YAML::Node config = YAML::LoadFile("config.yml"); | ||
21 | |||
22 | for (;;) | ||
23 | { | ||
24 | ifstream infile(config["corpus"].as<std::string>().c_str()); | ||
25 | string corpus; | ||
26 | string line; | ||
27 | while (getline(infile, line)) | ||
28 | { | ||
29 | corpus += " " + line; | ||
30 | } | ||
31 | |||
32 | kgramstats* stats = new kgramstats(corpus, 5); | ||
33 | vector<string> doc = stats->randomSentence(rand() % 25 + 5); | ||
34 | string hi; | ||
35 | for (vector<string>::iterator it = doc.begin(); it != doc.end(); ++it) | ||
36 | { | ||
37 | hi += *it + " "; | ||
38 | } | ||
39 | |||
40 | hi = hi.substr(0,140); | ||
41 | |||
42 | twitCurl twitterObj; | ||
43 | std::string tmpStr, tmpStr2; | ||
44 | std::string replyMsg; | ||
45 | char tmpBuf[1024]; | ||
46 | std::string username(config["username"].as<std::string>()); | ||
47 | std::string password(config["password"].as<std::string>()); | ||
48 | |||
49 | /* Set twitter username and password */ | ||
50 | twitterObj.setTwitterUsername(username); | ||
51 | twitterObj.setTwitterPassword(password); | ||
52 | |||
53 | /* OAuth flow begins */ | ||
54 | /* Step 0: Set OAuth related params. These are got by registering your app at twitter.com */ | ||
55 | twitterObj.getOAuth().setConsumerKey( config["consumer_key"].as<std::string>() ); | ||
56 | twitterObj.getOAuth().setConsumerSecret( config["consumer_secret"].as<std::string>() ); | ||
57 | |||
58 | /* Step 1: Check if we alredy have OAuth access token from a previous run */ | ||
59 | std::string myOAuthAccessTokenKey(""); | ||
60 | std::string myOAuthAccessTokenSecret(""); | ||
61 | std::ifstream oAuthTokenKeyIn; | ||
62 | std::ifstream oAuthTokenSecretIn; | ||
63 | |||
64 | oAuthTokenKeyIn.open( "twitterClient_token_key.txt" ); | ||
65 | oAuthTokenSecretIn.open( "twitterClient_token_secret.txt" ); | ||
66 | |||
67 | memset( tmpBuf, 0, 1024 ); | ||
68 | oAuthTokenKeyIn >> tmpBuf; | ||
69 | myOAuthAccessTokenKey = tmpBuf; | ||
70 | |||
71 | memset( tmpBuf, 0, 1024 ); | ||
72 | oAuthTokenSecretIn >> tmpBuf; | ||
73 | myOAuthAccessTokenSecret = tmpBuf; | ||
74 | |||
75 | oAuthTokenKeyIn.close(); | ||
76 | oAuthTokenSecretIn.close(); | ||
77 | |||
78 | if( myOAuthAccessTokenKey.size() && myOAuthAccessTokenSecret.size() ) | ||
79 | { | ||
80 | /* If we already have these keys, then no need to go through auth again */ | ||
81 | printf( "\nUsing:\nKey: %s\nSecret: %s\n\n", myOAuthAccessTokenKey.c_str(), myOAuthAccessTokenSecret.c_str() ); | ||
82 | |||
83 | twitterObj.getOAuth().setOAuthTokenKey( myOAuthAccessTokenKey ); | ||
84 | twitterObj.getOAuth().setOAuthTokenSecret( myOAuthAccessTokenSecret ); | ||
85 | } | ||
86 | else | ||
87 | { | ||
88 | /* Step 2: Get request token key and secret */ | ||
89 | std::string authUrl; | ||
90 | twitterObj.oAuthRequestToken( authUrl ); | ||
91 | |||
92 | /* Step 3: Get PIN */ | ||
93 | memset( tmpBuf, 0, 1024 ); | ||
94 | printf( "\nDo you want to visit twitter.com for PIN (0 for no; 1 for yes): " ); | ||
95 | gets( tmpBuf ); | ||
96 | tmpStr = tmpBuf; | ||
97 | if( std::string::npos != tmpStr.find( "1" ) ) | ||
98 | { | ||
99 | /* Ask user to visit twitter.com auth page and get PIN */ | ||
100 | memset( tmpBuf, 0, 1024 ); | ||
101 | printf( "\nPlease visit this link in web browser and authorize this application:\n%s", authUrl.c_str() ); | ||
102 | printf( "\nEnter the PIN provided by twitter: " ); | ||
103 | gets( tmpBuf ); | ||
104 | tmpStr = tmpBuf; | ||
105 | twitterObj.getOAuth().setOAuthPin( tmpStr ); | ||
106 | } | ||
107 | else | ||
108 | { | ||
109 | /* Else, pass auth url to twitCurl and get it via twitCurl PIN handling */ | ||
110 | twitterObj.oAuthHandlePIN( authUrl ); | ||
111 | } | ||
112 | |||
113 | /* Step 4: Exchange request token with access token */ | ||
114 | twitterObj.oAuthAccessToken(); | ||
115 | |||
116 | /* Step 5: Now, save this access token key and secret for future use without PIN */ | ||
117 | twitterObj.getOAuth().getOAuthTokenKey( myOAuthAccessTokenKey ); | ||
118 | twitterObj.getOAuth().getOAuthTokenSecret( myOAuthAccessTokenSecret ); | ||
119 | |||
120 | /* Step 6: Save these keys in a file or wherever */ | ||
121 | std::ofstream oAuthTokenKeyOut; | ||
122 | std::ofstream oAuthTokenSecretOut; | ||
123 | |||
124 | oAuthTokenKeyOut.open( "twitterClient_token_key.txt" ); | ||
125 | oAuthTokenSecretOut.open( "twitterClient_token_secret.txt" ); | ||
126 | |||
127 | oAuthTokenKeyOut.clear(); | ||
128 | oAuthTokenSecretOut.clear(); | ||
129 | |||
130 | oAuthTokenKeyOut << myOAuthAccessTokenKey.c_str(); | ||
131 | oAuthTokenSecretOut << myOAuthAccessTokenSecret.c_str(); | ||
132 | |||
133 | oAuthTokenKeyOut.close(); | ||
134 | oAuthTokenSecretOut.close(); | ||
135 | } | ||
136 | /* OAuth flow ends */ | ||
137 | |||
138 | /* Account credentials verification */ | ||
139 | if( twitterObj.accountVerifyCredGet() ) | ||
140 | { | ||
141 | twitterObj.getLastWebResponse( replyMsg ); | ||
142 | printf( "\ntwitterClient:: twitCurl::accountVerifyCredGet web response:\n%s\n", replyMsg.c_str() ); | ||
143 | } | ||
144 | else | ||
145 | { | ||
146 | twitterObj.getLastCurlError( replyMsg ); | ||
147 | printf( "\ntwitterClient:: twitCurl::accountVerifyCredGet error:\n%s\n", replyMsg.c_str() ); | ||
148 | } | ||
149 | |||
150 | /* Post a new status message */ | ||
151 | replyMsg = ""; | ||
152 | if( twitterObj.statusUpdate( hi ) ) | ||
153 | { | ||
154 | twitterObj.getLastWebResponse( replyMsg ); | ||
155 | printf( "\ntwitterClient:: twitCurl::statusUpdate web response:\n%s\n", replyMsg.c_str() ); | ||
156 | } | ||
157 | else | ||
158 | { | ||
159 | twitterObj.getLastCurlError( replyMsg ); | ||
160 | printf( "\ntwitterClient:: twitCurl::statusUpdate error:\n%s\n", replyMsg.c_str() ); | ||
161 | } | ||
162 | |||
163 | sleep(900); | ||
164 | } | ||
165 | |||
5 | return 0; | 166 | return 0; |
6 | } \ No newline at end of file | 167 | } \ No newline at end of file |