summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2019-02-27 20:45:17 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2019-02-27 20:45:17 -0500
commit2b152d09881559a0330b3ff923e03e715777c6c3 (patch)
tree385a725709198f68bb24c9bc352ee70c804a038d
downloadaspartame-2b152d09881559a0330b3ff923e03e715777c6c3.tar.gz
aspartame-2b152d09881559a0330b3ff923e03e715777c6c3.tar.bz2
aspartame-2b152d09881559a0330b3ff923e03e715777c6c3.zip
Initial commit (by Pink!)
-rw-r--r--.gitignore5
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt11
-rw-r--r--dialogue.cpp122
-rw-r--r--get.rb28
-rw-r--r--histogram.cpp44
-rw-r--r--histogram.h20
-rw-r--r--identifier.h59
-rw-r--r--vendor/csv.h1268
m---------vendor/rawr-ebooks0
10 files changed, 1560 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..934d9ac --- /dev/null +++ b/.gitignore
@@ -0,0 +1,5 @@
1CMakeFiles
2CMakeCache.txt
3build
4cmake_install.cmake
5Makefile
diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..57f0e20 --- /dev/null +++ b/.gitmodules
@@ -0,0 +1,3 @@
1[submodule "vendor/rawr-ebooks"]
2 path = vendor/rawr-ebooks
3 url = git@github.com:hatkirby/rawr-ebooks
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..9fe3ba2 --- /dev/null +++ b/CMakeLists.txt
@@ -0,0 +1,11 @@
1cmake_minimum_required (VERSION 3.1)
2project (rawr-ebooks)
3
4add_subdirectory(vendor/rawr-ebooks)
5
6include_directories(vendor/rawr-ebooks)
7
8add_executable(garnet dialogue.cpp histogram.cpp)
9set_property(TARGET garnet PROPERTY CXX_STANDARD 17)
10set_property(TARGET garnet PROPERTY CXX_STANDARD_REQUIRED ON)
11target_link_libraries(garnet rawr)
diff --git a/dialogue.cpp b/dialogue.cpp new file mode 100644 index 0000000..dd34ee5 --- /dev/null +++ b/dialogue.cpp
@@ -0,0 +1,122 @@
1#include "vendor/csv.h"
2#include "identifier.h"
3#include "histogram.h"
4#include <rawr.h>
5#include <cstdlib>
6#include <ctime>
7#include <map>
8#include <string>
9#include <iostream>
10#include <sstream>
11
12
13
14using speakerstore = identifier<std::string>;
15using speaker_id = speakerstore::key_type;
16
17
18struct speaker_data {
19
20 std::string name;
21 histogram<speaker_id> nextSpeaker;
22 rawr chain;
23
24};
25
26
27
28
29int main(int, char**)
30{
31 srand(time(NULL));
32 rand(); rand(); rand(); rand();
33
34 speakerstore speakers;
35 std::map<speaker_id, speaker_data> speakerData;
36 histogram<speaker_id> allSpeakers;
37
38
39
40 io::CSVReader<2,io::trim_chars<' ', '\t'>,io::double_quote_escape<',', '"'>> in("../dialogue.csv");
41 std::string speaker;
42 std::string line;
43
44 bool hadPrev = false;
45 speaker_id prevSpeaker;
46
47 while (in.read_row(speaker, line))
48 {
49 speaker_id spId = speakers.add(speaker);
50 speaker_data& myData = speakerData[spId];
51 myData.name = speaker;
52
53 allSpeakers.add(spId);
54
55 if (hadPrev && prevSpeaker != spId)
56 {
57 speaker_data& psd = speakerData[prevSpeaker];
58 psd.nextSpeaker.add(spId);
59 }
60
61 myData.chain.addCorpus(line);
62
63 hadPrev = true;
64 prevSpeaker = spId;
65 }
66
67 for (auto& sp : speakerData)
68 {
69 sp.second.chain.compile(4);
70 sp.second.nextSpeaker.compile();
71 }
72
73 std::cout << "Speakers:" << std::endl;
74 for (auto& sp : speakerData)
75 {
76 std::cout << " " << sp.second.name << std::endl;
77 }
78 std::cout << std::endl;
79
80 allSpeakers.compile();
81
82 for (;;)
83 {
84 speaker_id curSpeaker = allSpeakers.next();
85
86 std::ostringstream theEnd;
87
88 for (int i = 0; i < 5; i++)
89 {
90 speaker_data& curSd = speakerData.at(curSpeaker);
91
92 //std::ostringstream thisLine;
93
94 if (curSd.name != "")
95 {
96 theEnd << curSd.name << ": ";
97 }
98
99 theEnd << curSd.chain.randomSentence(1);
100
101 /*if (i > 0 && theEnd.str().length() + thisLine.str().length() > 280)
102 {
103 break;
104 }*/
105
106 theEnd << std::endl;
107 //theEnd << thisLine.str();
108
109 curSpeaker = curSd.nextSpeaker.next();
110 }
111
112 std::string output = theEnd.str();
113 output.resize(280);
114 output = output.substr(0, output.find_last_of('\n'));
115 std::cout << output;
116
117 std::cout << std::endl;
118 std::cout << std::endl;
119
120 getc(stdin);
121 }
122}
diff --git a/get.rb b/get.rb new file mode 100644 index 0000000..a2a213e --- /dev/null +++ b/get.rb
@@ -0,0 +1,28 @@
1require 'open-uri'
2require 'nokogiri'
3require 'csv'
4
5result = []
6transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read
7docTrans = Nokogiri::HTML transcripts
8docTrans.css(".category-page__member-link").each do |node|
9 puts node['href']
10 subpage = open("https://steven-universe.fandom.com" + node['href']).read
11 subpagedoc = Nokogiri::HTML subpage
12 rows = subpagedoc.css(".bgrevo tr")
13 rows.shift
14 rows.pop
15 rows.each do |row|
16 if row.children.length == 2
17 result << ["", row.children[1].content.strip.gsub(/\n/," ")]
18 elsif row.children.length == 3
19 result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")]
20 end
21 end
22end
23
24CSV.open("dialogue.csv", "w") do |csv|
25 result.each do |line|
26 csv << line
27 end
28end
diff --git a/histogram.cpp b/histogram.cpp new file mode 100644 index 0000000..38fca45 --- /dev/null +++ b/histogram.cpp
@@ -0,0 +1,44 @@
1#include "histogram.h"
2#include <cstdlib>
3#include <iostream>
4
5template <class T>
6void histogram<T>::add(const T& inst)
7{
8 freqtable[inst]++;
9}
10
11template <class T>
12void histogram<T>::compile()
13{
14 distribution.clear();
15
16 int max = 0;
17 for (auto& it : freqtable)
18 {
19 max += it.second;
20 distribution.emplace(max, it.first);
21 }
22
23 freqtable.clear();
24}
25
26template <class T>
27const T& histogram<T>::next() const
28{
29 int max = distribution.rbegin()->first;
30 int r = rand() % max;
31
32 return distribution.upper_bound(r)->second;
33}
34
35template <class T>
36void histogram<T>::print() const
37{
38 for (auto& freqpair : freqtable)
39 {
40 std::cout << freqpair.first << ": " << freqpair.second << std::endl;
41 }
42}
43
44template class histogram <unsigned long>;
diff --git a/histogram.h b/histogram.h new file mode 100644 index 0000000..76d8f1b --- /dev/null +++ b/histogram.h
@@ -0,0 +1,20 @@
1#ifndef HISTOGRAM_H_24094D97
2#define HISTOGRAM_H_24094D97
3
4#include <map>
5#include <string>
6
7template <class T>
8class histogram {
9 public:
10 void add(const T& inst);
11 void compile();
12 const T& next() const;
13 void print() const;
14
15 private:
16 std::map<T, int> freqtable;
17 std::map<int, T> distribution;
18};
19
20#endif /* end of include guard: HISTOGRAM_H_24094D97 */
diff --git a/identifier.h b/identifier.h new file mode 100644 index 0000000..74d83ce --- /dev/null +++ b/identifier.h
@@ -0,0 +1,59 @@
1#ifndef IDENTIFIER_H_D7EE5679
2#define IDENTIFIER_H_D7EE5679
3
4#include <map>
5#include <vector>
6
7template <typename T>
8class identifier {
9public:
10
11 using value_type = T;
12
13private:
14
15 using vector_type = std::vector<value_type>;
16
17public:
18
19 using key_type = typename vector_type::size_type;
20
21 key_type add(const value_type& val)
22 {
23 auto it = ids_.find(val);
24
25 if (it == std::end(ids_))
26 {
27 key_type ret = ids_.size();
28 ids_[val] = ret;
29
30 uniq_.push_back(val);
31
32 return ret;
33 } else {
34 return it->second;
35 }
36 }
37
38 void compile()
39 {
40 ids_.clear();
41 }
42
43 inline const value_type& get(key_type i) const
44 {
45 return uniq_.at(i);
46 }
47
48 inline key_type size() const
49 {
50 return uniq_.size();
51 }
52
53private:
54
55 std::map<value_type, key_type> ids_;
56 vector_type uniq_;
57};
58
59#endif /* end of include guard: IDENTIFIER_H_D7EE5679 */
diff --git a/vendor/csv.h b/vendor/csv.h new file mode 100644 index 0000000..93e9034 --- /dev/null +++ b/vendor/csv.h
@@ -0,0 +1,1268 @@
1// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
2// License: BSD-3
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are met:
8//
9// 1. Redistributions of source code must retain the above copyright notice,
10// this list of conditions and the following disclaimer.
11//
12//2. Redistributions in binary form must reproduce the above copyright notice,
13// this list of conditions and the following disclaimer in the documentation
14// and/or other materials provided with the distribution.
15//
16//3. Neither the name of the copyright holder nor the names of its contributors
17// may be used to endorse or promote products derived from this software
18// without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30// POSSIBILITY OF SUCH DAMAGE.
31
32#ifndef CSV_H
33#define CSV_H
34
35#include <vector>
36#include <string>
37#include <cstring>
38#include <algorithm>
39#include <utility>
40#include <cstdio>
41#include <exception>
42#ifndef CSV_IO_NO_THREAD
43#include <mutex>
44#include <thread>
45#include <condition_variable>
46#endif
47#include <memory>
48#include <cassert>
49#include <cerrno>
50#include <istream>
51
52namespace io{
53 ////////////////////////////////////////////////////////////////////////////
54 // LineReader //
55 ////////////////////////////////////////////////////////////////////////////
56
57 namespace error{
58 struct base : std::exception{
59 virtual void format_error_message()const = 0;
60
61 const char*what()const throw(){
62 format_error_message();
63 return error_message_buffer;
64 }
65
66 mutable char error_message_buffer[512];
67 };
68
69 const int max_file_name_length = 255;
70
71 struct with_file_name{
72 with_file_name(){
73 std::memset(file_name, 0, sizeof(file_name));
74 }
75
76 void set_file_name(const char*file_name){
77 if(file_name != nullptr){
78 strncpy(this->file_name, file_name, sizeof(this->file_name));
79 this->file_name[sizeof(this->file_name)-1] = '\0';
80 }else{
81 this->file_name[0] = '\0';
82 }
83 }
84
85 char file_name[max_file_name_length+1];
86 };
87
88 struct with_file_line{
89 with_file_line(){
90 file_line = -1;
91 }
92
93 void set_file_line(int file_line){
94 this->file_line = file_line;
95 }
96
97 int file_line;
98 };
99
100 struct with_errno{
101 with_errno(){
102 errno_value = 0;
103 }
104
105 void set_errno(int errno_value){
106 this->errno_value = errno_value;
107 }
108
109 int errno_value;
110 };
111
112 struct can_not_open_file :
113 base,
114 with_file_name,
115 with_errno{
116 void format_error_message()const{
117 if(errno_value != 0)
118 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
119 "Can not open file \"%s\" because \"%s\"."
120 , file_name, std::strerror(errno_value));
121 else
122 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
123 "Can not open file \"%s\"."
124 , file_name);
125 }
126 };
127
128 struct line_length_limit_exceeded :
129 base,
130 with_file_name,
131 with_file_line{
132 void format_error_message()const{
133 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
134 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
135 , file_line, file_name);
136 }
137 };
138 }
139
140 class ByteSourceBase{
141 public:
142 virtual int read(char*buffer, int size)=0;
143 virtual ~ByteSourceBase(){}
144 };
145
146 namespace detail{
147
148 class OwningStdIOByteSourceBase : public ByteSourceBase{
149 public:
150 explicit OwningStdIOByteSourceBase(FILE*file):file(file){
151 // Tell the std library that we want to do the buffering ourself.
152 std::setvbuf(file, 0, _IONBF, 0);
153 }
154
155 int read(char*buffer, int size){
156 return std::fread(buffer, 1, size, file);
157 }
158
159 ~OwningStdIOByteSourceBase(){
160 std::fclose(file);
161 }
162
163 private:
164 FILE*file;
165 };
166
167 class NonOwningIStreamByteSource : public ByteSourceBase{
168 public:
169 explicit NonOwningIStreamByteSource(std::istream&in):in(in){}
170
171 int read(char*buffer, int size){
172 in.read(buffer, size);
173 return in.gcount();
174 }
175
176 ~NonOwningIStreamByteSource(){}
177
178 private:
179 std::istream&in;
180 };
181
182 class NonOwningStringByteSource : public ByteSourceBase{
183 public:
184 NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}
185
186 int read(char*buffer, int desired_byte_count){
187 int to_copy_byte_count = desired_byte_count;
188 if(remaining_byte_count < to_copy_byte_count)
189 to_copy_byte_count = remaining_byte_count;
190 std::memcpy(buffer, str, to_copy_byte_count);
191 remaining_byte_count -= to_copy_byte_count;
192 str += to_copy_byte_count;
193 return to_copy_byte_count;
194 }
195
196 ~NonOwningStringByteSource(){}
197
198 private:
199 const char*str;
200 long long remaining_byte_count;
201 };
202
203 #ifndef CSV_IO_NO_THREAD
204 class AsynchronousReader{
205 public:
206 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
207 std::unique_lock<std::mutex>guard(lock);
208 byte_source = std::move(arg_byte_source);
209 desired_byte_count = -1;
210 termination_requested = false;
211 worker = std::thread(
212 [&]{
213 std::unique_lock<std::mutex>guard(lock);
214 try{
215 for(;;){
216 read_requested_condition.wait(
217 guard,
218 [&]{
219 return desired_byte_count != -1 || termination_requested;
220 }
221 );
222 if(termination_requested)
223 return;
224
225 read_byte_count = byte_source->read(buffer, desired_byte_count);
226 desired_byte_count = -1;
227 if(read_byte_count == 0)
228 break;
229 read_finished_condition.notify_one();
230 }
231 }catch(...){
232 read_error = std::current_exception();
233 }
234 read_finished_condition.notify_one();
235 }
236 );
237 }
238
239 bool is_valid()const{
240 return byte_source != nullptr;
241 }
242
243 void start_read(char*arg_buffer, int arg_desired_byte_count){
244 std::unique_lock<std::mutex>guard(lock);
245 buffer = arg_buffer;
246 desired_byte_count = arg_desired_byte_count;
247 read_byte_count = -1;
248 read_requested_condition.notify_one();
249 }
250
251 int finish_read(){
252 std::unique_lock<std::mutex>guard(lock);
253 read_finished_condition.wait(
254 guard,
255 [&]{
256 return read_byte_count != -1 || read_error;
257 }
258 );
259 if(read_error)
260 std::rethrow_exception(read_error);
261 else
262 return read_byte_count;
263 }
264
265 ~AsynchronousReader(){
266 if(byte_source != nullptr){
267 {
268 std::unique_lock<std::mutex>guard(lock);
269 termination_requested = true;
270 }
271 read_requested_condition.notify_one();
272 worker.join();
273 }
274 }
275
276 private:
277 std::unique_ptr<ByteSourceBase>byte_source;
278
279 std::thread worker;
280
281 bool termination_requested;
282 std::exception_ptr read_error;
283 char*buffer;
284 int desired_byte_count;
285 int read_byte_count;
286
287 std::mutex lock;
288 std::condition_variable read_finished_condition;
289 std::condition_variable read_requested_condition;
290 };
291 #endif
292
293 class SynchronousReader{
294 public:
295 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
296 byte_source = std::move(arg_byte_source);
297 }
298
299 bool is_valid()const{
300 return byte_source != nullptr;
301 }
302
303 void start_read(char*arg_buffer, int arg_desired_byte_count){
304 buffer = arg_buffer;
305 desired_byte_count = arg_desired_byte_count;
306 }
307
308 int finish_read(){
309 return byte_source->read(buffer, desired_byte_count);
310 }
311 private:
312 std::unique_ptr<ByteSourceBase>byte_source;
313 char*buffer;
314 int desired_byte_count;
315 };
316 }
317
318 class LineReader{
319 private:
320 static const int block_len = 1<<24;
321 std::unique_ptr<char[]>buffer; // must be constructed before (and thus destructed after) the reader!
322 #ifdef CSV_IO_NO_THREAD
323 detail::SynchronousReader reader;
324 #else
325 detail::AsynchronousReader reader;
326 #endif
327 int data_begin;
328 int data_end;
329
330 char file_name[error::max_file_name_length+1];
331 unsigned file_line;
332
333 static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
334 // We open the file in binary mode as it makes no difference under *nix
335 // and under Windows we handle \r\n newlines ourself.
336 FILE*file = std::fopen(file_name, "rb");
337 if(file == 0){
338 int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
339 error::can_not_open_file err;
340 err.set_errno(x);
341 err.set_file_name(file_name);
342 throw err;
343 }
344 return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
345 }
346
347 void init(std::unique_ptr<ByteSourceBase>byte_source){
348 file_line = 0;
349
350 buffer = std::unique_ptr<char[]>(new char[3*block_len]);
351 data_begin = 0;
352 data_end = byte_source->read(buffer.get(), 2*block_len);
353
354 // Ignore UTF-8 BOM
355 if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
356 data_begin = 3;
357
358 if(data_end == 2*block_len){
359 reader.init(std::move(byte_source));
360 reader.start_read(buffer.get() + 2*block_len, block_len);
361 }
362 }
363
364 public:
365 LineReader() = delete;
366 LineReader(const LineReader&) = delete;
367 LineReader&operator=(const LineReader&) = delete;
368
369 explicit LineReader(const char*file_name){
370 set_file_name(file_name);
371 init(open_file(file_name));
372 }
373
374 explicit LineReader(const std::string&file_name){
375 set_file_name(file_name.c_str());
376 init(open_file(file_name.c_str()));
377 }
378
379 LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
380 set_file_name(file_name);
381 init(std::move(byte_source));
382 }
383
384 LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
385 set_file_name(file_name.c_str());
386 init(std::move(byte_source));
387 }
388
389 LineReader(const char*file_name, const char*data_begin, const char*data_end){
390 set_file_name(file_name);
391 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
392 }
393
394 LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
395 set_file_name(file_name.c_str());
396 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
397 }
398
399 LineReader(const char*file_name, FILE*file){
400 set_file_name(file_name);
401 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
402 }
403
404 LineReader(const std::string&file_name, FILE*file){
405 set_file_name(file_name.c_str());
406 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
407 }
408
409 LineReader(const char*file_name, std::istream&in){
410 set_file_name(file_name);
411 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
412 }
413
414 LineReader(const std::string&file_name, std::istream&in){
415 set_file_name(file_name.c_str());
416 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
417 }
418
419 void set_file_name(const std::string&file_name){
420 set_file_name(file_name.c_str());
421 }
422
423 void set_file_name(const char*file_name){
424 if(file_name != nullptr){
425 strncpy(this->file_name, file_name, sizeof(this->file_name));
426 this->file_name[sizeof(this->file_name)-1] = '\0';
427 }else{
428 this->file_name[0] = '\0';
429 }
430 }
431
432 const char*get_truncated_file_name()const{
433 return file_name;
434 }
435
436 void set_file_line(unsigned file_line){
437 this->file_line = file_line;
438 }
439
440 unsigned get_file_line()const{
441 return file_line;
442 }
443
444 char*next_line(){
445 if(data_begin == data_end)
446 return 0;
447
448 ++file_line;
449
450 assert(data_begin < data_end);
451 assert(data_end <= block_len*2);
452
453 if(data_begin >= block_len){
454 std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
455 data_begin -= block_len;
456 data_end -= block_len;
457 if(reader.is_valid())
458 {
459 data_end += reader.finish_read();
460 std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
461 reader.start_read(buffer.get() + 2*block_len, block_len);
462 }
463 }
464
465 int line_end = data_begin;
466 while(buffer[line_end] != '\n' && line_end != data_end){
467 ++line_end;
468 }
469
470 if(line_end - data_begin + 1 > block_len){
471 error::line_length_limit_exceeded err;
472 err.set_file_name(file_name);
473 err.set_file_line(file_line);
474 throw err;
475 }
476
477 if(buffer[line_end] == '\n' && line_end != data_end){
478 buffer[line_end] = '\0';
479 }else{
480 // some files are missing the newline at the end of the
481 // last line
482 ++data_end;
483 buffer[line_end] = '\0';
484 }
485
486 // handle windows \r\n-line breaks
487 if(line_end != data_begin && buffer[line_end-1] == '\r')
488 buffer[line_end-1] = '\0';
489
490 char*ret = buffer.get() + data_begin;
491 data_begin = line_end+1;
492 return ret;
493 }
494 };
495
496
497 ////////////////////////////////////////////////////////////////////////////
498 // CSV //
499 ////////////////////////////////////////////////////////////////////////////
500
501 namespace error{
502 const int max_column_name_length = 63;
503 struct with_column_name{
504 with_column_name(){
505 std::memset(column_name, 0, max_column_name_length+1);
506 }
507
508 void set_column_name(const char*column_name){
509 if(column_name != nullptr){
510 std::strncpy(this->column_name, column_name, max_column_name_length);
511 this->column_name[max_column_name_length] = '\0';
512 }else{
513 this->column_name[0] = '\0';
514 }
515 }
516
517 char column_name[max_column_name_length+1];
518 };
519
520
521 const int max_column_content_length = 63;
522
523 struct with_column_content{
524 with_column_content(){
525 std::memset(column_content, 0, max_column_content_length+1);
526 }
527
528 void set_column_content(const char*column_content){
529 if(column_content != nullptr){
530 std::strncpy(this->column_content, column_content, max_column_content_length);
531 this->column_content[max_column_content_length] = '\0';
532 }else{
533 this->column_content[0] = '\0';
534 }
535 }
536
537 char column_content[max_column_content_length+1];
538 };
539
540
541 struct extra_column_in_header :
542 base,
543 with_file_name,
544 with_column_name{
545 void format_error_message()const{
546 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
547 "Extra column \"%s\" in header of file \"%s\"."
548 , column_name, file_name);
549 }
550 };
551
552 struct missing_column_in_header :
553 base,
554 with_file_name,
555 with_column_name{
556 void format_error_message()const{
557 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
558 "Missing column \"%s\" in header of file \"%s\"."
559 , column_name, file_name);
560 }
561 };
562
563 struct duplicated_column_in_header :
564 base,
565 with_file_name,
566 with_column_name{
567 void format_error_message()const{
568 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
569 "Duplicated column \"%s\" in header of file \"%s\"."
570 , column_name, file_name);
571 }
572 };
573
574 struct header_missing :
575 base,
576 with_file_name{
577 void format_error_message()const{
578 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
579 "Header missing in file \"%s\"."
580 , file_name);
581 }
582 };
583
584 struct too_few_columns :
585 base,
586 with_file_name,
587 with_file_line{
588 void format_error_message()const{
589 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
590 "Too few columns in line %d in file \"%s\"."
591 , file_line, file_name);
592 }
593 };
594
595 struct too_many_columns :
596 base,
597 with_file_name,
598 with_file_line{
599 void format_error_message()const{
600 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
601 "Too many columns in line %d in file \"%s\"."
602 , file_line, file_name);
603 }
604 };
605
606 struct escaped_string_not_closed :
607 base,
608 with_file_name,
609 with_file_line{
610 void format_error_message()const{
611 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
612 "Escaped string was not closed in line %d in file \"%s\"."
613 , file_line, file_name);
614 }
615 };
616
617 struct integer_must_be_positive :
618 base,
619 with_file_name,
620 with_file_line,
621 with_column_name,
622 with_column_content{
623 void format_error_message()const{
624 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
625 "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"."
626 , column_content, column_name, file_name, file_line);
627 }
628 };
629
630 struct no_digit :
631 base,
632 with_file_name,
633 with_file_line,
634 with_column_name,
635 with_column_content{
636 void format_error_message()const{
637 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
638 "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"."
639 , column_content, column_name, file_name, file_line);
640 }
641 };
642
643 struct integer_overflow :
644 base,
645 with_file_name,
646 with_file_line,
647 with_column_name,
648 with_column_content{
649 void format_error_message()const{
650 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
651 "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"."
652 , column_content, column_name, file_name, file_line);
653 }
654 };
655
656 struct integer_underflow :
657 base,
658 with_file_name,
659 with_file_line,
660 with_column_name,
661 with_column_content{
662 void format_error_message()const{
663 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
664 "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"."
665 , column_content, column_name, file_name, file_line);
666 }
667 };
668
669 struct invalid_single_character :
670 base,
671 with_file_name,
672 with_file_line,
673 with_column_name,
674 with_column_content{
675 void format_error_message()const{
676 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
677 "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character."
678 , column_content, column_name, file_name, file_line);
679 }
680 };
681 }
682
683 typedef unsigned ignore_column;
684 static const ignore_column ignore_no_column = 0;
685 static const ignore_column ignore_extra_column = 1;
686 static const ignore_column ignore_missing_column = 2;
687
688 template<char ... trim_char_list>
689 struct trim_chars{
690 private:
691 constexpr static bool is_trim_char(char){
692 return false;
693 }
694
695 template<class ...OtherTrimChars>
696 constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
697 return c == trim_char || is_trim_char(c, other_trim_chars...);
698 }
699
700 public:
701 static void trim(char*&str_begin, char*&str_end){
702 while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
703 ++str_begin;
704 while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
705 --str_end;
706 *str_end = '\0';
707 }
708 };
709
710
711 struct no_comment{
712 static bool is_comment(const char*){
713 return false;
714 }
715 };
716
717 template<char ... comment_start_char_list>
718 struct single_line_comment{
719 private:
720 constexpr static bool is_comment_start_char(char){
721 return false;
722 }
723
724 template<class ...OtherCommentStartChars>
725 constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
726 return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
727 }
728
729 public:
730
731 static bool is_comment(const char*line){
732 return is_comment_start_char(*line, comment_start_char_list...);
733 }
734 };
735
736 struct empty_line_comment{
737 static bool is_comment(const char*line){
738 if(*line == '\0')
739 return true;
740 while(*line == ' ' || *line == '\t'){
741 ++line;
742 if(*line == 0)
743 return true;
744 }
745 return false;
746 }
747 };
748
749 template<char ... comment_start_char_list>
750 struct single_and_empty_line_comment{
751 static bool is_comment(const char*line){
752 return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
753 }
754 };
755
756 template<char sep>
757 struct no_quote_escape{
758 static const char*find_next_column_end(const char*col_begin){
759 while(*col_begin != sep && *col_begin != '\0')
760 ++col_begin;
761 return col_begin;
762 }
763
764 static void unescape(char*&, char*&){
765
766 }
767 };
768
769 template<char sep, char quote>
770 struct double_quote_escape{
771 static const char*find_next_column_end(const char*col_begin){
772 while(*col_begin != sep && *col_begin != '\0')
773 if(*col_begin != quote)
774 ++col_begin;
775 else{
776 do{
777 ++col_begin;
778 while(*col_begin != quote){
779 if(*col_begin == '\0')
780 throw error::escaped_string_not_closed();
781 ++col_begin;
782 }
783 ++col_begin;
784 }while(*col_begin == quote);
785 }
786 return col_begin;
787 }
788
789 static void unescape(char*&col_begin, char*&col_end){
790 if(col_end - col_begin >= 2){
791 if(*col_begin == quote && *(col_end-1) == quote){
792 ++col_begin;
793 --col_end;
794 char*out = col_begin;
795 for(char*in = col_begin; in!=col_end; ++in){
796 if(*in == quote && (in+1) != col_end && *(in+1) == quote){
797 ++in;
798 }
799 *out = *in;
800 ++out;
801 }
802 col_end = out;
803 *col_end = '\0';
804 }
805 }
806
807 }
808 };
809
810 struct throw_on_overflow{
811 template<class T>
812 static void on_overflow(T&){
813 throw error::integer_overflow();
814 }
815
816 template<class T>
817 static void on_underflow(T&){
818 throw error::integer_underflow();
819 }
820 };
821
822 struct ignore_overflow{
823 template<class T>
824 static void on_overflow(T&){}
825
826 template<class T>
827 static void on_underflow(T&){}
828 };
829
830 struct set_to_max_on_overflow{
831 template<class T>
832 static void on_overflow(T&x){
833 x = std::numeric_limits<T>::max();
834 }
835
836 template<class T>
837 static void on_underflow(T&x){
838 x = std::numeric_limits<T>::min();
839 }
840 };
841
842
843 namespace detail{
844 template<class quote_policy>
845 void chop_next_column(
846 char*&line, char*&col_begin, char*&col_end
847 ){
848 assert(line != nullptr);
849
850 col_begin = line;
851 // the col_begin + (... - col_begin) removes the constness
852 col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
853
854 if(*col_end == '\0'){
855 line = nullptr;
856 }else{
857 *col_end = '\0';
858 line = col_end + 1;
859 }
860 }
861
862 template<class trim_policy, class quote_policy>
863 void parse_line(
864 char*line,
865 char**sorted_col,
866 const std::vector<int>&col_order
867 ){
868 for(std::size_t i=0; i<col_order.size(); ++i){
869 if(line == nullptr)
870 throw ::io::error::too_few_columns();
871 char*col_begin, *col_end;
872 chop_next_column<quote_policy>(line, col_begin, col_end);
873
874 if(col_order[i] != -1){
875 trim_policy::trim(col_begin, col_end);
876 quote_policy::unescape(col_begin, col_end);
877
878 sorted_col[col_order[i]] = col_begin;
879 }
880 }
881 if(line != nullptr)
882 throw ::io::error::too_many_columns();
883 }
884
885 template<unsigned column_count, class trim_policy, class quote_policy>
886 void parse_header_line(
887 char*line,
888 std::vector<int>&col_order,
889 const std::string*col_name,
890 ignore_column ignore_policy
891 ){
892 col_order.clear();
893
894 bool found[column_count];
895 std::fill(found, found + column_count, false);
896 while(line){
897 char*col_begin,*col_end;
898 chop_next_column<quote_policy>(line, col_begin, col_end);
899
900 trim_policy::trim(col_begin, col_end);
901 quote_policy::unescape(col_begin, col_end);
902
903 for(unsigned i=0; i<column_count; ++i)
904 if(col_begin == col_name[i]){
905 if(found[i]){
906 error::duplicated_column_in_header err;
907 err.set_column_name(col_begin);
908 throw err;
909 }
910 found[i] = true;
911 col_order.push_back(i);
912 col_begin = 0;
913 break;
914 }
915 if(col_begin){
916 if(ignore_policy & ::io::ignore_extra_column)
917 col_order.push_back(-1);
918 else{
919 error::extra_column_in_header err;
920 err.set_column_name(col_begin);
921 throw err;
922 }
923 }
924 }
925 if(!(ignore_policy & ::io::ignore_missing_column)){
926 for(unsigned i=0; i<column_count; ++i){
927 if(!found[i]){
928 error::missing_column_in_header err;
929 err.set_column_name(col_name[i].c_str());
930 throw err;
931 }
932 }
933 }
934 }
935
936 template<class overflow_policy>
937 void parse(char*col, char &x){
938 if(!*col)
939 throw error::invalid_single_character();
940 x = *col;
941 ++col;
942 if(*col)
943 throw error::invalid_single_character();
944 }
945
946 template<class overflow_policy>
947 void parse(char*col, std::string&x){
948 x = col;
949 }
950
951 template<class overflow_policy>
952 void parse(char*col, const char*&x){
953 x = col;
954 }
955
956 template<class overflow_policy>
957 void parse(char*col, char*&x){
958 x = col;
959 }
960
961 template<class overflow_policy, class T>
962 void parse_unsigned_integer(const char*col, T&x){
963 x = 0;
964 while(*col != '\0'){
965 if('0' <= *col && *col <= '9'){
966 T y = *col - '0';
967 if(x > (std::numeric_limits<T>::max()-y)/10){
968 overflow_policy::on_overflow(x);
969 return;
970 }
971 x = 10*x+y;
972 }else
973 throw error::no_digit();
974 ++col;
975 }
976 }
977
978 template<class overflow_policy>void parse(char*col, unsigned char &x)
979 {parse_unsigned_integer<overflow_policy>(col, x);}
980 template<class overflow_policy>void parse(char*col, unsigned short &x)
981 {parse_unsigned_integer<overflow_policy>(col, x);}
982 template<class overflow_policy>void parse(char*col, unsigned int &x)
983 {parse_unsigned_integer<overflow_policy>(col, x);}
984 template<class overflow_policy>void parse(char*col, unsigned long &x)
985 {parse_unsigned_integer<overflow_policy>(col, x);}
986 template<class overflow_policy>void parse(char*col, unsigned long long &x)
987 {parse_unsigned_integer<overflow_policy>(col, x);}
988
989 template<class overflow_policy, class T>
990 void parse_signed_integer(const char*col, T&x){
991 if(*col == '-'){
992 ++col;
993
994 x = 0;
995 while(*col != '\0'){
996 if('0' <= *col && *col <= '9'){
997 T y = *col - '0';
998 if(x < (std::numeric_limits<T>::min()+y)/10){
999 overflow_policy::on_underflow(x);
1000 return;
1001 }
1002 x = 10*x-y;
1003 }else
1004 throw error::no_digit();
1005 ++col;
1006 }
1007 return;
1008 }else if(*col == '+')
1009 ++col;
1010 parse_unsigned_integer<overflow_policy>(col, x);
1011 }
1012
1013 template<class overflow_policy>void parse(char*col, signed char &x)
1014 {parse_signed_integer<overflow_policy>(col, x);}
1015 template<class overflow_policy>void parse(char*col, signed short &x)
1016 {parse_signed_integer<overflow_policy>(col, x);}
1017 template<class overflow_policy>void parse(char*col, signed int &x)
1018 {parse_signed_integer<overflow_policy>(col, x);}
1019 template<class overflow_policy>void parse(char*col, signed long &x)
1020 {parse_signed_integer<overflow_policy>(col, x);}
1021 template<class overflow_policy>void parse(char*col, signed long long &x)
1022 {parse_signed_integer<overflow_policy>(col, x);}
1023
1024 template<class T>
1025 void parse_float(const char*col, T&x){
1026 bool is_neg = false;
1027 if(*col == '-'){
1028 is_neg = true;
1029 ++col;
1030 }else if(*col == '+')
1031 ++col;
1032
1033 x = 0;
1034 while('0' <= *col && *col <= '9'){
1035 int y = *col - '0';
1036 x *= 10;
1037 x += y;
1038 ++col;
1039 }
1040
1041 if(*col == '.'|| *col == ','){
1042 ++col;
1043 T pos = 1;
1044 while('0' <= *col && *col <= '9'){
1045 pos /= 10;
1046 int y = *col - '0';
1047 ++col;
1048 x += y*pos;
1049 }
1050 }
1051
1052 if(*col == 'e' || *col == 'E'){
1053 ++col;
1054 int e;
1055
1056 parse_signed_integer<set_to_max_on_overflow>(col, e);
1057
1058 if(e != 0){
1059 T base;
1060 if(e < 0){
1061 base = 0.1;
1062 e = -e;
1063 }else{
1064 base = 10;
1065 }
1066
1067 while(e != 1){
1068 if((e & 1) == 0){
1069 base = base*base;
1070 e >>= 1;
1071 }else{
1072 x *= base;
1073 --e;
1074 }
1075 }
1076 x *= base;
1077 }
1078 }else{
1079 if(*col != '\0')
1080 throw error::no_digit();
1081 }
1082
1083 if(is_neg)
1084 x = -x;
1085 }
1086
1087 template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
1088 template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
1089 template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
1090
1091 template<class overflow_policy, class T>
1092 void parse(char*col, T&x){
1093 // Mute unused variable compiler warning
1094 (void)col;
1095 (void)x;
1096 // GCC evalutes "false" when reading the template and
1097 // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1098 // this strange construct is used.
1099 static_assert(sizeof(T)!=sizeof(T),
1100 "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1101 }
1102
1103 }
1104
1105 template<unsigned column_count,
1106 class trim_policy = trim_chars<' ', '\t'>,
1107 class quote_policy = no_quote_escape<','>,
1108 class overflow_policy = throw_on_overflow,
1109 class comment_policy = no_comment
1110 >
1111 class CSVReader{
1112 private:
1113 LineReader in;
1114
1115 char*row[column_count];
1116 std::string column_names[column_count];
1117
1118 std::vector<int>col_order;
1119
1120 template<class ...ColNames>
1121 void set_column_names(std::string s, ColNames...cols){
1122 column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
1123 set_column_names(std::forward<ColNames>(cols)...);
1124 }
1125
1126 void set_column_names(){}
1127
1128
1129 public:
1130 CSVReader() = delete;
1131 CSVReader(const CSVReader&) = delete;
1132 CSVReader&operator=(const CSVReader&);
1133
1134 template<class ...Args>
1135 explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
1136 std::fill(row, row+column_count, nullptr);
1137 col_order.resize(column_count);
1138 for(unsigned i=0; i<column_count; ++i)
1139 col_order[i] = i;
1140 for(unsigned i=1; i<=column_count; ++i)
1141 column_names[i-1] = "col"+std::to_string(i);
1142 }
1143
1144 char*next_line(){
1145 return in.next_line();
1146 }
1147
1148 template<class ...ColNames>
1149 void read_header(ignore_column ignore_policy, ColNames...cols){
1150 static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
1151 static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
1152 try{
1153 set_column_names(std::forward<ColNames>(cols)...);
1154
1155 char*line;
1156 do{
1157 line = in.next_line();
1158 if(!line)
1159 throw error::header_missing();
1160 }while(comment_policy::is_comment(line));
1161
1162 detail::parse_header_line
1163 <column_count, trim_policy, quote_policy>
1164 (line, col_order, column_names, ignore_policy);
1165 }catch(error::with_file_name&err){
1166 err.set_file_name(in.get_truncated_file_name());
1167 throw;
1168 }
1169 }
1170
1171 template<class ...ColNames>
1172 void set_header(ColNames...cols){
1173 static_assert(sizeof...(ColNames)>=column_count,
1174 "not enough column names specified");
1175 static_assert(sizeof...(ColNames)<=column_count,
1176 "too many column names specified");
1177 set_column_names(std::forward<ColNames>(cols)...);
1178 std::fill(row, row+column_count, nullptr);
1179 col_order.resize(column_count);
1180 for(unsigned i=0; i<column_count; ++i)
1181 col_order[i] = i;
1182 }
1183
1184 bool has_column(const std::string&name) const {
1185 return col_order.end() != std::find(
1186 col_order.begin(), col_order.end(),
1187 std::find(std::begin(column_names), std::end(column_names), name)
1188 - std::begin(column_names));
1189 }
1190
1191 void set_file_name(const std::string&file_name){
1192 in.set_file_name(file_name);
1193 }
1194
1195 void set_file_name(const char*file_name){
1196 in.set_file_name(file_name);
1197 }
1198
1199 const char*get_truncated_file_name()const{
1200 return in.get_truncated_file_name();
1201 }
1202
1203 void set_file_line(unsigned file_line){
1204 in.set_file_line(file_line);
1205 }
1206
1207 unsigned get_file_line()const{
1208 return in.get_file_line();
1209 }
1210
1211 private:
1212 void parse_helper(std::size_t){}
1213
1214 template<class T, class ...ColType>
1215 void parse_helper(std::size_t r, T&t, ColType&...cols){
1216 if(row[r]){
1217 try{
1218 try{
1219 ::io::detail::parse<overflow_policy>(row[r], t);
1220 }catch(error::with_column_content&err){
1221 err.set_column_content(row[r]);
1222 throw;
1223 }
1224 }catch(error::with_column_name&err){
1225 err.set_column_name(column_names[r].c_str());
1226 throw;
1227 }
1228 }
1229 parse_helper(r+1, cols...);
1230 }
1231
1232
1233 public:
1234 template<class ...ColType>
1235 bool read_row(ColType& ...cols){
1236 static_assert(sizeof...(ColType)>=column_count,
1237 "not enough columns specified");
1238 static_assert(sizeof...(ColType)<=column_count,
1239 "too many columns specified");
1240 try{
1241 try{
1242
1243 char*line;
1244 do{
1245 line = in.next_line();
1246 if(!line)
1247 return false;
1248 }while(comment_policy::is_comment(line));
1249
1250 detail::parse_line<trim_policy, quote_policy>
1251 (line, row, col_order);
1252
1253 parse_helper(0, cols...);
1254 }catch(error::with_file_name&err){
1255 err.set_file_name(in.get_truncated_file_name());
1256 throw;
1257 }
1258 }catch(error::with_file_line&err){
1259 err.set_file_line(in.get_file_line());
1260 throw;
1261 }
1262
1263 return true;
1264 }
1265 };
1266}
1267#endif
1268
diff --git a/vendor/rawr-ebooks b/vendor/rawr-ebooks new file mode 160000
Subproject d75685e69f9a5d3cfc255aa921005fc40ae6e58