summary refs log tree commit diff stats
path: root/vendor/csv.h
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2019-02-27 20:45:17 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2019-02-27 20:45:17 -0500
commit2b152d09881559a0330b3ff923e03e715777c6c3 (patch)
tree385a725709198f68bb24c9bc352ee70c804a038d /vendor/csv.h
downloadaspartame-2b152d09881559a0330b3ff923e03e715777c6c3.tar.gz
aspartame-2b152d09881559a0330b3ff923e03e715777c6c3.tar.bz2
aspartame-2b152d09881559a0330b3ff923e03e715777c6c3.zip
Initial commit (by Pink!)
Diffstat (limited to 'vendor/csv.h')
-rw-r--r--vendor/csv.h1268
1 files changed, 1268 insertions, 0 deletions
diff --git a/vendor/csv.h b/vendor/csv.h new file mode 100644 index 0000000..93e9034 --- /dev/null +++ b/vendor/csv.h
@@ -0,0 +1,1268 @@
1// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
2// License: BSD-3
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are met:
8//
9// 1. Redistributions of source code must retain the above copyright notice,
10// this list of conditions and the following disclaimer.
11//
12//2. Redistributions in binary form must reproduce the above copyright notice,
13// this list of conditions and the following disclaimer in the documentation
14// and/or other materials provided with the distribution.
15//
16//3. Neither the name of the copyright holder nor the names of its contributors
17// may be used to endorse or promote products derived from this software
18// without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30// POSSIBILITY OF SUCH DAMAGE.
31
32#ifndef CSV_H
33#define CSV_H
34
35#include <vector>
36#include <string>
37#include <cstring>
38#include <algorithm>
39#include <utility>
40#include <cstdio>
41#include <exception>
42#ifndef CSV_IO_NO_THREAD
43#include <mutex>
44#include <thread>
45#include <condition_variable>
46#endif
47#include <memory>
48#include <cassert>
49#include <cerrno>
50#include <istream>
51
52namespace io{
53 ////////////////////////////////////////////////////////////////////////////
54 // LineReader //
55 ////////////////////////////////////////////////////////////////////////////
56
57 namespace error{
58 struct base : std::exception{
59 virtual void format_error_message()const = 0;
60
61 const char*what()const throw(){
62 format_error_message();
63 return error_message_buffer;
64 }
65
66 mutable char error_message_buffer[512];
67 };
68
69 const int max_file_name_length = 255;
70
71 struct with_file_name{
72 with_file_name(){
73 std::memset(file_name, 0, sizeof(file_name));
74 }
75
76 void set_file_name(const char*file_name){
77 if(file_name != nullptr){
78 strncpy(this->file_name, file_name, sizeof(this->file_name));
79 this->file_name[sizeof(this->file_name)-1] = '\0';
80 }else{
81 this->file_name[0] = '\0';
82 }
83 }
84
85 char file_name[max_file_name_length+1];
86 };
87
88 struct with_file_line{
89 with_file_line(){
90 file_line = -1;
91 }
92
93 void set_file_line(int file_line){
94 this->file_line = file_line;
95 }
96
97 int file_line;
98 };
99
100 struct with_errno{
101 with_errno(){
102 errno_value = 0;
103 }
104
105 void set_errno(int errno_value){
106 this->errno_value = errno_value;
107 }
108
109 int errno_value;
110 };
111
112 struct can_not_open_file :
113 base,
114 with_file_name,
115 with_errno{
116 void format_error_message()const{
117 if(errno_value != 0)
118 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
119 "Can not open file \"%s\" because \"%s\"."
120 , file_name, std::strerror(errno_value));
121 else
122 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
123 "Can not open file \"%s\"."
124 , file_name);
125 }
126 };
127
128 struct line_length_limit_exceeded :
129 base,
130 with_file_name,
131 with_file_line{
132 void format_error_message()const{
133 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
134 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
135 , file_line, file_name);
136 }
137 };
138 }
139
140 class ByteSourceBase{
141 public:
142 virtual int read(char*buffer, int size)=0;
143 virtual ~ByteSourceBase(){}
144 };
145
146 namespace detail{
147
148 class OwningStdIOByteSourceBase : public ByteSourceBase{
149 public:
150 explicit OwningStdIOByteSourceBase(FILE*file):file(file){
151 // Tell the std library that we want to do the buffering ourself.
152 std::setvbuf(file, 0, _IONBF, 0);
153 }
154
155 int read(char*buffer, int size){
156 return std::fread(buffer, 1, size, file);
157 }
158
159 ~OwningStdIOByteSourceBase(){
160 std::fclose(file);
161 }
162
163 private:
164 FILE*file;
165 };
166
167 class NonOwningIStreamByteSource : public ByteSourceBase{
168 public:
169 explicit NonOwningIStreamByteSource(std::istream&in):in(in){}
170
171 int read(char*buffer, int size){
172 in.read(buffer, size);
173 return in.gcount();
174 }
175
176 ~NonOwningIStreamByteSource(){}
177
178 private:
179 std::istream&in;
180 };
181
182 class NonOwningStringByteSource : public ByteSourceBase{
183 public:
184 NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}
185
186 int read(char*buffer, int desired_byte_count){
187 int to_copy_byte_count = desired_byte_count;
188 if(remaining_byte_count < to_copy_byte_count)
189 to_copy_byte_count = remaining_byte_count;
190 std::memcpy(buffer, str, to_copy_byte_count);
191 remaining_byte_count -= to_copy_byte_count;
192 str += to_copy_byte_count;
193 return to_copy_byte_count;
194 }
195
196 ~NonOwningStringByteSource(){}
197
198 private:
199 const char*str;
200 long long remaining_byte_count;
201 };
202
203 #ifndef CSV_IO_NO_THREAD
204 class AsynchronousReader{
205 public:
206 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
207 std::unique_lock<std::mutex>guard(lock);
208 byte_source = std::move(arg_byte_source);
209 desired_byte_count = -1;
210 termination_requested = false;
211 worker = std::thread(
212 [&]{
213 std::unique_lock<std::mutex>guard(lock);
214 try{
215 for(;;){
216 read_requested_condition.wait(
217 guard,
218 [&]{
219 return desired_byte_count != -1 || termination_requested;
220 }
221 );
222 if(termination_requested)
223 return;
224
225 read_byte_count = byte_source->read(buffer, desired_byte_count);
226 desired_byte_count = -1;
227 if(read_byte_count == 0)
228 break;
229 read_finished_condition.notify_one();
230 }
231 }catch(...){
232 read_error = std::current_exception();
233 }
234 read_finished_condition.notify_one();
235 }
236 );
237 }
238
239 bool is_valid()const{
240 return byte_source != nullptr;
241 }
242
243 void start_read(char*arg_buffer, int arg_desired_byte_count){
244 std::unique_lock<std::mutex>guard(lock);
245 buffer = arg_buffer;
246 desired_byte_count = arg_desired_byte_count;
247 read_byte_count = -1;
248 read_requested_condition.notify_one();
249 }
250
251 int finish_read(){
252 std::unique_lock<std::mutex>guard(lock);
253 read_finished_condition.wait(
254 guard,
255 [&]{
256 return read_byte_count != -1 || read_error;
257 }
258 );
259 if(read_error)
260 std::rethrow_exception(read_error);
261 else
262 return read_byte_count;
263 }
264
265 ~AsynchronousReader(){
266 if(byte_source != nullptr){
267 {
268 std::unique_lock<std::mutex>guard(lock);
269 termination_requested = true;
270 }
271 read_requested_condition.notify_one();
272 worker.join();
273 }
274 }
275
276 private:
277 std::unique_ptr<ByteSourceBase>byte_source;
278
279 std::thread worker;
280
281 bool termination_requested;
282 std::exception_ptr read_error;
283 char*buffer;
284 int desired_byte_count;
285 int read_byte_count;
286
287 std::mutex lock;
288 std::condition_variable read_finished_condition;
289 std::condition_variable read_requested_condition;
290 };
291 #endif
292
293 class SynchronousReader{
294 public:
295 void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
296 byte_source = std::move(arg_byte_source);
297 }
298
299 bool is_valid()const{
300 return byte_source != nullptr;
301 }
302
303 void start_read(char*arg_buffer, int arg_desired_byte_count){
304 buffer = arg_buffer;
305 desired_byte_count = arg_desired_byte_count;
306 }
307
308 int finish_read(){
309 return byte_source->read(buffer, desired_byte_count);
310 }
311 private:
312 std::unique_ptr<ByteSourceBase>byte_source;
313 char*buffer;
314 int desired_byte_count;
315 };
316 }
317
318 class LineReader{
319 private:
320 static const int block_len = 1<<24;
321 std::unique_ptr<char[]>buffer; // must be constructed before (and thus destructed after) the reader!
322 #ifdef CSV_IO_NO_THREAD
323 detail::SynchronousReader reader;
324 #else
325 detail::AsynchronousReader reader;
326 #endif
327 int data_begin;
328 int data_end;
329
330 char file_name[error::max_file_name_length+1];
331 unsigned file_line;
332
333 static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
334 // We open the file in binary mode as it makes no difference under *nix
335 // and under Windows we handle \r\n newlines ourself.
336 FILE*file = std::fopen(file_name, "rb");
337 if(file == 0){
338 int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
339 error::can_not_open_file err;
340 err.set_errno(x);
341 err.set_file_name(file_name);
342 throw err;
343 }
344 return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
345 }
346
347 void init(std::unique_ptr<ByteSourceBase>byte_source){
348 file_line = 0;
349
350 buffer = std::unique_ptr<char[]>(new char[3*block_len]);
351 data_begin = 0;
352 data_end = byte_source->read(buffer.get(), 2*block_len);
353
354 // Ignore UTF-8 BOM
355 if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
356 data_begin = 3;
357
358 if(data_end == 2*block_len){
359 reader.init(std::move(byte_source));
360 reader.start_read(buffer.get() + 2*block_len, block_len);
361 }
362 }
363
364 public:
365 LineReader() = delete;
366 LineReader(const LineReader&) = delete;
367 LineReader&operator=(const LineReader&) = delete;
368
369 explicit LineReader(const char*file_name){
370 set_file_name(file_name);
371 init(open_file(file_name));
372 }
373
374 explicit LineReader(const std::string&file_name){
375 set_file_name(file_name.c_str());
376 init(open_file(file_name.c_str()));
377 }
378
379 LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
380 set_file_name(file_name);
381 init(std::move(byte_source));
382 }
383
384 LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
385 set_file_name(file_name.c_str());
386 init(std::move(byte_source));
387 }
388
389 LineReader(const char*file_name, const char*data_begin, const char*data_end){
390 set_file_name(file_name);
391 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
392 }
393
394 LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
395 set_file_name(file_name.c_str());
396 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
397 }
398
399 LineReader(const char*file_name, FILE*file){
400 set_file_name(file_name);
401 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
402 }
403
404 LineReader(const std::string&file_name, FILE*file){
405 set_file_name(file_name.c_str());
406 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
407 }
408
409 LineReader(const char*file_name, std::istream&in){
410 set_file_name(file_name);
411 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
412 }
413
414 LineReader(const std::string&file_name, std::istream&in){
415 set_file_name(file_name.c_str());
416 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
417 }
418
419 void set_file_name(const std::string&file_name){
420 set_file_name(file_name.c_str());
421 }
422
423 void set_file_name(const char*file_name){
424 if(file_name != nullptr){
425 strncpy(this->file_name, file_name, sizeof(this->file_name));
426 this->file_name[sizeof(this->file_name)-1] = '\0';
427 }else{
428 this->file_name[0] = '\0';
429 }
430 }
431
432 const char*get_truncated_file_name()const{
433 return file_name;
434 }
435
436 void set_file_line(unsigned file_line){
437 this->file_line = file_line;
438 }
439
440 unsigned get_file_line()const{
441 return file_line;
442 }
443
444 char*next_line(){
445 if(data_begin == data_end)
446 return 0;
447
448 ++file_line;
449
450 assert(data_begin < data_end);
451 assert(data_end <= block_len*2);
452
453 if(data_begin >= block_len){
454 std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
455 data_begin -= block_len;
456 data_end -= block_len;
457 if(reader.is_valid())
458 {
459 data_end += reader.finish_read();
460 std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
461 reader.start_read(buffer.get() + 2*block_len, block_len);
462 }
463 }
464
465 int line_end = data_begin;
466 while(buffer[line_end] != '\n' && line_end != data_end){
467 ++line_end;
468 }
469
470 if(line_end - data_begin + 1 > block_len){
471 error::line_length_limit_exceeded err;
472 err.set_file_name(file_name);
473 err.set_file_line(file_line);
474 throw err;
475 }
476
477 if(buffer[line_end] == '\n' && line_end != data_end){
478 buffer[line_end] = '\0';
479 }else{
480 // some files are missing the newline at the end of the
481 // last line
482 ++data_end;
483 buffer[line_end] = '\0';
484 }
485
486 // handle windows \r\n-line breaks
487 if(line_end != data_begin && buffer[line_end-1] == '\r')
488 buffer[line_end-1] = '\0';
489
490 char*ret = buffer.get() + data_begin;
491 data_begin = line_end+1;
492 return ret;
493 }
494 };
495
496
497 ////////////////////////////////////////////////////////////////////////////
498 // CSV //
499 ////////////////////////////////////////////////////////////////////////////
500
501 namespace error{
502 const int max_column_name_length = 63;
503 struct with_column_name{
504 with_column_name(){
505 std::memset(column_name, 0, max_column_name_length+1);
506 }
507
508 void set_column_name(const char*column_name){
509 if(column_name != nullptr){
510 std::strncpy(this->column_name, column_name, max_column_name_length);
511 this->column_name[max_column_name_length] = '\0';
512 }else{
513 this->column_name[0] = '\0';
514 }
515 }
516
517 char column_name[max_column_name_length+1];
518 };
519
520
521 const int max_column_content_length = 63;
522
523 struct with_column_content{
524 with_column_content(){
525 std::memset(column_content, 0, max_column_content_length+1);
526 }
527
528 void set_column_content(const char*column_content){
529 if(column_content != nullptr){
530 std::strncpy(this->column_content, column_content, max_column_content_length);
531 this->column_content[max_column_content_length] = '\0';
532 }else{
533 this->column_content[0] = '\0';
534 }
535 }
536
537 char column_content[max_column_content_length+1];
538 };
539
540
541 struct extra_column_in_header :
542 base,
543 with_file_name,
544 with_column_name{
545 void format_error_message()const{
546 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
547 "Extra column \"%s\" in header of file \"%s\"."
548 , column_name, file_name);
549 }
550 };
551
552 struct missing_column_in_header :
553 base,
554 with_file_name,
555 with_column_name{
556 void format_error_message()const{
557 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
558 "Missing column \"%s\" in header of file \"%s\"."
559 , column_name, file_name);
560 }
561 };
562
563 struct duplicated_column_in_header :
564 base,
565 with_file_name,
566 with_column_name{
567 void format_error_message()const{
568 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
569 "Duplicated column \"%s\" in header of file \"%s\"."
570 , column_name, file_name);
571 }
572 };
573
574 struct header_missing :
575 base,
576 with_file_name{
577 void format_error_message()const{
578 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
579 "Header missing in file \"%s\"."
580 , file_name);
581 }
582 };
583
584 struct too_few_columns :
585 base,
586 with_file_name,
587 with_file_line{
588 void format_error_message()const{
589 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
590 "Too few columns in line %d in file \"%s\"."
591 , file_line, file_name);
592 }
593 };
594
595 struct too_many_columns :
596 base,
597 with_file_name,
598 with_file_line{
599 void format_error_message()const{
600 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
601 "Too many columns in line %d in file \"%s\"."
602 , file_line, file_name);
603 }
604 };
605
606 struct escaped_string_not_closed :
607 base,
608 with_file_name,
609 with_file_line{
610 void format_error_message()const{
611 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
612 "Escaped string was not closed in line %d in file \"%s\"."
613 , file_line, file_name);
614 }
615 };
616
617 struct integer_must_be_positive :
618 base,
619 with_file_name,
620 with_file_line,
621 with_column_name,
622 with_column_content{
623 void format_error_message()const{
624 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
625 "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"."
626 , column_content, column_name, file_name, file_line);
627 }
628 };
629
630 struct no_digit :
631 base,
632 with_file_name,
633 with_file_line,
634 with_column_name,
635 with_column_content{
636 void format_error_message()const{
637 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
638 "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"."
639 , column_content, column_name, file_name, file_line);
640 }
641 };
642
643 struct integer_overflow :
644 base,
645 with_file_name,
646 with_file_line,
647 with_column_name,
648 with_column_content{
649 void format_error_message()const{
650 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
651 "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"."
652 , column_content, column_name, file_name, file_line);
653 }
654 };
655
656 struct integer_underflow :
657 base,
658 with_file_name,
659 with_file_line,
660 with_column_name,
661 with_column_content{
662 void format_error_message()const{
663 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
664 "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"."
665 , column_content, column_name, file_name, file_line);
666 }
667 };
668
669 struct invalid_single_character :
670 base,
671 with_file_name,
672 with_file_line,
673 with_column_name,
674 with_column_content{
675 void format_error_message()const{
676 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
677 "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character."
678 , column_content, column_name, file_name, file_line);
679 }
680 };
681 }
682
683 typedef unsigned ignore_column;
684 static const ignore_column ignore_no_column = 0;
685 static const ignore_column ignore_extra_column = 1;
686 static const ignore_column ignore_missing_column = 2;
687
688 template<char ... trim_char_list>
689 struct trim_chars{
690 private:
691 constexpr static bool is_trim_char(char){
692 return false;
693 }
694
695 template<class ...OtherTrimChars>
696 constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
697 return c == trim_char || is_trim_char(c, other_trim_chars...);
698 }
699
700 public:
701 static void trim(char*&str_begin, char*&str_end){
702 while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
703 ++str_begin;
704 while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
705 --str_end;
706 *str_end = '\0';
707 }
708 };
709
710
711 struct no_comment{
712 static bool is_comment(const char*){
713 return false;
714 }
715 };
716
717 template<char ... comment_start_char_list>
718 struct single_line_comment{
719 private:
720 constexpr static bool is_comment_start_char(char){
721 return false;
722 }
723
724 template<class ...OtherCommentStartChars>
725 constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
726 return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
727 }
728
729 public:
730
731 static bool is_comment(const char*line){
732 return is_comment_start_char(*line, comment_start_char_list...);
733 }
734 };
735
736 struct empty_line_comment{
737 static bool is_comment(const char*line){
738 if(*line == '\0')
739 return true;
740 while(*line == ' ' || *line == '\t'){
741 ++line;
742 if(*line == 0)
743 return true;
744 }
745 return false;
746 }
747 };
748
749 template<char ... comment_start_char_list>
750 struct single_and_empty_line_comment{
751 static bool is_comment(const char*line){
752 return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
753 }
754 };
755
756 template<char sep>
757 struct no_quote_escape{
758 static const char*find_next_column_end(const char*col_begin){
759 while(*col_begin != sep && *col_begin != '\0')
760 ++col_begin;
761 return col_begin;
762 }
763
764 static void unescape(char*&, char*&){
765
766 }
767 };
768
769 template<char sep, char quote>
770 struct double_quote_escape{
771 static const char*find_next_column_end(const char*col_begin){
772 while(*col_begin != sep && *col_begin != '\0')
773 if(*col_begin != quote)
774 ++col_begin;
775 else{
776 do{
777 ++col_begin;
778 while(*col_begin != quote){
779 if(*col_begin == '\0')
780 throw error::escaped_string_not_closed();
781 ++col_begin;
782 }
783 ++col_begin;
784 }while(*col_begin == quote);
785 }
786 return col_begin;
787 }
788
789 static void unescape(char*&col_begin, char*&col_end){
790 if(col_end - col_begin >= 2){
791 if(*col_begin == quote && *(col_end-1) == quote){
792 ++col_begin;
793 --col_end;
794 char*out = col_begin;
795 for(char*in = col_begin; in!=col_end; ++in){
796 if(*in == quote && (in+1) != col_end && *(in+1) == quote){
797 ++in;
798 }
799 *out = *in;
800 ++out;
801 }
802 col_end = out;
803 *col_end = '\0';
804 }
805 }
806
807 }
808 };
809
810 struct throw_on_overflow{
811 template<class T>
812 static void on_overflow(T&){
813 throw error::integer_overflow();
814 }
815
816 template<class T>
817 static void on_underflow(T&){
818 throw error::integer_underflow();
819 }
820 };
821
822 struct ignore_overflow{
823 template<class T>
824 static void on_overflow(T&){}
825
826 template<class T>
827 static void on_underflow(T&){}
828 };
829
830 struct set_to_max_on_overflow{
831 template<class T>
832 static void on_overflow(T&x){
833 x = std::numeric_limits<T>::max();
834 }
835
836 template<class T>
837 static void on_underflow(T&x){
838 x = std::numeric_limits<T>::min();
839 }
840 };
841
842
843 namespace detail{
844 template<class quote_policy>
845 void chop_next_column(
846 char*&line, char*&col_begin, char*&col_end
847 ){
848 assert(line != nullptr);
849
850 col_begin = line;
851 // the col_begin + (... - col_begin) removes the constness
852 col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
853
854 if(*col_end == '\0'){
855 line = nullptr;
856 }else{
857 *col_end = '\0';
858 line = col_end + 1;
859 }
860 }
861
862 template<class trim_policy, class quote_policy>
863 void parse_line(
864 char*line,
865 char**sorted_col,
866 const std::vector<int>&col_order
867 ){
868 for(std::size_t i=0; i<col_order.size(); ++i){
869 if(line == nullptr)
870 throw ::io::error::too_few_columns();
871 char*col_begin, *col_end;
872 chop_next_column<quote_policy>(line, col_begin, col_end);
873
874 if(col_order[i] != -1){
875 trim_policy::trim(col_begin, col_end);
876 quote_policy::unescape(col_begin, col_end);
877
878 sorted_col[col_order[i]] = col_begin;
879 }
880 }
881 if(line != nullptr)
882 throw ::io::error::too_many_columns();
883 }
884
885 template<unsigned column_count, class trim_policy, class quote_policy>
886 void parse_header_line(
887 char*line,
888 std::vector<int>&col_order,
889 const std::string*col_name,
890 ignore_column ignore_policy
891 ){
892 col_order.clear();
893
894 bool found[column_count];
895 std::fill(found, found + column_count, false);
896 while(line){
897 char*col_begin,*col_end;
898 chop_next_column<quote_policy>(line, col_begin, col_end);
899
900 trim_policy::trim(col_begin, col_end);
901 quote_policy::unescape(col_begin, col_end);
902
903 for(unsigned i=0; i<column_count; ++i)
904 if(col_begin == col_name[i]){
905 if(found[i]){
906 error::duplicated_column_in_header err;
907 err.set_column_name(col_begin);
908 throw err;
909 }
910 found[i] = true;
911 col_order.push_back(i);
912 col_begin = 0;
913 break;
914 }
915 if(col_begin){
916 if(ignore_policy & ::io::ignore_extra_column)
917 col_order.push_back(-1);
918 else{
919 error::extra_column_in_header err;
920 err.set_column_name(col_begin);
921 throw err;
922 }
923 }
924 }
925 if(!(ignore_policy & ::io::ignore_missing_column)){
926 for(unsigned i=0; i<column_count; ++i){
927 if(!found[i]){
928 error::missing_column_in_header err;
929 err.set_column_name(col_name[i].c_str());
930 throw err;
931 }
932 }
933 }
934 }
935
936 template<class overflow_policy>
937 void parse(char*col, char &x){
938 if(!*col)
939 throw error::invalid_single_character();
940 x = *col;
941 ++col;
942 if(*col)
943 throw error::invalid_single_character();
944 }
945
946 template<class overflow_policy>
947 void parse(char*col, std::string&x){
948 x = col;
949 }
950
951 template<class overflow_policy>
952 void parse(char*col, const char*&x){
953 x = col;
954 }
955
956 template<class overflow_policy>
957 void parse(char*col, char*&x){
958 x = col;
959 }
960
961 template<class overflow_policy, class T>
962 void parse_unsigned_integer(const char*col, T&x){
963 x = 0;
964 while(*col != '\0'){
965 if('0' <= *col && *col <= '9'){
966 T y = *col - '0';
967 if(x > (std::numeric_limits<T>::max()-y)/10){
968 overflow_policy::on_overflow(x);
969 return;
970 }
971 x = 10*x+y;
972 }else
973 throw error::no_digit();
974 ++col;
975 }
976 }
977
978 template<class overflow_policy>void parse(char*col, unsigned char &x)
979 {parse_unsigned_integer<overflow_policy>(col, x);}
980 template<class overflow_policy>void parse(char*col, unsigned short &x)
981 {parse_unsigned_integer<overflow_policy>(col, x);}
982 template<class overflow_policy>void parse(char*col, unsigned int &x)
983 {parse_unsigned_integer<overflow_policy>(col, x);}
984 template<class overflow_policy>void parse(char*col, unsigned long &x)
985 {parse_unsigned_integer<overflow_policy>(col, x);}
986 template<class overflow_policy>void parse(char*col, unsigned long long &x)
987 {parse_unsigned_integer<overflow_policy>(col, x);}
988
989 template<class overflow_policy, class T>
990 void parse_signed_integer(const char*col, T&x){
991 if(*col == '-'){
992 ++col;
993
994 x = 0;
995 while(*col != '\0'){
996 if('0' <= *col && *col <= '9'){
997 T y = *col - '0';
998 if(x < (std::numeric_limits<T>::min()+y)/10){
999 overflow_policy::on_underflow(x);
1000 return;
1001 }
1002 x = 10*x-y;
1003 }else
1004 throw error::no_digit();
1005 ++col;
1006 }
1007 return;
1008 }else if(*col == '+')
1009 ++col;
1010 parse_unsigned_integer<overflow_policy>(col, x);
1011 }
1012
1013 template<class overflow_policy>void parse(char*col, signed char &x)
1014 {parse_signed_integer<overflow_policy>(col, x);}
1015 template<class overflow_policy>void parse(char*col, signed short &x)
1016 {parse_signed_integer<overflow_policy>(col, x);}
1017 template<class overflow_policy>void parse(char*col, signed int &x)
1018 {parse_signed_integer<overflow_policy>(col, x);}
1019 template<class overflow_policy>void parse(char*col, signed long &x)
1020 {parse_signed_integer<overflow_policy>(col, x);}
1021 template<class overflow_policy>void parse(char*col, signed long long &x)
1022 {parse_signed_integer<overflow_policy>(col, x);}
1023
1024 template<class T>
1025 void parse_float(const char*col, T&x){
1026 bool is_neg = false;
1027 if(*col == '-'){
1028 is_neg = true;
1029 ++col;
1030 }else if(*col == '+')
1031 ++col;
1032
1033 x = 0;
1034 while('0' <= *col && *col <= '9'){
1035 int y = *col - '0';
1036 x *= 10;
1037 x += y;
1038 ++col;
1039 }
1040
1041 if(*col == '.'|| *col == ','){
1042 ++col;
1043 T pos = 1;
1044 while('0' <= *col && *col <= '9'){
1045 pos /= 10;
1046 int y = *col - '0';
1047 ++col;
1048 x += y*pos;
1049 }
1050 }
1051
1052 if(*col == 'e' || *col == 'E'){
1053 ++col;
1054 int e;
1055
1056 parse_signed_integer<set_to_max_on_overflow>(col, e);
1057
1058 if(e != 0){
1059 T base;
1060 if(e < 0){
1061 base = 0.1;
1062 e = -e;
1063 }else{
1064 base = 10;
1065 }
1066
1067 while(e != 1){
1068 if((e & 1) == 0){
1069 base = base*base;
1070 e >>= 1;
1071 }else{
1072 x *= base;
1073 --e;
1074 }
1075 }
1076 x *= base;
1077 }
1078 }else{
1079 if(*col != '\0')
1080 throw error::no_digit();
1081 }
1082
1083 if(is_neg)
1084 x = -x;
1085 }
1086
1087 template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
1088 template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
1089 template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
1090
1091 template<class overflow_policy, class T>
1092 void parse(char*col, T&x){
1093 // Mute unused variable compiler warning
1094 (void)col;
1095 (void)x;
1096 // GCC evalutes "false" when reading the template and
1097 // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1098 // this strange construct is used.
1099 static_assert(sizeof(T)!=sizeof(T),
1100 "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1101 }
1102
1103 }
1104
1105 template<unsigned column_count,
1106 class trim_policy = trim_chars<' ', '\t'>,
1107 class quote_policy = no_quote_escape<','>,
1108 class overflow_policy = throw_on_overflow,
1109 class comment_policy = no_comment
1110 >
1111 class CSVReader{
1112 private:
1113 LineReader in;
1114
1115 char*row[column_count];
1116 std::string column_names[column_count];
1117
1118 std::vector<int>col_order;
1119
1120 template<class ...ColNames>
1121 void set_column_names(std::string s, ColNames...cols){
1122 column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
1123 set_column_names(std::forward<ColNames>(cols)...);
1124 }
1125
1126 void set_column_names(){}
1127
1128
1129 public:
1130 CSVReader() = delete;
1131 CSVReader(const CSVReader&) = delete;
1132 CSVReader&operator=(const CSVReader&);
1133
1134 template<class ...Args>
1135 explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
1136 std::fill(row, row+column_count, nullptr);
1137 col_order.resize(column_count);
1138 for(unsigned i=0; i<column_count; ++i)
1139 col_order[i] = i;
1140 for(unsigned i=1; i<=column_count; ++i)
1141 column_names[i-1] = "col"+std::to_string(i);
1142 }
1143
1144 char*next_line(){
1145 return in.next_line();
1146 }
1147
1148 template<class ...ColNames>
1149 void read_header(ignore_column ignore_policy, ColNames...cols){
1150 static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
1151 static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
1152 try{
1153 set_column_names(std::forward<ColNames>(cols)...);
1154
1155 char*line;
1156 do{
1157 line = in.next_line();
1158 if(!line)
1159 throw error::header_missing();
1160 }while(comment_policy::is_comment(line));
1161
1162 detail::parse_header_line
1163 <column_count, trim_policy, quote_policy>
1164 (line, col_order, column_names, ignore_policy);
1165 }catch(error::with_file_name&err){
1166 err.set_file_name(in.get_truncated_file_name());
1167 throw;
1168 }
1169 }
1170
1171 template<class ...ColNames>
1172 void set_header(ColNames...cols){
1173 static_assert(sizeof...(ColNames)>=column_count,
1174 "not enough column names specified");
1175 static_assert(sizeof...(ColNames)<=column_count,
1176 "too many column names specified");
1177 set_column_names(std::forward<ColNames>(cols)...);
1178 std::fill(row, row+column_count, nullptr);
1179 col_order.resize(column_count);
1180 for(unsigned i=0; i<column_count; ++i)
1181 col_order[i] = i;
1182 }
1183
1184 bool has_column(const std::string&name) const {
1185 return col_order.end() != std::find(
1186 col_order.begin(), col_order.end(),
1187 std::find(std::begin(column_names), std::end(column_names), name)
1188 - std::begin(column_names));
1189 }
1190
1191 void set_file_name(const std::string&file_name){
1192 in.set_file_name(file_name);
1193 }
1194
1195 void set_file_name(const char*file_name){
1196 in.set_file_name(file_name);
1197 }
1198
1199 const char*get_truncated_file_name()const{
1200 return in.get_truncated_file_name();
1201 }
1202
1203 void set_file_line(unsigned file_line){
1204 in.set_file_line(file_line);
1205 }
1206
1207 unsigned get_file_line()const{
1208 return in.get_file_line();
1209 }
1210
1211 private:
1212 void parse_helper(std::size_t){}
1213
1214 template<class T, class ...ColType>
1215 void parse_helper(std::size_t r, T&t, ColType&...cols){
1216 if(row[r]){
1217 try{
1218 try{
1219 ::io::detail::parse<overflow_policy>(row[r], t);
1220 }catch(error::with_column_content&err){
1221 err.set_column_content(row[r]);
1222 throw;
1223 }
1224 }catch(error::with_column_name&err){
1225 err.set_column_name(column_names[r].c_str());
1226 throw;
1227 }
1228 }
1229 parse_helper(r+1, cols...);
1230 }
1231
1232
1233 public:
1234 template<class ...ColType>
1235 bool read_row(ColType& ...cols){
1236 static_assert(sizeof...(ColType)>=column_count,
1237 "not enough columns specified");
1238 static_assert(sizeof...(ColType)<=column_count,
1239 "too many columns specified");
1240 try{
1241 try{
1242
1243 char*line;
1244 do{
1245 line = in.next_line();
1246 if(!line)
1247 return false;
1248 }while(comment_policy::is_comment(line));
1249
1250 detail::parse_line<trim_policy, quote_policy>
1251 (line, row, col_order);
1252
1253 parse_helper(0, cols...);
1254 }catch(error::with_file_name&err){
1255 err.set_file_name(in.get_truncated_file_name());
1256 throw;
1257 }
1258 }catch(error::with_file_line&err){
1259 err.set_file_line(in.get_file_line());
1260 throw;
1261 }
1262
1263 return true;
1264 }
1265 };
1266}
1267#endif
1268