From 2b152d09881559a0330b3ff923e03e715777c6c3 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Wed, 27 Feb 2019 20:45:17 -0500 Subject: Initial commit (by Pink!) --- vendor/csv.h | 1268 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1268 insertions(+) create mode 100644 vendor/csv.h (limited to 'vendor/csv.h') diff --git a/vendor/csv.h b/vendor/csv.h new file mode 100644 index 0000000..93e9034 --- /dev/null +++ b/vendor/csv.h @@ -0,0 +1,1268 @@ +// Copyright: (2012-2015) Ben Strasser +// License: BSD-3 +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +//2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +//3. Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef CSV_H +#define CSV_H + +#include +#include +#include +#include +#include +#include +#include +#ifndef CSV_IO_NO_THREAD +#include +#include +#include +#endif +#include +#include +#include +#include + +namespace io{ + //////////////////////////////////////////////////////////////////////////// + // LineReader // + //////////////////////////////////////////////////////////////////////////// + + namespace error{ + struct base : std::exception{ + virtual void format_error_message()const = 0; + + const char*what()const throw(){ + format_error_message(); + return error_message_buffer; + } + + mutable char error_message_buffer[512]; + }; + + const int max_file_name_length = 255; + + struct with_file_name{ + with_file_name(){ + std::memset(file_name, 0, sizeof(file_name)); + } + + void set_file_name(const char*file_name){ + if(file_name != nullptr){ + strncpy(this->file_name, file_name, sizeof(this->file_name)); + this->file_name[sizeof(this->file_name)-1] = '\0'; + }else{ + this->file_name[0] = '\0'; + } + } + + char file_name[max_file_name_length+1]; + }; + + struct with_file_line{ + with_file_line(){ + file_line = -1; + } + + void set_file_line(int file_line){ + this->file_line = file_line; + } + + int file_line; + }; + + struct with_errno{ + with_errno(){ + errno_value = 0; + } + + void set_errno(int errno_value){ + this->errno_value = errno_value; + } + + int errno_value; + }; + + struct can_not_open_file : + base, + with_file_name, + with_errno{ + void format_error_message()const{ + if(errno_value != 0) + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Can not open file \"%s\" because \"%s\"." + , file_name, std::strerror(errno_value)); + else + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Can not open file \"%s\"." + , file_name); + } + }; + + struct line_length_limit_exceeded : + base, + with_file_name, + with_file_line{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1." + , file_line, file_name); + } + }; + } + + class ByteSourceBase{ + public: + virtual int read(char*buffer, int size)=0; + virtual ~ByteSourceBase(){} + }; + + namespace detail{ + + class OwningStdIOByteSourceBase : public ByteSourceBase{ + public: + explicit OwningStdIOByteSourceBase(FILE*file):file(file){ + // Tell the std library that we want to do the buffering ourself. + std::setvbuf(file, 0, _IONBF, 0); + } + + int read(char*buffer, int size){ + return std::fread(buffer, 1, size, file); + } + + ~OwningStdIOByteSourceBase(){ + std::fclose(file); + } + + private: + FILE*file; + }; + + class NonOwningIStreamByteSource : public ByteSourceBase{ + public: + explicit NonOwningIStreamByteSource(std::istream&in):in(in){} + + int read(char*buffer, int size){ + in.read(buffer, size); + return in.gcount(); + } + + ~NonOwningIStreamByteSource(){} + + private: + std::istream∈ + }; + + class NonOwningStringByteSource : public ByteSourceBase{ + public: + NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){} + + int read(char*buffer, int desired_byte_count){ + int to_copy_byte_count = desired_byte_count; + if(remaining_byte_count < to_copy_byte_count) + to_copy_byte_count = remaining_byte_count; + std::memcpy(buffer, str, to_copy_byte_count); + remaining_byte_count -= to_copy_byte_count; + str += to_copy_byte_count; + return to_copy_byte_count; + } + + ~NonOwningStringByteSource(){} + + private: + const char*str; + long long remaining_byte_count; + }; + + #ifndef CSV_IO_NO_THREAD + class AsynchronousReader{ + public: + void init(std::unique_ptrarg_byte_source){ + std::unique_lockguard(lock); + byte_source = std::move(arg_byte_source); + desired_byte_count = -1; + termination_requested = false; + worker = std::thread( + [&]{ + std::unique_lockguard(lock); + try{ + for(;;){ + read_requested_condition.wait( + guard, + [&]{ + return desired_byte_count != -1 || termination_requested; + } + ); + if(termination_requested) + return; + + read_byte_count = byte_source->read(buffer, desired_byte_count); + desired_byte_count = -1; + if(read_byte_count == 0) + break; + read_finished_condition.notify_one(); + } + }catch(...){ + read_error = std::current_exception(); + } + read_finished_condition.notify_one(); + } + ); + } + + bool is_valid()const{ + return byte_source != nullptr; + } + + void start_read(char*arg_buffer, int arg_desired_byte_count){ + std::unique_lockguard(lock); + buffer = arg_buffer; + desired_byte_count = arg_desired_byte_count; + read_byte_count = -1; + read_requested_condition.notify_one(); + } + + int finish_read(){ + std::unique_lockguard(lock); + read_finished_condition.wait( + guard, + [&]{ + return read_byte_count != -1 || read_error; + } + ); + if(read_error) + std::rethrow_exception(read_error); + else + return read_byte_count; + } + + ~AsynchronousReader(){ + if(byte_source != nullptr){ + { + std::unique_lockguard(lock); + termination_requested = true; + } + read_requested_condition.notify_one(); + worker.join(); + } + } + + private: + std::unique_ptrbyte_source; + + std::thread worker; + + bool termination_requested; + std::exception_ptr read_error; + char*buffer; + int desired_byte_count; + int read_byte_count; + + std::mutex lock; + std::condition_variable read_finished_condition; + std::condition_variable read_requested_condition; + }; + #endif + + class SynchronousReader{ + public: + void init(std::unique_ptrarg_byte_source){ + byte_source = std::move(arg_byte_source); + } + + bool is_valid()const{ + return byte_source != nullptr; + } + + void start_read(char*arg_buffer, int arg_desired_byte_count){ + buffer = arg_buffer; + desired_byte_count = arg_desired_byte_count; + } + + int finish_read(){ + return byte_source->read(buffer, desired_byte_count); + } + private: + std::unique_ptrbyte_source; + char*buffer; + int desired_byte_count; + }; + } + + class LineReader{ + private: + static const int block_len = 1<<24; + std::unique_ptrbuffer; // must be constructed before (and thus destructed after) the reader! + #ifdef CSV_IO_NO_THREAD + detail::SynchronousReader reader; + #else + detail::AsynchronousReader reader; + #endif + int data_begin; + int data_end; + + char file_name[error::max_file_name_length+1]; + unsigned file_line; + + static std::unique_ptr open_file(const char*file_name){ + // We open the file in binary mode as it makes no difference under *nix + // and under Windows we handle \r\n newlines ourself. + FILE*file = std::fopen(file_name, "rb"); + if(file == 0){ + int x = errno; // store errno as soon as possible, doing it after constructor call can fail. + error::can_not_open_file err; + err.set_errno(x); + err.set_file_name(file_name); + throw err; + } + return std::unique_ptr(new detail::OwningStdIOByteSourceBase(file)); + } + + void init(std::unique_ptrbyte_source){ + file_line = 0; + + buffer = std::unique_ptr(new char[3*block_len]); + data_begin = 0; + data_end = byte_source->read(buffer.get(), 2*block_len); + + // Ignore UTF-8 BOM + if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') + data_begin = 3; + + if(data_end == 2*block_len){ + reader.init(std::move(byte_source)); + reader.start_read(buffer.get() + 2*block_len, block_len); + } + } + + public: + LineReader() = delete; + LineReader(const LineReader&) = delete; + LineReader&operator=(const LineReader&) = delete; + + explicit LineReader(const char*file_name){ + set_file_name(file_name); + init(open_file(file_name)); + } + + explicit LineReader(const std::string&file_name){ + set_file_name(file_name.c_str()); + init(open_file(file_name.c_str())); + } + + LineReader(const char*file_name, std::unique_ptrbyte_source){ + set_file_name(file_name); + init(std::move(byte_source)); + } + + LineReader(const std::string&file_name, std::unique_ptrbyte_source){ + set_file_name(file_name.c_str()); + init(std::move(byte_source)); + } + + LineReader(const char*file_name, const char*data_begin, const char*data_end){ + set_file_name(file_name); + init(std::unique_ptr(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin))); + } + + LineReader(const std::string&file_name, const char*data_begin, const char*data_end){ + set_file_name(file_name.c_str()); + init(std::unique_ptr(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin))); + } + + LineReader(const char*file_name, FILE*file){ + set_file_name(file_name); + init(std::unique_ptr(new detail::OwningStdIOByteSourceBase(file))); + } + + LineReader(const std::string&file_name, FILE*file){ + set_file_name(file_name.c_str()); + init(std::unique_ptr(new detail::OwningStdIOByteSourceBase(file))); + } + + LineReader(const char*file_name, std::istream&in){ + set_file_name(file_name); + init(std::unique_ptr(new detail::NonOwningIStreamByteSource(in))); + } + + LineReader(const std::string&file_name, std::istream&in){ + set_file_name(file_name.c_str()); + init(std::unique_ptr(new detail::NonOwningIStreamByteSource(in))); + } + + void set_file_name(const std::string&file_name){ + set_file_name(file_name.c_str()); + } + + void set_file_name(const char*file_name){ + if(file_name != nullptr){ + strncpy(this->file_name, file_name, sizeof(this->file_name)); + this->file_name[sizeof(this->file_name)-1] = '\0'; + }else{ + this->file_name[0] = '\0'; + } + } + + const char*get_truncated_file_name()const{ + return file_name; + } + + void set_file_line(unsigned file_line){ + this->file_line = file_line; + } + + unsigned get_file_line()const{ + return file_line; + } + + char*next_line(){ + if(data_begin == data_end) + return 0; + + ++file_line; + + assert(data_begin < data_end); + assert(data_end <= block_len*2); + + if(data_begin >= block_len){ + std::memcpy(buffer.get(), buffer.get()+block_len, block_len); + data_begin -= block_len; + data_end -= block_len; + if(reader.is_valid()) + { + data_end += reader.finish_read(); + std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len); + reader.start_read(buffer.get() + 2*block_len, block_len); + } + } + + int line_end = data_begin; + while(buffer[line_end] != '\n' && line_end != data_end){ + ++line_end; + } + + if(line_end - data_begin + 1 > block_len){ + error::line_length_limit_exceeded err; + err.set_file_name(file_name); + err.set_file_line(file_line); + throw err; + } + + if(buffer[line_end] == '\n' && line_end != data_end){ + buffer[line_end] = '\0'; + }else{ + // some files are missing the newline at the end of the + // last line + ++data_end; + buffer[line_end] = '\0'; + } + + // handle windows \r\n-line breaks + if(line_end != data_begin && buffer[line_end-1] == '\r') + buffer[line_end-1] = '\0'; + + char*ret = buffer.get() + data_begin; + data_begin = line_end+1; + return ret; + } + }; + + + //////////////////////////////////////////////////////////////////////////// + // CSV // + //////////////////////////////////////////////////////////////////////////// + + namespace error{ + const int max_column_name_length = 63; + struct with_column_name{ + with_column_name(){ + std::memset(column_name, 0, max_column_name_length+1); + } + + void set_column_name(const char*column_name){ + if(column_name != nullptr){ + std::strncpy(this->column_name, column_name, max_column_name_length); + this->column_name[max_column_name_length] = '\0'; + }else{ + this->column_name[0] = '\0'; + } + } + + char column_name[max_column_name_length+1]; + }; + + + const int max_column_content_length = 63; + + struct with_column_content{ + with_column_content(){ + std::memset(column_content, 0, max_column_content_length+1); + } + + void set_column_content(const char*column_content){ + if(column_content != nullptr){ + std::strncpy(this->column_content, column_content, max_column_content_length); + this->column_content[max_column_content_length] = '\0'; + }else{ + this->column_content[0] = '\0'; + } + } + + char column_content[max_column_content_length+1]; + }; + + + struct extra_column_in_header : + base, + with_file_name, + with_column_name{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Extra column \"%s\" in header of file \"%s\"." + , column_name, file_name); + } + }; + + struct missing_column_in_header : + base, + with_file_name, + with_column_name{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Missing column \"%s\" in header of file \"%s\"." + , column_name, file_name); + } + }; + + struct duplicated_column_in_header : + base, + with_file_name, + with_column_name{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Duplicated column \"%s\" in header of file \"%s\"." + , column_name, file_name); + } + }; + + struct header_missing : + base, + with_file_name{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Header missing in file \"%s\"." + , file_name); + } + }; + + struct too_few_columns : + base, + with_file_name, + with_file_line{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Too few columns in line %d in file \"%s\"." + , file_line, file_name); + } + }; + + struct too_many_columns : + base, + with_file_name, + with_file_line{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Too many columns in line %d in file \"%s\"." + , file_line, file_name); + } + }; + + struct escaped_string_not_closed : + base, + with_file_name, + with_file_line{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "Escaped string was not closed in line %d in file \"%s\"." + , file_line, file_name); + } + }; + + struct integer_must_be_positive : + base, + with_file_name, + with_file_line, + with_column_name, + with_column_content{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"." + , column_content, column_name, file_name, file_line); + } + }; + + struct no_digit : + base, + with_file_name, + with_file_line, + with_column_name, + with_column_content{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"." + , column_content, column_name, file_name, file_line); + } + }; + + struct integer_overflow : + base, + with_file_name, + with_file_line, + with_column_name, + with_column_content{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"." + , column_content, column_name, file_name, file_line); + } + }; + + struct integer_underflow : + base, + with_file_name, + with_file_line, + with_column_name, + with_column_content{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"." + , column_content, column_name, file_name, file_line); + } + }; + + struct invalid_single_character : + base, + with_file_name, + with_file_line, + with_column_name, + with_column_content{ + void format_error_message()const{ + std::snprintf(error_message_buffer, sizeof(error_message_buffer), + "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character." + , column_content, column_name, file_name, file_line); + } + }; + } + + typedef unsigned ignore_column; + static const ignore_column ignore_no_column = 0; + static const ignore_column ignore_extra_column = 1; + static const ignore_column ignore_missing_column = 2; + + template + struct trim_chars{ + private: + constexpr static bool is_trim_char(char){ + return false; + } + + template + constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){ + return c == trim_char || is_trim_char(c, other_trim_chars...); + } + + public: + static void trim(char*&str_begin, char*&str_end){ + while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...)) + ++str_begin; + while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...)) + --str_end; + *str_end = '\0'; + } + }; + + + struct no_comment{ + static bool is_comment(const char*){ + return false; + } + }; + + template + struct single_line_comment{ + private: + constexpr static bool is_comment_start_char(char){ + return false; + } + + template + constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){ + return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...); + } + + public: + + static bool is_comment(const char*line){ + return is_comment_start_char(*line, comment_start_char_list...); + } + }; + + struct empty_line_comment{ + static bool is_comment(const char*line){ + if(*line == '\0') + return true; + while(*line == ' ' || *line == '\t'){ + ++line; + if(*line == 0) + return true; + } + return false; + } + }; + + template + struct single_and_empty_line_comment{ + static bool is_comment(const char*line){ + return single_line_comment::is_comment(line) || empty_line_comment::is_comment(line); + } + }; + + template + struct no_quote_escape{ + static const char*find_next_column_end(const char*col_begin){ + while(*col_begin != sep && *col_begin != '\0') + ++col_begin; + return col_begin; + } + + static void unescape(char*&, char*&){ + + } + }; + + template + struct double_quote_escape{ + static const char*find_next_column_end(const char*col_begin){ + while(*col_begin != sep && *col_begin != '\0') + if(*col_begin != quote) + ++col_begin; + else{ + do{ + ++col_begin; + while(*col_begin != quote){ + if(*col_begin == '\0') + throw error::escaped_string_not_closed(); + ++col_begin; + } + ++col_begin; + }while(*col_begin == quote); + } + return col_begin; + } + + static void unescape(char*&col_begin, char*&col_end){ + if(col_end - col_begin >= 2){ + if(*col_begin == quote && *(col_end-1) == quote){ + ++col_begin; + --col_end; + char*out = col_begin; + for(char*in = col_begin; in!=col_end; ++in){ + if(*in == quote && (in+1) != col_end && *(in+1) == quote){ + ++in; + } + *out = *in; + ++out; + } + col_end = out; + *col_end = '\0'; + } + } + + } + }; + + struct throw_on_overflow{ + template + static void on_overflow(T&){ + throw error::integer_overflow(); + } + + template + static void on_underflow(T&){ + throw error::integer_underflow(); + } + }; + + struct ignore_overflow{ + template + static void on_overflow(T&){} + + template + static void on_underflow(T&){} + }; + + struct set_to_max_on_overflow{ + template + static void on_overflow(T&x){ + x = std::numeric_limits::max(); + } + + template + static void on_underflow(T&x){ + x = std::numeric_limits::min(); + } + }; + + + namespace detail{ + template + void chop_next_column( + char*&line, char*&col_begin, char*&col_end + ){ + assert(line != nullptr); + + col_begin = line; + // the col_begin + (... - col_begin) removes the constness + col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin); + + if(*col_end == '\0'){ + line = nullptr; + }else{ + *col_end = '\0'; + line = col_end + 1; + } + } + + template + void parse_line( + char*line, + char**sorted_col, + const std::vector&col_order + ){ + for(std::size_t i=0; i(line, col_begin, col_end); + + if(col_order[i] != -1){ + trim_policy::trim(col_begin, col_end); + quote_policy::unescape(col_begin, col_end); + + sorted_col[col_order[i]] = col_begin; + } + } + if(line != nullptr) + throw ::io::error::too_many_columns(); + } + + template + void parse_header_line( + char*line, + std::vector&col_order, + const std::string*col_name, + ignore_column ignore_policy + ){ + col_order.clear(); + + bool found[column_count]; + std::fill(found, found + column_count, false); + while(line){ + char*col_begin,*col_end; + chop_next_column(line, col_begin, col_end); + + trim_policy::trim(col_begin, col_end); + quote_policy::unescape(col_begin, col_end); + + for(unsigned i=0; i + void parse(char*col, char &x){ + if(!*col) + throw error::invalid_single_character(); + x = *col; + ++col; + if(*col) + throw error::invalid_single_character(); + } + + template + void parse(char*col, std::string&x){ + x = col; + } + + template + void parse(char*col, const char*&x){ + x = col; + } + + template + void parse(char*col, char*&x){ + x = col; + } + + template + void parse_unsigned_integer(const char*col, T&x){ + x = 0; + while(*col != '\0'){ + if('0' <= *col && *col <= '9'){ + T y = *col - '0'; + if(x > (std::numeric_limits::max()-y)/10){ + overflow_policy::on_overflow(x); + return; + } + x = 10*x+y; + }else + throw error::no_digit(); + ++col; + } + } + + templatevoid parse(char*col, unsigned char &x) + {parse_unsigned_integer(col, x);} + templatevoid parse(char*col, unsigned short &x) + {parse_unsigned_integer(col, x);} + templatevoid parse(char*col, unsigned int &x) + {parse_unsigned_integer(col, x);} + templatevoid parse(char*col, unsigned long &x) + {parse_unsigned_integer(col, x);} + templatevoid parse(char*col, unsigned long long &x) + {parse_unsigned_integer(col, x);} + + template + void parse_signed_integer(const char*col, T&x){ + if(*col == '-'){ + ++col; + + x = 0; + while(*col != '\0'){ + if('0' <= *col && *col <= '9'){ + T y = *col - '0'; + if(x < (std::numeric_limits::min()+y)/10){ + overflow_policy::on_underflow(x); + return; + } + x = 10*x-y; + }else + throw error::no_digit(); + ++col; + } + return; + }else if(*col == '+') + ++col; + parse_unsigned_integer(col, x); + } + + templatevoid parse(char*col, signed char &x) + {parse_signed_integer(col, x);} + templatevoid parse(char*col, signed short &x) + {parse_signed_integer(col, x);} + templatevoid parse(char*col, signed int &x) + {parse_signed_integer(col, x);} + templatevoid parse(char*col, signed long &x) + {parse_signed_integer(col, x);} + templatevoid parse(char*col, signed long long &x) + {parse_signed_integer(col, x);} + + template + void parse_float(const char*col, T&x){ + bool is_neg = false; + if(*col == '-'){ + is_neg = true; + ++col; + }else if(*col == '+') + ++col; + + x = 0; + while('0' <= *col && *col <= '9'){ + int y = *col - '0'; + x *= 10; + x += y; + ++col; + } + + if(*col == '.'|| *col == ','){ + ++col; + T pos = 1; + while('0' <= *col && *col <= '9'){ + pos /= 10; + int y = *col - '0'; + ++col; + x += y*pos; + } + } + + if(*col == 'e' || *col == 'E'){ + ++col; + int e; + + parse_signed_integer(col, e); + + if(e != 0){ + T base; + if(e < 0){ + base = 0.1; + e = -e; + }else{ + base = 10; + } + + while(e != 1){ + if((e & 1) == 0){ + base = base*base; + e >>= 1; + }else{ + x *= base; + --e; + } + } + x *= base; + } + }else{ + if(*col != '\0') + throw error::no_digit(); + } + + if(is_neg) + x = -x; + } + + template void parse(char*col, float&x) { parse_float(col, x); } + template void parse(char*col, double&x) { parse_float(col, x); } + template void parse(char*col, long double&x) { parse_float(col, x); } + + template + void parse(char*col, T&x){ + // Mute unused variable compiler warning + (void)col; + (void)x; + // GCC evalutes "false" when reading the template and + // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why + // this strange construct is used. + static_assert(sizeof(T)!=sizeof(T), + "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported"); + } + + } + + template, + class quote_policy = no_quote_escape<','>, + class overflow_policy = throw_on_overflow, + class comment_policy = no_comment + > + class CSVReader{ + private: + LineReader in; + + char*row[column_count]; + std::string column_names[column_count]; + + std::vectorcol_order; + + template + void set_column_names(std::string s, ColNames...cols){ + column_names[column_count-sizeof...(ColNames)-1] = std::move(s); + set_column_names(std::forward(cols)...); + } + + void set_column_names(){} + + + public: + CSVReader() = delete; + CSVReader(const CSVReader&) = delete; + CSVReader&operator=(const CSVReader&); + + template + explicit CSVReader(Args&&...args):in(std::forward(args)...){ + std::fill(row, row+column_count, nullptr); + col_order.resize(column_count); + for(unsigned i=0; i + void read_header(ignore_column ignore_policy, ColNames...cols){ + static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified"); + static_assert(sizeof...(ColNames)<=column_count, "too many column names specified"); + try{ + set_column_names(std::forward(cols)...); + + char*line; + do{ + line = in.next_line(); + if(!line) + throw error::header_missing(); + }while(comment_policy::is_comment(line)); + + detail::parse_header_line + + (line, col_order, column_names, ignore_policy); + }catch(error::with_file_name&err){ + err.set_file_name(in.get_truncated_file_name()); + throw; + } + } + + template + void set_header(ColNames...cols){ + static_assert(sizeof...(ColNames)>=column_count, + "not enough column names specified"); + static_assert(sizeof...(ColNames)<=column_count, + "too many column names specified"); + set_column_names(std::forward(cols)...); + std::fill(row, row+column_count, nullptr); + col_order.resize(column_count); + for(unsigned i=0; i + void parse_helper(std::size_t r, T&t, ColType&...cols){ + if(row[r]){ + try{ + try{ + ::io::detail::parse(row[r], t); + }catch(error::with_column_content&err){ + err.set_column_content(row[r]); + throw; + } + }catch(error::with_column_name&err){ + err.set_column_name(column_names[r].c_str()); + throw; + } + } + parse_helper(r+1, cols...); + } + + + public: + template + bool read_row(ColType& ...cols){ + static_assert(sizeof...(ColType)>=column_count, + "not enough columns specified"); + static_assert(sizeof...(ColType)<=column_count, + "too many columns specified"); + try{ + try{ + + char*line; + do{ + line = in.next_line(); + if(!line) + return false; + }while(comment_policy::is_comment(line)); + + detail::parse_line + (line, row, col_order); + + parse_helper(0, cols...); + }catch(error::with_file_name&err){ + err.set_file_name(in.get_truncated_file_name()); + throw; + } + }catch(error::with_file_line&err){ + err.set_file_line(in.get_file_line()); + throw; + } + + return true; + } + }; +} +#endif + -- cgit 1.4.1