From f9dc73ee518fef82ab8d122d2ca81748a3fdaf4b Mon Sep 17 00:00:00 2001 From: vlaskine Date: Thu, 14 Mar 2019 16:36:22 +1100 Subject: [PATCH 0001/1056] csv::options: full_xpath set to true by default --- csv/applications/csv-blocks.cpp | 1 - csv/applications/csv-calc.new.cpp | 1 - csv/applications/csv-enumerate.cpp | 2 -- csv/applications/csv-select.cpp | 1 - csv/applications/csv-sort.cpp | 1 - csv/applications/csv-time-delay.cpp | 2 +- csv/applications/csv-time.cpp | 1 - csv/applications/csv-units.cpp | 1 - csv/applications/csv-update.cpp | 4 ---- csv/impl/program_options.h | 2 -- csv/options.cpp | 28 +++++++--------------------- csv/options.h | 4 ++-- csv/test/stream_test.cpp | 1 - csv/traits.h | 2 -- etc/bash_completion.d/comma | 1 - 15 files changed, 10 insertions(+), 42 deletions(-) diff --git a/csv/applications/csv-blocks.cpp b/csv/applications/csv-blocks.cpp index 5e3c9b24a..94998dd44 100644 --- a/csv/applications/csv-blocks.cpp +++ b/csv/applications/csv-blocks.cpp @@ -413,7 +413,6 @@ int main( int ac, char** av ) verbose = options.exists( "--verbose,-v" ); strict = options.exists( "--strict" ); csv = comma::csv::options( options ); - csv.full_xpath = true; csv.quote.reset(); comma::csv::options csv_out; if( csv.binary() ) { csv_out.format( comma::csv::format("ui") ); } diff --git a/csv/applications/csv-calc.new.cpp b/csv/applications/csv-calc.new.cpp index e9bf661d9..c064cc641 100644 --- a/csv/applications/csv-calc.new.cpp +++ b/csv/applications/csv-calc.new.cpp @@ -182,7 +182,6 @@ static std::pair< entry_t, comma::csv::options > make_input_( const comma::csv:: else { vf.push_back( "" ); } } p.second.fields = comma::join( vf, ',' ); - p.second.full_xpath = true; return p; } diff --git a/csv/applications/csv-enumerate.cpp b/csv/applications/csv-enumerate.cpp index a58cfc4f7..ff5d4955a 100644 --- a/csv/applications/csv-enumerate.cpp +++ b/csv/applications/csv-enumerate.cpp @@ -74,7 +74,6 @@ int main( int ac, char** av ) bool has_non_empty_field = false; for( const auto& f: comma::split( csv.fields, ',' ) ) { if( !f.empty() ) { has_non_empty_field = true; break; } } if( !has_non_empty_field ) { std::cerr << "csv-enumerate: please specify at least one key in fields" << std::endl; return 1; } - csv.full_xpath = true; std::string first_line; comma::csv::format f; if( csv.binary() ) { f = csv.format(); } @@ -131,7 +130,6 @@ int main( int ac, char** av ) if( !output_map ) { return 0; } comma::csv::options output_csv; output_csv.delimiter = csv.delimiter; - output_csv.full_xpath = true; if( csv.binary() ) { output_csv.format( comma::csv::format::value< input_t >( default_input ) + ",2ui" ); } comma::csv::output_stream< map_t::value_type > ostream( std::cout, output_csv, std::make_pair( default_input, std::make_pair( 0, 0 ) ) ); for( map_t::const_iterator it = map.begin(); it != map.end(); ++it ) { ostream.write( *it ); } diff --git a/csv/applications/csv-select.cpp b/csv/applications/csv-select.cpp index 0ccadd3a9..e0fb9c1ff 100644 --- a/csv/applications/csv-select.cpp +++ b/csv/applications/csv-select.cpp @@ -338,7 +338,6 @@ static void init_input( const comma::csv::format& format, const comma::command_l } } csv.fields = comma::join( fields, ',' ); - csv.full_xpath = true; } int main( int ac, char** av ) diff --git a/csv/applications/csv-sort.cpp b/csv/applications/csv-sort.cpp index 18cfffbec..48d216abf 100644 --- a/csv/applications/csv-sort.cpp +++ b/csv/applications/csv-sort.cpp @@ -678,7 +678,6 @@ int main( int ac, char** av ) options.assert_mutually_exclusive( "--discard-out-of-order,--discard-unsorted,--first,--max,--sliding-window,--window,--unique" ); verbose = options.exists( "--verbose,-v" ); csv = comma::csv::options( options ); - csv.full_xpath = true; return options.exists( "--first,--min,--max" ) ? handle_operations_with_ids( options ) : sort( options ); } catch( std::exception& ex ) { std::cerr << "csv-sort: " << ex.what() << std::endl; } diff --git a/csv/applications/csv-time-delay.cpp b/csv/applications/csv-time-delay.cpp index 187838166..10e9b7910 100644 --- a/csv/applications/csv-time-delay.cpp +++ b/csv/applications/csv-time-delay.cpp @@ -78,7 +78,7 @@ int main( int ac, char** av ) { comma::command_line_options options( ac, av ); if( options.exists( "--help" ) || options.exists( "-h" ) || ac == 1 ) { usage(); } - const std::vector< std::string >& v = options.unnamed( "--flush", "--binary,-b,--delimiter,-d,--fields,-f,--full-xpath,--precision,--quote" ); + const std::vector< std::string >& v = options.unnamed( "--flush", "--binary,-b,--delimiter,-d,--fields,-f,--precision,--quote" ); if( v.empty() ) { std::cerr << "csv-time-delay: expected time delay, got none" << std::endl; return 1; } double d = boost::lexical_cast< double >( v[0] ); int sign = d < 0 ? -1 : 1; diff --git a/csv/applications/csv-time.cpp b/csv/applications/csv-time.cpp index 78308a2ba..7e5922825 100644 --- a/csv/applications/csv-time.cpp +++ b/csv/applications/csv-time.cpp @@ -451,7 +451,6 @@ static void init_input() } csv.fields = fields; - csv.full_xpath = true; input.values.resize( size ); } diff --git a/csv/applications/csv-units.cpp b/csv/applications/csv-units.cpp index 8a74d546a..c2bf85f77 100644 --- a/csv/applications/csv-units.cpp +++ b/csv/applications/csv-units.cpp @@ -461,7 +461,6 @@ static void init_input() fields += init_input_field( v[i] ); } csv.fields = fields; - csv.full_xpath = true; input.values.resize( input_fields.size() ); //input.values.resize( size ); } diff --git a/csv/applications/csv-update.cpp b/csv/applications/csv-update.cpp index 98f0ff463..4778f02f2 100644 --- a/csv/applications/csv-update.cpp +++ b/csv/applications/csv-update.cpp @@ -256,7 +256,6 @@ static input_t::input_stream_t* make_filter_stream() if( filter_transport ) { return new input_t::input_stream_t( **filter_transport, csv, default_input ); } if( filter_line.empty() ) { return NULL; } comma::csv::options c; - c.full_xpath = true; c.fields = csv.fields; static std::istringstream iss( filter_line ); return new input_t::input_stream_t( iss, c, default_input ); @@ -390,7 +389,6 @@ int main( int ac, char** av ) comma::command_line_options options( ac, av, usage ); verbose = options.exists( "--verbose,-v" ); csv = comma::csv::options( options ); - csv.full_xpath = true; csv.quote.reset(); last_only = options.exists( "--last-only,--last" ); last_block = options.exists( "--last-block" ); @@ -448,7 +446,6 @@ int main( int ac, char** av ) std::string s = options.value< std::string >( "--empty" ) + std::string( f.count(), ',' ); std::istringstream iss( s ); comma::csv::options c; - c.full_xpath = true; c.fields = csv.fields; comma::csv::input_stream< input_t > isstream( iss, c, default_input ); empty = ( isstream.read() )->value; @@ -460,7 +457,6 @@ int main( int ac, char** av ) std::string s = options.value< std::string >( "--remove,--reset,--unset,--erase" ) + std::string( f.count(), ',' ); std::istringstream iss( s ); comma::csv::options c; - c.full_xpath = true; c.fields = csv.fields; comma::csv::input_stream< input_t > isstream( iss, c, default_input ); erase = ( isstream.read() )->value; diff --git a/csv/impl/program_options.h b/csv/impl/program_options.h index 7dd400ae4..9aa7bc424 100644 --- a/csv/impl/program_options.h +++ b/csv/impl/program_options.h @@ -55,7 +55,6 @@ inline boost::program_options::options_description program_options::description( ( "fields", boost::program_options::value< std::string >()->default_value( default_fields ), "csv fields" ) ( "binary,b", boost::program_options::value< std::string >(), "csv binary format" ) ( "delimiter,d", boost::program_options::value< char >()->default_value( ',' ), "csv delimiter" ) - ( "full-xpath", "expect full xpaths as field names" ) ( "precision", boost::program_options::value< unsigned int >()->default_value( 12 ), "floating point precision" ) ( "quote", boost::program_options::value< std::string >()->default_value( "\"" ), "quote sign to quote strings (ascii only)" ) ( "flush", "flush output stream after each record" ); @@ -69,7 +68,6 @@ inline csv::options program_options::get( const boost::program_options::variable if( vm.count( "delimiter ") ) { csv.delimiter = vm[ "delimiter" ].as< char >(); } if( vm.count( "precision" ) ) { csv.precision = vm[ "precision" ].as< unsigned int >(); } if( vm.count( "binary" ) ) { csv.format( vm[ "binary" ].as< std::string >() ); } - csv.full_xpath = vm.count( "full-xpath" ) > 0; csv.flush = vm.count( "flush" ) > 0; if( vm.count( "quote" ) ) { diff --git a/csv/options.cpp b/csv/options.cpp index ce369090e..f6f494e8c 100644 --- a/csv/options.cpp +++ b/csv/options.cpp @@ -27,7 +27,6 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine #include @@ -50,17 +49,14 @@ bool options::binary() const { return static_cast< bool >( format_ ); } namespace impl { -inline static void init( comma::csv::options& csv_options, const comma::command_line_options& options, const std::string& defaultFields ) +inline static void init( comma::csv::options& csv_options, const comma::command_line_options& options, const std::string& defaultFields, bool full_xpath ) { - csv_options.full_xpath = options.exists( "--full-xpath" ); + csv_options.full_xpath = full_xpath; csv_options.fields = options.value( "--fields,-f", defaultFields ); if( options.exists( "--binary,-b" ) ) { boost::optional< std::string > format = options.optional< std::string >( "--binary,-b" ); - if( format ) - { - csv_options.format( options.value< std::string >( "--binary,-b" ) ); - } + if( format ) { csv_options.format( options.value< std::string >( "--binary,-b" ) ); } } csv_options.precision = options.value< unsigned int >( "--precision", 12 ); csv_options.delimiter = options.exists( "--delimiter" ) ? options.value( "--delimiter", ',' ) : options.value( "-d", ',' ); @@ -79,18 +75,11 @@ inline static void init( comma::csv::options& csv_options, const comma::command_ } // namespace impl { -options::options() : full_xpath( false ), delimiter( ',' ), precision( 12 ), quote( '"' ), flush( false ) {} +options::options() : full_xpath( true ), delimiter( ',' ), precision( 12 ), quote( '"' ), flush( false ) {} -options::options( int argc, char** argv, const std::string& defaultFields ) -{ - impl::init( *this, comma::command_line_options( argc, argv ), defaultFields ); -} +options::options( int argc, char** argv, const std::string& defaultFields, bool full_xpath ) { impl::init( *this, comma::command_line_options( argc, argv ), defaultFields, full_xpath ); } -options::options( const comma::command_line_options& options, const std::string& defaultFields, bool set_full_xpath ) -{ - impl::init( *this, options, defaultFields ); - if(set_full_xpath) { full_xpath = true; } -} +options::options( const comma::command_line_options& options, const std::string& defaultFields, bool full_xpath ) { impl::init( *this, options, defaultFields, full_xpath ); } std::string options::usage( const std::string& default_fields, bool verbose ) { @@ -101,9 +90,6 @@ std::string options::usage( const std::string& default_fields, bool verbose ) oss << " --fields,-f : comma-separated field names"; if( !default_fields.empty() ) { oss << "; default: " << default_fields; } oss << std::endl; - oss << " --full-xpath: expect full xpaths as field names; default: false" << std::endl; - oss << " default false was a wrong choice, but changing it" << std::endl; - oss << " to true now may break too many things" << std::endl; oss << " --precision : floating point precision; default: 12" << std::endl; oss << " --quote=[]: quote sign to quote strings (ascii only); default: '\"'" << std::endl; oss << " --flush: if present, flush output stream after each record" << std::endl; @@ -174,6 +160,6 @@ bool options::has_some_of_paths( const std::string& paths ) const return false; } -std::string options::valueless_options() { return "--full-xpath,--flush"; } +std::string options::valueless_options() { return "--flush"; } } } // namespace comma { namespace csv { diff --git a/csv/options.h b/csv/options.h index 01c561a2a..12bc7c863 100644 --- a/csv/options.h +++ b/csv/options.h @@ -45,10 +45,10 @@ class options options(); /// constructor - options( int argc, char** argv, const std::string& defaultFields = "" ); + options( int argc, char** argv, const std::string& defaultFields = "", bool full_xpath = true ); /// constructor - options( const comma::command_line_options& options, const std::string& defaultFields = "", bool set_full_xpath = false ); + options( const comma::command_line_options& options, const std::string& defaultFields = "", bool full_xpath = true ); /// return usage to incorporate into application usage static std::string usage( const std::string& default_fields = "", bool verbose = true ); diff --git a/csv/test/stream_test.cpp b/csv/test/stream_test.cpp index 2322d3cde..bef1731db 100644 --- a/csv/test/stream_test.cpp +++ b/csv/test/stream_test.cpp @@ -94,7 +94,6 @@ namespace comma { namespace csv { namespace stream_test { TEST( csv, container ) { comma::csv::options csv; - csv.full_xpath = true; { std::string s( "2,3,,,6" ); std::istringstream iss( s ); diff --git a/csv/traits.h b/csv/traits.h index 0aeba617f..d8ed02bf2 100644 --- a/csv/traits.h +++ b/csv/traits.h @@ -45,7 +45,6 @@ template <> struct traits< comma::csv::options > v.apply( "filename", p.filename ); v.apply( "delimiter", p.delimiter ); v.apply( "fields", p.fields ); - v.apply( "full-xpath", p.full_xpath ); v.apply( "precision", p.precision ); v.apply( "quote", p.quote ? std::string( 1, *p.quote ) : std::string() ); v.apply( "flush", p.flush ); @@ -59,7 +58,6 @@ template <> struct traits< comma::csv::options > v.apply( "filename", p.filename ); v.apply( "delimiter", p.delimiter ); v.apply( "fields", p.fields ); - v.apply( "full-xpath", p.full_xpath ); v.apply( "precision", p.precision ); std::string quote = p.quote ? std::string( 1, *p.quote ) : std::string(); v.apply( "quote", p.quote ); diff --git a/etc/bash_completion.d/comma b/etc/bash_completion.d/comma index 6dd9eec80..0911896ae 100644 --- a/etc/bash_completion.d/comma +++ b/etc/bash_completion.d/comma @@ -36,7 +36,6 @@ _comma_opts() --delimiter --fields --flush ---full-xpath --help --output-fields --precision From 962cdad8b9319bebcca8a7adb9d561072e928637 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 15 Mar 2019 18:28:49 +1100 Subject: [PATCH 0002/1056] comma-options-validate: a bug fixed --- application/command_line_options.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/application/command_line_options.cpp b/application/command_line_options.cpp index 831ccef3e..f7b3e601d 100644 --- a/application/command_line_options.cpp +++ b/application/command_line_options.cpp @@ -30,19 +30,18 @@ /// @author vsevolod vlaskine -#include "../string/split.h" -#include "../application/command_line_options.h" -#include "../base/exception.h" +#include #include #include +#include #include #include #include #include #include -#include - -#include +#include "../string/split.h" +#include "../application/command_line_options.h" +#include "../base/exception.h" namespace comma { @@ -192,9 +191,14 @@ void command_line_options::assert_valid( const std::vector< description >& d, bo { for( unsigned int i = 0; i < d.size(); ++i ) { d[i].assert_valid( *this ); } if( !unknown_options_invalid ) { return; } - boost::unordered_set< std::string > s; // real quick and dirty, just to make it work - for( unsigned int i = 0; i < d.size(); ++i ) { for( unsigned int j = 0; j < d[i].names.size(); s.insert( d[i].names[j] ), ++j ); } - for( unsigned int i = 0; i < names_.size(); ++i ) { if( s.find( names_[i] ) == s.end() ) { COMMA_THROW( comma::exception, "unknown option " << names_[i] ); } } + std::unordered_map< std::string, bool > m; // real quick and dirty, just to make it work + for( unsigned int i = 0; i < d.size(); ++i ) { for( unsigned int j = 0; j < d[i].names.size(); ++j ) { m[ d[i].names[j] ] = d[i].has_value; } } + for( unsigned int i = 1; i < argv_.size(); ++i ) + { + auto it = m.find( argv_[i] ); + if( it == m.end() ) { COMMA_THROW( comma::exception, "unknown option " << argv_[i] ); } + if( it->second ) { ++i; } + } } namespace impl { From b806621eed2e9521517ec8092c0542486f3b9880 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Mar 2019 18:28:39 +1100 Subject: [PATCH 0003/1056] csv/stream: full_xpath: default set to true --- csv/stream.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/csv/stream.h b/csv/stream.h index 815878ad1..c862c011b 100644 --- a/csv/stream.h +++ b/csv/stream.h @@ -64,7 +64,7 @@ class ascii_input_stream : public boost::noncopyable { public: /// constructor - ascii_input_stream( std::istream& is, const std::string& column_names = "", char delimiter = ',', bool full_path_as_name = false, const S& sample = S() ); + ascii_input_stream( std::istream& is, const std::string& column_names = "", char delimiter = ',', bool full_path_as_name = true, const S& sample = S() ); /// constructor from csv options ascii_input_stream( std::istream& is, const options& o, const S& sample = S() ); @@ -115,7 +115,7 @@ class ascii_output_stream : public boost::noncopyable { public: /// constructor - ascii_output_stream( std::ostream& os, const std::string& column_names = "", char delimiter = ',', bool full_path_as_name = false, const S& sample = S() ); + ascii_output_stream( std::ostream& os, const std::string& column_names = "", char delimiter = ',', bool full_path_as_name = true, const S& sample = S() ); /// constructor from csv options ascii_output_stream( std::ostream& os, const options& o, const S& sample = S() ); @@ -166,7 +166,7 @@ class binary_input_stream : public boost::noncopyable { public: /// constructor - binary_input_stream( std::istream& is, const std::string& format = "", const std::string& column_names = "", bool full_path_as_name = false, const S& sample = S() ); + binary_input_stream( std::istream& is, const std::string& format = "", const std::string& column_names = "", bool full_path_as_name = true, const S& sample = S() ); /// constructor from options binary_input_stream( std::istream& is, const options& o, const S& sample = S() ); @@ -215,7 +215,7 @@ class binary_output_stream : public boost::noncopyable { public: /// constructor - binary_output_stream( std::ostream& os, const std::string& format = "", const std::string& column_names = "", bool full_path_as_name = false, bool flush = false, const S& sample = S() ); + binary_output_stream( std::ostream& os, const std::string& format = "", const std::string& column_names = "", bool full_path_as_name = true, bool flush = false, const S& sample = S() ); /// constructor from options binary_output_stream( std::ostream& os, const options& o, const S& sample = S() ); @@ -320,7 +320,7 @@ class output_stream : public boost::noncopyable /// construct from csv options output_stream( std::ostream& os, const csv::options& o, const S& sample = S() ); - output_stream( std::ostream& os, bool binary, bool full_xpath = false, bool flush = false, const S& sample = S() ); + output_stream( std::ostream& os, bool binary, bool full_xpath = true, bool flush = false, const S& sample = S() ); /// write void write( const S& s ) { if( ascii_ ) { ascii_->write( s ); } else { binary_->write( s ); } } From 4290987de768582469938df16d4d917223c74d3e Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 19 Mar 2019 18:46:45 +1100 Subject: [PATCH 0004/1056] csv-play: full xpath flag set to false --- csv/applications/csv-play.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csv/applications/csv-play.cpp b/csv/applications/csv-play.cpp index 3d9eb2a69..62d084c11 100644 --- a/csv/applications/csv-play.cpp +++ b/csv/applications/csv-play.cpp @@ -254,10 +254,11 @@ int main( int argc, char** argv ) bool flush = !options.exists( "--no-flush" ); std::vector< std::string > configstrings = options.unnamed("--interactive,-i,--paused,--paused-at-start,--quiet,--flush,--no-flush","--slow,--slowdown,--speed,--resolution,--binary,--fields,--clients,--from,--to"); if( configstrings.empty() ) { configstrings.push_back( "-;-" ); } - comma::csv::options csvoptions( argc, argv ); + comma::csv::options csv( argc, argv ); + csv.full_xpath = false; comma::name_value::parser name_value("filename,output", ';', '=', false ); std::vector< comma::Multiplay::SourceConfig > sourceConfigs( configstrings.size() ); - comma::Multiplay::SourceConfig defaultConfig( "-", options.value( "--clients", 0 ), csvoptions ); + comma::Multiplay::SourceConfig defaultConfig( "-", options.value( "--clients", 0 ), csv ); for( unsigned int i = 0U; i < configstrings.size(); ++i ) { sourceConfigs[i] = name_value.get< comma::Multiplay::SourceConfig >( configstrings[i], defaultConfig ); } boost::posix_time::ptime fromtime; if( !from.empty() ) { fromtime = boost::posix_time::from_iso_string( from ); } From 1ede632bbcff9f56d5468d58af7912912e619747 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 27 Mar 2019 19:00:16 +1100 Subject: [PATCH 0005/1056] csv::options::full_xpath: dave jennings' stage 1: set full_xpath to false explicitly wherever appropriate --- csv/applications/csv-bin-cut.cpp | 1 + csv/applications/csv-blocks.cpp | 1 + csv/applications/csv-calc.cpp | 1 + csv/applications/csv-interval.cpp | 1 + csv/applications/csv-join.cpp | 2 ++ csv/applications/csv-repeat.cpp | 1 + csv/applications/csv-shape.cpp | 1 + csv/applications/csv-shuffle.cpp | 1 + csv/applications/csv-thin.cpp | 1 + csv/applications/csv-time-delay.cpp | 1 + csv/applications/csv-time.cpp | 1 + csv/applications/csv-to-sql.cpp | 1 + csv/applications/play/multiplay.h | 2 +- csv/impl/fieldwise.cpp | 1 + csv/test/format_test.cpp | 1 + csv/test/options_test.cpp | 2 ++ 16 files changed, 18 insertions(+), 1 deletion(-) diff --git a/csv/applications/csv-bin-cut.cpp b/csv/applications/csv-bin-cut.cpp index ce87a1067..ae1309d96 100644 --- a/csv/applications/csv-bin-cut.cpp +++ b/csv/applications/csv-bin-cut.cpp @@ -361,6 +361,7 @@ int main( int ac, char** av ) { command_line_options options( ac, av, usage ); comma::csv::options csv( options ); + csv.full_xpath = false; std::vector< std::string > files = options.unnamed( "--help,-h,--verbose,-v,--flush,--read-all,--force-read", "--fields,-f,--output-fields,--output,-o,--binary,-b,--skip,--count" ); if( !csv.binary() ) { diff --git a/csv/applications/csv-blocks.cpp b/csv/applications/csv-blocks.cpp index 94998dd44..fd89c8b58 100644 --- a/csv/applications/csv-blocks.cpp +++ b/csv/applications/csv-blocks.cpp @@ -415,6 +415,7 @@ int main( int ac, char** av ) csv = comma::csv::options( options ); csv.quote.reset(); comma::csv::options csv_out; + csv_out.full_xpath = false; if( csv.binary() ) { csv_out.format( comma::csv::format("ui") ); } std::vector< std::string > unnamed = options.unnamed( "--help,-h,--reverse,--verbose,-v", "-.*" ); if( unnamed.empty() ) { std::cerr << name() << "please specify operation" << std::endl; return 1; } diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 1670f4ac2..5a5b8af5d 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -1231,6 +1231,7 @@ int main( int ac, char** av ) if( options.exists( "--bash-completion" ) ) bash_completion( ac, av ); std::vector< std::string > unnamed = options.unnamed( "", "--binary,-b,--delimiter,-d,--format,--fields,-f,--output-fields" ); comma::csv::options csv( options ); + csv.full_xpath = false; #ifdef WIN32 if( csv.binary() ) { _setmode( _fileno( stdin ), _O_BINARY ); _setmode( _fileno( stdout ), _O_BINARY ); } #endif diff --git a/csv/applications/csv-interval.cpp b/csv/applications/csv-interval.cpp index 5fa81d6a2..338eaee1b 100644 --- a/csv/applications/csv-interval.cpp +++ b/csv/applications/csv-interval.cpp @@ -476,6 +476,7 @@ int main( int ac, char** av ) if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } if( options.exists( "--output-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } comma::csv::options csv( options ); + csv.full_xpath = false; if( csv.fields.empty() ) { csv.fields = comma::join( comma::csv::names< interval_t< double > >(), ',' ); } if( !csv.has_field( "from,to" ) ) { COMMA_THROW( comma::exception, "expected from and to fields" ); } options.assert_mutually_exclusive( "--binary,--format" ); diff --git a/csv/applications/csv-join.cpp b/csv/applications/csv-join.cpp index 161a7df33..f3b6456b5 100644 --- a/csv/applications/csv-join.cpp +++ b/csv/applications/csv-join.cpp @@ -295,6 +295,7 @@ template < typename T > static std::string keys_as_string( const input< T >& i ) { std::ostringstream oss; comma::csv::options csv; + csv.full_xpath = false; csv.fields = "keys"; comma::csv::ascii_output_stream< input< T > > os( oss, csv, i ); os.write( i ); @@ -430,6 +431,7 @@ template < typename K, bool Strict = true > struct join_impl_ // quick and dirty if( !strict ) { ++discarded; continue; } std::string s; comma::csv::options c; + c.full_xpath = false; c.fields = "keys"; std::cerr << "csv-join: match not found for key(s): " << comma::csv::ascii< input< K > >( c, default_input ).put( *p, s ) << ", block: " << block << std::endl; return 1; diff --git a/csv/applications/csv-repeat.cpp b/csv/applications/csv-repeat.cpp index a6fe24e6d..460133a7e 100644 --- a/csv/applications/csv-repeat.cpp +++ b/csv/applications/csv-repeat.cpp @@ -154,6 +154,7 @@ int main( int ac, char** av ) comma::io::istream is( "-", comma::io::mode::binary ); boost::scoped_ptr< comma::csv::output_stream< output_t > > ostream; comma::csv::options output_csv; + output_csv.full_xpath = false; if( options.exists( "--append-fields,--append,-a" ) ) { output_csv.fields = options.value< std::string >( "--append-fields,--append,-a", "" ); diff --git a/csv/applications/csv-shape.cpp b/csv/applications/csv-shape.cpp index 024b6456e..5218d03af 100644 --- a/csv/applications/csv-shape.cpp +++ b/csv/applications/csv-shape.cpp @@ -249,6 +249,7 @@ int main( int ac, char** av ) comma::command_line_options options( ac, av, usage ); std::vector< std::string > unnamed = options.unnamed( "--size,-n,--sliding-window,-w,--step,--verbose,-v", "-.*" ); comma::csv::options csv( options ); + csv.full_xpath = false; if (csv.fields.empty()) { csv.fields="a"; } is_binary = csv.binary(); if( unnamed.empty() ) { std::cerr << comma::verbose.app_name() << ": please specify operations" << std::endl; exit( 1 ); } diff --git a/csv/applications/csv-shuffle.cpp b/csv/applications/csv-shuffle.cpp index 556514099..aecceed64 100644 --- a/csv/applications/csv-shuffle.cpp +++ b/csv/applications/csv-shuffle.cpp @@ -93,6 +93,7 @@ int main( int ac, char** av ) bool verbose = options.exists( "--verbose,-v" ); if( options.exists( "--help,-h" ) ) { usage( verbose ); } comma::csv::options csv( options ); + csv.full_xpath = false; std::string f = options.value< std::string >( "--input-fields", "" ); if( !f.empty() ) { csv.fields = f; } std::vector< std::string > input_fields = comma::split( csv.fields, ',' ); diff --git a/csv/applications/csv-thin.cpp b/csv/applications/csv-thin.cpp index 3ee53466f..ac6dfa137 100644 --- a/csv/applications/csv-thin.cpp +++ b/csv/applications/csv-thin.cpp @@ -189,6 +189,7 @@ int main( int ac, char** av ) { if( !period ) { COMMA_THROW( comma::exception, "--fields requires --period option" ); } comma::csv::options csv( options ); + csv.full_xpath = false; comma::csv::input_stream< timestamped > istream( std::cin, csv ); while( std::cin.good() && !std::cin.eof() ) { diff --git a/csv/applications/csv-time-delay.cpp b/csv/applications/csv-time-delay.cpp index 10e9b7910..6f80d0f30 100644 --- a/csv/applications/csv-time-delay.cpp +++ b/csv/applications/csv-time-delay.cpp @@ -90,6 +90,7 @@ int main( int ac, char** av ) microseconds *= sign; boost::posix_time::time_duration delay = boost::posix_time::minutes( minutes ) + boost::posix_time::seconds( seconds ) + boost::posix_time::microseconds( microseconds ); comma::csv::options csv( options ); + csv.full_xpath = false; comma::csv::input_stream< Point > istream( std::cin, csv ); comma::csv::output_stream< Point > ostream( std::cout, csv ); while( std::cin.good() && !std::cin.eof() ) diff --git a/csv/applications/csv-time.cpp b/csv/applications/csv-time.cpp index 7e5922825..887db2cfb 100644 --- a/csv/applications/csv-time.cpp +++ b/csv/applications/csv-time.cpp @@ -476,6 +476,7 @@ int main( int ac, char** av ) comma::command_line_options options( ac, av, usage ); accept_empty = options.exists( "--empty-as-not-a-date-time,--accept-empty,-e" ); csv = comma::csv::options( options ); + csv.full_xpath = false; csv.precision = 16; if( csv.fields.empty() ) { csv.fields="a"; } init_input(); diff --git a/csv/applications/csv-to-sql.cpp b/csv/applications/csv-to-sql.cpp index d7e8f8b59..4dc5df12b 100644 --- a/csv/applications/csv-to-sql.cpp +++ b/csv/applications/csv-to-sql.cpp @@ -148,6 +148,7 @@ int main( int ac, char** av ) if( options.exists( "--help,-h" ) ) { usage( true ); } comma::csv::options csv( options ); + csv.full_xpath = false; if ( csv.binary() ) { std::cerr << app_name << ": binary not handled" << std::endl; return 1; } std::string null_value = default_null_value; diff --git a/csv/applications/play/multiplay.h b/csv/applications/play/multiplay.h index 6486854f1..c10c35d0f 100644 --- a/csv/applications/play/multiplay.h +++ b/csv/applications/play/multiplay.h @@ -63,7 +63,7 @@ class Multiplay outputFileName( output ), minNumberOfClients( 0 ), options( csv ) {} SourceConfig( const std::string& output, std::size_t n, const csv::options& csv ) : outputFileName( output ), minNumberOfClients( n ), options( csv ) {} - SourceConfig() {}; + SourceConfig() { options.full_xpath = false; }; }; Multiplay( const std::vector< SourceConfig >& configs diff --git a/csv/impl/fieldwise.cpp b/csv/impl/fieldwise.cpp index 53f16dde9..724efa32a 100644 --- a/csv/impl/fieldwise.cpp +++ b/csv/impl/fieldwise.cpp @@ -42,6 +42,7 @@ fieldwise::fieldwise( const std::string& fields, char delimiter ) , binary_( this ) { csv::options o; + o.full_xpath = false; o.fields = fields; o.delimiter = delimiter; init_( o, split( fields, ',' ) ); diff --git a/csv/test/format_test.cpp b/csv/test/format_test.cpp index 97e3929ea..28ea7f599 100644 --- a/csv/test/format_test.cpp +++ b/csv/test/format_test.cpp @@ -349,6 +349,7 @@ TEST( csv, unstructured ) EXPECT_EQ( "l,l,l,l", comma::csv::impl::unstructured::guess_format( "1,2,3,4" ).string() ); EXPECT_EQ( "l,d,t,s[1024]", comma::csv::impl::unstructured::guess_format( "1,2.1,20121212T000000,blah" ).string() ); comma::csv::options csv; + csv.full_xpath = false; csv.fields = "a,,,b,,,c"; csv.delimiter = ','; EXPECT_EQ( "l,s[1024],s[1024],s[1024],s[1024],s[1024],t", comma::csv::impl::unstructured::guess_format( "1,,,blah,,,20121212T000000" ).string() ); diff --git a/csv/test/options_test.cpp b/csv/test/options_test.cpp index faf3f53ae..8e13dd614 100644 --- a/csv/test/options_test.cpp +++ b/csv/test/options_test.cpp @@ -36,6 +36,7 @@ TEST( options, has_paths ) { { comma::csv::options csv; + csv.full_xpath = false; csv.fields = "a,b/c,d/e/f,p[0],q[0]/x,s/t/y[0],s/t/y[1]/z"; EXPECT_TRUE( csv.has_paths( "a" ) ); EXPECT_TRUE( csv.has_paths( "b" ) ); @@ -66,6 +67,7 @@ TEST( options, has_paths ) } { comma::csv::options csv; + csv.full_xpath = false; csv.fields = "a,b/c,d/e/f,p[0],q[0]/x,s/t/y[0],s/t/y[1]/z"; EXPECT_TRUE( csv.has_some_of_paths( "a" ) ); EXPECT_TRUE( csv.has_some_of_paths( "a,blah" ) ); From 65b8a05c1d884c2e6049fd14eed185fd097b0fd1 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 29 Mar 2019 17:13:33 +1100 Subject: [PATCH 0006/1056] csv-thin: trivial brush-up --- csv/applications/csv-thin.cpp | 52 ++++++++++------------------------- 1 file changed, 15 insertions(+), 37 deletions(-) diff --git a/csv/applications/csv-thin.cpp b/csv/applications/csv-thin.cpp index ac6dfa137..61139202e 100644 --- a/csv/applications/csv-thin.cpp +++ b/csv/applications/csv-thin.cpp @@ -53,7 +53,7 @@ using namespace comma; -static void usage(bool detail=false) +static void usage( bool verbose = false ) { std::cerr << std::endl; std::cerr << "Read input data and thin them down by the given percentage;" << std::endl; @@ -68,22 +68,13 @@ static void usage(bool detail=false) std::cerr << " That is, if is 0.33, output every third packet." << std::endl; std::cerr << " Default is to output each packet with a probability of ." << std::endl; std::cerr << " --fields=: use timestamp in fields to determine time for --period" << std::endl; - std::cerr << " --fps,--frames-per-second=: deprecated and removed" << std::endl; std::cerr << " --period=: output once every seconds, ignores " << std::endl; std::cerr << " --size,-s=: data is packets of fixed size, otherwise data is expected" << std::endl; std::cerr << " line-wise. Alternatively use --binary" << std::endl; std::cerr << std::endl; - if( detail ) - { - std::cerr << "csv options:" << std::endl; - std::cerr<< comma::csv::options::usage() << std::endl; - std::cerr << std::endl; - } - else - { - std::cerr << "use -v or --verbose to see more detail" << std::endl; - std::cerr << std::endl; - } + std::cerr << "csv options:" << std::endl; + std::cerr << comma::csv::options::usage( verbose ) << std::endl; + std::cerr << std::endl; std::cerr << "examples:" << std::endl; std::cerr << " output 70% of data: cat full.csv | csv-thin 0.7" << std::endl; std::cerr << " output once every 2 seconds: cat full.csv | csv-thin --period 2" << std::endl; @@ -122,12 +113,9 @@ static bool ignore() { static boost::posix_time::ptime next_time = boost::posix_time::microsec_clock::universal_time(); boost::posix_time::ptime now = boost::posix_time::microsec_clock::universal_time(); - if( now > next_time ) - { - next_time += *period; - return false; - } - return true; + if( now <= next_time ) { return true; } + next_time += *period; + return false; } if( deterministic ) { @@ -161,15 +149,12 @@ static bool ignore() return do_ignore && random() > rate; } -static bool ignore_with_timestamp( boost::posix_time::ptime timestamp ) +static bool ignore_by_timestamp( boost::posix_time::ptime timestamp ) { static boost::posix_time::ptime next_time = timestamp; - if( timestamp > next_time ) - { - next_time += *period; - return false; - } - return true; + if( timestamp <= next_time ) { return true; } + next_time += *period; + return false; } int main( int ac, char** av ) @@ -180,7 +165,6 @@ int main( int ac, char** av ) bool binary = options.exists( "--size,-s,--binary,-b" ); deterministic = options.exists( "--deterministic,-d" ); if( options.exists( "--period" )) { period = boost::posix_time::microseconds( static_cast (options.value< double >( "--period" ) * 1000000 )); } - if(options.exists("--fps,--frames-per-second")) { COMMA_THROW( comma::exception, "ERROR: --fps option is deprecated and removed! Please talk to software team if you are using it"); } #ifdef WIN32 if( binary ) { _setmode( _fileno( stdin ), _O_BINARY ); _setmode( _fileno( stdout ), _O_BINARY ); } #endif @@ -188,24 +172,18 @@ int main( int ac, char** av ) if( options.exists( "--fields" )) { if( !period ) { COMMA_THROW( comma::exception, "--fields requires --period option" ); } - comma::csv::options csv( options ); - csv.full_xpath = false; - comma::csv::input_stream< timestamped > istream( std::cin, csv ); + comma::csv::input_stream< timestamped > istream( std::cin, comma::csv::options( options ) ); while( std::cin.good() && !std::cin.eof() ) { const timestamped* p = istream.read(); if( !p ) { break; } - if( !ignore_with_timestamp( p->timestamp )) - { - if( istream.is_binary()) { std::cout.write( istream.binary().last(), istream.binary().size() ); } - else { std::cout << comma::join( istream.ascii().last(), istream.ascii().ascii().delimiter() )<< std::endl; } - } + if( ignore_by_timestamp( p->timestamp ) ) { continue; } + if( istream.is_binary()) { std::cout.write( istream.binary().last(), istream.binary().size() ); } + else { std::cout << comma::join( istream.ascii().last(), istream.ascii().ascii().delimiter() )<< std::endl; } } return 0; } - std::vector< std::string > v; - if( !period ) { v = options.unnamed( "--deterministic,-d", "-.*" ); From 5d20df9fb190f2f8425758f6ceeed87201626e87 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 31 Mar 2019 13:17:13 +1100 Subject: [PATCH 0007/1056] updated --- name_value/applications/name-value-to-csv.cpp | 1 - python/comma/csv/applications/csv_eval.py | 1 - 2 files changed, 2 deletions(-) diff --git a/name_value/applications/name-value-to-csv.cpp b/name_value/applications/name-value-to-csv.cpp index b9d678e65..92c38ed19 100644 --- a/name_value/applications/name-value-to-csv.cpp +++ b/name_value/applications/name-value-to-csv.cpp @@ -1,6 +1,5 @@ // This file is part of comma, a generic and flexible library // Copyright (c) 2011 The University of Sydney -// Copyright (c) 2019 Vsevolod Vlaskine // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index efd3ceb88..c634e2cb2 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -1,6 +1,5 @@ # This file is part of comma, a generic and flexible library # Copyright (c) 2011 The University of Sydney -# Copyright (c) 2018 Vsevolod Vlaskine # All rights reserved. # # Redistribution and use in source and binary forms, with or without From b1d1104ed6789369543313f4c61f9953f1cecd1d Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 1 Apr 2019 12:36:18 +1100 Subject: [PATCH 0008/1056] string::split(): empty_if_empty_input parameter added; csv-split numbers: --fields implemented --- csv/applications/csv-fields.cpp | 7 ++++++- csv/test/csv-fields/expected | 10 ++++++++++ csv/test/csv-fields/input | 5 +++++ string/split.cpp | 12 +++++------- string/split.h | 15 ++++++++------- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/csv/applications/csv-fields.cpp b/csv/applications/csv-fields.cpp index 6cea12a13..2e4442673 100644 --- a/csv/applications/csv-fields.cpp +++ b/csv/applications/csv-fields.cpp @@ -53,6 +53,7 @@ static void usage( bool ) std::cerr << " numbers (default): convert comma-separated field names to field numbers" << std::endl; std::cerr << " e.g. for combining with cut or csv-bin-cut" << std::endl; std::cerr << " --count,--size: output the total number of fields" << std::endl; + std::cerr << " --fields=[]: number only fields with given names, same as csv-fields clear --except ... | csv-fields numbers" << std::endl; std::cerr << " --fill: number even empty fields, e.g. try: echo ,, | csv-fields numbers --fill" << std::endl; std::cerr << " --from=: start field numbering from ; default=1" << std::endl; std::cerr << " to keep it consistent with linux cut utility" << std::endl; @@ -177,7 +178,10 @@ int main( int ac, char** av ) { int from = options.value( "--from", 1 ); bool fill = options.exists( "--fill" ); - std::string prefix = options.value< std::string >( "--prefix", "" ); + options.assert_mutually_exclusive( "--fill,--fields" ); + const auto& v = comma::split( options.value< std::string >( "--fields", "" ), ',', true ); + std::set< std::string > fields( v.begin(), v.end() ); + std::string prefix = options.value< std::string >( "--prefix", "" ); while( std::cin.good() ) { std::string line; @@ -189,6 +193,7 @@ int main( int ac, char** av ) for( unsigned int i = 0; i < v.size(); ++i ) { if( v[i].empty() && !fill ) { continue; } + if( !fields.empty() && fields.find( v[i] ) == fields.end() ) { continue; } std::cout << comma << prefix << ( i + from ); comma = ','; } diff --git a/csv/test/csv-fields/expected b/csv/test/csv-fields/expected index 43a1a5ac8..f00b0889c 100644 --- a/csv/test/csv-fields/expected +++ b/csv/test/csv-fields/expected @@ -18,6 +18,16 @@ numbers/fill[2]/output="f1,f2,f3,f4" numbers/fill[2]/status=0 numbers/fill[3]/output="f1,f2,f3,f4" numbers/fill[3]/status=0 +numbers/fields[0]/output="" +numbers/fields[0]/status=0 +numbers/fields[1]/output="1" +numbers/fields[1]/status=0 +numbers/fields[2]/output="2" +numbers/fields[2]/status=0 +numbers/fields[3]/output="2" +numbers/fields[3]/status=0 +numbers/fields[4]/output="2,3" +numbers/fields[4]/status=0 clear/keep[0]/output=",,," clear/keep[0]/status=0 diff --git a/csv/test/csv-fields/input b/csv/test/csv-fields/input index c4a92cbd8..f7b0a309f 100644 --- a/csv/test/csv-fields/input +++ b/csv/test/csv-fields/input @@ -8,6 +8,11 @@ numbers/fill[0]="echo ,,, | csv-fields numbers --fill" numbers/fill[1]="echo a,,,b | csv-fields numbers --fill" numbers/fill[2]="echo ,,, | csv-fields numbers --fill --prefix f" numbers/fill[3]="echo a,,,b | csv-fields numbers --fill --prefix f" +numbers/fields[0]="echo ,,, | csv-fields numbers --fields=a" +numbers/fields[1]="echo a,,, | csv-fields numbers --fields=a" +numbers/fields[2]="echo ,a,, | csv-fields numbers --fields=a" +numbers/fields[3]="echo ,a,b, | csv-fields numbers --fields=a" +numbers/fields[4]="echo ,a,b, | csv-fields numbers --fields=a,b" clear/keep[0]="echo a,b,c,d | csv-fields clear --keep ,,," clear/keep[1]="echo a,b,c,d | csv-fields clear --keep a" diff --git a/string/split.cpp b/string/split.cpp index ed00ae1ee..fd09fe931 100644 --- a/string/split.cpp +++ b/string/split.cpp @@ -45,26 +45,24 @@ bool string::is_one_of( char c, const char * characters ) return false; } -std::vector< std::string > split( const std::string & s, const char * separators ) +std::vector< std::string > split( const std::string & s, const char * separators, bool empty_if_empty_input ) { std::vector< std::string > v; + if( empty_if_empty_input && s.empty() ) { return v; } const char* begin( &s[0] ); const char* end( begin + s.length() ); v.push_back( std::string() ); for( const char* p = begin; p < end; ++p ) { - if( string::is_one_of( *p, separators ) ) - v.push_back( std::string() ); - else - v.back() += *p; + if( string::is_one_of( *p, separators ) ) { v.push_back( std::string() ); } else { v.back() += *p; } } return v; } -std::vector< std::string > split( const std::string & s, char separator ) +std::vector< std::string > split( const std::string & s, char separator, bool empty_if_empty_input ) { const char separators[] = { separator, 0 }; - return split( s, separators ); + return split( s, separators, empty_if_empty_input ); } std::vector< std::string > split_escaped( const std::string & s, const char * separators, const char * quotes, char escape ) diff --git a/string/split.h b/string/split.h index bdfe94722..ec8f176e3 100644 --- a/string/split.h +++ b/string/split.h @@ -39,15 +39,16 @@ namespace comma { namespace string { - // Used for escaping / unescaping to maybe find a character in a list of specials - bool is_one_of( char c, const char * characters ); -} + +bool is_one_of( char c, const char* characters ); + +} // namespace string { /// split string into tokens (a quick implementation); always contains at least one element -std::vector< std::string > split( const std::string& s, const char* separators = " " ); +std::vector< std::string > split( const std::string& s, const char* separators = " ", bool empty_if_empty_input = false ); /// split string into tokens (a quick implementation); always contains at least one element -std::vector< std::string > split( const std::string& s, char separator ); +std::vector< std::string > split( const std::string& s, char separator, bool empty_if_empty_input = false ); /// Split string into tokens; always contains at least one element; /// skips backslash escaped separator, handle non-nested quotes; @@ -64,10 +65,10 @@ std::vector< std::string > split( const std::string& s, char separator ); /// quote must be paired with an end quote, or an exception is thrown. /// Quotes don't nest and can not be mixed; e.g. a ' will not close a " quoted string. /// However "'" and '"' are perfectly legal strings of ' and " -std::vector< std::string > split_escaped( const std::string & s, const char * separators = " ", const char * quotes = "\"\'", char escape = '\\' ); +std::vector< std::string > split_escaped( const std::string& s, const char * separators = " ", const char * quotes = "\"\'", char escape = '\\' ); /// split string into tokens; always contains at least one element; /// skips backslash escaped seperator, handle boolean quotes -std::vector< std::string > split_escaped( const std::string & s, char separator, const char * quotes = "\"\'", char escape = '\\' ); +std::vector< std::string > split_escaped( const std::string& s, char separator, const char * quotes = "\"\'", char escape = '\\' ); } // namespace comma { From 85fd860827f39466e889bea3523587d1586ddfda Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 1 Apr 2019 12:37:51 +1100 Subject: [PATCH 0009/1056] csv-fields: test case added --- csv/test/csv-fields/expected | 2 ++ csv/test/csv-fields/input | 1 + 2 files changed, 3 insertions(+) diff --git a/csv/test/csv-fields/expected b/csv/test/csv-fields/expected index f00b0889c..2dddba1ee 100644 --- a/csv/test/csv-fields/expected +++ b/csv/test/csv-fields/expected @@ -28,6 +28,8 @@ numbers/fields[3]/output="2" numbers/fields[3]/status=0 numbers/fields[4]/output="2,3" numbers/fields[4]/status=0 +numbers/fields[5]/output="2,3,4" +numbers/fields[5]/status=0 clear/keep[0]/output=",,," clear/keep[0]/status=0 diff --git a/csv/test/csv-fields/input b/csv/test/csv-fields/input index f7b0a309f..9bb3ea7ca 100644 --- a/csv/test/csv-fields/input +++ b/csv/test/csv-fields/input @@ -13,6 +13,7 @@ numbers/fields[1]="echo a,,, | csv-fields numbers --fields=a" numbers/fields[2]="echo ,a,, | csv-fields numbers --fields=a" numbers/fields[3]="echo ,a,b, | csv-fields numbers --fields=a" numbers/fields[4]="echo ,a,b, | csv-fields numbers --fields=a,b" +numbers/fields[5]="echo ,a,a,a | csv-fields numbers --fields=a" clear/keep[0]="echo a,b,c,d | csv-fields clear --keep ,,," clear/keep[1]="echo a,b,c,d | csv-fields clear --keep a" From fd96e42bb547299f11d773559e49dc35cc04f459 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 5 Apr 2019 20:26:11 +1100 Subject: [PATCH 0010/1056] packed: implementing generic big/little endian values to introduce 3-byte big-endian --- packed/big_endian.h | 65 +------------- packed/detail/endian.h | 188 +++++++++++++++++++++++++++++++++++++++++ packed/little_endian.h | 85 +++---------------- 3 files changed, 202 insertions(+), 136 deletions(-) create mode 100644 packed/detail/endian.h diff --git a/packed/big_endian.h b/packed/big_endian.h index 017a6f215..a8b662250 100644 --- a/packed/big_endian.h +++ b/packed/big_endian.h @@ -39,75 +39,12 @@ #include "../base/exception.h" #include "../base/types.h" #include "../packed/field.h" +#include "detail/endian.h" namespace comma { namespace packed { namespace detail { -template < typename T > struct net_traits {}; - -template <> struct net_traits< comma::uint16 > -{ - static comma::uint16 hton( comma::uint16 v ) { return htons( v ); } - static comma::uint16 ntoh( comma::uint16 v ) { return ntohs( v ); } -}; - -template <> struct net_traits< comma::int16 > -{ - static comma::int16 hton( comma::int16 v ) { return htons( v ); } - static comma::int16 ntoh( comma::int16 v ) { return ntohs( v ); } -}; - -template <> struct net_traits< comma::uint32 > -{ - static comma::uint32 hton( comma::uint32 v ) { return htonl( v ); } - static comma::uint32 ntoh( comma::uint32 v ) { return ntohl( v ); } -}; - -template <> struct net_traits< comma::int32 > -{ - static comma::int32 hton( comma::int32 v ) { return htonl( v ); } - static comma::int32 ntoh( comma::int32 v ) { return ntohl( v ); } -}; - -BOOST_STATIC_ASSERT( sizeof( float ) == 4 ); -BOOST_STATIC_ASSERT( sizeof( double ) == 8 ); - -template < typename type, typename uint_of_same_size > -inline type pack_float( type value ) -{ - char storage[sizeof(type)]; - uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); - for( unsigned int i = 0; i < sizeof( type ); ++i, *p >>= 8 ) { storage[sizeof(type)-i-1] = *p & 0xff; } - const type* result = reinterpret_cast< const type* >( &storage ); - return *result; -} - -template< typename type, typename uint_of_same_size > -inline type unpack_float( type value ) -{ - const char* storage = reinterpret_cast< const char* >( &value ); - uint_of_same_size v = 0; - unsigned int shift = 0; - for( unsigned int i = 0; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( ( unsigned char )( storage[sizeof(type)-i-1] ) ) << shift; } - const type* result = reinterpret_cast< const type* >( &v ); - return *result; -} - -template <> struct net_traits< float > -{ - typedef comma::uint32 uint_of_same_size; - static float hton( float value ) { return pack_float< float, uint_of_same_size >( value ); } - static float ntoh( float value ) { return unpack_float< float, uint_of_same_size >( value ); } -}; - -template <> struct net_traits< double > -{ - typedef comma::uint64 uint_of_same_size; - static double hton( double value ) { return pack_float< double, uint_of_same_size >( value ); } - static double ntoh( double value ) { return unpack_float< double, uint_of_same_size >( value ); } -}; - template < typename T > class big_endian : public packed::field< big_endian< T >, T, sizeof( T ) > { diff --git a/packed/detail/endian.h b/packed/detail/endian.h new file mode 100644 index 000000000..c54a3be7f --- /dev/null +++ b/packed/detail/endian.h @@ -0,0 +1,188 @@ +// This file is provided in addition to snark and is not an integral +// part of snark library. +// Copyright (c) 2018 Vsevolod Vlaskine +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// snark is a generic and flexible library for robotics research +// Copyright (c) 2011 The University of Sydney +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the University of Sydney nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/// @author vsevolod vlaskine + +#pragma once + +#include +#include +#include "../../base/exception.h" +#include "../../base/types.h" +#include "../field.h" + +namespace comma { namespace packed { namespace detail { + +BOOST_STATIC_ASSERT( sizeof( float ) == 4 ); +BOOST_STATIC_ASSERT( sizeof( double ) == 8 ); + +//inline static bool host_is_little_endian_() { comma::uint32 i = 1; return reinterpret_cast< char* >( &i )[0] == 1; } + +//static bool host_is_little_endian = detail::host_is_little_endian_(); // super quick and dirty + +template < typename T > struct net_traits {}; + +template <> struct net_traits< comma::uint16 > +{ + static comma::uint16 hton( comma::uint16 v ) { return htons( v ); } + static comma::uint16 ntoh( comma::uint16 v ) { return ntohs( v ); } +}; + +template <> struct net_traits< comma::int16 > +{ + static comma::int16 hton( comma::int16 v ) { return htons( v ); } + static comma::int16 ntoh( comma::int16 v ) { return ntohs( v ); } +}; + +template <> struct net_traits< comma::uint32 > +{ + static comma::uint32 hton( comma::uint32 v ) { return htonl( v ); } + static comma::uint32 ntoh( comma::uint32 v ) { return ntohl( v ); } +}; + +template <> struct net_traits< comma::int32 > +{ + static comma::int32 hton( comma::int32 v ) { return htonl( v ); } + static comma::int32 ntoh( comma::int32 v ) { return ntohl( v ); } +}; + +template < typename type, typename uint_of_same_size > +inline type pack_float( type value ) +{ + char storage[sizeof(type)]; + uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); + for( unsigned int i = 0; i < sizeof( type ); ++i, *p >>= 8 ) { storage[sizeof(type)-i-1] = *p & 0xff; } + const type* result = reinterpret_cast< const type* >( &storage ); + return *result; +} + +template< typename type, typename uint_of_same_size > +inline type unpack_float( type value ) +{ + const char* storage = reinterpret_cast< const char* >( &value ); + uint_of_same_size v = 0; + unsigned int shift = 0; + for( unsigned int i = 0; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( ( unsigned char )( storage[sizeof(type)-i-1] ) ) << shift; } + const type* result = reinterpret_cast< const type* >( &v ); + return *result; +} + +template <> struct net_traits< float > +{ + typedef comma::uint32 uint_of_same_size; + static float hton( float value ) { return pack_float< float, uint_of_same_size >( value ); } + static float ntoh( float value ) { return unpack_float< float, uint_of_same_size >( value ); } +}; + +template <> struct net_traits< double > +{ + typedef comma::uint64 uint_of_same_size; + static double hton( double value ) { return pack_float< double, uint_of_same_size >( value ); } + static double ntoh( double value ) { return unpack_float< double, uint_of_same_size >( value ); } +}; + +template < bool Little, unsigned int Size, bool Signed, bool Floating = false > struct endian_traits { typedef typename comma::integer< Size, Signed >::type type; typedef typename comma::integer< Size, false >::type uint_of_same_size; }; +template < bool Little > struct endian_traits< Little, 3, true > { typedef comma::int32 type; typedef comma::uint32 uint_of_same_size; }; +template < bool Little > struct endian_traits< Little, 3, false > { typedef comma::uint32 type; typedef comma::uint32 uint_of_same_size; }; +template < bool Little > struct endian_traits< Little, 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; +template < bool Little > struct endian_traits< Little, 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; + +enum { little = 0, big = 1 }; + +template < bool Little, unsigned int Size, bool Signed, bool Floating = false > +struct endian : public packed::field< endian< Little, Size, Signed, Floating >, typename endian_traits< Little, Size, Signed, Floating >::type, Size > +{ + static const unsigned int size = Size; + + typedef typename endian_traits< Little, Size, Signed, Floating >::type type; + + BOOST_STATIC_ASSERT( size <= sizeof( type ) ); + + typedef packed::field< endian< Little, Size, Signed, Floating >, typename endian_traits< Little, Size, Signed, Floating >::type, Size > base_type; + + static type default_value() { return 0; } + + typedef typename endian_traits< Little, size, Signed, Floating >::uint_of_same_size uint_of_same_size; + + static void pack( char* storage, type value ) + { + uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); + for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } + } + + static type unpack( const char* storage ) // for floats it is a real hack, since there is no standard + { + uint_of_same_size v = 0; + unsigned int shift = 0; + unsigned int i = 0; + for( ; i < size; ++i, shift += 8 ) + { + v += static_cast< uint_of_same_size >( ( unsigned char )( storage[i] ) ) << shift; + } + if( !Floating && Signed && ( storage[ size - 1 ] & 0x80 ) ) + { + for( ; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( 0xff ) << shift; } + } + const type* result = reinterpret_cast< const type* >( &v ); + return *result; + } + + const endian& operator=( const endian& rhs ) { return base_type::operator=( rhs ); } + + const endian& operator=( const type& rhs ) { return base_type::operator=( rhs ); } +}; + +} } } // namespace comma { namespace packed { namespace detail { diff --git a/packed/little_endian.h b/packed/little_endian.h index 5d419f095..28694a5b5 100644 --- a/packed/little_endian.h +++ b/packed/little_endian.h @@ -27,102 +27,43 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author Matthew Herrmann 2007 -/// @author Vsevolod Vlaskine 2010-2011 +/// @author vsevolod vlaskine -#ifndef COMMA_PACKED_LITTLEENDIAN_H_ -#define COMMA_PACKED_LITTLEENDIAN_H_ +#pragma once #include #include "../base/exception.h" #include "../base/types.h" #include "field.h" +#include "detail/endian.h" namespace comma { namespace packed { -namespace detail { - -BOOST_STATIC_ASSERT( sizeof( float ) == 4 ); -BOOST_STATIC_ASSERT( sizeof( double ) == 8 ); - -template < unsigned int Size, bool Signed, bool Floating = false > struct little_endian_traits { typedef typename comma::integer< Size, Signed >::type type; typedef typename comma::integer< Size, false >::type uint_of_same_size; }; -template <> struct little_endian_traits< 3, true > { typedef comma::int32 type; typedef comma::uint32 uint_of_same_size; }; -template <> struct little_endian_traits< 3, false > { typedef comma::uint32 type; typedef comma::uint32 uint_of_same_size; }; -template <> struct little_endian_traits< 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; -template <> struct little_endian_traits< 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; - -template < unsigned int Size, bool Signed, bool Floating = false > -struct little_endian : public packed::field< little_endian< Size, Signed, Floating >, typename little_endian_traits< Size, Signed, Floating >::type, Size > -{ - static const unsigned int size = Size; - - typedef typename little_endian_traits< Size, Signed, Floating >::type type; - - BOOST_STATIC_ASSERT( size <= sizeof( type ) ); - - typedef packed::field< little_endian< Size, Signed, Floating >, typename little_endian_traits< Size, Signed, Floating >::type, Size > base_type; - - static type default_value() { return 0; } - - typedef typename little_endian_traits< size, Signed, Floating >::uint_of_same_size uint_of_same_size; - - static void pack( char* storage, type value ) - { - uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); - for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } - } - - static type unpack( const char* storage ) // for floats it is a real hack, since there is no standard - { - uint_of_same_size v = 0; - unsigned int shift = 0; - unsigned int i = 0; - for( ; i < size; ++i, shift += 8 ) - { - v += static_cast< uint_of_same_size >( ( unsigned char )( storage[i] ) ) << shift; - } - if( !Floating && Signed && ( storage[ size - 1 ] & 0x80 ) ) - { - for( ; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( 0xff ) << shift; } - } - const type* result = reinterpret_cast< const type* >( &v ); - return *result; - } - - const little_endian& operator=( const little_endian& rhs ) { return base_type::operator=( rhs ); } - - const little_endian& operator=( const type& rhs ) { return base_type::operator=( rhs ); } -}; - -} // namespace detail { - /// packed little endian 16-bit integers -typedef detail::little_endian< 2, true > little_endian16; -typedef detail::little_endian< 2, false > little_endian_uint16; +typedef detail::endian< detail::little, 2, true > little_endian16; +typedef detail::endian< detail::little, 2, false > little_endian_uint16; typedef little_endian16 int16; typedef little_endian_uint16 uint16; /// packed little endian 24-bit integers (strangely, there are protocols using it) -typedef detail::little_endian< 3, true > little_endian24; -typedef detail::little_endian< 3, false > little_endian_uint24; +typedef detail::endian< detail::little, 3, true > little_endian24; +typedef detail::endian< detail::little, 3, false > little_endian_uint24; typedef little_endian24 int24; typedef little_endian_uint24 uint24; /// packed little endian 32-bit integers -typedef detail::little_endian< 4, true > little_endian32; -typedef detail::little_endian< 4, false > little_endian_uint32; +typedef detail::endian< detail::little, 4, true > little_endian32; +typedef detail::endian< detail::little, 4, false > little_endian_uint32; typedef little_endian32 int32; typedef little_endian_uint32 uint32; /// packed little endian 32-bit integers -typedef detail::little_endian< 8, true > little_endian64; -typedef detail::little_endian< 8, false > little_endian_uint64; +typedef detail::endian< detail::little, 8, true > little_endian64; +typedef detail::endian< detail::little, 8, false > little_endian_uint64; typedef little_endian64 int64; typedef little_endian_uint64 uint64; /// packed floating point number (does it even make sense?) -typedef detail::little_endian< 4, true, true > little_endian_float32; -typedef detail::little_endian< 8, true, true > little_endian_float64; +typedef detail::endian< detail::little, 4, true, true > little_endian_float32; +typedef detail::endian< detail::little, 8, true, true > little_endian_float64; typedef little_endian_float32 float32; typedef little_endian_float64 float64; } } // namespace comma { namespace packed { - -#endif // #ifndef COMMA_PACKED_LITTLEENDIAN_H_ From 31dea043b906e4fb0834953e9a541115266a5f89 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 5 Apr 2019 21:27:13 +1100 Subject: [PATCH 0011/1056] packed: implementing generic big/little endian values to introduce 3-byte big-endian... --- packed/big_endian.h | 57 ++----------- packed/detail/endian.h | 177 +++++++++++++++++++++++++++-------------- packed/little_endian.h | 4 - 3 files changed, 125 insertions(+), 113 deletions(-) diff --git a/packed/big_endian.h b/packed/big_endian.h index a8b662250..bc20eef82 100644 --- a/packed/big_endian.h +++ b/packed/big_endian.h @@ -31,74 +31,31 @@ /// @author Matthew Herrmann 2007 /// @author Vsevolod Vlaskine 2010-2011 -#ifndef COMMA_PACKED_BIG_ENDIAN_H_ -#define COMMA_PACKED_BIG_ENDIAN_H_ +#pragma once -#include -#include -#include "../base/exception.h" -#include "../base/types.h" -#include "../packed/field.h" #include "detail/endian.h" namespace comma { namespace packed { -namespace detail { - -template < typename T > -class big_endian : public packed::field< big_endian< T >, T, sizeof( T ) > -{ - public: - enum { size = sizeof( T ) }; - - typedef T type; - - typedef packed::field< big_endian< T >, T, size > base_type; - - static type default_value() { return 0; } - - static void pack( char* storage, type value ) - { - type v( net_traits< type >::hton( value ) ); - ::memcpy( storage, ( void* )&v, size ); - } - - static type unpack( const char* storage ) - { - type value; - ::memcpy( ( void* )&value, storage, size ); - return net_traits< type >::ntoh( value ); - } - - const big_endian& operator=( const big_endian& rhs ) { return base_type::operator=( rhs ); } - - const big_endian& operator=( type rhs ) { return base_type::operator=( rhs ); } -}; - -} // namespace detail { - /// big endian 16-bit integers -typedef detail::big_endian< comma::uint16 > big_endian_uint16; -typedef detail::big_endian< comma::int16 > big_endian_int16; +typedef detail::endian< detail::big, 2, false > big_endian_uint16; +typedef detail::endian< detail::big, 2, true > big_endian_int16; /// aliases for big endian 16-bit integers typedef big_endian_uint16 net_uint16; typedef big_endian_int16 net_int16; /// big endian 32-bit integers -typedef detail::big_endian< comma::uint32 > big_endian_uint32; -typedef detail::big_endian< comma::int32 > big_endian_int32; +typedef detail::endian< detail::big, 4, false > big_endian_uint32; +typedef detail::endian< detail::big, 4, true > big_endian_int32; /// aliases for big endian 32-bit integers typedef big_endian_uint32 net_uint32; typedef big_endian_int32 net_int32; /// big endian float and double -typedef detail::big_endian< float > big_endian_float32; -typedef detail::big_endian< double > big_endian_float64; +typedef detail::endian< detail::big, 4, true, true > big_endian_float32; +typedef detail::endian< detail::big, 8, true, true > big_endian_float64; typedef big_endian_float64 big_endian_double; /// aliases for big endian float and double typedef big_endian_float32 net_float32; typedef big_endian_float64 net_float64; typedef net_float64 net_double; - } } // namespace comma { namespace packed { - -#endif // #ifndef COMMA_PACKED_BIG_ENDIAN_H_ diff --git a/packed/detail/endian.h b/packed/detail/endian.h index c54a3be7f..4120e92b6 100644 --- a/packed/detail/endian.h +++ b/packed/detail/endian.h @@ -58,6 +58,7 @@ #pragma once +#include #include #include #include "../../base/exception.h" @@ -69,115 +70,173 @@ namespace comma { namespace packed { namespace detail { BOOST_STATIC_ASSERT( sizeof( float ) == 4 ); BOOST_STATIC_ASSERT( sizeof( double ) == 8 ); -//inline static bool host_is_little_endian_() { comma::uint32 i = 1; return reinterpret_cast< char* >( &i )[0] == 1; } - -//static bool host_is_little_endian = detail::host_is_little_endian_(); // super quick and dirty - template < typename T > struct net_traits {}; template <> struct net_traits< comma::uint16 > { - static comma::uint16 hton( comma::uint16 v ) { return htons( v ); } - static comma::uint16 ntoh( comma::uint16 v ) { return ntohs( v ); } + typedef comma::uint16 uint_of_same_size; + static comma::uint16 htobe( comma::uint16 v ) { return htobe16( v ); } + static comma::uint16 betoh( comma::uint16 v ) { return be16toh( v ); } + static comma::uint16 htole( comma::uint16 v ) { return htole16( v ); } + static comma::uint16 letoh( comma::uint16 v ) { return le16toh( v ); } }; template <> struct net_traits< comma::int16 > { - static comma::int16 hton( comma::int16 v ) { return htons( v ); } - static comma::int16 ntoh( comma::int16 v ) { return ntohs( v ); } + typedef comma::uint16 uint_of_same_size; + static comma::int16 htobe( comma::int16 v ) { return htobe16( v ); } + static comma::int16 betoh( comma::int16 v ) { return be16toh( v ); } + static comma::int16 htole( comma::int16 v ) { return htole16( v ); } + static comma::int16 letoh( comma::int16 v ) { return le16toh( v ); } }; template <> struct net_traits< comma::uint32 > { - static comma::uint32 hton( comma::uint32 v ) { return htonl( v ); } - static comma::uint32 ntoh( comma::uint32 v ) { return ntohl( v ); } + typedef comma::uint32 uint_of_same_size; + static comma::uint32 htobe( comma::uint32 v ) { return htobe32( v ); } + static comma::uint32 betoh( comma::uint32 v ) { return be32toh( v ); } + static comma::uint32 htole( comma::uint32 v ) { return htole32( v ); } + static comma::uint32 letoh( comma::uint32 v ) { return le32toh( v ); } }; template <> struct net_traits< comma::int32 > { - static comma::int32 hton( comma::int32 v ) { return htonl( v ); } - static comma::int32 ntoh( comma::int32 v ) { return ntohl( v ); } + typedef comma::uint32 uint_of_same_size; + static comma::int32 htobe( comma::int32 v ) { return htobe32( v ); } + static comma::int32 betoh( comma::int32 v ) { return be32toh( v ); } + static comma::int32 htole( comma::int32 v ) { return htole32( v ); } + static comma::int32 letoh( comma::int32 v ) { return le32toh( v ); } }; -template < typename type, typename uint_of_same_size > -inline type pack_float( type value ) +template <> struct net_traits< comma::uint64 > { - char storage[sizeof(type)]; - uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); - for( unsigned int i = 0; i < sizeof( type ); ++i, *p >>= 8 ) { storage[sizeof(type)-i-1] = *p & 0xff; } - const type* result = reinterpret_cast< const type* >( &storage ); - return *result; -} - -template< typename type, typename uint_of_same_size > -inline type unpack_float( type value ) + typedef comma::uint64 uint_of_same_size; + static comma::uint64 htobe( comma::uint64 v ) { return htobe64( v ); } + static comma::uint64 betoh( comma::uint64 v ) { return be64toh( v ); } + static comma::uint64 htole( comma::uint64 v ) { return htole64( v ); } + static comma::uint64 letoh( comma::uint64 v ) { return le64toh( v ); } +}; + +template <> struct net_traits< comma::int64 > { - const char* storage = reinterpret_cast< const char* >( &value ); - uint_of_same_size v = 0; - unsigned int shift = 0; - for( unsigned int i = 0; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( ( unsigned char )( storage[sizeof(type)-i-1] ) ) << shift; } - const type* result = reinterpret_cast< const type* >( &v ); - return *result; -} + typedef comma::uint64 uint_of_same_size; + static comma::int64 htobe( comma::int64 v ) { return htobe64( v ); } + static comma::int64 betoh( comma::int64 v ) { return be64toh( v ); } + static comma::int64 htole( comma::int64 v ) { return htole64( v ); } + static comma::int64 letoh( comma::int64 v ) { return le64toh( v ); } +}; template <> struct net_traits< float > { typedef comma::uint32 uint_of_same_size; - static float hton( float value ) { return pack_float< float, uint_of_same_size >( value ); } - static float ntoh( float value ) { return unpack_float< float, uint_of_same_size >( value ); } + static float htobe( float value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::htobe( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< float* >( &v ) ); + } + static float betoh( float value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::betoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< float* >( &v ) ); + } + static float htole( float value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::htole( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< float* >( &v ) ); + } + static float letoh( float value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::letoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< float* >( &v ) ); + } }; template <> struct net_traits< double > { typedef comma::uint64 uint_of_same_size; - static double hton( double value ) { return pack_float< double, uint_of_same_size >( value ); } - static double ntoh( double value ) { return unpack_float< double, uint_of_same_size >( value ); } + static double htobe( double value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::htobe( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< double* >( &v ) ); + } + static double betoh( double value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::betoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< double* >( &v ) ); + } + static double htole( double value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::htole( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< double* >( &v ) ); + } + static double letoh( double value ) + { + uint_of_same_size v = net_traits< uint_of_same_size >::letoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); + return *( reinterpret_cast< double* >( &v ) ); + } }; - -template < bool Little, unsigned int Size, bool Signed, bool Floating = false > struct endian_traits { typedef typename comma::integer< Size, Signed >::type type; typedef typename comma::integer< Size, false >::type uint_of_same_size; }; -template < bool Little > struct endian_traits< Little, 3, true > { typedef comma::int32 type; typedef comma::uint32 uint_of_same_size; }; -template < bool Little > struct endian_traits< Little, 3, false > { typedef comma::uint32 type; typedef comma::uint32 uint_of_same_size; }; -template < bool Little > struct endian_traits< Little, 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; -template < bool Little > struct endian_traits< Little, 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; - -enum { little = 0, big = 1 }; -template < bool Little, unsigned int Size, bool Signed, bool Floating = false > -struct endian : public packed::field< endian< Little, Size, Signed, Floating >, typename endian_traits< Little, Size, Signed, Floating >::type, Size > +enum endiannes { little = 0, big = 1 }; + +template < endiannes Endianness, unsigned int Size, bool Signed, bool Floating = false > struct endian_traits { typedef typename comma::integer< Size, Signed >::type type; typedef typename comma::integer< Size, false >::type uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 3, true > { typedef comma::int32 type; typedef comma::uint32 uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 3, false > { typedef comma::uint32 type; typedef comma::uint32 uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; + +template < endiannes Endianness, unsigned int Size, bool Signed, bool Floating = false > +struct endian : public packed::field< endian< Endianness, Size, Signed, Floating >, typename endian_traits< Endianness, Size, Signed, Floating >::type, Size > { static const unsigned int size = Size; - typedef typename endian_traits< Little, Size, Signed, Floating >::type type; + typedef typename endian_traits< Endianness, Size, Signed, Floating >::type type; BOOST_STATIC_ASSERT( size <= sizeof( type ) ); - typedef packed::field< endian< Little, Size, Signed, Floating >, typename endian_traits< Little, Size, Signed, Floating >::type, Size > base_type; + typedef packed::field< endian< Endianness, Size, Signed, Floating >, typename endian_traits< Endianness, Size, Signed, Floating >::type, Size > base_type; static type default_value() { return 0; } - typedef typename endian_traits< Little, size, Signed, Floating >::uint_of_same_size uint_of_same_size; + typedef typename endian_traits< Endianness, size, Signed, Floating >::uint_of_same_size uint_of_same_size; static void pack( char* storage, type value ) { - uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); - for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } + if( Endianness == packed::detail::little ) // no point for further generics; should be optimized by compiler anyway + { + uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); + for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } + } + else + { + type v( net_traits< type >::htobe( value ) ); + ::memcpy( storage, ( void* )&v, size ); + } } static type unpack( const char* storage ) // for floats it is a real hack, since there is no standard { - uint_of_same_size v = 0; - unsigned int shift = 0; - unsigned int i = 0; - for( ; i < size; ++i, shift += 8 ) + if( Endianness == packed::detail::little ) // no point for further generics; should be optimized by compiler anyway { - v += static_cast< uint_of_same_size >( ( unsigned char )( storage[i] ) ) << shift; + uint_of_same_size v = 0; + unsigned int shift = 0; + unsigned int i = 0; + for( ; i < size; ++i, shift += 8 ) + { + v += static_cast< uint_of_same_size >( ( unsigned char )( storage[i] ) ) << shift; + } + if( !Floating && Signed && ( storage[ size - 1 ] & 0x80 ) ) + { + for( ; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( 0xff ) << shift; } + } + const type* result = reinterpret_cast< const type* >( &v ); + return *result; } - if( !Floating && Signed && ( storage[ size - 1 ] & 0x80 ) ) - { - for( ; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( 0xff ) << shift; } + else + { + type value; + ::memcpy( ( void* )&value, storage, size ); + return net_traits< type >::betoh( value ); } - const type* result = reinterpret_cast< const type* >( &v ); - return *result; } const endian& operator=( const endian& rhs ) { return base_type::operator=( rhs ); } diff --git a/packed/little_endian.h b/packed/little_endian.h index 28694a5b5..b2b9a823d 100644 --- a/packed/little_endian.h +++ b/packed/little_endian.h @@ -32,10 +32,6 @@ #pragma once -#include -#include "../base/exception.h" -#include "../base/types.h" -#include "field.h" #include "detail/endian.h" namespace comma { namespace packed { From 5b03746b6f180304ee27a8b689c914d497f23dfe Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 5 Apr 2019 21:29:23 +1100 Subject: [PATCH 0012/1056] packed: cmake fixed --- packed/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packed/CMakeLists.txt b/packed/CMakeLists.txt index 4475ab364..648c65102 100644 --- a/packed/CMakeLists.txt +++ b/packed/CMakeLists.txt @@ -3,11 +3,13 @@ SET( TARGET_NAME comma_${PROJECT} ) FILE( GLOB source ${SOURCE_CODE_BASE_DIR}/${PROJECT}/*.cpp ) FILE( GLOB includes ${SOURCE_CODE_BASE_DIR}/${PROJECT}/*.h ) -SOURCE_GROUP( ${PROJECT} FILES ${source} ${includes} ) -ADD_CUSTOM_TARGET( ${TARGET_NAME} ${source} ${includes} ) +FILE( GLOB detail_includes ${SOURCE_CODE_BASE_DIR}/${PROJECT}/detail/*.h ) +SOURCE_GROUP( ${PROJECT} FILES ${source} ${includes} ${detail_includes} ) +ADD_CUSTOM_TARGET( ${TARGET_NAME} ${source} ${includes} ${detail_includes} ) # target_link_libraries( ${TARGET_NAME} comma_string ) INSTALL( FILES ${includes} DESTINATION ${comma_INSTALL_INCLUDE_DIR}/${PROJECT} ) +INSTALL( FILES ${detail_includes} DESTINATION ${comma_INSTALL_INCLUDE_DIR}/${PROJECT}/detail ) IF( comma_BUILD_TESTS ) ADD_SUBDIRECTORY( test ) From a26109369fa48b6a59442b09d2a2c2f25790ad3b Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 5 Apr 2019 21:51:11 +1100 Subject: [PATCH 0013/1056] packed: implementing generic big/little endian values to introduce 3-byte big-endian... --- packed/detail/endian.h | 109 ++++++----------------------------------- 1 file changed, 15 insertions(+), 94 deletions(-) diff --git a/packed/detail/endian.h b/packed/detail/endian.h index 4120e92b6..53250411e 100644 --- a/packed/detail/endian.h +++ b/packed/detail/endian.h @@ -70,120 +70,40 @@ namespace comma { namespace packed { namespace detail { BOOST_STATIC_ASSERT( sizeof( float ) == 4 ); BOOST_STATIC_ASSERT( sizeof( double ) == 8 ); +enum endiannes { little = 0, big = 1 }; + +template < endiannes Endianness, unsigned int Size, bool Signed, bool Floating = false > struct endian_traits { typedef typename comma::integer< Size, Signed >::type type; typedef typename comma::integer< Size, false >::type uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 3, true > { typedef comma::int32 type; typedef comma::uint32 uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 3, false > { typedef comma::uint32 type; typedef comma::uint32 uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; +template < endiannes Endianness > struct endian_traits< Endianness, 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; + template < typename T > struct net_traits {}; template <> struct net_traits< comma::uint16 > { - typedef comma::uint16 uint_of_same_size; static comma::uint16 htobe( comma::uint16 v ) { return htobe16( v ); } static comma::uint16 betoh( comma::uint16 v ) { return be16toh( v ); } static comma::uint16 htole( comma::uint16 v ) { return htole16( v ); } static comma::uint16 letoh( comma::uint16 v ) { return le16toh( v ); } }; -template <> struct net_traits< comma::int16 > -{ - typedef comma::uint16 uint_of_same_size; - static comma::int16 htobe( comma::int16 v ) { return htobe16( v ); } - static comma::int16 betoh( comma::int16 v ) { return be16toh( v ); } - static comma::int16 htole( comma::int16 v ) { return htole16( v ); } - static comma::int16 letoh( comma::int16 v ) { return le16toh( v ); } -}; - template <> struct net_traits< comma::uint32 > { - typedef comma::uint32 uint_of_same_size; static comma::uint32 htobe( comma::uint32 v ) { return htobe32( v ); } static comma::uint32 betoh( comma::uint32 v ) { return be32toh( v ); } static comma::uint32 htole( comma::uint32 v ) { return htole32( v ); } static comma::uint32 letoh( comma::uint32 v ) { return le32toh( v ); } }; -template <> struct net_traits< comma::int32 > -{ - typedef comma::uint32 uint_of_same_size; - static comma::int32 htobe( comma::int32 v ) { return htobe32( v ); } - static comma::int32 betoh( comma::int32 v ) { return be32toh( v ); } - static comma::int32 htole( comma::int32 v ) { return htole32( v ); } - static comma::int32 letoh( comma::int32 v ) { return le32toh( v ); } -}; - template <> struct net_traits< comma::uint64 > { - typedef comma::uint64 uint_of_same_size; static comma::uint64 htobe( comma::uint64 v ) { return htobe64( v ); } static comma::uint64 betoh( comma::uint64 v ) { return be64toh( v ); } static comma::uint64 htole( comma::uint64 v ) { return htole64( v ); } static comma::uint64 letoh( comma::uint64 v ) { return le64toh( v ); } }; -template <> struct net_traits< comma::int64 > -{ - typedef comma::uint64 uint_of_same_size; - static comma::int64 htobe( comma::int64 v ) { return htobe64( v ); } - static comma::int64 betoh( comma::int64 v ) { return be64toh( v ); } - static comma::int64 htole( comma::int64 v ) { return htole64( v ); } - static comma::int64 letoh( comma::int64 v ) { return le64toh( v ); } -}; - -template <> struct net_traits< float > -{ - typedef comma::uint32 uint_of_same_size; - static float htobe( float value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::htobe( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< float* >( &v ) ); - } - static float betoh( float value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::betoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< float* >( &v ) ); - } - static float htole( float value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::htole( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< float* >( &v ) ); - } - static float letoh( float value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::letoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< float* >( &v ) ); - } -}; - -template <> struct net_traits< double > -{ - typedef comma::uint64 uint_of_same_size; - static double htobe( double value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::htobe( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< double* >( &v ) ); - } - static double betoh( double value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::betoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< double* >( &v ) ); - } - static double htole( double value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::htole( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< double* >( &v ) ); - } - static double letoh( double value ) - { - uint_of_same_size v = net_traits< uint_of_same_size >::letoh( *( reinterpret_cast< uint_of_same_size* >( &value ) ) ); - return *( reinterpret_cast< double* >( &v ) ); - } -}; - -enum endiannes { little = 0, big = 1 }; - -template < endiannes Endianness, unsigned int Size, bool Signed, bool Floating = false > struct endian_traits { typedef typename comma::integer< Size, Signed >::type type; typedef typename comma::integer< Size, false >::type uint_of_same_size; }; -template < endiannes Endianness > struct endian_traits< Endianness, 3, true > { typedef comma::int32 type; typedef comma::uint32 uint_of_same_size; }; -template < endiannes Endianness > struct endian_traits< Endianness, 3, false > { typedef comma::uint32 type; typedef comma::uint32 uint_of_same_size; }; -template < endiannes Endianness > struct endian_traits< Endianness, 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; -template < endiannes Endianness > struct endian_traits< Endianness, 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; - template < endiannes Endianness, unsigned int Size, bool Signed, bool Floating = false > struct endian : public packed::field< endian< Endianness, Size, Signed, Floating >, typename endian_traits< Endianness, Size, Signed, Floating >::type, Size > { @@ -201,6 +121,7 @@ struct endian : public packed::field< endian< Endianness, Size, Signed, Floating static void pack( char* storage, type value ) { + uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); if( Endianness == packed::detail::little ) // no point for further generics; should be optimized by compiler anyway { uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); @@ -208,16 +129,16 @@ struct endian : public packed::field< endian< Endianness, Size, Signed, Floating } else { - type v( net_traits< type >::htobe( value ) ); - ::memcpy( storage, ( void* )&v, size ); + uint_of_same_size i = net_traits< uint_of_same_size >::htobe( *p ); + ::memcpy( storage, ( void* )&i, size ); } } static type unpack( const char* storage ) // for floats it is a real hack, since there is no standard { + uint_of_same_size v = 0; if( Endianness == packed::detail::little ) // no point for further generics; should be optimized by compiler anyway { - uint_of_same_size v = 0; unsigned int shift = 0; unsigned int i = 0; for( ; i < size; ++i, shift += 8 ) @@ -233,9 +154,9 @@ struct endian : public packed::field< endian< Endianness, Size, Signed, Floating } else { - type value; - ::memcpy( ( void* )&value, storage, size ); - return net_traits< type >::betoh( value ); + ::memcpy( ( void* )&v, storage, size ); + v = net_traits< uint_of_same_size >::betoh( v ); + return *( reinterpret_cast< type* >( &v ) ); } } From e77037cc82ddce34a56c75f86d9d215e48297f1e Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 5 Apr 2019 23:38:38 +1100 Subject: [PATCH 0014/1056] packed/traits.h removed --- packed/traits.h | 49 ------------------------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 packed/traits.h diff --git a/packed/traits.h b/packed/traits.h deleted file mode 100644 index e16a5e22e..000000000 --- a/packed/traits.h +++ /dev/null @@ -1,49 +0,0 @@ -// This file is part of comma, a generic and flexible library -// Copyright (c) 2011 The University of Sydney -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// 3. Neither the name of the University of Sydney nor the -// names of its contributors may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE -// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT -// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED -// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#pragma once - -#include "big_endian.h" -#include -#include "../visiting/traits.h" - -namespace comma { namespace visiting { - -template < typename T, std::size_t N > struct traits< boost::array, N> > -{ - template< typename K, typename V > static void visit( const K& k, const boost::array, N>& t, V& v ) - { - for( std::size_t i = 0; i < t.size(); i++ ) { v.apply( i, t[i]() ); } - } -}; - - -} } //namespace comma { namespace visiting { - - From a0c4b66b333d4ca060c1d1b66cd8a2ce9462030c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sat, 6 Apr 2019 13:34:06 +1100 Subject: [PATCH 0015/1056] packed: endian stuff further refactored in preparation to handle signed 24-bit big endian numbers --- packed/detail/endian.h | 61 ++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/packed/detail/endian.h b/packed/detail/endian.h index 53250411e..6fb850558 100644 --- a/packed/detail/endian.h +++ b/packed/detail/endian.h @@ -78,7 +78,7 @@ template < endiannes Endianness > struct endian_traits< Endianness, 3, false > { template < endiannes Endianness > struct endian_traits< Endianness, 4, true, true > { typedef float type; typedef comma::uint32 uint_of_same_size; }; template < endiannes Endianness > struct endian_traits< Endianness, 8, true, true > { typedef double type; typedef comma::uint64 uint_of_same_size; }; -template < typename T > struct net_traits {}; +template < typename T > struct net_traits; template <> struct net_traits< comma::uint16 > { @@ -104,6 +104,23 @@ template <> struct net_traits< comma::uint64 > static comma::uint64 letoh( comma::uint64 v ) { return le64toh( v ); } }; +template < endiannes Endianness > struct convert; + +template <> struct convert< packed::detail::little > +{ + template < typename T > static T from_host( T t ) { return net_traits< T >::htole( t ); } + template < typename T > static T to_host( T t ) { return net_traits< T >::letoh( t ); } +}; + +template <> struct convert< packed::detail::big > +{ + template < typename T > static T from_host( T t ) { return net_traits< T >::htobe( t ); } + template < typename T > static T to_host( T t ) { return net_traits< T >::betoh( t ); } +}; + +template < unsigned int Size > struct ff { enum { value = ff< Size - 1 >::value << 8 + 0xff }; }; +template <> struct ff< 1 > { enum { value = 0xff }; }; + template < endiannes Endianness, unsigned int Size, bool Signed, bool Floating = false > struct endian : public packed::field< endian< Endianness, Size, Signed, Floating >, typename endian_traits< Endianness, Size, Signed, Floating >::type, Size > { @@ -112,6 +129,8 @@ struct endian : public packed::field< endian< Endianness, Size, Signed, Floating typedef typename endian_traits< Endianness, Size, Signed, Floating >::type type; BOOST_STATIC_ASSERT( size <= sizeof( type ) ); + + BOOST_STATIC_ASSERT( Signed || !Floating ); // unsigned floats don't make sense typedef packed::field< endian< Endianness, Size, Signed, Floating >, typename endian_traits< Endianness, Size, Signed, Floating >::type, Size > base_type; @@ -122,42 +141,26 @@ struct endian : public packed::field< endian< Endianness, Size, Signed, Floating static void pack( char* storage, type value ) { uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); - if( Endianness == packed::detail::little ) // no point for further generics; should be optimized by compiler anyway + *p = convert< Endianness >::from_host( *p ); + if( sizeof( uint_of_same_size ) == size ) // no point for further generics; should be optimized by compiler anyway { - uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); - for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } + ::memcpy( storage, ( void* )p, size ); } else { - uint_of_same_size i = net_traits< uint_of_same_size >::htobe( *p ); - ::memcpy( storage, ( void* )&i, size ); + // todo! 24-bit big endian!!! + // todo: test signed big endian + + for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } } } - + static type unpack( const char* storage ) // for floats it is a real hack, since there is no standard { - uint_of_same_size v = 0; - if( Endianness == packed::detail::little ) // no point for further generics; should be optimized by compiler anyway - { - unsigned int shift = 0; - unsigned int i = 0; - for( ; i < size; ++i, shift += 8 ) - { - v += static_cast< uint_of_same_size >( ( unsigned char )( storage[i] ) ) << shift; - } - if( !Floating && Signed && ( storage[ size - 1 ] & 0x80 ) ) - { - for( ; i < sizeof( type ); ++i, shift += 8 ) { v += static_cast< uint_of_same_size >( 0xff ) << shift; } - } - const type* result = reinterpret_cast< const type* >( &v ); - return *result; - } - else - { - ::memcpy( ( void* )&v, storage, size ); - v = net_traits< uint_of_same_size >::betoh( v ); - return *( reinterpret_cast< type* >( &v ) ); - } + uint_of_same_size i = ( !Floating && Signed && ( storage[ Endianness == little ? size - 1 : 0 ] & 0x80 ) ) ? -1 : 0; + ::memcpy( ( void* )&i + ( Endianness == little ? 0 : sizeof( uint_of_same_size ) - size ), storage, size ); + i = convert< Endianness >::to_host( i ); + return *( reinterpret_cast< type* >( &i ) ); } const endian& operator=( const endian& rhs ) { return base_type::operator=( rhs ); } From 155e0f1f1431e62d4b05e2d87108aff9ce2acef5 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sat, 6 Apr 2019 19:44:57 +1100 Subject: [PATCH 0016/1056] packed: 24-bit big endian implemented; basic unit test added --- packed/big_endian.h | 28 ++++++-- packed/detail/endian.h | 12 +--- packed/little_endian.h | 21 ++++-- packed/test/packed_test.cpp | 127 ++++++++++++++++++------------------ 4 files changed, 103 insertions(+), 85 deletions(-) diff --git a/packed/big_endian.h b/packed/big_endian.h index bc20eef82..230eafb04 100644 --- a/packed/big_endian.h +++ b/packed/big_endian.h @@ -37,25 +37,41 @@ namespace comma { namespace packed { -/// big endian 16-bit integers typedef detail::endian< detail::big, 2, false > big_endian_uint16; typedef detail::endian< detail::big, 2, true > big_endian_int16; -/// aliases for big endian 16-bit integers typedef big_endian_uint16 net_uint16; typedef big_endian_int16 net_int16; -/// big endian 32-bit integers +typedef detail::endian< detail::big, 3, false > big_endian_uint24; +typedef detail::endian< detail::big, 3, true > big_endian_int24; +typedef big_endian_uint24 net_uint24; +typedef big_endian_int24 net_int24; typedef detail::endian< detail::big, 4, false > big_endian_uint32; typedef detail::endian< detail::big, 4, true > big_endian_int32; -/// aliases for big endian 32-bit integers typedef big_endian_uint32 net_uint32; typedef big_endian_int32 net_int32; -/// big endian float and double typedef detail::endian< detail::big, 4, true, true > big_endian_float32; typedef detail::endian< detail::big, 8, true, true > big_endian_float64; +typedef big_endian_float32 big_endian_float; typedef big_endian_float64 big_endian_double; -/// aliases for big endian float and double typedef big_endian_float32 net_float32; typedef big_endian_float64 net_float64; +typedef net_float32 net_float; typedef net_float64 net_double; +// all types above deprecated; use namespacing below +namespace big_endian { // i love namespacing + +typedef detail::endian< detail::big, 2, false > uint16; +typedef detail::endian< detail::big, 2, true > int16; +typedef detail::endian< detail::big, 3, false > uint24; +typedef detail::endian< detail::big, 3, true > int24; +typedef detail::endian< detail::big, 4, false > uint32; +typedef detail::endian< detail::big, 4, true > int32; +typedef detail::endian< detail::big, 8, false > uint64; +typedef detail::endian< detail::big, 8, true > int64; +typedef detail::endian< detail::big, 4, true, true > float32; +typedef detail::endian< detail::big, 8, true, true > float64; + +} // namespace big_endian { + } } // namespace comma { namespace packed { diff --git a/packed/detail/endian.h b/packed/detail/endian.h index 6fb850558..80b1b9b75 100644 --- a/packed/detail/endian.h +++ b/packed/detail/endian.h @@ -142,17 +142,7 @@ struct endian : public packed::field< endian< Endianness, Size, Signed, Floating { uint_of_same_size* p = reinterpret_cast< uint_of_same_size* >( &value ); *p = convert< Endianness >::from_host( *p ); - if( sizeof( uint_of_same_size ) == size ) // no point for further generics; should be optimized by compiler anyway - { - ::memcpy( storage, ( void* )p, size ); - } - else - { - // todo! 24-bit big endian!!! - // todo: test signed big endian - - for( unsigned int i = 0; i < size; ++i, *p >>= 8 ) { storage[i] = *p & 0xff; } - } + ::memcpy( storage, ( void* )p + ( Endianness == little ? 0 : sizeof( uint_of_same_size ) - size ), size ); } static type unpack( const char* storage ) // for floats it is a real hack, since there is no standard diff --git a/packed/little_endian.h b/packed/little_endian.h index b2b9a823d..7f8b16d70 100644 --- a/packed/little_endian.h +++ b/packed/little_endian.h @@ -36,30 +36,41 @@ namespace comma { namespace packed { -/// packed little endian 16-bit integers typedef detail::endian< detail::little, 2, true > little_endian16; typedef detail::endian< detail::little, 2, false > little_endian_uint16; typedef little_endian16 int16; typedef little_endian_uint16 uint16; -/// packed little endian 24-bit integers (strangely, there are protocols using it) typedef detail::endian< detail::little, 3, true > little_endian24; typedef detail::endian< detail::little, 3, false > little_endian_uint24; typedef little_endian24 int24; typedef little_endian_uint24 uint24; -/// packed little endian 32-bit integers typedef detail::endian< detail::little, 4, true > little_endian32; typedef detail::endian< detail::little, 4, false > little_endian_uint32; typedef little_endian32 int32; typedef little_endian_uint32 uint32; -/// packed little endian 32-bit integers typedef detail::endian< detail::little, 8, true > little_endian64; typedef detail::endian< detail::little, 8, false > little_endian_uint64; typedef little_endian64 int64; typedef little_endian_uint64 uint64; -/// packed floating point number (does it even make sense?) typedef detail::endian< detail::little, 4, true, true > little_endian_float32; typedef detail::endian< detail::little, 8, true, true > little_endian_float64; typedef little_endian_float32 float32; typedef little_endian_float64 float64; +// all types above deprecated; use namespacing below +namespace little_endian { // i love namespacing + +typedef detail::endian< detail::little, 2, false > uint16; +typedef detail::endian< detail::little, 2, true > int16; +typedef detail::endian< detail::little, 3, false > uint24; +typedef detail::endian< detail::little, 3, true > int24; +typedef detail::endian< detail::little, 4, false > uint32; +typedef detail::endian< detail::little, 4, true > int32; +typedef detail::endian< detail::little, 8, false > uint64; +typedef detail::endian< detail::little, 8, true > int64; +typedef detail::endian< detail::little, 4, true, true > float32; +typedef detail::endian< detail::little, 8, true, true > float64; + +} // namespace little_endian { + } } // namespace comma { namespace packed { diff --git a/packed/test/packed_test.cpp b/packed/test/packed_test.cpp index e7b891b9d..5ee680ff6 100644 --- a/packed/test/packed_test.cpp +++ b/packed/test/packed_test.cpp @@ -47,8 +47,8 @@ struct test_packed_struct_t : public comma::packed::packed_struct< test_packed_s { comma::packed::string< 4 > hello; comma::packed::string< 5 > world; - comma::packed::net_uint16 int16; - comma::packed::net_uint32 int32; + comma::packed::big_endian::uint16 int16; + comma::packed::big_endian::uint32 int32; comma::packed::byte byte; }; @@ -75,8 +75,7 @@ TEST( packed_struct, test_packed_struct ) EXPECT_EQ( s.byte(), 3 ); } -template < typename T > -void test_packed_int( comma::int64 value ) +template < typename T > static void test_packed_int( comma::int64 value ) { T t; EXPECT_EQ( true, t == 0 ); @@ -85,8 +84,7 @@ void test_packed_int( comma::int64 value ) EXPECT_EQ( value, t() ); } -template < typename T > -void test_packed_uint( comma::uint64 value ) +template < typename T > static void test_packed_uint( comma::uint64 value ) { T t; EXPECT_EQ( true, t == 0 ); @@ -95,42 +93,45 @@ void test_packed_uint( comma::uint64 value ) EXPECT_EQ( value, t() ); } - TEST( test_packed_struct_test, test_little_endian ) { - test_packed_uint< comma::packed::uint16 >( 1231 ); - test_packed_uint< comma::packed::uint16 >( 65535 ); - test_packed_uint< comma::packed::uint24 >( 1232 ); - test_packed_uint< comma::packed::uint24 >( 16777215 ); - test_packed_uint< comma::packed::uint32 >( 1233 ); - test_packed_uint< comma::packed::uint32 >( 4294967295 ); - test_packed_uint< comma::packed::uint64 >( 4321 ); - test_packed_uint< comma::packed::uint64 >( comma::uint64( std::numeric_limits< comma::uint64 >::max() ) ); - test_packed_uint< comma::packed::uint64 >( comma::uint64( 0x1BCDEF1213141500ULL ) ); - - test_packed_int< comma::packed::int16 >( 1234 ); - test_packed_int< comma::packed::int24 >( 1235 ); - test_packed_int< comma::packed::int24 >( 8388607 ); - test_packed_int< comma::packed::int32 >( 8388607 ); - test_packed_int< comma::packed::int32 >( 1236 ); - test_packed_int< comma::packed::int16 >( -1231 ); - test_packed_int< comma::packed::int24 >( -1232 ); - test_packed_int< comma::packed::int24 >( -8388608 ); - test_packed_int< comma::packed::int32 >( -1233 ); - test_packed_int< comma::packed::int64 >( -4321 ); - test_packed_int< comma::packed::int64 >( comma::int64( std::numeric_limits< comma::int64 >::min() ) ); + test_packed_uint< comma::packed::little_endian::uint16 >( 1231 ); + test_packed_uint< comma::packed::little_endian::uint16 >( 65535 ); + test_packed_uint< comma::packed::little_endian::uint24 >( 1232 ); + test_packed_uint< comma::packed::little_endian::uint24 >( 16777215 ); + test_packed_uint< comma::packed::little_endian::uint32 >( 1233 ); + test_packed_uint< comma::packed::little_endian::uint32 >( 4294967295 ); + test_packed_uint< comma::packed::little_endian::uint64 >( 4321 ); + test_packed_uint< comma::packed::little_endian::uint64 >( comma::uint64( std::numeric_limits< comma::uint64 >::max() ) ); + test_packed_uint< comma::packed::little_endian::uint64 >( comma::uint64( 0x1BCDEF1213141500ULL ) ); + + test_packed_int< comma::packed::little_endian::int16 >( 1234 ); + test_packed_int< comma::packed::little_endian::int24 >( 1235 ); + test_packed_int< comma::packed::little_endian::int24 >( 8388607 ); + test_packed_int< comma::packed::little_endian::int32 >( 8388607 ); + test_packed_int< comma::packed::little_endian::int32 >( 1236 ); + test_packed_int< comma::packed::little_endian::int16 >( -1231 ); + test_packed_int< comma::packed::little_endian::int24 >( -1232 ); + test_packed_int< comma::packed::little_endian::int24 >( -8388608 ); + test_packed_int< comma::packed::little_endian::int32 >( -1233 ); + test_packed_int< comma::packed::little_endian::int64 >( -4321 ); + test_packed_int< comma::packed::little_endian::int64 >( comma::int64( std::numeric_limits< comma::int64 >::min() ) ); } TEST( test_packed_struct_test, test_big_endian ) { - test_packed_int< comma::packed::net_uint16 >( 1234 ); - test_packed_int< comma::packed::net_uint16 >( 65535 ); - test_packed_int< comma::packed::net_uint32 >( 1234 ); - test_packed_int< comma::packed::net_uint32 >( 4294967295 ); - test_packed_int< comma::packed::net_int16 >( 1234 ); - test_packed_int< comma::packed::net_int32 >( 1234 ); - test_packed_int< comma::packed::net_int16 >( -1234 ); - test_packed_int< comma::packed::net_int32 >( -1234 ); + test_packed_int< comma::packed::big_endian::uint16 >( 1234 ); + test_packed_int< comma::packed::big_endian::uint16 >( 65535 ); + test_packed_uint< comma::packed::big_endian::uint24 >( 1232 ); + test_packed_uint< comma::packed::big_endian::uint24 >( 16777215 ); + test_packed_int< comma::packed::big_endian::uint32 >( 1234 ); + test_packed_int< comma::packed::big_endian::uint32 >( 4294967295 ); + test_packed_int< comma::packed::big_endian::int16 >( 1234 ); + test_packed_int< comma::packed::big_endian::int32 >( 1234 ); + test_packed_int< comma::packed::big_endian::int16 >( -1234 ); + test_packed_int< comma::packed::big_endian::int32 >( -1234 ); + test_packed_int< comma::packed::big_endian::int24 >( -1232 ); + test_packed_int< comma::packed::big_endian::int24 >( -8388608 ); } template< typename T > @@ -151,7 +152,7 @@ static void test_int64_byte_order( comma::int64 value, char byte0, char byte1, c TEST( test_packed_struct_test, test_int64_byte_order ) { comma::int64 i = 0xFBCDEF1213141500LL; - test_int64_byte_order< comma::packed::uint64 >( i, 0x00, 0x15, 0x14, 0x13, 0x12, 0xEF, 0xCD, 0xFB ); + test_int64_byte_order< comma::packed::little_endian::uint64 >( i, 0x00, 0x15, 0x14, 0x13, 0x12, 0xEF, 0xCD, 0xFB ); } template< typename T > @@ -172,12 +173,12 @@ static void test_uint64_byte_order( comma::uint64 value, char byte0, char byte1, TEST( test_packed_struct_test, test_uint64_byte_order ) { comma::uint64 i = 0xABCDEF1213141500ULL; - test_uint64_byte_order< comma::packed::uint64 >( i, 0x00, 0x15, 0x14, 0x13, 0x12, 0xEF, 0xCD, 0xAB ); + test_uint64_byte_order< comma::packed::little_endian::uint64 >( i, 0x00, 0x15, 0x14, 0x13, 0x12, 0xEF, 0xCD, 0xAB ); } static void test_int24_byte_order( int value, char byte0, char byte1, char byte2 ) { - comma::packed::int24 a; + comma::packed::little_endian::int24 a; a = value; EXPECT_EQ( ( 0xff & a.data()[0] ), ( 0xff & byte0 ) ); EXPECT_EQ( ( 0xff & a.data()[1] ), ( 0xff & byte1 ) ); @@ -201,10 +202,10 @@ TEST( test_packed_struct_test, test_int24_byte_order ) struct test_packed_struct_floats_t : public comma::packed::packed_struct< test_packed_struct_floats_t, 24 > { - comma::packed::float32 f32; - comma::packed::float64 f64; - comma::packed::net_float32 nf32; - comma::packed::net_float64 nf64; + comma::packed::little_endian::float32 f32; + comma::packed::little_endian::float64 f64; + comma::packed::big_endian::float32 nf32; + comma::packed::big_endian::float64 nf64; }; TEST( packed_struct, test_packed_struct_floats ) @@ -218,7 +219,7 @@ TEST( packed_struct, test_packed_struct_floats ) EXPECT_DOUBLE_EQ( 1.23456789, s.f64() ); } -TEST( packed_struct, test_packed_struct_net_floats ) +TEST( packed_struct, test_packed_struct_big_endian_floats ) { test_packed_struct_floats_t s; EXPECT_FLOAT_EQ( true, s.nf32 == 0 ); @@ -231,25 +232,25 @@ TEST( packed_struct, test_packed_struct_net_floats ) TEST( test_packed_struct_test, test_little_endian_floats ) { - comma::packed::float32 a; + comma::packed::little_endian::float32 a; EXPECT_FLOAT_EQ( 0, a() ); a = 1.2345; EXPECT_FLOAT_EQ( 1.2345, a() ); - comma::packed::float64 b; + comma::packed::little_endian::float64 b; EXPECT_DOUBLE_EQ( 0, b() ); b = 1.23456789; EXPECT_DOUBLE_EQ( 1.23456789, b() ); } -TEST( test_packed_struct_test, test_net_floats ) +TEST( test_packed_struct_test, test_big_endian_floats ) { - comma::packed::net_float32 a; + comma::packed::big_endian::float32 a; EXPECT_FLOAT_EQ( 0, a() ); a = 1.2345; EXPECT_FLOAT_EQ( 1.2345, a() ); - comma::packed::net_float64 b; + comma::packed::big_endian::float64 b; EXPECT_DOUBLE_EQ( 0, b() ); b = 1.23456789; EXPECT_DOUBLE_EQ( 1.23456789, b() ); @@ -268,14 +269,14 @@ static void test_float32_byte_order( float value, char byte0, char byte1, char b TEST( test_packed_struct_test, test_float32_byte_order ) { - test_float32_byte_order< comma::packed::float32 >( 5.2, 0x66, 0x66, 0xA6, 0x40 ); - test_float32_byte_order< comma::packed::float32 >( -5.2, 0x66, 0x66, 0xA6, 0xC0 ); + test_float32_byte_order< comma::packed::little_endian::float32 >( 5.2, 0x66, 0x66, 0xA6, 0x40 ); + test_float32_byte_order< comma::packed::little_endian::float32 >( -5.2, 0x66, 0x66, 0xA6, 0xC0 ); } -TEST( test_packed_struct_test, test_net_float32_byte_order ) +TEST( test_packed_struct_test, test_big_endian_float32_byte_order ) { - test_float32_byte_order< comma::packed::net_float32 >( 5.2, 0x40, 0xA6, 0x66, 0x66 ); - test_float32_byte_order< comma::packed::net_float32 >( -5.2, 0xC0, 0xA6, 0x66, 0x66 ); + test_float32_byte_order< comma::packed::big_endian::float32 >( 5.2, 0x40, 0xA6, 0x66, 0x66 ); + test_float32_byte_order< comma::packed::big_endian::float32 >( -5.2, 0xC0, 0xA6, 0x66, 0x66 ); } template< typename T > @@ -295,18 +296,18 @@ static void test_float64_byte_order( double value, char byte0, char byte1, char TEST( test_packed_struct_test, test_float64_byte_order ) { - test_float64_byte_order< comma::packed::float64 >( 5.2, 0xCD, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0x14, 0x40 ); - test_float64_byte_order< comma::packed::float64 >( -5.2, 0xCD, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0x14, 0xC0 ); - test_float64_byte_order< comma::packed::float64 >( -1.2e-123, 0x4E, 0x57, 0x04, 0xD1, 0x71, 0x62, 0x69, 0xA6 ); - test_float64_byte_order< comma::packed::float64 >( -1.2e+123, 0x21, 0xBD, 0xC3, 0x60, 0x60, 0x0B, 0x7D, 0xD9 ); + test_float64_byte_order< comma::packed::little_endian::float64 >( 5.2, 0xCD, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0x14, 0x40 ); + test_float64_byte_order< comma::packed::little_endian::float64 >( -5.2, 0xCD, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0x14, 0xC0 ); + test_float64_byte_order< comma::packed::little_endian::float64 >( -1.2e-123, 0x4E, 0x57, 0x04, 0xD1, 0x71, 0x62, 0x69, 0xA6 ); + test_float64_byte_order< comma::packed::little_endian::float64 >( -1.2e+123, 0x21, 0xBD, 0xC3, 0x60, 0x60, 0x0B, 0x7D, 0xD9 ); } -TEST( test_packed_struct_test, test_net_float64_byte_order ) +TEST( test_packed_struct_test, test_big_endian_float64_byte_order ) { - test_float64_byte_order< comma::packed::net_float64 >( 5.2, 0x40, 0x14, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCD ); - test_float64_byte_order< comma::packed::net_float64 >( -5.2, 0xC0, 0x14, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCD ); - test_float64_byte_order< comma::packed::net_float64 >( -1.2e-123, 0xA6, 0x69, 0x62, 0x71, 0xD1, 0x04, 0x57, 0x4E ); - test_float64_byte_order< comma::packed::net_float64 >( -1.2e+123, 0xD9, 0x7D, 0x0B, 0x60, 0x60, 0xC3, 0xBD, 0x21 ); + test_float64_byte_order< comma::packed::big_endian::float64 >( 5.2, 0x40, 0x14, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCD ); + test_float64_byte_order< comma::packed::big_endian::float64 >( -5.2, 0xC0, 0x14, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCD ); + test_float64_byte_order< comma::packed::big_endian::float64 >( -1.2e-123, 0xA6, 0x69, 0x62, 0x71, 0xD1, 0x04, 0x57, 0x4E ); + test_float64_byte_order< comma::packed::big_endian::float64 >( -1.2e+123, 0xD9, 0x7D, 0x0B, 0x60, 0x60, 0xC3, 0xBD, 0x21 ); } static boost::array< std::string, 16 > hex_digits_u = { { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F" } }; From 2271eb6999bba59646f561f69de50a81a2d2d8f0 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Thu, 11 Apr 2019 16:17:43 +1000 Subject: [PATCH 0017/1056] csv-fields: make-fields: --values convenience option added --- csv/applications/csv-fields.cpp | 9 ++++++++- csv/test/csv-fields/expected | 10 ++++++++++ csv/test/csv-fields/input | 7 +++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/csv/applications/csv-fields.cpp b/csv/applications/csv-fields.cpp index 2e4442673..3886f591e 100644 --- a/csv/applications/csv-fields.cpp +++ b/csv/applications/csv-fields.cpp @@ -95,6 +95,7 @@ static void usage( bool ) std::cerr << std::endl; std::cerr << " make-fixed: normalise input to a fixed number of fields" << std::endl; std::cerr << " --count,--size=: number of output fields" << std::endl; + std::cerr << " --values=[]: if present, fill missing fields with given values" << std::endl; std::cerr << " --force: chop input to fields if larger" << std::endl; std::cerr << std::endl; std::cerr << "examples" << std::endl; @@ -156,6 +157,11 @@ static void usage( bool ) std::cerr << " a,b,c,d,," << std::endl; std::cerr << " x,y,z,,," << std::endl; std::cerr << std::endl; + std::cerr << " make-fixed" << std::endl; + std::cerr << " { echo a,b; echo x,y,z; } | csv-fields make-fixed --count=6 --fields=A,B,C,D,E,F" << std::endl; + std::cerr << " a,b,C,D,E,F" << std::endl; + std::cerr << " x,y,z,D,E,F" << std::endl; + std::cerr << std::endl; std::cerr << " { echo a,b,c,d; echo x,y,z; } | csv-fields make-fixed --count=3 --force" << std::endl; std::cerr << " a,b,c" << std::endl; std::cerr << " x,y,z" << std::endl; @@ -421,6 +427,7 @@ int main( int ac, char** av ) { const unsigned int count = options.value< unsigned int >( "--count,--size" ); bool force = options.exists( "--force" ); + const std::vector< std::string >& values = comma::split( options.value< std::string >( "--values", "" ), ',', true ); while( std::cin.good() ) { std::string line; @@ -430,7 +437,7 @@ int main( int ac, char** av ) if( v.size() <= count ) { std::cout << line; - for( unsigned int i = v.size(); i < count; i++ ) { std::cout << delimiter; } + for( unsigned int i = v.size(); i < count; i++ ) { std::cout << delimiter << ( i < values.size() ? values[i] : std::string() ); } } else { diff --git a/csv/test/csv-fields/expected b/csv/test/csv-fields/expected index 2dddba1ee..1aafcbd77 100644 --- a/csv/test/csv-fields/expected +++ b/csv/test/csv-fields/expected @@ -205,3 +205,13 @@ make_fixed[2]/output="a,b,c" make_fixed[2]/status=0 make_fixed[3]/output="a:b:c:d::" make_fixed[3]/status=0 +make_fixed[4]/output="a,b,c,4,5,6" +make_fixed[4]/status=0 +make_fixed[5]/output="a,b,c,4,5," +make_fixed[5]/status=0 +make_fixed[6]/output="a,b,c,,," +make_fixed[6]/status=0 +make_fixed[7]/output="a,b,c,,," +make_fixed[7]/status=0 +make_fixed[8]/output="a,b,,4,5,6" +make_fixed[8]/status=0 diff --git a/csv/test/csv-fields/input b/csv/test/csv-fields/input index 9bb3ea7ca..27131ca22 100644 --- a/csv/test/csv-fields/input +++ b/csv/test/csv-fields/input @@ -109,5 +109,8 @@ make_fixed[0]="echo a,b,c,d | csv-fields make-fixed --count=6" make_fixed[1]="echo a,b,c,d | csv-fields make-fixed --count=3" make_fixed[2]="echo a,b,c,d | csv-fields make-fixed --count=3 --force" make_fixed[3]="echo a:b:c:d | csv-fields make-fixed --count=6 --delimiter=:" - - +make_fixed[4]="echo a,b,c | csv-fields make-fixed --count=6 --values 1,2,3,4,5,6" +make_fixed[5]="echo a,b,c | csv-fields make-fixed --count=6 --values 1,2,3,4,5" +make_fixed[6]="echo a,b,c | csv-fields make-fixed --count=6 --values 1,2,3" +make_fixed[7]="echo a,b,c | csv-fields make-fixed --count=6 --values 1" +make_fixed[8]="echo a,b, | csv-fields make-fixed --count=6 --values 1,2,3,4,5,6" From b06079d9d1b483855ea11b4b6d4e42aeb36a4e96 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Thu, 11 Apr 2019 17:28:33 +1000 Subject: [PATCH 0018/1056] csv-fields: make-fixed: if --value given, --count is optional --- csv/applications/csv-fields.cpp | 8 +++++--- csv/test/csv-fields/expected | 2 ++ csv/test/csv-fields/input | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/csv/applications/csv-fields.cpp b/csv/applications/csv-fields.cpp index 3886f591e..e56d89efb 100644 --- a/csv/applications/csv-fields.cpp +++ b/csv/applications/csv-fields.cpp @@ -95,7 +95,8 @@ static void usage( bool ) std::cerr << std::endl; std::cerr << " make-fixed: normalise input to a fixed number of fields" << std::endl; std::cerr << " --count,--size=: number of output fields" << std::endl; - std::cerr << " --values=[]: if present, fill missing fields with given values" << std::endl; + std::cerr << " --values=[]: fill missing fields with given values" << std::endl; + std::cerr << " if --count not specified, use number of as desired number of fields" << std::endl; std::cerr << " --force: chop input to fields if larger" << std::endl; std::cerr << std::endl; std::cerr << "examples" << std::endl; @@ -425,9 +426,10 @@ int main( int ac, char** av ) } if( operation == "make-fixed" ) { - const unsigned int count = options.value< unsigned int >( "--count,--size" ); - bool force = options.exists( "--force" ); const std::vector< std::string >& values = comma::split( options.value< std::string >( "--values", "" ), ',', true ); + const unsigned int count = options.value< unsigned int >( "--count,--size", values.size() ); + if( count == 0 ) { std::cerr << "csv-fields: make-fixed: please specify either --count or --values" << std::endl; } + bool force = options.exists( "--force" ); while( std::cin.good() ) { std::string line; diff --git a/csv/test/csv-fields/expected b/csv/test/csv-fields/expected index 1aafcbd77..443885c0a 100644 --- a/csv/test/csv-fields/expected +++ b/csv/test/csv-fields/expected @@ -215,3 +215,5 @@ make_fixed[7]/output="a,b,c,,," make_fixed[7]/status=0 make_fixed[8]/output="a,b,,4,5,6" make_fixed[8]/status=0 +make_fixed[9]/output="a,b,c,4,5,6" +make_fixed[9]/status=0 diff --git a/csv/test/csv-fields/input b/csv/test/csv-fields/input index 27131ca22..7a8a3db2c 100644 --- a/csv/test/csv-fields/input +++ b/csv/test/csv-fields/input @@ -114,3 +114,4 @@ make_fixed[5]="echo a,b,c | csv-fields make-fixed --count=6 --values 1,2,3,4,5" make_fixed[6]="echo a,b,c | csv-fields make-fixed --count=6 --values 1,2,3" make_fixed[7]="echo a,b,c | csv-fields make-fixed --count=6 --values 1" make_fixed[8]="echo a,b, | csv-fields make-fixed --count=6 --values 1,2,3,4,5,6" +make_fixed[9]="echo a,b,c | csv-fields make-fixed --values 1,2,3,4,5,6" From 7cdfbbd28ff8abea41236007cf0f9e7bdef15be6 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 16 Apr 2019 19:35:34 +1000 Subject: [PATCH 0019/1056] csv-blocks: make-blocks: --min-gap-between-blocks: implementing... --- csv/applications/csv-blocks.cpp | 49 ++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/csv/applications/csv-blocks.cpp b/csv/applications/csv-blocks.cpp index fd89c8b58..e65989747 100644 --- a/csv/applications/csv-blocks.cpp +++ b/csv/applications/csv-blocks.cpp @@ -255,7 +255,16 @@ static bool empty_( const std::string& s ) // quick and dirty return true; } -template < typename T > static void set_fields( const comma::command_line_options& options, std::string& first_line, T& default_input ) +static double diff( const input_t& from, const input_t& to ) // quick and dirty +{ + if( from.key.longs.size() == 1 ) { return std::abs( double( from.key.longs[0] ) - to.key.longs[0] ); } + if( from.key.doubles.size() == 1 ) { return std::abs( from.key.doubles[0] - to.key.doubles[0] ); } + if( from.key.time.size() == 1 ) { return std::abs( double( ( from.key.time[0] - to.key.time[0] ).total_microseconds() ) / 1000000 ); } + if( from.key.strings.size() == 1 ) { COMMA_THROW( comma::exception, "difference for strings: not implemented" ); } + COMMA_THROW( comma::exception, "never here" ); +} + +template < typename T > static bool set_fields( const comma::command_line_options& options, std::string& first_line, T& default_input ) { std::vector< std::string > v = comma::split( csv.fields, ',' ); comma::csv::format f; @@ -270,8 +279,29 @@ template < typename T > static void set_fields( const comma::command_line_option } // This is to load the keys into input_t structure unsigned int size = f.count(); - for( std::size_t i = 0; i < size; ++i ) { if( i < v.size() ) { if( v[i] == "id" ) { v[i] = "key/" + default_input.key.append( f.offset( i ).type ); continue; } } } + bool has_id = false; + bool has_scalar = false; + for( std::size_t i = 0; i < size; ++i ) + { + if( i < v.size() ) + { + if( v[i] == "id" ) + { + has_id = true; + v[i] = "key/" + default_input.key.append( f.offset( i ).type ); + } + else if( v[i] == "scalar" ) + { + if( has_scalar ) { COMMA_THROW( comma::exception, "expected not more than one scalar in --fields; got: \"" << csv.fields << "\"" ); } + has_scalar = true; + v[i] = "key/" + default_input.key.append( f.offset( i ).type ); + } + } + } + if( !has_id && !has_scalar ) { COMMA_THROW( comma::exception, "please specify at least one id or scalar in --fields; got: \"" << csv.fields << "\"" ); } + if( has_id && has_scalar ) { COMMA_THROW( comma::exception, "expected either id or scalar in --fields; got both in: \"" << csv.fields << "\"" ); } csv.fields = comma::join( v, ',' ); + return has_id; } #ifndef WIN32 @@ -472,21 +502,20 @@ int main( int ac, char** av ) if( operation == "group" || operation == "make-blocks" ) { current_block = options.value< comma::uint32 >( "--starting-block,--from", 0 ); // default is 0 - std::string first_line; input_t default_input; - set_fields( options, first_line, default_input ); - if( verbose ) { std::cerr << name() << "csv fields: " << csv.fields << std::endl; } - if ( default_input.key.empty() ) { std::cerr << name() << "please specify at least one id field" << std::endl; return 1; } - + bool has_id = set_fields( options, first_line, default_input ); + if( !has_id ) { std::cerr << "csv-blocks: scalar field support: todo" << std::endl; } + if( verbose ) { std::cerr << name() << "csv fields: " << csv.fields << "; making blocks by " << ( has_id ? "id" : "scalar" ) << std::endl; } + double gap; + if( !has_id ) { gap = options.value< double >( "--min-gap-between-blocks,--min-gap,--gap" ); } comma::csv::input_stream< input_t > istream( std::cin, csv, default_input ); comma::csv::output_stream< appended_column > ostream( std::cout, csv_out ); comma::csv::tied< input_t, appended_column > tied( istream, ostream ); - if( !first_line.empty() ) { input_t p = comma::csv::ascii< input_t >( csv, default_input ).get( first_line ); - if( !(keys == p.key) ) { ++current_block; } + if( !( keys == p.key ) ) { ++current_block; } keys = p.key; // This is needed because the record wasnt read in by istream // Write it out @@ -499,7 +528,7 @@ int main( int ac, char** av ) { const input_t* p = istream.read(); if( !p ) { break; } - if( !(keys == p->key) ) { ++current_block; } + if( !( keys == p->key ) ) { ++current_block; } keys = p->key; tied.append( appended_column( current_block ) ); if( csv.flush ) { std::cout.flush(); } From ac92ca59977a186f13b1346dbe25c2ae16fff279 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 16 Apr 2019 23:19:44 +1000 Subject: [PATCH 0020/1056] csv-blocks: make-blocks: scalar field support implemented --- csv/applications/csv-blocks.cpp | 74 +++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/csv/applications/csv-blocks.cpp b/csv/applications/csv-blocks.cpp index e65989747..1b5773bf9 100644 --- a/csv/applications/csv-blocks.cpp +++ b/csv/applications/csv-blocks.cpp @@ -27,7 +27,7 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -/// @author dewey nguyen +/// @authors dewey nguyen, vsevolod vlaskine #include #include @@ -218,7 +218,6 @@ static comma::csv::options csv; static bool reverse_index = false; // All the data for this block static std::deque< std::string > block_records; -static comma::csv::impl::unstructured keys; static comma::uint32 current_block = 1; static comma::int32 increment_step = 1; @@ -264,7 +263,9 @@ static double diff( const input_t& from, const input_t& to ) // quick and dirty COMMA_THROW( comma::exception, "never here" ); } -template < typename T > static bool set_fields( const comma::command_line_options& options, std::string& first_line, T& default_input ) +struct how_t { enum values { none, by_id, by_scalar }; }; + +template < typename T > static how_t::values set_fields( const comma::command_line_options& options, std::string& first_line, T& default_input ) { std::vector< std::string > v = comma::split( csv.fields, ',' ); comma::csv::format f; @@ -279,29 +280,29 @@ template < typename T > static bool set_fields( const comma::command_line_option } // This is to load the keys into input_t structure unsigned int size = f.count(); - bool has_id = false; - bool has_scalar = false; + how_t::values how = how_t::none; for( std::size_t i = 0; i < size; ++i ) { if( i < v.size() ) { if( v[i] == "id" ) { - has_id = true; + if( how == how_t::by_scalar ) { COMMA_THROW( comma::exception, "expected either id or scalar in --fields; got both in: \"" << csv.fields << "\"" ); } + how = how_t::by_id; v[i] = "key/" + default_input.key.append( f.offset( i ).type ); } else if( v[i] == "scalar" ) { - if( has_scalar ) { COMMA_THROW( comma::exception, "expected not more than one scalar in --fields; got: \"" << csv.fields << "\"" ); } - has_scalar = true; + if( how == how_t::by_id ) { COMMA_THROW( comma::exception, "expected either id or scalar in --fields; got both in: \"" << csv.fields << "\"" ); } + if( how == how_t::by_scalar ) { COMMA_THROW( comma::exception, "expected not more than one scalar in --fields; got: \"" << csv.fields << "\"" ); } + how = how_t::by_scalar; v[i] = "key/" + default_input.key.append( f.offset( i ).type ); } } } - if( !has_id && !has_scalar ) { COMMA_THROW( comma::exception, "please specify at least one id or scalar in --fields; got: \"" << csv.fields << "\"" ); } - if( has_id && has_scalar ) { COMMA_THROW( comma::exception, "expected either id or scalar in --fields; got both in: \"" << csv.fields << "\"" ); } + if( how == how_t::none ) { COMMA_THROW( comma::exception, "please specify at least one id or scalar in --fields; got: \"" << csv.fields << "\"" ); } csv.fields = comma::join( v, ',' ); - return has_id; + return how; } #ifndef WIN32 @@ -501,24 +502,52 @@ int main( int ac, char** av ) } if( operation == "group" || operation == "make-blocks" ) { - current_block = options.value< comma::uint32 >( "--starting-block,--from", 0 ); // default is 0 + current_block = options.value< comma::uint32 >( "--starting-block,--from", 0 ); std::string first_line; input_t default_input; - bool has_id = set_fields( options, first_line, default_input ); - if( !has_id ) { std::cerr << "csv-blocks: scalar field support: todo" << std::endl; } - if( verbose ) { std::cerr << name() << "csv fields: " << csv.fields << "; making blocks by " << ( has_id ? "id" : "scalar" ) << std::endl; } - double gap; - if( !has_id ) { gap = options.value< double >( "--min-gap-between-blocks,--min-gap,--gap" ); } + auto how = set_fields( options, first_line, default_input ); + if( verbose ) { std::cerr << name() << "csv fields: " << csv.fields << "; making blocks by " << ( how == how_t::by_id ? "id" : "scalar" ) << std::endl; } + boost::optional< double > gap; + boost::optional< double > span; + if( how == how_t::by_scalar ) + { + options.assert_mutually_exclusive( "--min-gap-between-blocks,--min-gap,--gap", "--block-span,--span" ); + gap = options.optional< double >( "--min-gap-between-blocks,--min-gap,--gap" ); + span = options.optional< double >( "--block-span,--span" ); + } comma::csv::input_stream< input_t > istream( std::cin, csv, default_input ); comma::csv::output_stream< appended_column > ostream( std::cout, csv_out ); comma::csv::tied< input_t, appended_column > tied( istream, ostream ); + auto update_block = [&]( const input_t& p ) + { + static boost::optional< input_t > last; + switch( how ) + { + case how_t::by_id: + if( last && !( last->key == p.key ) ) { ++current_block; } + last = p; + break; + case how_t::by_scalar: + if( gap ) + { + if( last && diff( *last, p ) >= *gap ) { ++current_block; } + last = p; + } + else if( span ) + { + if( !last ) { last = p; } + else if( diff( *last, p ) >= *span ) { ++current_block; last = p; } + } + break; + case how_t::none: // never here + break; + } + + }; if( !first_line.empty() ) { input_t p = comma::csv::ascii< input_t >( csv, default_input ).get( first_line ); - if( !( keys == p.key ) ) { ++current_block; } - keys = p.key; - // This is needed because the record wasnt read in by istream - // Write it out + update_block( p ); if( istream.is_binary() ) { std::cout.write( (char*)&p, istream.binary().size() ); } else { std::cout << first_line << istream.ascii().ascii().delimiter(); } ostream.write( appended_column( current_block ) ); @@ -528,8 +557,7 @@ int main( int ac, char** av ) { const input_t* p = istream.read(); if( !p ) { break; } - if( !( keys == p->key ) ) { ++current_block; } - keys = p->key; + update_block( *p ); tied.append( appended_column( current_block ) ); if( csv.flush ) { std::cout.flush(); } } From 811a3c1dc7cb33fc3e84ea2b17fb3e1803a38154 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 16 Apr 2019 23:32:51 +1000 Subject: [PATCH 0021/1056] csv-blocks: --help updated; group by scalar: unit test: todo... --- csv/applications/csv-blocks.cpp | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/csv/applications/csv-blocks.cpp b/csv/applications/csv-blocks.cpp index 1b5773bf9..feda4ce44 100644 --- a/csv/applications/csv-blocks.cpp +++ b/csv/applications/csv-blocks.cpp @@ -139,8 +139,14 @@ static void usage( bool more ) std::cerr << " attention: output does not preserve input order, since there is no reasonable tradeof there" << std::endl; std::cerr << " use csv-sort for post-processing, if required" << std::endl; std::cerr << " group|make-blocks" << std::endl; - std::cerr << " cat something.csv | csv-blocks group --fields=,id, " << std::endl; + std::cerr << " usage: cat something.csv | csv-blocks group --fields=,id, " << std::endl; std::cerr << " appends group's block field based on specified id key or keys" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --fields=" << std::endl; + std::cerr << " id: any number of id fields to group by" << std::endl; + std::cerr << " scalar: group by scalar, which can be integer, floating point number, or time" << std::endl; + std::cerr << " --block-gap,--gap=; minimum gap in values between blocks, double (for time: seconds as double), see examples" << std::endl; + std::cerr << " --block-span,--span=; maximum block span, double (for time: seconds as double), see examples" << std::endl; std::cerr << " head" << std::endl; std::cerr << " reads records from first block to stdout, if --num-of-blocks= specified, read more than one blocks" << std::endl; std::cerr << " requires the index from 'index' mode in the inputs" << std::endl; @@ -181,10 +187,16 @@ static void usage( bool more ) std::cerr << " ( echo \"a,1,2,3\"; echo \"a,4,2,3\"; echo \"b,5,5,6\"; echo \"c,7,5,6\"; echo \"c,7,8,9\"; echo \"c,7,8,9\" ) >$block_csv" << std::endl; std::cerr << std::endl; std::cerr << " group|make-blocks" << std::endl; - std::cerr << " cat $block_csv | csv-blocks group --fields=id" << std::endl; - std::cerr << " unique ascending block number are assigned based on one id field" << std::endl; - std::cerr << " cat $block_csv | csv-blocks group --fields=id,,id" << std::endl; - std::cerr << " unique ascending block number are assigned based on two id fields" << std::endl; + std::cerr << " unique ascending block number are assigned based on one id field" << std::endl; + std::cerr << " cat $block_csv | csv-blocks group --fields=id" << std::endl; + std::cerr << " unique ascending block number are assigned based on two id fields" << std::endl; + std::cerr << " cat $block_csv | csv-blocks group --fields=id,,id" << std::endl; + std::cerr << " group by scalar span - try it" << std::endl; + std::cerr << " seq 20 | csv-blocks group --fields=scalar --span 5" << std::endl; + std::cerr << " seq 1 3 20 | csv-blocks group --fields scalar --span 4" << std::endl; + std::cerr << " group by scalar gap - try it" << std::endl; + std::cerr << " seq 20 | csv-blocks group --fields=scalar --gap 1" << std::endl; + std::cerr << " seq 20 | csv-blocks group --fields=scalar --gap 2" << std::endl; std::cerr << std::endl; std::cerr << " index" << std::endl; std::cerr << " cat $block_csv | csv-blocks group --fields=id | csv-blocks index --fields=,,,,block" << std::endl; @@ -511,8 +523,8 @@ int main( int ac, char** av ) boost::optional< double > span; if( how == how_t::by_scalar ) { - options.assert_mutually_exclusive( "--min-gap-between-blocks,--min-gap,--gap", "--block-span,--span" ); - gap = options.optional< double >( "--min-gap-between-blocks,--min-gap,--gap" ); + options.assert_mutually_exclusive( "--block-gap,--gap", "--block-span,--span" ); + gap = options.optional< double >( "--block-gap,--gap" ); span = options.optional< double >( "--block-span,--span" ); } comma::csv::input_stream< input_t > istream( std::cin, csv, default_input ); From 4e3384532ac86dcfc7462b27c35fc6e769d5ee0d Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 17 Apr 2019 17:31:23 +1000 Subject: [PATCH 0022/1056] csv-block: group: --gap and --span can be used together now; basic unit test added --- csv/applications/csv-blocks.cpp | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/csv/applications/csv-blocks.cpp b/csv/applications/csv-blocks.cpp index feda4ce44..42d09a96d 100644 --- a/csv/applications/csv-blocks.cpp +++ b/csv/applications/csv-blocks.cpp @@ -523,7 +523,6 @@ int main( int ac, char** av ) boost::optional< double > span; if( how == how_t::by_scalar ) { - options.assert_mutually_exclusive( "--block-gap,--gap", "--block-span,--span" ); gap = options.optional< double >( "--block-gap,--gap" ); span = options.optional< double >( "--block-span,--span" ); } @@ -532,28 +531,20 @@ int main( int ac, char** av ) comma::csv::tied< input_t, appended_column > tied( istream, ostream ); auto update_block = [&]( const input_t& p ) { - static boost::optional< input_t > last; + static input_t first = p; + static input_t last = p; switch( how ) { case how_t::by_id: - if( last && !( last->key == p.key ) ) { ++current_block; } - last = p; + if( !( last.key == p.key ) ) { ++current_block; } break; case how_t::by_scalar: - if( gap ) - { - if( last && diff( *last, p ) >= *gap ) { ++current_block; } - last = p; - } - else if( span ) - { - if( !last ) { last = p; } - else if( diff( *last, p ) >= *span ) { ++current_block; last = p; } - } + if( ( gap && diff( last, p ) >= *gap ) || ( span && diff( first, p ) >= *span ) ) { ++current_block; first = p; } break; case how_t::none: // never here break; } + last = p; }; if( !first_line.empty() ) From 8a284f43c183c14f7932dc0686e39a1259fa769c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 17 Apr 2019 17:31:39 +1000 Subject: [PATCH 0023/1056] csv-block: group: --gap and --span can be used together now; basic unit test added --- csv/test/csv-blocks/group/expected | 70 ++++++++++++++++++++++++++++++ csv/test/csv-blocks/group/input | 15 +++++++ 2 files changed, 85 insertions(+) create mode 100644 csv/test/csv-blocks/group/expected create mode 100644 csv/test/csv-blocks/group/input diff --git a/csv/test/csv-blocks/group/expected b/csv/test/csv-blocks/group/expected new file mode 100644 index 000000000..5c144db16 --- /dev/null +++ b/csv/test/csv-blocks/group/expected @@ -0,0 +1,70 @@ +group/scalar/gap[0]/output/line[0]="1.1,0" +group/scalar/gap[0]/output/line[1]="2,1" +group/scalar/gap[0]/output/line[2]="5,2" +group/scalar/gap[0]/output/line[3]="5.9,3" +group/scalar/gap[0]/status=0 +group/scalar/gap[1]/output/line[0]="1.1,0" +group/scalar/gap[1]/output/line[1]="2,0" +group/scalar/gap[1]/output/line[2]="5,1" +group/scalar/gap[1]/output/line[3]="5.9,1" +group/scalar/gap[1]/status=0 +group/scalar/gap[2]/output/line[0]="1.1,0" +group/scalar/gap[2]/output/line[1]="2,0" +group/scalar/gap[2]/output/line[2]="5,0" +group/scalar/gap[2]/output/line[3]="5.9,0" +group/scalar/gap[2]/status=0 +group/scalar/gap[3]/output/line[0]="19700101T000001.100000,0" +group/scalar/gap[3]/output/line[1]="19700101T000002,1" +group/scalar/gap[3]/output/line[2]="19700101T000005,2" +group/scalar/gap[3]/output/line[3]="19700101T000005.900000,3" +group/scalar/gap[3]/status=0 +group/scalar/gap[4]/output/line[0]="19700101T000001.100000,0" +group/scalar/gap[4]/output/line[1]="19700101T000002,0" +group/scalar/gap[4]/output/line[2]="19700101T000005,1" +group/scalar/gap[4]/output/line[3]="19700101T000005.900000,1" +group/scalar/gap[4]/status=0 +group/scalar/gap[5]/output/line[0]="19700101T000001.100000,0" +group/scalar/gap[5]/output/line[1]="19700101T000002,0" +group/scalar/gap[5]/output/line[2]="19700101T000005,0" +group/scalar/gap[5]/output/line[3]="19700101T000005.900000,0" +group/scalar/gap[5]/status=0 + +group/scalar/span[0]/output/line[0]="1.1,0" +group/scalar/span[0]/output/line[1]="2,1" +group/scalar/span[0]/output/line[2]="5,2" +group/scalar/span[0]/output/line[3]="5.9,3" +group/scalar/span[0]/status=0 +group/scalar/span[1]/output/line[0]="1.1,0" +group/scalar/span[1]/output/line[1]="2,0" +group/scalar/span[1]/output/line[2]="5,1" +group/scalar/span[1]/output/line[3]="5.9,1" +group/scalar/span[1]/status=0 +group/scalar/span[2]/output/line[0]="1.1,0" +group/scalar/span[2]/output/line[1]="2,0" +group/scalar/span[2]/output/line[2]="5,0" +group/scalar/span[2]/output/line[3]="5.9,1" +group/scalar/span[2]/status=0 +group/scalar/span[3]/output/line[0]="19700101T000001.100000,0" +group/scalar/span[3]/output/line[1]="19700101T000002,1" +group/scalar/span[3]/output/line[2]="19700101T000005,2" +group/scalar/span[3]/output/line[3]="19700101T000005.900000,3" +group/scalar/span[3]/status=0 +group/scalar/span[4]/output/line[0]="19700101T000001.100000,0" +group/scalar/span[4]/output/line[1]="19700101T000002,0" +group/scalar/span[4]/output/line[2]="19700101T000005,1" +group/scalar/span[4]/output/line[3]="19700101T000005.900000,1" +group/scalar/span[4]/status=0 +group/scalar/span[5]/output/line[0]="19700101T000001.100000,0" +group/scalar/span[5]/output/line[1]="19700101T000002,0" +group/scalar/span[5]/output/line[2]="19700101T000005,0" +group/scalar/span[5]/output/line[3]="19700101T000005.900000,0" +group/scalar/span[5]/status=0 + +group/scalar/gap_and_span[0]/output/line[0]="1,0" +group/scalar/gap_and_span[0]/output/line[1]="2,0" +group/scalar/gap_and_span[0]/output/line[2]="5,1" +group/scalar/gap_and_span[0]/output/line[3]="6,1" +group/scalar/gap_and_span[0]/output/line[4]="7,1" +group/scalar/gap_and_span[0]/output/line[5]="8,1" +group/scalar/gap_and_span[0]/output/line[6]="9,2" +group/scalar/gap_and_span[0]/status=0 diff --git a/csv/test/csv-blocks/group/input b/csv/test/csv-blocks/group/input new file mode 100644 index 000000000..f4bf3e794 --- /dev/null +++ b/csv/test/csv-blocks/group/input @@ -0,0 +1,15 @@ +group/scalar/gap[0]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-blocks group --fields scalar --gap 0.5" +group/scalar/gap[1]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-blocks group --fields scalar --gap 1" +group/scalar/gap[2]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-blocks group --fields scalar --gap 5" +group/scalar/gap[3]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-time --from seconds | csv-blocks group --fields scalar --gap 0.5" +group/scalar/gap[4]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-time --from seconds | csv-blocks group --fields scalar --gap 1" +group/scalar/gap[5]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-time --from seconds | csv-blocks group --fields scalar --gap 5" + +group/scalar/span[0]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-blocks group --fields scalar --span 0.5" +group/scalar/span[1]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-blocks group --fields scalar --span 1" +group/scalar/span[2]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-blocks group --fields scalar --span 4" +group/scalar/span[3]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-time --from seconds | csv-blocks group --fields scalar --span 0.5" +group/scalar/span[4]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-time --from seconds | csv-blocks group --fields scalar --span 1" +group/scalar/span[5]="( echo 1.1; echo 2; echo 5; echo 5.9 ) | csv-time --from seconds | csv-blocks group --fields scalar --span 5" + +group/scalar/gap_and_span[0]="( echo 1; echo 2; echo 5; echo 6; echo 7; echo 8; echo 9 ) | csv-blocks group --fields scalar --gap 2 --span 4" From 2753b880b6e6c4a2ce0bec1ee3ae628824638d12 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 26 Apr 2019 12:41:01 +1000 Subject: [PATCH 0024/1056] csv-time-join: minor refactoring --- csv/applications/csv-time-join.cpp | 126 ++++++++--------------------- 1 file changed, 33 insertions(+), 93 deletions(-) diff --git a/csv/applications/csv-time-join.cpp b/csv/applications/csv-time-join.cpp index 80e3ef85b..d22c054eb 100644 --- a/csv/applications/csv-time-join.cpp +++ b/csv/applications/csv-time-join.cpp @@ -151,15 +151,8 @@ namespace comma { namespace visiting { template <> struct traits< Point > { - template < typename K, typename V > static void visit( const K&, const Point& p, V& v ) - { - v.apply( "t", p.timestamp ); - } - - template < typename K, typename V > static void visit( const K&, Point& p, V& v ) - { - v.apply( "t", p.timestamp ); - } + template < typename K, typename V > static void visit( const K&, const Point& p, V& v ) { v.apply( "t", p.timestamp ); } + template < typename K, typename V > static void visit( const K&, Point& p, V& v ) { v.apply( "t", p.timestamp ); } }; } } // namespace comma { namespace visiting { @@ -168,17 +161,12 @@ enum class how { by_lower, by_upper, nearest, realtime }; how method = how::by_lower; bool timestamp_only; bool select_only; - comma::csv::options stdin_csv; comma::csv::options bounding_csv; boost::optional< boost::posix_time::time_duration > bound; - typedef std::pair< boost::posix_time::ptime, std::string > timestring_t; -boost::posix_time::ptime get_time( const Point& p ) -{ - return p.timestamp ? *p.timestamp : boost::posix_time::microsec_clock::universal_time(); -} +boost::posix_time::ptime get_time( const Point& p ) { return p.timestamp ? *p.timestamp : boost::posix_time::microsec_clock::universal_time(); } static void output_bounding( std::ostream& os, const timestring_t& bounding, bool stdin_first ) { @@ -209,16 +197,13 @@ static void output_bounding( std::ostream& os, const timestring_t& bounding, boo static void output_input( std::ostream& os, const timestring_t& input ) { - if( stdin_csv.binary() ) { os.write( &input.second[0], stdin_csv.format().size() ); } - else { os << input.second; } + if( stdin_csv.binary() ) { os.write( &input.second[0], stdin_csv.format().size() ); } else { os << input.second; } } static void output( const timestring_t& input, const timestring_t& bounding, bool stdin_first ) { if( bounding.first.is_infinity() ) { return; } - if( bound && ( input.first - bounding.first > bound || bounding.first - input.first > bound )) { return; } - if( stdin_first ) { output_input( std::cout, input ); @@ -229,7 +214,6 @@ static void output( const timestring_t& input, const timestring_t& bounding, boo output_bounding( std::cout, bounding, stdin_first ); output_input( std::cout, input ); } - if( !stdin_csv.binary() ) { std::cout << '\n'; } std::cout.flush(); } @@ -240,7 +224,6 @@ int main( int ac, char** av ) { comma::signal_flag is_shutdown(comma::signal_flag::hard); comma::command_line_options options( ac, av, usage ); - if( options.exists( "--bash-completion" )) bash_completion( ac, av ); options.assert_mutually_exclusive( "--by-lower,--by-upper,--nearest,--realtime" ); if( options.exists( "--by-upper" )) { method = how::by_upper; } @@ -253,7 +236,6 @@ int main( int ac, char** av ) boost::optional< unsigned int > buffer_size = options.optional< unsigned int >( "--buffer" ); if( options.exists( "--bound" ) ) { bound = boost::posix_time::microseconds( static_cast(options.value< double >( "--bound" ) * 1000000 )); } stdin_csv = comma::csv::options( options, "t" ); - std::vector< std::string > unnamed = options.unnamed( "--by-lower,--by-upper,--nearest,--realtime,--select,--do-not-append,--timestamp-only,--time-only,--discard-bounding", "--binary,-b,--delimiter,-d,--fields,-f,--bound,--buffer,--verbose,-v" ); @@ -285,8 +267,7 @@ int main( int ac, char** av ) if( stdin_csv.binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); } #endif // #ifdef WIN32 - comma::io::istream bounding_istream( comma::split( properties, ';' )[0] - , bounding_csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii ); + comma::io::istream bounding_istream( comma::split( properties, ';' )[0], bounding_csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii ); comma::csv::input_stream< Point > bounding_stream( *bounding_istream, bounding_csv ); #ifndef WIN32 @@ -298,23 +279,18 @@ int main( int ac, char** av ) #endif // #ifndef WIN32 const Point* p = NULL; - if( method == how::realtime ) { - #ifndef WIN32 + #ifdef WIN32 + COMMA_THROW( comma::exception, "--realtime mode not supported in WIN32" ); + #else bool end_of_input = false; bool end_of_bounds = false; - - boost::optional joined_line; - - while (!is_shutdown && !end_of_input) + boost::optional< timestring_t > joined_line; + while( !is_shutdown && !end_of_input ) { - if ( !bounding_stream.ready() && !stdin_stream.ready() ) - { - select.wait(boost::posix_time::milliseconds(1)); - } - - if ( !is_shutdown && !end_of_input && ( stdin_stream.ready() || ( select.check() && select.read().ready( comma::io::stdin_fd ) ) ) ) + if( !bounding_stream.ready() && !stdin_stream.ready() ) { select.wait(boost::posix_time::milliseconds(1)); } + if( !is_shutdown && !end_of_input && ( stdin_stream.ready() || ( select.check() && select.read().ready( comma::io::stdin_fd ) ) ) ) { p = stdin_stream.read(); if( p ) @@ -328,9 +304,7 @@ int main( int ac, char** av ) end_of_input = true; } } - - if ( !is_shutdown && !end_of_bounds && - ( bounding_stream.ready() || ( select.check() && select.read().ready( bounding_istream.fd() )))) + if( !is_shutdown && !end_of_bounds && ( bounding_stream.ready() || ( select.check() && select.read().ready( bounding_istream.fd() ) ) ) ) { p = bounding_stream.read(); if( p ) @@ -344,22 +318,17 @@ int main( int ac, char** av ) } } } - if (is_shutdown) { comma::verbose << "got a signal" << std::endl; return 0; } - #else - COMMA_THROW(comma::exception, "--realtime mode not supported in WIN32"); - #endif + if( is_shutdown ) { comma::verbose << "got a signal" << std::endl; return 0; } + #endif // #ifdef WIN32 } else { - std::deque bounding_queue; + std::deque< timestring_t > bounding_queue; bool next = true; bool bounding_data_available; bool upper_bound_added = false; - - // add a fake entry for an lower bound to allow stdin before first bound to match - bounding_queue.push_back( std::make_pair( boost::posix_time::neg_infin, "" )); - - while( ( stdin_stream.ready() || ( std::cin.good() && !std::cin.eof() ) ) ) + bounding_queue.push_back( std::make_pair( boost::posix_time::neg_infin, "" ) ); // add a fake entry for an lower bound to allow stdin before first bound to match + while( stdin_stream.ready() || ( std::cin.good() && !std::cin.eof() ) ) { if( !std::cin.good() ) { select.read().remove( 0 ); } if( !bounding_istream->good() ) { select.read().remove( bounding_istream.fd() ); } @@ -371,52 +340,35 @@ int main( int ac, char** av ) //check so we do not block bool bounding_stream_ready = bounding_stream.ready(); bool stdin_stream_ready = stdin_stream.ready(); - if( next ) { if( !bounding_stream_ready || !stdin_stream_ready ) { - if( !bounding_stream_ready && !stdin_stream_ready ) - { - select.wait( boost::posix_time::milliseconds(10) ); - } - else - { - select.check(); - } + if( !bounding_stream_ready && !stdin_stream_ready ) { select.wait( boost::posix_time::milliseconds( 10 ) ); } + else { select.check(); } if( select.read().ready( bounding_istream.fd() )) { bounding_stream_ready = true; } - if( select.read().ready(0) ) { stdin_stream_ready=true; } + if( select.read().ready(0) ) { stdin_stream_ready = true; } } } else { if( !bounding_stream_ready ) { - bounding_stream_select.wait( boost::posix_time::milliseconds(10) ); + bounding_stream_select.wait( boost::posix_time::milliseconds( 10 ) ); if( bounding_stream_select.read().ready( bounding_istream.fd() )) { bounding_stream_ready=true; } } } #endif //#ifdef WIN32 - //keep storing available bounding data if( bounding_stream_ready ) { if( !buffer_size || bounding_queue.size() < *buffer_size || discard_bounding ) { const Point* q = bounding_stream.read(); - if( q ) - { - bounding_queue.push_back( std::make_pair( get_time( *q ), bounding_stream.last() )); - } - else - { - bounding_data_available=false; - } - } - if( buffer_size && bounding_queue.size() > *buffer_size && discard_bounding ) - { - bounding_queue.pop_front(); + if( q ) { bounding_queue.push_back( std::make_pair( get_time( *q ), bounding_stream.last() )); } + else { bounding_data_available = false; } } + if( buffer_size && bounding_queue.size() > *buffer_size && discard_bounding ) { bounding_queue.pop_front(); } } if( !upper_bound_added && bounding_istream->eof() ) { @@ -424,53 +376,41 @@ int main( int ac, char** av ) bounding_queue.push_back( std::make_pair( boost::posix_time::pos_infin, "" )); upper_bound_added = true; } - //if we are done with the last bounded point get next if( next ) { - if(!stdin_stream_ready) { continue; } + if( !stdin_stream_ready ) { continue; } p = stdin_stream.read(); if( !p ) { break; } } - - boost::posix_time::ptime t = get_time(*p); - + boost::posix_time::ptime t = get_time( *p ); //get bound - while(bounding_queue.size()>=2) - { - if( t < bounding_queue[1].first ) { break; } - bounding_queue.pop_front(); - } - - if(bounding_queue.size()<2) + for( ; bounding_queue.size() >= 2 && t >= bounding_queue[1].first; bounding_queue.pop_front() ); + if( bounding_queue.size() < 2 ) { //bound not found //do we have more data? - if(!bounding_data_available) { break; } - next=false; + if( !bounding_data_available ) { break; } + next = false; continue; } - //bound available - if( method == how::by_lower && t < bounding_queue.front().first ) { next = true; continue; } - bool is_first = ( method == how::by_lower ) || ( method == how::nearest && ( t - bounding_queue[0].first ) < ( bounding_queue[1].first - t )); - const timestring_t& chosen_bound = is_first ? bounding_queue[0] : bounding_queue[1];; timestring_t input_line = std::make_pair( t, stdin_stream.last() ); - output( input_line, chosen_bound, stdin_first ); - next=true; + next = true; } } return 0; } catch( std::exception& ex ) { std::cerr << "csv-time-join: " << ex.what() << std::endl; } catch( ... ) { std::cerr << "csv-time-join: unknown exception" << std::endl; } + return 1; } From ef5cad882173fa8bc45dd72964843bd690c88d2c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 29 Apr 2019 14:20:59 +1000 Subject: [PATCH 0025/1056] csv-thin: --invert, --seed implemented; unit test added --- csv/applications/csv-thin.cpp | 242 ++++------------------------------ csv/test/csv-thin/expected | 4 + csv/test/csv-thin/input | 2 + 3 files changed, 34 insertions(+), 214 deletions(-) create mode 100644 csv/test/csv-thin/expected create mode 100644 csv/test/csv-thin/input diff --git a/csv/applications/csv-thin.cpp b/csv/applications/csv-thin.cpp index 61139202e..310da562a 100644 --- a/csv/applications/csv-thin.cpp +++ b/csv/applications/csv-thin.cpp @@ -56,26 +56,28 @@ using namespace comma; static void usage( bool verbose = false ) { std::cerr << std::endl; - std::cerr << "Read input data and thin them down by the given percentage;" << std::endl; + std::cerr << "read input data and thin them down by the given percentage;" << std::endl; std::cerr << "buffer handling optimized for a high-output producer" << std::endl; std::cerr << std::endl; - std::cerr << "Usage: cat full.csv | csv-thin [] [] > thinned.csv" << std::endl; + std::cerr << "usage: cat full.csv | csv-thin [] [] > thinned.csv" << std::endl; std::cerr << std::endl; - std::cerr << "options:" << std::endl; - std::cerr << " --binary,-b=: data is packets of fixed size given by ." << std::endl; - std::cerr << " Alternatively use --size" << std::endl; - std::cerr << " --deterministic,-d: input is downsampled by a factor of int( 1 / )." << std::endl; - std::cerr << " That is, if is 0.33, output every third packet." << std::endl; - std::cerr << " Default is to output each packet with a probability of ." << std::endl; + std::cerr << "options" << std::endl; + std::cerr << " --binary,-b=: data is packets of fixed size given by " << std::endl; + std::cerr << " alternatively use --size" << std::endl; + std::cerr << " --deterministic,-d: input is downsampled by a factor of int( 1 / )" << std::endl; + std::cerr << " that is, if is 0.33, output every third packet" << std::endl; + std::cerr << " default is to output each packet with a probability of " << std::endl; std::cerr << " --fields=: use timestamp in fields to determine time for --period" << std::endl; + std::cerr << " --invert,-i; invert selection logic; e.g. to split data" << std::endl; std::cerr << " --period=: output once every seconds, ignores " << std::endl; std::cerr << " --size,-s=: data is packets of fixed size, otherwise data is expected" << std::endl; std::cerr << " line-wise. Alternatively use --binary" << std::endl; + std::cerr << " --seed=[]; random seed" << std::endl; std::cerr << std::endl; - std::cerr << "csv options:" << std::endl; + std::cerr << "csv options" << std::endl; std::cerr << comma::csv::options::usage( verbose ) << std::endl; std::cerr << std::endl; - std::cerr << "examples:" << std::endl; + std::cerr << "examples" << std::endl; std::cerr << " output 70% of data: cat full.csv | csv-thin 0.7" << std::endl; std::cerr << " output once every 2 seconds: cat full.csv | csv-thin --period 2" << std::endl; std::cerr << " using timestamp from input: cat full.csv | csv-thin --period 2 --fields t" << std::endl; @@ -88,6 +90,8 @@ static void usage( bool verbose = false ) static double rate; static bool deterministic; +static bool invert; +static boost::optional< comma::uint32 > seed; static boost::optional< boost::posix_time::microseconds > period; struct timestamped @@ -107,7 +111,7 @@ template <> struct traits< timestamped > } } // namespace comma { namespace visiting { -static bool ignore() +static bool skip() { if( period ) { @@ -135,21 +139,19 @@ static bool ignore() ++count; if( count < ( step + 1 ) / rate ) { return true; } ++step; - if( step == size ) - { - count = 0; - step = 0; - } + if( step == size ) { count = step = 0; } return false; } - static boost::mt19937 rng; + static boost::mt19937 rng = seed ? boost::mt19937( *seed ) : boost::mt19937(); static boost::uniform_real<> dist( 0, 1 ); static boost::variate_generator< boost::mt19937&, boost::uniform_real<> > random( rng, dist ); static bool do_ignore = comma::math::less( rate, 1.0 ); return do_ignore && random() > rate; } -static bool ignore_by_timestamp( boost::posix_time::ptime timestamp ) +static bool keep() { return skip() == invert; } + +static bool skip_by_timestamp( boost::posix_time::ptime timestamp ) { static boost::posix_time::ptime next_time = timestamp; if( timestamp <= next_time ) { return true; } @@ -164,6 +166,8 @@ int main( int ac, char** av ) comma::command_line_options options( ac, av, usage ); bool binary = options.exists( "--size,-s,--binary,-b" ); deterministic = options.exists( "--deterministic,-d" ); + invert = options.exists( "--invert,-i" ); + seed = options.optional< comma::uint32 >( "--seed" ); if( options.exists( "--period" )) { period = boost::posix_time::microseconds( static_cast (options.value< double >( "--period" ) * 1000000 )); } #ifdef WIN32 if( binary ) { _setmode( _fileno( stdin ), _O_BINARY ); _setmode( _fileno( stdout ), _O_BINARY ); } @@ -177,7 +181,7 @@ int main( int ac, char** av ) { const timestamped* p = istream.read(); if( !p ) { break; } - if( ignore_by_timestamp( p->timestamp ) ) { continue; } + if( skip_by_timestamp( p->timestamp ) != invert ) { continue; } if( istream.is_binary()) { std::cout.write( istream.binary().last(), istream.binary().size() ); } else { std::cout << comma::join( istream.ascii().last(), istream.ascii().ascii().delimiter() )<< std::endl; } } @@ -191,7 +195,6 @@ int main( int ac, char** av ) rate = boost::lexical_cast< double >( v[0] ); if( comma::math::less( rate, 0 ) || comma::math::less( 1, rate ) ) { std::cerr << "csv-thin: expected rate between 0 and 1, got " << rate << std::endl; usage(); } } - if( binary ) // quick and dirty, improve performance by reading larger buffer { std::size_t size = options.value( "--size,-s", 0u ); @@ -213,8 +216,8 @@ int main( int ac, char** av ) //std::size_t e = available < int( size ) ? size : available - available % size; std::cin.read( &buf[0], size ); // quick and dirty if( std::cin.gcount() <= 0 ) { break; } - if( std::cin.gcount() < int( size ) ) { std::cerr << "csv-thin: expected " << size << " bytes; got only " << std::cin.gcount() << std::endl; exit( 1 ); } - { if( !ignore() ) { std::cout.write( &buf[0], size ); std::cout.flush(); } } + if( std::cin.gcount() < int( size ) ) { std::cerr << "csv-thin: expected " << size << " bytes; got only " << std::cin.gcount() << std::endl; return 1; } + if( keep() ) { std::cout.write( &buf[0], size ); std::cout.flush(); } } #else char* cur = &buf[0]; @@ -232,7 +235,7 @@ int main( int ac, char** av ) capacity -= count; for( ; offset >= size; cur += size, offset -= size ) { - if( !ignore() ) { std::cout.write( cur, size ); } + if( keep() ) { std::cout.write( cur, size ); } } if( capacity == 0 ) { cur = &buf[0]; offset = 0; capacity = buf.size(); } std::cout.flush(); @@ -245,201 +248,12 @@ int main( int ac, char** av ) while( std::cin.good() && !std::cin.eof() ) { std::getline( std::cin, line ); - if( !line.empty() && !ignore() ) { std::cout << line << std::endl; } + if( !line.empty() && keep() ) { std::cout << line << std::endl; } } } return 0; } catch( std::exception& ex ) { std::cerr << "csv-size: " << ex.what() << std::endl; } catch( ... ) { std::cerr << "csv-size: unknown exception" << std::endl; } - usage(); + return 1; } - -// // This file is part of comma, a generic and flexible library -// // Copyright (c) 2011 The University of Sydney -// // All rights reserved. -// // -// // Redistribution and use in source and binary forms, with or without -// // modification, are permitted provided that the following conditions are met: -// // 1. Redistributions of source code must retain the above copyright -// // notice, this list of conditions and the following disclaimer. -// // 2. Redistributions in binary form must reproduce the above copyright -// // notice, this list of conditions and the following disclaimer in the -// // documentation and/or other materials provided with the distribution. -// // 3. All advertising materials mentioning features or use of this software -// // must display the following acknowledgement: -// // This product includes software developed by the University of Sydney. -// // 4. Neither the name of the University of Sydney nor the -// // names of its contributors may be used to endorse or promote products -// // derived from this software without specific prior written permission. -// // -// // NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE -// // GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT -// // HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED -// // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -// // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -// // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -// // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN -// // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// /// @author vsevolod vlaskine -// -// #ifdef WIN32 -// #include -// #include -// #include -// #endif -// -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// -// using namespace comma; -// -// static void usage() -// { -// std::cerr << std::endl; -// std::cerr << "Read input data and thin them down by the given percentage;" << std::endl; -// std::cerr << "buffer handling optimized for a high-output producer" << std::endl; -// std::cerr << std::endl; -// std::cerr << "Usage: cat full.csv | csv-thin [] > thinned.csv" << std::endl; -// std::cerr << std::endl; -// std::cerr << "e.g. output 70% of data: cat full.csv | csv-thin 0.7 > thinned.csv" << std::endl; -// std::cerr << std::endl; -// std::cerr << "" << std::endl; -// std::cerr << std::endl; -// std::cerr << " --size,-s : if given, data is packets of fixed size" << std::endl; -// std::cerr << " otherwise data is line-based" << std::endl; -// std::cerr << " --deterministic,-d: if given, input is downsampled by a factor of int(1 / )." << std::endl; -// std::cerr << " That is, if is 0.33, output every third packet." << std::endl; -// std::cerr << " Default is to output each packet with a probability of ." << std::endl; -// std::cerr << std::endl; -// std::cerr << comma::contact_info << std::endl; -// std::cerr << std::endl; -// exit( 1 ); -// } -// -// static double rate; -// static bool deterministic; -// static unsigned long long count_size; -// -// static bool ignore() -// { -// if( deterministic ) -// { -// static unsigned long long count = count_size - 1; -// if( ++count == count_size ) { count = 0; } -// if(rate<0.5) -// { -// return count != 0; -// } -// else -// { -// return count == 0; -// } -// } -// static boost::mt19937 rng; -// static boost::uniform_real<> dist( 0, 1 ); -// static boost::variate_generator< boost::mt19937&, boost::uniform_real<> > random( rng, dist ); -// static bool do_ignore = comma::math::less( rate, 1.0 ); -// return do_ignore && random() > rate; -// -// } -// -// int main( int ac, char** av ) -// { -// try -// { -// comma::command_line_options options( ac, av ); -// if( options.exists( "--help,-h" ) || ac == 1 ) { usage(); } -// bool binary = options.exists( "--size,-s" ); -// deterministic = options.exists( "--deterministic,-d" ); -// std::size_t size = options.value( "--size,-s", 0u ); -// #ifdef WIN32 -// if( binary ) { _setmode( _fileno( stdin ), _O_BINARY ); _setmode( _fileno( stdout ), _O_BINARY ); } -// #endif -// std::vector< std::string > v = options.unnamed( "--deterministic,-d", "-.*" ); -// if( v.empty() ) { std::cerr << "csv-thin: please specify rate" << std::endl; usage(); } -// rate = boost::lexical_cast< double >( v[0] ); -// if( comma::math::less( rate, 0 ) || comma::math::less( 1, rate ) ) { std::cerr << "csv-thin: expected rate between 0 and 1, got " << rate << std::endl; usage(); } -// -// if( deterministic ) -// { -// if(rate<0.5) -// { -// count_size = static_cast< unsigned long long >( 1.0 / rate ); -// } -// else -// { -// count_size = static_cast< unsigned long long >( 1.0 / (1-rate) ); -// } -// } -// -// if( binary ) // quick and dirty, improve performance by reading larger buffer -// { -// unsigned int factor = 65536 / size; // arbitrary -// if( factor == 0 ) { factor = 1; } -// std::vector< char > buf( size * factor ); -// #ifdef WIN32 -// while( std::cin.good() && !std::cin.eof() ) -// { -// // it all does not seem to work: in_avail() always returns 0 -// //std::streamsize available = std::cin.rdbuf()->in_avail(); -// //if( available < 0 ) { continue; } -// //if( available > 0 ) { std::cerr << "available = " << available << std::endl; } -// //std::size_t e = available < int( size ) ? size : available - available % size; -// std::cin.read( &buf[0], size ); // quick and dirty -// if( std::cin.gcount() <= 0 ) { break; } -// if( std::cin.gcount() < int( size ) ) { std::cerr << "csv-thin: expected " << size << " bytes; got only " << std::cin.gcount() << std::endl; exit( 1 ); } -// { if( !ignore() ) { std::cout.write( &buf[0], size ); std::cout.flush(); } } -// } -// #else -// char* cur = &buf[0]; -// unsigned int offset = 0; -// unsigned int capacity = buf.size(); -// while( std::cin.good() && !std::cin.eof() ) -// { -// int count = ::read( comma::io::stdin_fd, cur + offset, capacity ); -// if( count <= 0 ) -// { -// if( offset != 0 ) { std::cerr << "csv-thin: expected at least " << size << " bytes, got only " << offset << std::endl; return 1; } -// break; -// } -// offset += count; -// capacity -= count; -// for( ; offset >= size; cur += size, offset -= size ) -// { -// if( !ignore() ) { std::cout.write( cur, size ); } -// } -// if( capacity == 0 ) { cur = &buf[0]; offset = 0; capacity = buf.size(); } -// std::cout.flush(); -// } -// #endif -// } -// else -// { -// std::string line; -// while( std::cin.good() && !std::cin.eof() ) -// { -// std::getline( std::cin, line ); -// if( !line.empty() && !ignore() ) { std::cout << line << std::endl; } -// } -// } -// return 0; -// } -// catch( std::exception& ex ) { std::cerr << "csv-size: " << ex.what() << std::endl; } -// catch( ... ) { std::cerr << "csv-size: unknown exception" << std::endl; } -// usage(); -// } diff --git a/csv/test/csv-thin/expected b/csv/test/csv-thin/expected new file mode 100644 index 000000000..176eff035 --- /dev/null +++ b/csv/test/csv-thin/expected @@ -0,0 +1,4 @@ +random/deterministic[0]/output="1,3,5,7,9," +random/deterministic[0]/status=0 +random/inverted[0]/output="1,2,3,4,5,6,7,8,9,10," +random/inverted[0]/status=0 diff --git a/csv/test/csv-thin/input b/csv/test/csv-thin/input new file mode 100644 index 000000000..b3d4aa61c --- /dev/null +++ b/csv/test/csv-thin/input @@ -0,0 +1,2 @@ +random/deterministic[0]="seq 10 | csv-thin 0.5 --invert --deterministic | tr '\\\n' ','" +random/inverted[0]="( seq 10 | csv-thin 0.5; seq 10 | csv-thin 0.5 --invert ) | csv-sort --fields i | tr '\\\n' ','" From c806f3885178f5e504fa1e638b8a8bedfcc33c14 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 10 May 2019 23:51:09 +1000 Subject: [PATCH 0026/1056] packed: more signed int tests added --- packed/test/packed_test.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packed/test/packed_test.cpp b/packed/test/packed_test.cpp index 5ee680ff6..4509a38d2 100644 --- a/packed/test/packed_test.cpp +++ b/packed/test/packed_test.cpp @@ -81,7 +81,10 @@ template < typename T > static void test_packed_int( comma::int64 value ) EXPECT_EQ( true, t == 0 ); t = value; EXPECT_EQ( true, t == value ); + std::cerr << "-------------------------------------------------" << std::endl; + std::cerr << "--> a: value: " << value << " t: " << t() << std::endl; EXPECT_EQ( value, t() ); + std::cerr << "-------------------------------------------------" << std::endl; } template < typename T > static void test_packed_uint( comma::uint64 value ) @@ -90,7 +93,10 @@ template < typename T > static void test_packed_uint( comma::uint64 value ) EXPECT_EQ( true, t == 0 ); t = value; EXPECT_EQ( true, t == value ); + std::cerr << "-------------------------------------------------" << std::endl; + std::cerr << "--> b: value: " << value << " t: " << t() << std::endl; EXPECT_EQ( value, t() ); + std::cerr << "-------------------------------------------------" << std::endl; } TEST( test_packed_struct_test, test_little_endian ) @@ -106,12 +112,24 @@ TEST( test_packed_struct_test, test_little_endian ) test_packed_uint< comma::packed::little_endian::uint64 >( comma::uint64( 0x1BCDEF1213141500ULL ) ); test_packed_int< comma::packed::little_endian::int16 >( 1234 ); + test_packed_int< comma::packed::little_endian::int16 >( 256 * 128 - 1 ); + test_packed_int< comma::packed::little_endian::int16 >( 0 ); + test_packed_int< comma::packed::little_endian::int16 >( -1 ); + test_packed_int< comma::packed::little_endian::int16 >( -2 ); + test_packed_int< comma::packed::little_endian::int16 >( -256 * 128 + 1 ); + //for( comma::int16 i = 256 * 128 - 1; i > 0; --i ) { test_packed_uint< comma::packed::little_endian::int16 >( i ); } + //for( comma::int16 i = 256 * 128 - 1; i > 0; --i ) { test_packed_int< comma::packed::little_endian::int16 >( -i ); } test_packed_int< comma::packed::little_endian::int24 >( 1235 ); test_packed_int< comma::packed::little_endian::int24 >( 8388607 ); test_packed_int< comma::packed::little_endian::int32 >( 8388607 ); test_packed_int< comma::packed::little_endian::int32 >( 1236 ); test_packed_int< comma::packed::little_endian::int16 >( -1231 ); + test_packed_int< comma::packed::little_endian::int24 >( -1 ); + test_packed_int< comma::packed::little_endian::int24 >( -2 ); + test_packed_int< comma::packed::little_endian::int24 >( -256 ); test_packed_int< comma::packed::little_endian::int24 >( -1232 ); + //for( unsigned int i = 0; i < 8388608; ++i ) { test_packed_int< comma::packed::little_endian::int24 >( -i ); } + test_packed_int< comma::packed::little_endian::int24 >( -1000000 ); test_packed_int< comma::packed::little_endian::int24 >( -8388608 ); test_packed_int< comma::packed::little_endian::int32 >( -1233 ); test_packed_int< comma::packed::little_endian::int64 >( -4321 ); @@ -130,7 +148,10 @@ TEST( test_packed_struct_test, test_big_endian ) test_packed_int< comma::packed::big_endian::int32 >( 1234 ); test_packed_int< comma::packed::big_endian::int16 >( -1234 ); test_packed_int< comma::packed::big_endian::int32 >( -1234 ); + test_packed_int< comma::packed::big_endian::int24 >( -1 ); + test_packed_int< comma::packed::big_endian::int24 >( -2 ); test_packed_int< comma::packed::big_endian::int24 >( -1232 ); + test_packed_int< comma::packed::big_endian::int24 >( -8388607 ); test_packed_int< comma::packed::big_endian::int24 >( -8388608 ); } From c698148e2fb165172fb114af7579a09412669031 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 20 May 2019 12:01:37 +1000 Subject: [PATCH 0027/1056] csv-shape: repeat operation implemented --- csv/applications/csv-shape.cpp | 52 ++++++++++++++++++++++-------- csv/test/csv-shape/repeat/expected | 17 ++++++++++ csv/test/csv-shape/repeat/input | 2 ++ 3 files changed, 57 insertions(+), 14 deletions(-) create mode 100644 csv/test/csv-shape/repeat/expected create mode 100644 csv/test/csv-shape/repeat/input diff --git a/csv/applications/csv-shape.cpp b/csv/applications/csv-shape.cpp index 5218d03af..bae53b53d 100644 --- a/csv/applications/csv-shape.cpp +++ b/csv/applications/csv-shape.cpp @@ -50,6 +50,7 @@ static void usage( bool verbose=false ) std::cerr << " loop: same as concatenate, but with an additional last record:" << std::endl; std::cerr << " last input record concatenated with the first record (hence, 'loop')" << std::endl; std::cerr << " this mode always uses the sliding window for overlapping groups" << std::endl; + std::cerr << " repeat: repeat input given number of times, e.g. csv-shape repeat --size 5" << std::endl; std::cerr << std::endl; std::cerr << "Usage: cat data.csv | csv-shape []" << std::endl; std::cerr << std::endl; @@ -109,11 +110,11 @@ static void simple_binary_pass_through(const comma::csv::format& f, bool flush=f bool is_binary; -class concatenate_impl_ +class concatenate_ { public: - concatenate_impl_() + concatenate_() : use_sliding_window_(false) , bidirectional_(false) , reverse_(false) @@ -139,7 +140,7 @@ class concatenate_impl_ if( size_ < 2 ) { std::cerr << comma::verbose.app_name() << ": expected --size,-n= value to be greater than 1" << std::endl; return 1; } expected_records_ = step_ * ( size_ - 1 ) + 1; if( options.exists("--expected-records") ) { std::cout << expected_records_ << std::endl; return 0; }; - comma::csv::input_stream< input_t > istream(std::cin, csv); + comma::csv::input_stream< input_t > istream( std::cin, csv ); std::deque< std::string > deque; std::deque< std::string > first; bool has_block_ = csv.has_field( "block" ); @@ -169,12 +170,9 @@ class concatenate_impl_ return 0; } - struct input_t { - comma::uint32 block = 0; - }; + struct input_t { comma::uint32 block = 0; }; private: - bool use_sliding_window_; bool bidirectional_; bool reverse_; @@ -234,14 +232,42 @@ class concatenate_impl_ namespace comma { namespace visiting { -template <> struct traits< concatenate_impl_::input_t > +template <> struct traits< concatenate_::input_t > { - template < typename K, typename V > static void visit( const K&, const concatenate_impl_::input_t& p, V& v ) { v.apply("block", p.block); } - template < typename K, typename V > static void visit( const K&, concatenate_impl_::input_t& p, V& v ) { v.apply("block", p.block); } + template < typename K, typename V > static void visit( const K&, const concatenate_::input_t& p, V& v ) { v.apply("block", p.block); } + template < typename K, typename V > static void visit( const K&, concatenate_::input_t& p, V& v ) { v.apply("block", p.block); } }; } } // namespace comma { namespace visiting { +static int repeat_( const comma::command_line_options& options, const comma::csv::options& csv ) +{ + unsigned int size = options.value< unsigned int >( "--size,-n" ); + if( csv.binary() ) + { + typedef concatenate_::input_t input_t; // quick and dirty + comma::csv::input_stream< input_t > is( std::cin, csv ); // quick and dirty, will be slow on ascii + while( is.ready() || ( std::cin.good() && !std::cin.eof() ) ) + { + const input_t* p = is.read(); + if( !p ) { break; } + for( unsigned int i = 0; i < size; ++i ) { std::cout.write( is.binary().last(), csv.format().size() ); } + if( csv.flush ) { std::cout.flush(); } + } + } + else + { + while( std::cin.good() && !std::cin.eof() ) + { + std::string line; + std::getline( std::cin, line ); + if( comma::strip( line ).empty() ) { continue; } + for( unsigned int i = 0; i < size; ++i ) { std::cout << line << std::endl; } + } + } + return 0; +} + int main( int ac, char** av ) { try @@ -254,10 +280,8 @@ int main( int ac, char** av ) is_binary = csv.binary(); if( unnamed.empty() ) { std::cerr << comma::verbose.app_name() << ": please specify operations" << std::endl; exit( 1 ); } std::string operation = unnamed[0]; - if( operation == "concatenate" || operation == "loop" ) - { - return concatenate_impl_().run(options, csv); - } + if( operation == "concatenate" || operation == "loop" ) { return concatenate_().run(options, csv); } + if( operation == "repeat" ) { return repeat_( options, csv ); } std::cerr << comma::verbose.app_name() << ": operation not supported or unknown: '" << operation << '\'' << std::endl; return 1; } diff --git a/csv/test/csv-shape/repeat/expected b/csv/test/csv-shape/repeat/expected new file mode 100644 index 000000000..972463d6e --- /dev/null +++ b/csv/test/csv-shape/repeat/expected @@ -0,0 +1,17 @@ +ascii/output/line[0]="hello" +ascii/output/line[1]="hello" +ascii/output/line[2]="hello" +ascii/output/line[3]="world" +ascii/output/line[4]="world" +ascii/output/line[5]="world" +ascii/status=0 +binary/output/line[0]="1" +binary/output/line[1]="1" +binary/output/line[2]="1" +binary/output/line[3]="2" +binary/output/line[4]="2" +binary/output/line[5]="2" +binary/output/line[6]="3" +binary/output/line[7]="3" +binary/output/line[8]="3" +binary/status=0 diff --git a/csv/test/csv-shape/repeat/input b/csv/test/csv-shape/repeat/input new file mode 100644 index 000000000..9d2e8c57c --- /dev/null +++ b/csv/test/csv-shape/repeat/input @@ -0,0 +1,2 @@ +ascii="( echo hello; echo; echo world ) | csv-shape repeat --size 3" +binary="( echo 1; echo 2; echo 3 ) | csv-to-bin ui | csv-shape repeat --size 3 --binary ui | csv-from-bin ui" From 3fb92084f3b7414eabdcab1ca0e807623f9eecbd Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 27 May 2019 18:58:12 +1000 Subject: [PATCH 0028/1056] csv-interval: minor refactoring --- csv/applications/csv-interval.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/csv/applications/csv-interval.cpp b/csv/applications/csv-interval.cpp index 338eaee1b..6c538c608 100644 --- a/csv/applications/csv-interval.cpp +++ b/csv/applications/csv-interval.cpp @@ -394,9 +394,9 @@ struct intervals } } - void run() + void read( std::istream& is = std::cin ) { - comma::csv::input_stream< interval_t< From, To > > istream( std::cin, csv ); + comma::csv::input_stream< interval_t< From, To > > istream( is, csv ); comma::csv::ascii< interval_t< std::string > > ascii( csv.fields ); if( !first_line.empty() ) { @@ -413,7 +413,7 @@ struct intervals if( verbose ) { std::cerr << app_name << ": from: " << from << " to: " << to << " payload: " << payload << std::endl; } add( from, to, payload ); } - while( istream.ready() || std::cin.good() ) + while( istream.ready() || is.good() ) { const interval_t< From, To >* interval = istream.read(); if( !interval ) { break; } @@ -441,7 +441,12 @@ struct intervals if( verbose ) { std::cerr << app_name << ": from: " << from << " to: " << to << " payload: " << ( csv.binary() ? "" : payload ) << std::endl; } add( from, to, payload ); } - write(); + } + + void run() + { + this->read(); + this->write(); } }; From d900b93289372ee48503bac3562cc67e2268cadb Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 28 May 2019 12:56:07 +1000 Subject: [PATCH 0029/1056] csv-interval: more refactoring --- csv/applications/csv-interval.cpp | 164 ++++++++++++++---------------- 1 file changed, 76 insertions(+), 88 deletions(-) diff --git a/csv/applications/csv-interval.cpp b/csv/applications/csv-interval.cpp index 6c538c608..41a54a662 100644 --- a/csv/applications/csv-interval.cpp +++ b/csv/applications/csv-interval.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include "../../application/command_line_options.h" @@ -46,7 +47,6 @@ static const std::string app_name = "csv-interval"; static bool verbose; static bool debug; -static std::string first_line; static bool append; template < typename T > struct limits @@ -102,9 +102,9 @@ static void usage( bool verbose = false ) // std::cerr << " --input-format: print input format and exit" << std::endl; std::cerr << " --output-fields: print output fields and exit" << std::endl; // std::cerr << " --output-format: print output format and exit" << std::endl; - std::cerr << " --empty: empty value used to signify unbounded intervals" << std::endl; + std::cerr << " --empty=[]: empty value used to signify unbounded intervals" << std::endl; std::cerr << " default for time is \"not-a-date-time\"" << std::endl; - std::cerr << " --format: input format (ascii only), also affects the --limits option; if not given the format is guessed" << std::endl; + std::cerr << " --format=[]: input format (ascii only), also affects the --limits option; if not given the format is guessed" << std::endl; std::cerr << " --intervals-only: only output the intervals, ignore payload if any" << std::endl; std::cerr << " --limits,-l: replace empty bounds with type limits" << std::endl; std::cerr << " b : " << (int)limits< char >::lowest() << " " << (int)limits< char >::max() << std::endl; @@ -295,6 +295,7 @@ struct intervals , intervals_only( options.exists( "--intervals-only" ) ) , use_limits( options.exists( "--limits,-l" ) ) { + csv.full_xpath = false; if( csv.fields.empty() ) { csv.fields = comma::join( comma::csv::names< interval_t< From, To > >(), ',' ); } if( ocsv.fields.empty() || intervals_only ) { @@ -394,7 +395,7 @@ struct intervals } } - void read( std::istream& is = std::cin ) + void read( std::istream& is, const std::string& first_line ) // preparing for adding operations { comma::csv::input_stream< interval_t< From, To > > istream( is, csv ); comma::csv::ascii< interval_t< std::string > > ascii( csv.fields ); @@ -443,107 +444,94 @@ struct intervals } } - void run() + void make( const std::string& first_line ) { - this->read(); + this->read( std::cin, first_line ); this->write(); } }; -// template < typename From > static void run( const comma::command_line_options& options, const comma::csv::format::types_enum to_type ) -// { -// switch( to_type ) -// { -// case comma::csv::format::int8: intervals< From, char >( options ).run(); break; -// case comma::csv::format::uint8: intervals< From, unsigned char >( options ).run(); break; -// case comma::csv::format::int16: intervals< From, comma::int16 >( options ).run(); break; -// case comma::csv::format::uint16: intervals< From, comma::uint16 >( options ).run(); break; -// case comma::csv::format::int32: intervals< From, comma::int32 >( options ).run(); break; -// case comma::csv::format::uint32: intervals< From, comma::uint32 >( options ).run(); break; -// case comma::csv::format::int64: intervals< From, comma::int64 >( options ).run(); break; -// case comma::csv::format::uint64: intervals< From, comma::uint64 >( options ).run(); break; -// case comma::csv::format::char_t: intervals< From, char >( options ).run(); break; -// case comma::csv::format::float_t: intervals< From, float >( options ).run(); break; -// case comma::csv::format::double_t: intervals< From, double >( options ).run(); break; -// default: COMMA_THROW( comma::exception, "from/to type mismatch" ); break; -// } -// } +static std::tuple< comma::csv::format::types_enum, std::string > interval_type( std::istream& is, comma::csv::options csv, const std::string& format ) +{ + if( csv.fields.empty() ) { csv.fields = comma::join( comma::csv::names< interval_t< double > >(), ',' ); } + if( !csv.has_field( "from,to" ) ) { COMMA_THROW( comma::exception, "expected from and to fields" ); } + std::string first_line; + csv.full_xpath = false; + if( !csv.binary() ) + { + if( format.empty() ) + { + while( std::cin.good() && first_line.empty() ) { std::getline( is, first_line ); } + if( first_line.empty() ) { exit( 0 ); } // quick and dirty + csv.format( comma::csv::impl::unstructured::guess_format( first_line, csv.delimiter ) ); + if( verbose ) { std::cerr << app_name << ": guessed format: " << csv.format().string() << std::endl;; } + } + else + { + csv.format( format ); + } + } + const std::vector< std::string >& fields = comma::split( csv.fields, ',' ); + unsigned int from_index = 0; + unsigned int to_index = 1; + for( unsigned int i = 0; i < fields.size(); ++i ) { if( fields[i] == "from" ) { from_index = i; break; } } + for( unsigned int i = 0; i < fields.size(); ++i ) { if( fields[i] == "to" ) { to_index = i; break; } } + const comma::csv::format::types_enum from_type = csv.format().offset( from_index ).type; + const comma::csv::format::types_enum to_type = csv.format().offset( to_index ).type; + if( ( ( from_type == comma::csv::format::time || from_type == comma::csv::format::long_time ) && ( to_type != comma::csv::format::time && to_type != comma::csv::format::long_time ) ) || + ( ( ( from_type != comma::csv::format::time && from_type != comma::csv::format::long_time ) && ( to_type == comma::csv::format::time || to_type == comma::csv::format::long_time ) ) ) ) + { COMMA_THROW( comma::exception, "from/to type mismatch; time" ); } + if( ( from_type == comma::csv::format::fixed_string || to_type == comma::csv::format::fixed_string ) && from_type != to_type ) + { COMMA_THROW( comma::exception, "from/to type mismatch; string" ); } + if( from_type != to_type ) { std::cerr << app_name << ": support only from and to of the same type, got from: " << comma::csv::format::to_format( from_type ) << ", to: " << comma::csv::format::to_format( to_type ) << std::endl; exit( 1 ); } + return std::tie( to_type, first_line ); +} int main( int ac, char** av ) { try { - comma::command_line_options options( ac, av ); + comma::command_line_options options( ac, av, usage ); verbose = options.exists( "--verbose,-v" ); debug = options.exists( "--debug" ); - append = options.exists( "--append,-a" ); - if( options.exists( "--help,-h" ) ) { usage( verbose ); } - if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } - if( options.exists( "--output-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } - comma::csv::options csv( options ); - csv.full_xpath = false; - if( csv.fields.empty() ) { csv.fields = comma::join( comma::csv::names< interval_t< double > >(), ',' ); } - if( !csv.has_field( "from,to" ) ) { COMMA_THROW( comma::exception, "expected from and to fields" ); } options.assert_mutually_exclusive( "--binary,--format" ); - if( options.exists( "--binary,-b" ) ) {} - else if( options.exists( "--format" ) ) { csv.format( options.value< std::string >( "--format" ) ); } - else + const auto& unnamed = options.unnamed( "--append,-a,--debug,--flush,--input-fields,--output-fields,--intervals-only,--limits,-l", "-.*" ); + std::string operation = unnamed.empty() ? "make" : unnamed[0]; + if( operation == "make" ) { - while( std::cin.good() && first_line.empty() ) { std::getline( std::cin, first_line ); } - if( first_line.empty() ) { return 0; } - csv.format( comma::csv::impl::unstructured::guess_format( first_line, csv.delimiter ) ); - if( verbose ) { std::cerr << app_name << ": guessed format: " << csv.format().string() << std::endl;; } + append = options.exists( "--append,-a" ); + if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } + if( options.exists( "--output-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } + comma::csv::options csv( options ); + auto t = interval_type( std::cin, comma::csv::options( options ), options.value< std::string >( "--format,-f", "" ) ); + const comma::csv::format::types_enum to_type = std::get< 0 >( t ); + std::string first_line = std::get< 1 >( t ); + switch( to_type ) + { + case comma::csv::format::int8: intervals< char >( options ).make( first_line ); break; + case comma::csv::format::uint8: intervals< unsigned char >( options ).make( first_line ); break; + case comma::csv::format::int16: intervals< comma::int16 >( options ).make( first_line ); break; + case comma::csv::format::uint16: intervals< comma::uint16 >( options ).make( first_line ); break; + case comma::csv::format::int32: intervals< comma::int32 >( options ).make( first_line ); break; + case comma::csv::format::uint32: intervals< comma::uint32 >( options ).make( first_line ); break; + case comma::csv::format::int64: intervals< comma::int64 >( options ).make( first_line ); break; + case comma::csv::format::uint64: intervals< comma::uint64 >( options ).make( first_line ); break; + case comma::csv::format::char_t: intervals< char >( options ).make( first_line ); break; + case comma::csv::format::float_t: intervals< float >( options ).make( first_line ); break; + case comma::csv::format::double_t: intervals< double >( options ).make( first_line ); break; + case comma::csv::format::time: + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).make( first_line ); break; + case comma::csv::format::fixed_string: intervals< std::string >( options ).make( first_line ); break; + default: COMMA_THROW( comma::exception, "invalid type" ); break; // never here + } + return 0; } - const std::vector< std::string >& fields = comma::split( csv.fields, ',' ); - unsigned int from_index = 0; - unsigned int to_index = 1; - for( unsigned int i = 0; i < fields.size(); ++i ) { if( fields[i] == "from" ) { from_index = i; break; } } - for( unsigned int i = 0; i < fields.size(); ++i ) { if( fields[i] == "to" ) { to_index = i; break; } } - const comma::csv::format::types_enum from_type = csv.format().offset( from_index ).type; - const comma::csv::format::types_enum to_type = csv.format().offset( to_index ).type; - if( ( ( from_type == comma::csv::format::time || from_type == comma::csv::format::long_time ) && ( to_type != comma::csv::format::time && to_type != comma::csv::format::long_time ) ) || - ( ( ( from_type != comma::csv::format::time && from_type != comma::csv::format::long_time ) && ( to_type == comma::csv::format::time || to_type == comma::csv::format::long_time ) ) ) ) - { COMMA_THROW( comma::exception, "from/to type mismatch; time" ); } - if( ( from_type == comma::csv::format::fixed_string || to_type == comma::csv::format::fixed_string ) && from_type != to_type ) - { COMMA_THROW( comma::exception, "from/to type mismatch; string" ); } -// switch( from_type ) -// { -// case comma::csv::format::int8: run< char >( options, to_type ); break; -// case comma::csv::format::uint8: run< unsigned char >( options, to_type ); break; -// case comma::csv::format::int16: run< comma::int16 >( options, to_type ); break; -// case comma::csv::format::uint16: run< comma::uint16 >( options, to_type ); break; -// case comma::csv::format::int32: run< comma::int32 >( options, to_type ); break; -// case comma::csv::format::uint32: run< comma::uint32 >( options, to_type ); break; -// case comma::csv::format::int64: run< comma::int64 >( options, to_type ); break; -// case comma::csv::format::uint64: run< comma::uint64 >( options, to_type ); break; -// case comma::csv::format::char_t: run< char >( options, to_type ); break; -// case comma::csv::format::float_t: run< float >( options, to_type ); break; -// case comma::csv::format::double_t: run< double >( options, to_type ); break; -// case comma::csv::format::time: -// case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).run(); break; -// case comma::csv::format::fixed_string: intervals< std::string >( options ).run(); break; -// default: COMMA_THROW( comma::exception, "unknown type" ); break; -// } - if( from_type != to_type ) { std::cerr << app_name << ": support only from and to of the same type, got from: " << comma::csv::format::to_format( from_type ) << ", to: " << comma::csv::format::to_format( to_type ) << std::endl; return 1; } - switch( to_type ) + if( operation == "select" ) { - case comma::csv::format::int8: intervals< char >( options ).run(); break; - case comma::csv::format::uint8: intervals< unsigned char >( options ).run(); break; - case comma::csv::format::int16: intervals< comma::int16 >( options ).run(); break; - case comma::csv::format::uint16: intervals< comma::uint16 >( options ).run(); break; - case comma::csv::format::int32: intervals< comma::int32 >( options ).run(); break; - case comma::csv::format::uint32: intervals< comma::uint32 >( options ).run(); break; - case comma::csv::format::int64: intervals< comma::int64 >( options ).run(); break; - case comma::csv::format::uint64: intervals< comma::uint64 >( options ).run(); break; - case comma::csv::format::char_t: intervals< char >( options ).run(); break; - case comma::csv::format::float_t: intervals< float >( options ).run(); break; - case comma::csv::format::double_t: intervals< double >( options ).run(); break; - case comma::csv::format::time: - case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).run(); break; - case comma::csv::format::fixed_string: intervals< std::string >( options ).run(); break; - default: COMMA_THROW( comma::exception, "from/to type mismatch" ); break; + std::cerr << "csv-interval: select: todo" << std::endl; + return 1; } - return 0; + std::cerr << "csv-interval: expected operation, got: '" << operation << "'" << std::endl; } catch( std::exception& ex ) { std::cerr << app_name << ": " << ex.what() << std::endl; } catch( ... ) { std::cerr << app_name << ": unknown exception" << std::endl; } From 02be391168c9eff33a2af4ffcf25512aac123820 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 28 May 2019 13:53:02 +1000 Subject: [PATCH 0030/1056] csv-interval: select operation: implementing... --- csv/applications/csv-interval.cpp | 71 ++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/csv/applications/csv-interval.cpp b/csv/applications/csv-interval.cpp index 41a54a662..50c71bb81 100644 --- a/csv/applications/csv-interval.cpp +++ b/csv/applications/csv-interval.cpp @@ -40,8 +40,12 @@ #include "../../application/command_line_options.h" #include "../../base/exception.h" #include "../../csv/stream.h" -#include "../../visiting/traits.h" +#include "../../csv/traits.h" #include "../../csv/impl/unstructured.h" +#include "../../io/stream.h" +#include "../../name_value/map.h" +#include "../../name_value/parser.h" +#include "../../visiting/traits.h" static const std::string app_name = "csv-interval"; @@ -444,10 +448,17 @@ struct intervals } } - void make( const std::string& first_line ) + int make( const std::string& first_line ) { this->read( std::cin, first_line ); this->write(); + return 0; + } + + int select( const std::string& first_line ) + { + this->read( std::cin, first_line ); + std::cerr << "csv-interval select: todo" << std::endl; exit( 1 ); } }; @@ -508,26 +519,54 @@ int main( int ac, char** av ) std::string first_line = std::get< 1 >( t ); switch( to_type ) { - case comma::csv::format::int8: intervals< char >( options ).make( first_line ); break; - case comma::csv::format::uint8: intervals< unsigned char >( options ).make( first_line ); break; - case comma::csv::format::int16: intervals< comma::int16 >( options ).make( first_line ); break; - case comma::csv::format::uint16: intervals< comma::uint16 >( options ).make( first_line ); break; - case comma::csv::format::int32: intervals< comma::int32 >( options ).make( first_line ); break; - case comma::csv::format::uint32: intervals< comma::uint32 >( options ).make( first_line ); break; - case comma::csv::format::int64: intervals< comma::int64 >( options ).make( first_line ); break; - case comma::csv::format::uint64: intervals< comma::uint64 >( options ).make( first_line ); break; - case comma::csv::format::char_t: intervals< char >( options ).make( first_line ); break; - case comma::csv::format::float_t: intervals< float >( options ).make( first_line ); break; - case comma::csv::format::double_t: intervals< double >( options ).make( first_line ); break; + case comma::csv::format::int8: intervals< char >( options ).make( first_line ); return 0; + case comma::csv::format::uint8: intervals< unsigned char >( options ).make( first_line ); return 0; + case comma::csv::format::int16: intervals< comma::int16 >( options ).make( first_line ); return 0; + case comma::csv::format::uint16: intervals< comma::uint16 >( options ).make( first_line ); return 0; + case comma::csv::format::int32: intervals< comma::int32 >( options ).make( first_line ); return 0; + case comma::csv::format::uint32: intervals< comma::uint32 >( options ).make( first_line ); return 0; + case comma::csv::format::int64: intervals< comma::int64 >( options ).make( first_line ); return 0; + case comma::csv::format::uint64: intervals< comma::uint64 >( options ).make( first_line ); return 0; + case comma::csv::format::char_t: intervals< char >( options ).make( first_line ); return 0; + case comma::csv::format::float_t: intervals< float >( options ).make( first_line ); return 0; + case comma::csv::format::double_t: intervals< double >( options ).make( first_line ); return 0; case comma::csv::format::time: - case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).make( first_line ); break; - case comma::csv::format::fixed_string: intervals< std::string >( options ).make( first_line ); break; - default: COMMA_THROW( comma::exception, "invalid type" ); break; // never here + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).make( first_line ); return 0; + case comma::csv::format::fixed_string: intervals< std::string >( options ).make( first_line ); return 0; + default: COMMA_THROW( comma::exception, "invalid type" ); return 0; // never here } return 0; } if( operation == "select" ) { + // todo + //if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } + //if( options.exists( "--output-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } + auto i = options.value< std::string >( "--intervals" ); + comma::csv::options csv = comma::name_value::parser().get< comma::csv::options >( i ); + std::string format = comma::name_value::map( i ).value< std::string >( "format", "" ); + comma::io::istream is( csv.filename ); + auto t = interval_type( *is, csv, format ); + const comma::csv::format::types_enum to_type = std::get< 0 >( t ); + std::string first_line = std::get< 1 >( t ); + switch( to_type ) + { + case comma::csv::format::int8: intervals< char >( options ).select( first_line ); return 0; + case comma::csv::format::uint8: intervals< unsigned char >( options ).select( first_line ); return 0; + case comma::csv::format::int16: intervals< comma::int16 >( options ).select( first_line ); return 0; + case comma::csv::format::uint16: intervals< comma::uint16 >( options ).select( first_line ); return 0; + case comma::csv::format::int32: intervals< comma::int32 >( options ).select( first_line ); return 0; + case comma::csv::format::uint32: intervals< comma::uint32 >( options ).select( first_line ); return 0; + case comma::csv::format::int64: intervals< comma::int64 >( options ).select( first_line ); return 0; + case comma::csv::format::uint64: intervals< comma::uint64 >( options ).select( first_line ); return 0; + case comma::csv::format::char_t: intervals< char >( options ).select( first_line ); return 0; + case comma::csv::format::float_t: intervals< float >( options ).select( first_line ); return 0; + case comma::csv::format::double_t: intervals< double >( options ).select( first_line ); return 0; + case comma::csv::format::time: + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).select( first_line ); return 0; + case comma::csv::format::fixed_string: intervals< std::string >( options ).select( first_line ); return 0; + default: COMMA_THROW( comma::exception, "invalid type" ); return 0; // never here + } std::cerr << "csv-interval: select: todo" << std::endl; return 1; } From 0bc6759344c0cf84f8e8348f4e3739b3e8635f7d Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 28 May 2019 17:47:23 +1000 Subject: [PATCH 0031/1056] csv-interval: renamed to csv-intervals; operations introduced; contain operation implemented --- csv/applications/CMakeLists.txt | 6 +- .../{csv-interval.cpp => csv-intervals.cpp} | 118 ++++++++++++------ csv/test/csv-interval/ascii/test | 3 - csv/test/csv-interval/overlap-count/input | 16 --- .../append/expected | 0 .../append/input | 2 +- csv/test/csv-intervals/ascii/test | 3 + .../ascii/test.01/expected | 0 .../ascii/test.01/input | 0 .../ascii/test.01/options | 0 .../ascii/test.02/expected | 0 .../ascii/test.02/input | 0 .../ascii/test.02/options | 0 .../ascii/test.03/expected | 0 .../ascii/test.03/input | 0 .../ascii/test.03/options | 0 .../ascii/test.04/disabled | 0 .../ascii/test.04/expected | 0 .../ascii/test.04/input | 0 .../ascii/test.04/options | 0 .../ascii/test.05/expected | 0 .../ascii/test.05/input | 0 .../ascii/test.05/options | 0 .../ascii/test.06/expected | 0 .../ascii/test.06/input | 0 .../ascii/test.06/options | 0 .../ascii/test.07/expected | 0 .../ascii/test.07/input | 0 .../ascii/test.07/options | 0 .../ascii/test.08/expected | 0 .../ascii/test.08/input | 0 .../ascii/test.08/options | 0 .../ascii/test.09/expected | 0 .../ascii/test.09/input | 0 .../ascii/test.09/options | 0 .../ascii/test.10/expected | 0 .../ascii/test.10/input | 0 .../ascii/test.10/options | 0 .../ascii/test.11/expected | 0 .../ascii/test.11/input | 0 .../ascii/test.11/options | 0 .../binary/test | 2 +- .../binary/test.01/expected | 0 .../binary/test.01/input | 0 .../binary/test.01/options | 0 .../binary/test.02/expected | 0 .../binary/test.02/input | 0 .../binary/test.02/options | 0 .../binary/test.03/disabled | 0 .../binary/test.03/expected | 0 .../binary/test.03/input | 0 .../binary/test.03/options | 0 .../binary/test.04/disabled | 0 .../binary/test.04/expected | 0 .../binary/test.04/input | 0 .../binary/test.04/options | 0 .../binary/test.05/expected | 0 .../binary/test.05/input | 0 .../binary/test.05/options | 0 .../binary/test.06/expected | 0 .../binary/test.06/input | 0 .../binary/test.06/options | 0 .../binary/test.07/expected | 0 .../binary/test.07/input | 0 .../binary/test.07/options | 0 .../binary/test.08/expected | 0 .../binary/test.08/input | 0 .../binary/test.08/options | 0 .../binary/test.09/expected | 0 .../binary/test.09/input | 0 .../binary/test.09/options | 0 .../overlap-count/expected | 0 csv/test/csv-intervals/overlap-count/input | 16 +++ 73 files changed, 105 insertions(+), 61 deletions(-) rename csv/applications/{csv-interval.cpp => csv-intervals.cpp} (85%) delete mode 100755 csv/test/csv-interval/ascii/test delete mode 100644 csv/test/csv-interval/overlap-count/input rename csv/test/{csv-interval => csv-intervals}/append/expected (100%) rename csv/test/{csv-interval => csv-intervals}/append/input (79%) create mode 100755 csv/test/csv-intervals/ascii/test rename csv/test/{csv-interval => csv-intervals}/ascii/test.01/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.01/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.01/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.02/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.02/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.02/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.03/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.03/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.03/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.04/disabled (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.04/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.04/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.04/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.05/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.05/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.05/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.06/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.06/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.06/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.07/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.07/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.07/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.08/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.08/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.08/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.09/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.09/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.09/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.10/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.10/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.10/options (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.11/expected (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.11/input (100%) rename csv/test/{csv-interval => csv-intervals}/ascii/test.11/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test (65%) rename csv/test/{csv-interval => csv-intervals}/binary/test.01/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.01/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.01/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.02/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.02/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.02/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.03/disabled (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.03/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.03/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.03/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.04/disabled (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.04/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.04/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.04/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.05/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.05/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.05/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.06/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.06/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.06/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.07/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.07/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.07/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.08/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.08/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.08/options (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.09/expected (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.09/input (100%) rename csv/test/{csv-interval => csv-intervals}/binary/test.09/options (100%) rename csv/test/{csv-interval => csv-intervals}/overlap-count/expected (100%) create mode 100644 csv/test/csv-intervals/overlap-count/input diff --git a/csv/applications/CMakeLists.txt b/csv/applications/CMakeLists.txt index 7cd1fb6c4..5a76792c2 100644 --- a/csv/applications/CMakeLists.txt +++ b/csv/applications/CMakeLists.txt @@ -102,9 +102,9 @@ add_executable( csv-enumerate ${dir}/csv-enumerate.cpp ) target_link_libraries ( csv-enumerate ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_io comma_string comma_xpath comma_csv ) install( TARGETS csv-enumerate RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) -add_executable( csv-interval ${dir}/csv-interval.cpp ) -target_link_libraries ( csv-interval ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_csv comma_io comma_xpath ) -install( TARGETS csv-interval RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) +add_executable( csv-intervals ${dir}/csv-intervals.cpp ) +target_link_libraries ( csv-intervals ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_csv comma_io comma_xpath ) +install( TARGETS csv-intervals RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) add_executable( csv-units ${dir}/csv-units.cpp ) target_link_libraries ( csv-units ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_csv comma_io comma_xpath comma_string ) diff --git a/csv/applications/csv-interval.cpp b/csv/applications/csv-intervals.cpp similarity index 85% rename from csv/applications/csv-interval.cpp rename to csv/applications/csv-intervals.cpp index 50c71bb81..dc710ed6a 100644 --- a/csv/applications/csv-interval.cpp +++ b/csv/applications/csv-intervals.cpp @@ -30,6 +30,7 @@ /// @author Vinny Do +#include #include #include #include @@ -47,7 +48,7 @@ #include "../../name_value/parser.h" #include "../../visiting/traits.h" -static const std::string app_name = "csv-interval"; +static const std::string app_name = "csv-intervals"; static bool verbose; static bool debug; @@ -95,21 +96,22 @@ static void usage( bool verbose = false ) { std::cerr << "takes csv intervals and separates them at points of overlap if any" << std::endl; std::cerr << std::endl; - std::cerr << "usage: cat intervals.csv | " << app_name << " [OPTIONS...]" << std::endl; + std::cerr << "usage: cat intervals.csv | csv-intervals []" << std::endl; + std::cerr << std::endl; + std::cerr << "operations" << std::endl; + std::cerr << " contain: given a set of intervals, take scalars on stdin, append 1 if contained in the intervals, 0 if not" << std::endl; + std::cerr << " make: make intervals" << std::endl; std::cerr << std::endl; std::cerr << "options" << std::endl; std::cerr << " --help,-h: show help; --help --verbose for more help" << std::endl; std::cerr << " --verbose,-v: more info" << std::endl; - std::cerr << " --append,-a: append output intervals instead of outputting them in place" << std::endl; - std::cerr << " --debug: print debug" << std::endl; + std::cerr << " --debug: more debug output" << std::endl; std::cerr << " --input-fields: print input fields and exit" << std::endl; // std::cerr << " --input-format: print input format and exit" << std::endl; std::cerr << " --output-fields: print output fields and exit" << std::endl; // std::cerr << " --output-format: print output format and exit" << std::endl; std::cerr << " --empty=[]: empty value used to signify unbounded intervals" << std::endl; std::cerr << " default for time is \"not-a-date-time\"" << std::endl; - std::cerr << " --format=[]: input format (ascii only), also affects the --limits option; if not given the format is guessed" << std::endl; - std::cerr << " --intervals-only: only output the intervals, ignore payload if any" << std::endl; std::cerr << " --limits,-l: replace empty bounds with type limits" << std::endl; std::cerr << " b : " << (int)limits< char >::lowest() << " " << (int)limits< char >::max() << std::endl; std::cerr << " ub : " << (int)limits< unsigned char >::lowest() << " " << (int)limits< unsigned char >::max() << std::endl; @@ -125,9 +127,21 @@ static void usage( bool verbose = false ) std::cerr << " s : \"" << limits< std::string >::lowest() << "\" \"" << limits< std::string >::max() << "\"" << std::endl; std::cerr << " t : " << limits< boost::posix_time::ptime >::lowest() << " " << limits< boost::posix_time::ptime >::max() << std::endl; std::cerr << " lt : " << limits< boost::posix_time::ptime >::lowest() << " " << limits< boost::posix_time::ptime >::max() << std::endl; - std::cerr << " --overlap-count=[]; output only intervals with overlaps" << std::endl; - std::cerr << " --overlap-count-min,--min-overlap-count=[]; output only intervals with at least overlaps" << std::endl; - std::cerr << " --overlap-count-max,--max-overlap-count=[]; output only intervals with not more than overlaps" << std::endl; + std::cerr << std::endl; + std::cerr << "operation details" << std::endl; + std::cerr << std::endl; + std::cerr << " contain" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --intervals=: file or stream name" << std::endl; + std::cerr << std::endl; + std::cerr << " make" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --append,-a: append output intervals instead of outputting them in place" << std::endl; + std::cerr << " --format=[]: input format (ascii only), also affects the --limits option; if not given the format is guessed" << std::endl; + std::cerr << " --intervals-only: only output the intervals, ignore payload if any" << std::endl; + std::cerr << " --overlap-count=[]; output only intervals with overlaps" << std::endl; + std::cerr << " --overlap-count-min,--min-overlap-count=[]; output only intervals with at least overlaps" << std::endl; + std::cerr << " --overlap-count-max,--max-overlap-count=[]; output only intervals with not more than overlaps" << std::endl; std::cerr << std::endl; std::cerr << "ascii notes" << std::endl; std::cerr << " unbounded intervals may be indicated by no value (e.g. ,3 \u2261 -\u221e,3), both sides unbounded is also supported" << std::endl; @@ -252,6 +266,13 @@ struct interval_t to_t< To > to; }; +template < typename T > struct scalar_t +{ + T scalar; + scalar_t() {} + scalar_t( const T& t ): scalar( t ) {} +}; + namespace comma { namespace visiting { template < typename T > struct traits< from_t< T > > @@ -272,6 +293,12 @@ template < typename From, typename To > struct traits< interval_t< From, To > > template < typename K, typename V > static void visit( const K&, const interval_t< From, To >& p, V& v ) { v.apply( "", p.from ); v.apply( "", p.to ); } }; +template < typename T > struct traits< scalar_t< T > > +{ + template < typename K, typename V > static void visit( const K&, scalar_t< T >& p, V& v ) { v.apply( "scalar", p.scalar ); } + template < typename K, typename V > static void visit( const K&, const scalar_t< T >& p, V& v ) { v.apply( "scalar", p.scalar ); } +}; + } } // namespace comma { namespace visiting { template < typename From, typename To = From > @@ -455,10 +482,29 @@ struct intervals return 0; } - int select( const std::string& first_line ) + int contain( std::istream& is, const std::string& first_line ) { - this->read( std::cin, first_line ); - std::cerr << "csv-interval select: todo" << std::endl; exit( 1 ); + comma::csv::options icsv( options, "", false ); + comma::csv::input_stream< scalar_t< From > > istream( std::cin, icsv ); + comma::csv::output_stream< scalar_t< bool > > ostream( std::cout, icsv.binary() ); + auto tied = comma::csv::make_tied( istream, ostream ); + this->read( is, first_line ); // todo: support block + while( istream.ready() || std::cin.good() ) + { + auto p = istream.read(); + if( !p ) { break; } + bool contained = false; // todo? use boost::...::query + for( typename map_t::iterator it = map.begin(); it != map.end() && !contained; ++it ) + { + const bound_t< bound_type >& from = it->first.lower(); + const bound_t< bound_type >& to = it->first.upper(); + contained = ( !from.value || p->scalar >= *from.value ) && ( !to.value || p->scalar < *to.value ); + } + tied.append( scalar_t< bool >( contained ) ); + if( icsv.flush ) { std::cout.flush(); } + } + std::cout.flush(); + return 0; } }; @@ -472,7 +518,7 @@ static std::tuple< comma::csv::format::types_enum, std::string > interval_type( { if( format.empty() ) { - while( std::cin.good() && first_line.empty() ) { std::getline( is, first_line ); } + while( is.good() && first_line.empty() ) { std::getline( is, first_line ); } if( first_line.empty() ) { exit( 0 ); } // quick and dirty csv.format( comma::csv::impl::unstructured::guess_format( first_line, csv.delimiter ) ); if( verbose ) { std::cerr << app_name << ": guessed format: " << csv.format().string() << std::endl;; } @@ -537,13 +583,12 @@ int main( int ac, char** av ) } return 0; } - if( operation == "select" ) + if( operation == "contain" ) { - // todo - //if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } - //if( options.exists( "--output-fields" ) ) { std::cout << comma::join( comma::csv::names< interval_t< double > >(), ',' ) << std::endl; return 0; } + if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< scalar_t< double > >(), ',' ) << std::endl; return 0; } + if( options.exists( "--output-fields" ) ) { std::cout << comma::join( comma::csv::names< scalar_t< double > >(), ',' ) << std::endl; return 0; } auto i = options.value< std::string >( "--intervals" ); - comma::csv::options csv = comma::name_value::parser().get< comma::csv::options >( i ); + comma::csv::options csv = comma::name_value::parser( "filename" ).get< comma::csv::options >( i ); std::string format = comma::name_value::map( i ).value< std::string >( "format", "" ); comma::io::istream is( csv.filename ); auto t = interval_type( *is, csv, format ); @@ -551,28 +596,27 @@ int main( int ac, char** av ) std::string first_line = std::get< 1 >( t ); switch( to_type ) { - case comma::csv::format::int8: intervals< char >( options ).select( first_line ); return 0; - case comma::csv::format::uint8: intervals< unsigned char >( options ).select( first_line ); return 0; - case comma::csv::format::int16: intervals< comma::int16 >( options ).select( first_line ); return 0; - case comma::csv::format::uint16: intervals< comma::uint16 >( options ).select( first_line ); return 0; - case comma::csv::format::int32: intervals< comma::int32 >( options ).select( first_line ); return 0; - case comma::csv::format::uint32: intervals< comma::uint32 >( options ).select( first_line ); return 0; - case comma::csv::format::int64: intervals< comma::int64 >( options ).select( first_line ); return 0; - case comma::csv::format::uint64: intervals< comma::uint64 >( options ).select( first_line ); return 0; - case comma::csv::format::char_t: intervals< char >( options ).select( first_line ); return 0; - case comma::csv::format::float_t: intervals< float >( options ).select( first_line ); return 0; - case comma::csv::format::double_t: intervals< double >( options ).select( first_line ); return 0; + case comma::csv::format::int8: intervals< char >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::uint8: intervals< unsigned char >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::int16: intervals< comma::int16 >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::uint16: intervals< comma::uint16 >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::int32: intervals< comma::int32 >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::uint32: intervals< comma::uint32 >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::int64: intervals< comma::int64 >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::uint64: intervals< comma::uint64 >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::char_t: intervals< char >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::float_t: intervals< float >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::double_t: intervals< double >( options ).contain( *is, first_line ); return 0; case comma::csv::format::time: - case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).select( first_line ); return 0; - case comma::csv::format::fixed_string: intervals< std::string >( options ).select( first_line ); return 0; - default: COMMA_THROW( comma::exception, "invalid type" ); return 0; // never here + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::fixed_string: intervals< std::string >( options ).contain( *is, first_line ); return 0; + default: COMMA_THROW( comma::exception, "invalid type" ); // never here } - std::cerr << "csv-interval: select: todo" << std::endl; - return 1; + return 0; } - std::cerr << "csv-interval: expected operation, got: '" << operation << "'" << std::endl; + std::cerr << "csv-intervals: expected operation, got: '" << operation << "'" << std::endl; } - catch( std::exception& ex ) { std::cerr << app_name << ": " << ex.what() << std::endl; } - catch( ... ) { std::cerr << app_name << ": unknown exception" << std::endl; } + catch( std::exception& ex ) { std::cerr << "csv-invervals: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "csv-invervals: unknown exception" << std::endl; } return 1; } diff --git a/csv/test/csv-interval/ascii/test b/csv/test/csv-interval/ascii/test deleted file mode 100755 index 074ec4ca8..000000000 --- a/csv/test/csv-interval/ascii/test +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -csv-interval $( cat options ) | gawk '{ printf( "interval[%d]=%s\n", NR, $0 ); }' diff --git a/csv/test/csv-interval/overlap-count/input b/csv/test/csv-interval/overlap-count/input deleted file mode 100644 index f4e21571d..000000000 --- a/csv/test/csv-interval/overlap-count/input +++ /dev/null @@ -1,16 +0,0 @@ -min[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-min 0" -min[1]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-min 1" -min[2]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-min 2" -min[3]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-min 3" - -max[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-max 0" -max[1]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-max 1" -max[2]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-max 2" -max[3]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-max 3" - -min_max[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count-min 1 --overlap-count-max 2" - -equal[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count 0" -equal[1]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count 1" -equal[2]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count 2" -equal[3]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --overlap-count 3" diff --git a/csv/test/csv-interval/append/expected b/csv/test/csv-intervals/append/expected similarity index 100% rename from csv/test/csv-interval/append/expected rename to csv/test/csv-intervals/append/expected diff --git a/csv/test/csv-interval/append/input b/csv/test/csv-intervals/append/input similarity index 79% rename from csv/test/csv-interval/append/input rename to csv/test/csv-intervals/append/input index 2f584bfa3..bf97c221d 100644 --- a/csv/test/csv-interval/append/input +++ b/csv/test/csv-intervals/append/input @@ -1,3 +1,3 @@ -append[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-interval --append" +append[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --append" diff --git a/csv/test/csv-intervals/ascii/test b/csv/test/csv-intervals/ascii/test new file mode 100755 index 000000000..940042294 --- /dev/null +++ b/csv/test/csv-intervals/ascii/test @@ -0,0 +1,3 @@ +#!/bin/bash + +csv-intervals make $( cat options ) | gawk '{ printf( "interval[%d]=%s\n", NR, $0 ); }' diff --git a/csv/test/csv-interval/ascii/test.01/expected b/csv/test/csv-intervals/ascii/test.01/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.01/expected rename to csv/test/csv-intervals/ascii/test.01/expected diff --git a/csv/test/csv-interval/ascii/test.01/input b/csv/test/csv-intervals/ascii/test.01/input similarity index 100% rename from csv/test/csv-interval/ascii/test.01/input rename to csv/test/csv-intervals/ascii/test.01/input diff --git a/csv/test/csv-interval/ascii/test.01/options b/csv/test/csv-intervals/ascii/test.01/options similarity index 100% rename from csv/test/csv-interval/ascii/test.01/options rename to csv/test/csv-intervals/ascii/test.01/options diff --git a/csv/test/csv-interval/ascii/test.02/expected b/csv/test/csv-intervals/ascii/test.02/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.02/expected rename to csv/test/csv-intervals/ascii/test.02/expected diff --git a/csv/test/csv-interval/ascii/test.02/input b/csv/test/csv-intervals/ascii/test.02/input similarity index 100% rename from csv/test/csv-interval/ascii/test.02/input rename to csv/test/csv-intervals/ascii/test.02/input diff --git a/csv/test/csv-interval/ascii/test.02/options b/csv/test/csv-intervals/ascii/test.02/options similarity index 100% rename from csv/test/csv-interval/ascii/test.02/options rename to csv/test/csv-intervals/ascii/test.02/options diff --git a/csv/test/csv-interval/ascii/test.03/expected b/csv/test/csv-intervals/ascii/test.03/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.03/expected rename to csv/test/csv-intervals/ascii/test.03/expected diff --git a/csv/test/csv-interval/ascii/test.03/input b/csv/test/csv-intervals/ascii/test.03/input similarity index 100% rename from csv/test/csv-interval/ascii/test.03/input rename to csv/test/csv-intervals/ascii/test.03/input diff --git a/csv/test/csv-interval/ascii/test.03/options b/csv/test/csv-intervals/ascii/test.03/options similarity index 100% rename from csv/test/csv-interval/ascii/test.03/options rename to csv/test/csv-intervals/ascii/test.03/options diff --git a/csv/test/csv-interval/ascii/test.04/disabled b/csv/test/csv-intervals/ascii/test.04/disabled similarity index 100% rename from csv/test/csv-interval/ascii/test.04/disabled rename to csv/test/csv-intervals/ascii/test.04/disabled diff --git a/csv/test/csv-interval/ascii/test.04/expected b/csv/test/csv-intervals/ascii/test.04/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.04/expected rename to csv/test/csv-intervals/ascii/test.04/expected diff --git a/csv/test/csv-interval/ascii/test.04/input b/csv/test/csv-intervals/ascii/test.04/input similarity index 100% rename from csv/test/csv-interval/ascii/test.04/input rename to csv/test/csv-intervals/ascii/test.04/input diff --git a/csv/test/csv-interval/ascii/test.04/options b/csv/test/csv-intervals/ascii/test.04/options similarity index 100% rename from csv/test/csv-interval/ascii/test.04/options rename to csv/test/csv-intervals/ascii/test.04/options diff --git a/csv/test/csv-interval/ascii/test.05/expected b/csv/test/csv-intervals/ascii/test.05/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.05/expected rename to csv/test/csv-intervals/ascii/test.05/expected diff --git a/csv/test/csv-interval/ascii/test.05/input b/csv/test/csv-intervals/ascii/test.05/input similarity index 100% rename from csv/test/csv-interval/ascii/test.05/input rename to csv/test/csv-intervals/ascii/test.05/input diff --git a/csv/test/csv-interval/ascii/test.05/options b/csv/test/csv-intervals/ascii/test.05/options similarity index 100% rename from csv/test/csv-interval/ascii/test.05/options rename to csv/test/csv-intervals/ascii/test.05/options diff --git a/csv/test/csv-interval/ascii/test.06/expected b/csv/test/csv-intervals/ascii/test.06/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.06/expected rename to csv/test/csv-intervals/ascii/test.06/expected diff --git a/csv/test/csv-interval/ascii/test.06/input b/csv/test/csv-intervals/ascii/test.06/input similarity index 100% rename from csv/test/csv-interval/ascii/test.06/input rename to csv/test/csv-intervals/ascii/test.06/input diff --git a/csv/test/csv-interval/ascii/test.06/options b/csv/test/csv-intervals/ascii/test.06/options similarity index 100% rename from csv/test/csv-interval/ascii/test.06/options rename to csv/test/csv-intervals/ascii/test.06/options diff --git a/csv/test/csv-interval/ascii/test.07/expected b/csv/test/csv-intervals/ascii/test.07/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.07/expected rename to csv/test/csv-intervals/ascii/test.07/expected diff --git a/csv/test/csv-interval/ascii/test.07/input b/csv/test/csv-intervals/ascii/test.07/input similarity index 100% rename from csv/test/csv-interval/ascii/test.07/input rename to csv/test/csv-intervals/ascii/test.07/input diff --git a/csv/test/csv-interval/ascii/test.07/options b/csv/test/csv-intervals/ascii/test.07/options similarity index 100% rename from csv/test/csv-interval/ascii/test.07/options rename to csv/test/csv-intervals/ascii/test.07/options diff --git a/csv/test/csv-interval/ascii/test.08/expected b/csv/test/csv-intervals/ascii/test.08/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.08/expected rename to csv/test/csv-intervals/ascii/test.08/expected diff --git a/csv/test/csv-interval/ascii/test.08/input b/csv/test/csv-intervals/ascii/test.08/input similarity index 100% rename from csv/test/csv-interval/ascii/test.08/input rename to csv/test/csv-intervals/ascii/test.08/input diff --git a/csv/test/csv-interval/ascii/test.08/options b/csv/test/csv-intervals/ascii/test.08/options similarity index 100% rename from csv/test/csv-interval/ascii/test.08/options rename to csv/test/csv-intervals/ascii/test.08/options diff --git a/csv/test/csv-interval/ascii/test.09/expected b/csv/test/csv-intervals/ascii/test.09/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.09/expected rename to csv/test/csv-intervals/ascii/test.09/expected diff --git a/csv/test/csv-interval/ascii/test.09/input b/csv/test/csv-intervals/ascii/test.09/input similarity index 100% rename from csv/test/csv-interval/ascii/test.09/input rename to csv/test/csv-intervals/ascii/test.09/input diff --git a/csv/test/csv-interval/ascii/test.09/options b/csv/test/csv-intervals/ascii/test.09/options similarity index 100% rename from csv/test/csv-interval/ascii/test.09/options rename to csv/test/csv-intervals/ascii/test.09/options diff --git a/csv/test/csv-interval/ascii/test.10/expected b/csv/test/csv-intervals/ascii/test.10/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.10/expected rename to csv/test/csv-intervals/ascii/test.10/expected diff --git a/csv/test/csv-interval/ascii/test.10/input b/csv/test/csv-intervals/ascii/test.10/input similarity index 100% rename from csv/test/csv-interval/ascii/test.10/input rename to csv/test/csv-intervals/ascii/test.10/input diff --git a/csv/test/csv-interval/ascii/test.10/options b/csv/test/csv-intervals/ascii/test.10/options similarity index 100% rename from csv/test/csv-interval/ascii/test.10/options rename to csv/test/csv-intervals/ascii/test.10/options diff --git a/csv/test/csv-interval/ascii/test.11/expected b/csv/test/csv-intervals/ascii/test.11/expected similarity index 100% rename from csv/test/csv-interval/ascii/test.11/expected rename to csv/test/csv-intervals/ascii/test.11/expected diff --git a/csv/test/csv-interval/ascii/test.11/input b/csv/test/csv-intervals/ascii/test.11/input similarity index 100% rename from csv/test/csv-interval/ascii/test.11/input rename to csv/test/csv-intervals/ascii/test.11/input diff --git a/csv/test/csv-interval/ascii/test.11/options b/csv/test/csv-intervals/ascii/test.11/options similarity index 100% rename from csv/test/csv-interval/ascii/test.11/options rename to csv/test/csv-intervals/ascii/test.11/options diff --git a/csv/test/csv-interval/binary/test b/csv/test/csv-intervals/binary/test similarity index 65% rename from csv/test/csv-interval/binary/test rename to csv/test/csv-intervals/binary/test index 4f41eb06b..93e137d9e 100755 --- a/csv/test/csv-interval/binary/test +++ b/csv/test/csv-intervals/binary/test @@ -7,4 +7,4 @@ format_index=$( echo $options | fmt -1 | grep -n -- '--binary' | cut -d: -f1 ) input_format="${args[$format_index]}" [[ -z "$output_format" ]] && output_format=$input_format -csv-to-bin $input_format | csv-interval $options | csv-from-bin $output_format | gawk '{ printf( "interval[%d]=%s\n", NR, $0 ); }' +csv-to-bin $input_format | csv-intervals make $options | csv-from-bin $output_format | gawk '{ printf( "interval[%d]=%s\n", NR, $0 ); }' diff --git a/csv/test/csv-interval/binary/test.01/expected b/csv/test/csv-intervals/binary/test.01/expected similarity index 100% rename from csv/test/csv-interval/binary/test.01/expected rename to csv/test/csv-intervals/binary/test.01/expected diff --git a/csv/test/csv-interval/binary/test.01/input b/csv/test/csv-intervals/binary/test.01/input similarity index 100% rename from csv/test/csv-interval/binary/test.01/input rename to csv/test/csv-intervals/binary/test.01/input diff --git a/csv/test/csv-interval/binary/test.01/options b/csv/test/csv-intervals/binary/test.01/options similarity index 100% rename from csv/test/csv-interval/binary/test.01/options rename to csv/test/csv-intervals/binary/test.01/options diff --git a/csv/test/csv-interval/binary/test.02/expected b/csv/test/csv-intervals/binary/test.02/expected similarity index 100% rename from csv/test/csv-interval/binary/test.02/expected rename to csv/test/csv-intervals/binary/test.02/expected diff --git a/csv/test/csv-interval/binary/test.02/input b/csv/test/csv-intervals/binary/test.02/input similarity index 100% rename from csv/test/csv-interval/binary/test.02/input rename to csv/test/csv-intervals/binary/test.02/input diff --git a/csv/test/csv-interval/binary/test.02/options b/csv/test/csv-intervals/binary/test.02/options similarity index 100% rename from csv/test/csv-interval/binary/test.02/options rename to csv/test/csv-intervals/binary/test.02/options diff --git a/csv/test/csv-interval/binary/test.03/disabled b/csv/test/csv-intervals/binary/test.03/disabled similarity index 100% rename from csv/test/csv-interval/binary/test.03/disabled rename to csv/test/csv-intervals/binary/test.03/disabled diff --git a/csv/test/csv-interval/binary/test.03/expected b/csv/test/csv-intervals/binary/test.03/expected similarity index 100% rename from csv/test/csv-interval/binary/test.03/expected rename to csv/test/csv-intervals/binary/test.03/expected diff --git a/csv/test/csv-interval/binary/test.03/input b/csv/test/csv-intervals/binary/test.03/input similarity index 100% rename from csv/test/csv-interval/binary/test.03/input rename to csv/test/csv-intervals/binary/test.03/input diff --git a/csv/test/csv-interval/binary/test.03/options b/csv/test/csv-intervals/binary/test.03/options similarity index 100% rename from csv/test/csv-interval/binary/test.03/options rename to csv/test/csv-intervals/binary/test.03/options diff --git a/csv/test/csv-interval/binary/test.04/disabled b/csv/test/csv-intervals/binary/test.04/disabled similarity index 100% rename from csv/test/csv-interval/binary/test.04/disabled rename to csv/test/csv-intervals/binary/test.04/disabled diff --git a/csv/test/csv-interval/binary/test.04/expected b/csv/test/csv-intervals/binary/test.04/expected similarity index 100% rename from csv/test/csv-interval/binary/test.04/expected rename to csv/test/csv-intervals/binary/test.04/expected diff --git a/csv/test/csv-interval/binary/test.04/input b/csv/test/csv-intervals/binary/test.04/input similarity index 100% rename from csv/test/csv-interval/binary/test.04/input rename to csv/test/csv-intervals/binary/test.04/input diff --git a/csv/test/csv-interval/binary/test.04/options b/csv/test/csv-intervals/binary/test.04/options similarity index 100% rename from csv/test/csv-interval/binary/test.04/options rename to csv/test/csv-intervals/binary/test.04/options diff --git a/csv/test/csv-interval/binary/test.05/expected b/csv/test/csv-intervals/binary/test.05/expected similarity index 100% rename from csv/test/csv-interval/binary/test.05/expected rename to csv/test/csv-intervals/binary/test.05/expected diff --git a/csv/test/csv-interval/binary/test.05/input b/csv/test/csv-intervals/binary/test.05/input similarity index 100% rename from csv/test/csv-interval/binary/test.05/input rename to csv/test/csv-intervals/binary/test.05/input diff --git a/csv/test/csv-interval/binary/test.05/options b/csv/test/csv-intervals/binary/test.05/options similarity index 100% rename from csv/test/csv-interval/binary/test.05/options rename to csv/test/csv-intervals/binary/test.05/options diff --git a/csv/test/csv-interval/binary/test.06/expected b/csv/test/csv-intervals/binary/test.06/expected similarity index 100% rename from csv/test/csv-interval/binary/test.06/expected rename to csv/test/csv-intervals/binary/test.06/expected diff --git a/csv/test/csv-interval/binary/test.06/input b/csv/test/csv-intervals/binary/test.06/input similarity index 100% rename from csv/test/csv-interval/binary/test.06/input rename to csv/test/csv-intervals/binary/test.06/input diff --git a/csv/test/csv-interval/binary/test.06/options b/csv/test/csv-intervals/binary/test.06/options similarity index 100% rename from csv/test/csv-interval/binary/test.06/options rename to csv/test/csv-intervals/binary/test.06/options diff --git a/csv/test/csv-interval/binary/test.07/expected b/csv/test/csv-intervals/binary/test.07/expected similarity index 100% rename from csv/test/csv-interval/binary/test.07/expected rename to csv/test/csv-intervals/binary/test.07/expected diff --git a/csv/test/csv-interval/binary/test.07/input b/csv/test/csv-intervals/binary/test.07/input similarity index 100% rename from csv/test/csv-interval/binary/test.07/input rename to csv/test/csv-intervals/binary/test.07/input diff --git a/csv/test/csv-interval/binary/test.07/options b/csv/test/csv-intervals/binary/test.07/options similarity index 100% rename from csv/test/csv-interval/binary/test.07/options rename to csv/test/csv-intervals/binary/test.07/options diff --git a/csv/test/csv-interval/binary/test.08/expected b/csv/test/csv-intervals/binary/test.08/expected similarity index 100% rename from csv/test/csv-interval/binary/test.08/expected rename to csv/test/csv-intervals/binary/test.08/expected diff --git a/csv/test/csv-interval/binary/test.08/input b/csv/test/csv-intervals/binary/test.08/input similarity index 100% rename from csv/test/csv-interval/binary/test.08/input rename to csv/test/csv-intervals/binary/test.08/input diff --git a/csv/test/csv-interval/binary/test.08/options b/csv/test/csv-intervals/binary/test.08/options similarity index 100% rename from csv/test/csv-interval/binary/test.08/options rename to csv/test/csv-intervals/binary/test.08/options diff --git a/csv/test/csv-interval/binary/test.09/expected b/csv/test/csv-intervals/binary/test.09/expected similarity index 100% rename from csv/test/csv-interval/binary/test.09/expected rename to csv/test/csv-intervals/binary/test.09/expected diff --git a/csv/test/csv-interval/binary/test.09/input b/csv/test/csv-intervals/binary/test.09/input similarity index 100% rename from csv/test/csv-interval/binary/test.09/input rename to csv/test/csv-intervals/binary/test.09/input diff --git a/csv/test/csv-interval/binary/test.09/options b/csv/test/csv-intervals/binary/test.09/options similarity index 100% rename from csv/test/csv-interval/binary/test.09/options rename to csv/test/csv-intervals/binary/test.09/options diff --git a/csv/test/csv-interval/overlap-count/expected b/csv/test/csv-intervals/overlap-count/expected similarity index 100% rename from csv/test/csv-interval/overlap-count/expected rename to csv/test/csv-intervals/overlap-count/expected diff --git a/csv/test/csv-intervals/overlap-count/input b/csv/test/csv-intervals/overlap-count/input new file mode 100644 index 000000000..5fb97b7f1 --- /dev/null +++ b/csv/test/csv-intervals/overlap-count/input @@ -0,0 +1,16 @@ +min[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-min 0" +min[1]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-min 1" +min[2]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-min 2" +min[3]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-min 3" + +max[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-max 0" +max[1]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-max 1" +max[2]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-max 2" +max[3]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-max 3" + +min_max[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count-min 1 --overlap-count-max 2" + +equal[0]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count 0" +equal[1]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count 1" +equal[2]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count 2" +equal[3]="( echo 0,5,a; echo 4,7,b; echo 2,8,c ) | csv-intervals make --overlap-count 3" From 3a9d4e021de34d0e403b8f75fb0b0ac6b72f6855 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 28 May 2019 18:28:30 +1000 Subject: [PATCH 0032/1056] csv-intervals: contain: basic unit test added --- csv/applications/csv-intervals.cpp | 73 +++++++++++++------------ csv/test/csv-intervals/contain/expected | 23 ++++++++ csv/test/csv-intervals/contain/input | 7 +++ 3 files changed, 68 insertions(+), 35 deletions(-) create mode 100644 csv/test/csv-intervals/contain/expected create mode 100644 csv/test/csv-intervals/contain/input diff --git a/csv/applications/csv-intervals.cpp b/csv/applications/csv-intervals.cpp index dc710ed6a..416e23337 100644 --- a/csv/applications/csv-intervals.cpp +++ b/csv/applications/csv-intervals.cpp @@ -318,13 +318,14 @@ struct intervals unsigned int min_overlap_count; unsigned int max_overlap_count; - intervals( const comma::command_line_options& options ) : options( options ) - , csv( options ) - , ocsv( options ) - , ascii_csv( options ) - , empty( traits< bound_type >::cast( options.optional< std::string >( "--empty" ) ) ) - , intervals_only( options.exists( "--intervals-only" ) ) - , use_limits( options.exists( "--limits,-l" ) ) + intervals( const comma::command_line_options& options, const comma::csv::options& csv_ ) + : options( options ) + , csv( csv_ ) + , ocsv( csv ) + , ascii_csv( csv ) + , empty( traits< bound_type >::cast( options.optional< std::string >( "--empty" ) ) ) + , intervals_only( options.exists( "--intervals-only" ) ) + , use_limits( options.exists( "--limits,-l" ) ) { csv.full_xpath = false; if( csv.fields.empty() ) { csv.fields = comma::join( comma::csv::names< interval_t< From, To > >(), ',' ); } @@ -484,7 +485,8 @@ struct intervals int contain( std::istream& is, const std::string& first_line ) { - comma::csv::options icsv( options, "", false ); + comma::csv::options icsv( options ); + icsv.full_xpath = false; comma::csv::input_stream< scalar_t< From > > istream( std::cin, icsv ); comma::csv::output_stream< scalar_t< bool > > ostream( std::cout, icsv.binary() ); auto tied = comma::csv::make_tied( istream, ostream ); @@ -553,7 +555,8 @@ int main( int ac, char** av ) debug = options.exists( "--debug" ); options.assert_mutually_exclusive( "--binary,--format" ); const auto& unnamed = options.unnamed( "--append,-a,--debug,--flush,--input-fields,--output-fields,--intervals-only,--limits,-l", "-.*" ); - std::string operation = unnamed.empty() ? "make" : unnamed[0]; + if( unnamed.empty() ) { std::cerr << "csv-intervals: please specify operation" << std::endl; return 1; } + std::string operation = unnamed[0]; if( operation == "make" ) { append = options.exists( "--append,-a" ); @@ -565,20 +568,20 @@ int main( int ac, char** av ) std::string first_line = std::get< 1 >( t ); switch( to_type ) { - case comma::csv::format::int8: intervals< char >( options ).make( first_line ); return 0; - case comma::csv::format::uint8: intervals< unsigned char >( options ).make( first_line ); return 0; - case comma::csv::format::int16: intervals< comma::int16 >( options ).make( first_line ); return 0; - case comma::csv::format::uint16: intervals< comma::uint16 >( options ).make( first_line ); return 0; - case comma::csv::format::int32: intervals< comma::int32 >( options ).make( first_line ); return 0; - case comma::csv::format::uint32: intervals< comma::uint32 >( options ).make( first_line ); return 0; - case comma::csv::format::int64: intervals< comma::int64 >( options ).make( first_line ); return 0; - case comma::csv::format::uint64: intervals< comma::uint64 >( options ).make( first_line ); return 0; - case comma::csv::format::char_t: intervals< char >( options ).make( first_line ); return 0; - case comma::csv::format::float_t: intervals< float >( options ).make( first_line ); return 0; - case comma::csv::format::double_t: intervals< double >( options ).make( first_line ); return 0; + case comma::csv::format::int8: intervals< char >( options, csv ).make( first_line ); return 0; + case comma::csv::format::uint8: intervals< unsigned char >( options, csv ).make( first_line ); return 0; + case comma::csv::format::int16: intervals< comma::int16 >( options, csv ).make( first_line ); return 0; + case comma::csv::format::uint16: intervals< comma::uint16 >( options, csv ).make( first_line ); return 0; + case comma::csv::format::int32: intervals< comma::int32 >( options, csv ).make( first_line ); return 0; + case comma::csv::format::uint32: intervals< comma::uint32 >( options, csv ).make( first_line ); return 0; + case comma::csv::format::int64: intervals< comma::int64 >( options, csv ).make( first_line ); return 0; + case comma::csv::format::uint64: intervals< comma::uint64 >( options, csv ).make( first_line ); return 0; + case comma::csv::format::char_t: intervals< char >( options, csv ).make( first_line ); return 0; + case comma::csv::format::float_t: intervals< float >( options, csv ).make( first_line ); return 0; + case comma::csv::format::double_t: intervals< double >( options, csv ).make( first_line ); return 0; case comma::csv::format::time: - case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).make( first_line ); return 0; - case comma::csv::format::fixed_string: intervals< std::string >( options ).make( first_line ); return 0; + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options, csv ).make( first_line ); return 0; + case comma::csv::format::fixed_string: intervals< std::string >( options, csv ).make( first_line ); return 0; default: COMMA_THROW( comma::exception, "invalid type" ); return 0; // never here } return 0; @@ -596,20 +599,20 @@ int main( int ac, char** av ) std::string first_line = std::get< 1 >( t ); switch( to_type ) { - case comma::csv::format::int8: intervals< char >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::uint8: intervals< unsigned char >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::int16: intervals< comma::int16 >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::uint16: intervals< comma::uint16 >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::int32: intervals< comma::int32 >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::uint32: intervals< comma::uint32 >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::int64: intervals< comma::int64 >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::uint64: intervals< comma::uint64 >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::char_t: intervals< char >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::float_t: intervals< float >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::double_t: intervals< double >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::int8: intervals< char >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::uint8: intervals< unsigned char >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::int16: intervals< comma::int16 >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::uint16: intervals< comma::uint16 >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::int32: intervals< comma::int32 >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::uint32: intervals< comma::uint32 >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::int64: intervals< comma::int64 >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::uint64: intervals< comma::uint64 >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::char_t: intervals< char >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::float_t: intervals< float >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::double_t: intervals< double >( options, csv ).contain( *is, first_line ); return 0; case comma::csv::format::time: - case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options ).contain( *is, first_line ); return 0; - case comma::csv::format::fixed_string: intervals< std::string >( options ).contain( *is, first_line ); return 0; + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options, csv ).contain( *is, first_line ); return 0; + case comma::csv::format::fixed_string: intervals< std::string >( options, csv ).contain( *is, first_line ); return 0; default: COMMA_THROW( comma::exception, "invalid type" ); // never here } return 0; diff --git a/csv/test/csv-intervals/contain/expected b/csv/test/csv-intervals/contain/expected new file mode 100644 index 000000000..b307e5a5c --- /dev/null +++ b/csv/test/csv-intervals/contain/expected @@ -0,0 +1,23 @@ +contain/ascii[0]/output/line[0]="1,1" +contain/ascii[0]/output/line[1]="5,0" +contain/ascii[0]/output/line[2]="9,1" +contain/ascii[0]/output/line[3]="11,0" +contain/ascii[0]/status=0 +contain/ascii[1]/output/line[0]="1,1" +contain/ascii[1]/output/line[1]="5,0" +contain/ascii[1]/status=0 +contain/fields[0]/output/line[0]=",1,1" +contain/fields[0]/output/line[1]=",5,0" +contain/fields[0]/status=0 +contain/fields[1]/output/line[0]=",1,1" +contain/fields[1]/output/line[1]=",5,0" +contain/fields[1]/status=0 +contain/binary[0]/output/line[0]="1,1" +contain/binary[0]/output/line[1]="5,0" +contain/binary[0]/status=0 +contain/binary[1]/output/line[0]="1,1" +contain/binary[1]/output/line[1]="5,0" +contain/binary[1]/status=0 +contain/binary[2]/output/line[0]="1,1" +contain/binary[2]/output/line[1]="5,0" +contain/binary[2]/status=0 diff --git a/csv/test/csv-intervals/contain/input b/csv/test/csv-intervals/contain/input new file mode 100644 index 000000000..eac7bd818 --- /dev/null +++ b/csv/test/csv-intervals/contain/input @@ -0,0 +1,7 @@ +contain/ascii[0]="( echo 1; echo 5; echo 9; echo 11 ) | csv-intervals contain --intervals <( echo 0,2; echo 9,11 )" +contain/ascii[1]="( echo 1; echo 5 ) | csv-intervals --fields scalar contain --intervals <( echo 0,2 )" +contain/fields[0]="( echo ,1; echo ,5 ) | csv-intervals --fields ,scalar contain --intervals <( echo 0,2 )" +contain/fields[1]="( echo ,1; echo ,5 ) | csv-intervals --fields ,scalar contain --intervals <( echo ,0,2 )';fields=,from,to'" +contain/binary[0]="( echo 1; echo 5 ) | csv-to-bin ui | csv-intervals contain --binary ui --intervals <( echo 0,2; echo 9,11 ) | csv-from-bin ui,b" +contain/binary[1]="( echo 1; echo 5 ) | csv-to-bin ui | csv-intervals contain --binary ui --intervals <( ( echo 0,2; echo 9,11 ) | csv-to-bin 2ui )';binary=2ui' | csv-from-bin ui,b" +contain/binary[2]="( echo 1; echo 5 ) | csv-intervals contain --intervals <( ( echo 0,2; echo 9,11 ) | csv-to-bin 2ui )';binary=2ui'" From 1be73a880e09a927e6e9f4b29aa23935e63f54fb Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 29 May 2019 19:04:59 +1000 Subject: [PATCH 0033/1056] csv-intervals: join operation: implemented (with somewhat suboptimal complexity); basic unit test done --- csv/applications/csv-intervals.cpp | 124 +++++++++++++++++++++------ csv/test/csv-intervals/join/expected | 15 ++++ csv/test/csv-intervals/join/input | 5 ++ 3 files changed, 118 insertions(+), 26 deletions(-) create mode 100644 csv/test/csv-intervals/join/expected create mode 100644 csv/test/csv-intervals/join/input diff --git a/csv/applications/csv-intervals.cpp b/csv/applications/csv-intervals.cpp index 416e23337..f370dd819 100644 --- a/csv/applications/csv-intervals.cpp +++ b/csv/applications/csv-intervals.cpp @@ -46,10 +46,9 @@ #include "../../io/stream.h" #include "../../name_value/map.h" #include "../../name_value/parser.h" +#include "../../string/string.h" #include "../../visiting/traits.h" -static const std::string app_name = "csv-intervals"; - static bool verbose; static bool debug; static bool append; @@ -100,6 +99,7 @@ static void usage( bool verbose = false ) std::cerr << std::endl; std::cerr << "operations" << std::endl; std::cerr << " contain: given a set of intervals, take scalars on stdin, append 1 if contained in the intervals, 0 if not" << std::endl; + std::cerr << " join: given a set of intervals, take scalars on stdin, append payloads of the intervals the scalars are contained in" << std::endl; std::cerr << " make: make intervals" << std::endl; std::cerr << std::endl; std::cerr << "options" << std::endl; @@ -133,6 +133,9 @@ static void usage( bool verbose = false ) std::cerr << " contain" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --intervals=: file or stream name" << std::endl; + std::cerr << " join" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --intervals=: file or stream name" << std::endl; std::cerr << std::endl; std::cerr << " make" << std::endl; std::cerr << " options" << std::endl; @@ -146,7 +149,7 @@ static void usage( bool verbose = false ) std::cerr << "ascii notes" << std::endl; std::cerr << " unbounded intervals may be indicated by no value (e.g. ,3 \u2261 -\u221e,3), both sides unbounded is also supported" << std::endl; std::cerr << std::endl; - std::cerr << "for examples see verbose help: " << app_name << " --help --verbose" << std::endl; + std::cerr << "for examples see verbose help: csv-intervals --help --verbose" << std::endl; std::cerr << std::endl; if( verbose ) { @@ -161,7 +164,7 @@ static void usage( bool verbose = false ) std::cerr << " B: [2 4]" << std::endl; std::cerr << " C: [3 6]" << std::endl; std::cerr << std::endl; - std::cerr << " echo -e '1,5,A\\n2,4,B\\n3,6,C' | " << app_name << std::endl; + std::cerr << " echo -e '1,5,A\\n2,4,B\\n3,6,C' | csv-intervals make" << std::endl; std::cerr << std::endl; std::cerr << " A: [1 2][2 3][3 4][4 5]" << std::endl; std::cerr << " B: [2 3][3 4]" << std::endl; @@ -175,7 +178,7 @@ static void usage( bool verbose = false ) std::cerr << " D: [3 8]" << std::endl; std::cerr << " Z: [-\u221e +\u221e]" << std::endl; std::cerr << std::endl; - std::cerr << " echo -e ',4,A\\n2,4,B\\n3,6,C\\n3,8,D\\n,,Z' | " << app_name << " --format 2i" << std::endl; + std::cerr << " echo -e ',4,A\\n2,4,B\\n3,6,C\\n3,8,D\\n,,Z' | csv-intervals make" << " --format 2i" << std::endl; std::cerr << std::endl; std::cerr << " A: [-\u221e 2][2 3][3 4]" << std::endl; std::cerr << " B: [2 3][3 4]" << std::endl; @@ -190,7 +193,7 @@ static void usage( bool verbose = false ) std::cerr << " C: [20140916T190000 +\u221e]" << std::endl; std::cerr << " Z: [-\u221e +\u221e]" << std::endl; std::cerr << std::endl; - std::cerr << " echo -e ',20140916T030000.000000,A\\n20140916T010000.000000,20140916T190000.000000,B\\n20140916T190000.000000,,C\\n,,Z' | " << app_name << " --format 2t" << std::endl; + std::cerr << " echo -e ',20140916T030000.000000,A\\n20140916T010000.000000,20140916T190000.000000,B\\n20140916T190000.000000,,C\\n,,Z' | csv-intervals make" << " --format 2t" << std::endl; std::cerr << std::endl; std::cerr << " A: [-\u221e 20140916T010000][20140916T010000 20140916T030000]" << std::endl; std::cerr << " B: [20140916T010000 20140916T030000][20140916T030000 20140916T190000]" << std::endl; @@ -336,7 +339,7 @@ struct intervals } ascii_csv.fields = ocsv.fields; ascii_csv.quote = boost::none; - if( verbose ) { std::cerr << app_name << ": empty: "; empty ? std::cerr << *empty : std::cerr << ""; std::cerr << std::endl; } + if( verbose ) { std::cerr << "csv-intervals: empty: "; empty ? std::cerr << *empty : std::cerr << ""; std::cerr << std::endl; } options.assert_mutually_exclusive( "overlap-count-min,overlap-count-max", "overlap-count" ); if( options.exists( "--overlap-count" ) ) { @@ -443,7 +446,7 @@ struct intervals if( !first.to.value.empty() && ( !empty || interval.to.value != *empty ) ) { to.value = interval.to.value; } payload = first_line; if( !intervals_only && !append ) { ascii.put( interval_t< std::string >(), payload ); } // blank out interval from payload - if( verbose ) { std::cerr << app_name << ": from: " << from << " to: " << to << " payload: " << payload << std::endl; } + if( verbose ) { std::cerr << "csv-intervals: from: " << from << " to: " << to << " payload: " << payload << std::endl; } add( from, to, payload ); } while( istream.ready() || is.good() ) @@ -471,18 +474,11 @@ struct intervals if( !intervals_only && !append ) { ascii.put( interval_t< std::string >(), buf ); } // blank out interval from payload payload = comma::join( buf, csv.delimiter ); } - if( verbose ) { std::cerr << app_name << ": from: " << from << " to: " << to << " payload: " << ( csv.binary() ? "" : payload ) << std::endl; } + if( verbose ) { std::cerr << "csv-intervals: from: " << from << " to: " << to << " payload: " << ( csv.binary() ? "" : payload ) << std::endl; } add( from, to, payload ); } } - int make( const std::string& first_line ) - { - this->read( std::cin, first_line ); - this->write(); - return 0; - } - int contain( std::istream& is, const std::string& first_line ) { comma::csv::options icsv( options ); @@ -495,8 +491,8 @@ struct intervals { auto p = istream.read(); if( !p ) { break; } - bool contained = false; // todo? use boost::...::query - for( typename map_t::iterator it = map.begin(); it != map.end() && !contained; ++it ) + bool contained = false; + for( typename map_t::iterator it = map.begin(); it != map.end() && !contained; ++it ) // todo! quadratic complexity; how the heck to query icl map? use boost::...::query? { const bound_t< bound_type >& from = it->first.lower(); const bound_t< bound_type >& to = it->first.upper(); @@ -505,7 +501,52 @@ struct intervals tied.append( scalar_t< bool >( contained ) ); if( icsv.flush ) { std::cout.flush(); } } - std::cout.flush(); + return 0; + } + + int join( std::istream& is, const std::string& first_line ) + { + comma::csv::options icsv( options ); + if( csv.binary() != icsv.binary() ) { COMMA_THROW( comma::exception, "expected both inputs ascii or both binary; got stdin " << ( icsv.binary() ? "binary" : "ascii" ) << " while --intervals " << ( csv.binary() ? "binary" : "ascii" ) ); } + icsv.full_xpath = false; + comma::csv::input_stream< scalar_t< From > > istream( std::cin, icsv ); + append = true; + this->read( is, first_line ); // todo: support block + while( istream.ready() || std::cin.good() ) + { + auto p = istream.read(); + if( !p ) { break; } + for( typename map_t::iterator it = map.begin(); it != map.end(); ++it ) // todo! quadratic complexity; how the heck to query icl map? use boost::...::query? + { + const bound_t< bound_type >& from = it->first.lower(); + const bound_t< bound_type >& to = it->first.upper(); + if( ( !from.value || p->scalar >= *from.value ) && ( !to.value || p->scalar < *to.value ) ) + { + std::string joined = csv.binary() ? "" : comma::join( istream.ascii().last(), icsv.delimiter ); + for( const auto& s: it->second ) + { + if( csv.binary() ) + { + std::cout.write( istream.binary().last(), icsv.format().size() ); + std::cout.write( &s[0], s.size() ); + } + else + { + std::cout << joined << icsv.delimiter << s << std::endl; + } + } + break; + } + } + if( icsv.flush ) { std::cout.flush(); } + } + return 0; + } + + int make( const std::string& first_line ) + { + this->read( std::cin, first_line ); + this->write(); return 0; } }; @@ -523,7 +564,7 @@ static std::tuple< comma::csv::format::types_enum, std::string > interval_type( while( is.good() && first_line.empty() ) { std::getline( is, first_line ); } if( first_line.empty() ) { exit( 0 ); } // quick and dirty csv.format( comma::csv::impl::unstructured::guess_format( first_line, csv.delimiter ) ); - if( verbose ) { std::cerr << app_name << ": guessed format: " << csv.format().string() << std::endl;; } + if( verbose ) { std::cerr << "csv-intervals: guessed format: " << csv.format().string() << std::endl;; } } else { @@ -538,11 +579,11 @@ static std::tuple< comma::csv::format::types_enum, std::string > interval_type( const comma::csv::format::types_enum from_type = csv.format().offset( from_index ).type; const comma::csv::format::types_enum to_type = csv.format().offset( to_index ).type; if( ( ( from_type == comma::csv::format::time || from_type == comma::csv::format::long_time ) && ( to_type != comma::csv::format::time && to_type != comma::csv::format::long_time ) ) || - ( ( ( from_type != comma::csv::format::time && from_type != comma::csv::format::long_time ) && ( to_type == comma::csv::format::time || to_type == comma::csv::format::long_time ) ) ) ) - { COMMA_THROW( comma::exception, "from/to type mismatch; time" ); } + ( ( ( from_type != comma::csv::format::time && from_type != comma::csv::format::long_time ) && ( to_type == comma::csv::format::time || to_type == comma::csv::format::long_time ) ) ) ) + { COMMA_THROW( comma::exception, "from/to type mismatch; time" ); } if( ( from_type == comma::csv::format::fixed_string || to_type == comma::csv::format::fixed_string ) && from_type != to_type ) - { COMMA_THROW( comma::exception, "from/to type mismatch; string" ); } - if( from_type != to_type ) { std::cerr << app_name << ": support only from and to of the same type, got from: " << comma::csv::format::to_format( from_type ) << ", to: " << comma::csv::format::to_format( to_type ) << std::endl; exit( 1 ); } + { COMMA_THROW( comma::exception, "from/to type mismatch; string" ); } + if( from_type != to_type ) { std::cerr << "csv-intervals: support only from and to of the same type, got from: " << comma::csv::format::to_format( from_type ) << ", to: " << comma::csv::format::to_format( to_type ) << std::endl; exit( 1 ); } return std::tie( to_type, first_line ); } @@ -617,9 +658,40 @@ int main( int ac, char** av ) } return 0; } + if( operation == "join" ) + { + if( options.exists( "--input-fields" ) ) { std::cout << comma::join( comma::csv::names< scalar_t< double > >(), ',' ) << std::endl; return 0; } + if( options.exists( "--output-fields" ) ) { std::cerr << "csv-intervals join: does not have --output-fields" << std::endl; return 1; } + auto i = options.value< std::string >( "--intervals" ); + comma::csv::options csv = comma::name_value::parser( "filename" ).get< comma::csv::options >( i ); + std::string format = comma::name_value::map( i ).value< std::string >( "format", "" ); + comma::io::istream is( csv.filename ); + auto t = interval_type( *is, csv, format ); + const comma::csv::format::types_enum to_type = std::get< 0 >( t ); + std::string first_line = std::get< 1 >( t ); + switch( to_type ) + { + case comma::csv::format::int8: intervals< char >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::uint8: intervals< unsigned char >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::int16: intervals< comma::int16 >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::uint16: intervals< comma::uint16 >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::int32: intervals< comma::int32 >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::uint32: intervals< comma::uint32 >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::int64: intervals< comma::int64 >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::uint64: intervals< comma::uint64 >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::char_t: intervals< char >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::float_t: intervals< float >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::double_t: intervals< double >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::time: + case comma::csv::format::long_time: intervals< boost::posix_time::ptime >( options, csv ).join( *is, first_line ); return 0; + case comma::csv::format::fixed_string: intervals< std::string >( options, csv ).join( *is, first_line ); return 0; + default: COMMA_THROW( comma::exception, "invalid type" ); // never here + } + return 0; + } std::cerr << "csv-intervals: expected operation, got: '" << operation << "'" << std::endl; } - catch( std::exception& ex ) { std::cerr << "csv-invervals: " << ex.what() << std::endl; } - catch( ... ) { std::cerr << "csv-invervals: unknown exception" << std::endl; } + catch( std::exception& ex ) { std::cerr << "csv-intervals: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "csv-intervals: unknown exception" << std::endl; } return 1; } diff --git a/csv/test/csv-intervals/join/expected b/csv/test/csv-intervals/join/expected new file mode 100644 index 000000000..d78db3ead --- /dev/null +++ b/csv/test/csv-intervals/join/expected @@ -0,0 +1,15 @@ +join/ascii[0]/output/line[0]="1,0,2,a" +join/ascii[0]/output/line[1]="9,9,11,b" +join/ascii[0]/status=0 +join/ascii[1]/output/line[0]="9,0,10,b" +join/ascii[1]/output/line[1]="9,0,10,c" +join/ascii[1]/output/line[2]="9,0,20,d" +join/ascii[1]/status=0 +join/fields[0]/output=",1,0,2,a" +join/fields[0]/status=0 +join/fields[1]/output=",1,a,0,2,b" +join/fields[1]/status=0 +join/binary[0]/output/line[0]="9,0,10,0" +join/binary[0]/output/line[1]="9,0,10,1" +join/binary[0]/output/line[2]="9,5,20,2" +join/binary[0]/status=0 diff --git a/csv/test/csv-intervals/join/input b/csv/test/csv-intervals/join/input new file mode 100644 index 000000000..9cd53fe1a --- /dev/null +++ b/csv/test/csv-intervals/join/input @@ -0,0 +1,5 @@ +join/ascii[0]="( echo 1; echo 5; echo 9; echo 11 ) | csv-intervals join --intervals <( echo 0,2,a; echo 9,11,b )" +join/ascii[1]="( echo 9 ) | csv-intervals join --intervals <( echo 0,2,a; echo 0,10,b; echo 0,10,c; echo 0,20,d )" +join/fields[0]="( echo ,1; echo ,5 ) | csv-intervals --fields ,scalar join --intervals <( echo 0,2,a )" +join/fields[1]="( echo ,1; echo ,5 ) | csv-intervals --fields ,scalar join --intervals <( echo a,0,2,b )';fields=,from,to'" +join/binary[0]="( echo 9 ) | csv-to-bin ui | csv-intervals join --binary ui --intervals <( ( echo 0,10,0; echo 0,10,1; echo 5,20,2 ) | csv-to-bin 3ui )';binary=3ui' | csv-from-bin 4ui" From 162d39b2d0c241d632bd1d2a7b0704162d9c61db Mon Sep 17 00:00:00 2001 From: vlaskine Date: Thu, 30 May 2019 19:16:57 +1000 Subject: [PATCH 0034/1056] cmake: boost::iostreams added --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 167b087db..980e1a74e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,9 +171,9 @@ IF( BUILD_PYTHON_PACKAGES AND BUILD_CPP_PYTHON_BINDINGS ) SET( Python_ADDITIONAL_VERSIONS 2.7 ) FIND_PACKAGE( PythonLibs REQUIRED ) INCLUDE_DIRECTORIES( ${PYTHON_INCLUDE_DIRS} ) - FIND_PACKAGE( Boost COMPONENTS thread filesystem system date_time program_options regex python ) + FIND_PACKAGE( Boost COMPONENTS thread filesystem system date_time iostreams program_options regex python ) ELSE( BUILD_PYTHON_PACKAGES AND BUILD_CPP_PYTHON_BINDINGS ) - FIND_PACKAGE( Boost COMPONENTS thread filesystem system date_time program_options regex ) + FIND_PACKAGE( Boost COMPONENTS thread filesystem system date_time iostreams program_options regex ) ENDIF( BUILD_PYTHON_PACKAGES AND BUILD_CPP_PYTHON_BINDINGS ) INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIRS} ) LINK_DIRECTORIES( ${Boost_LIBRARY_DIRS} ) From d4c8b1304d3e3d7b2d0f556cd6bcf5ee0b71df24 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 31 May 2019 13:48:05 +1000 Subject: [PATCH 0035/1056] io-tee: --dry-run made exiting before checking whether output file is writable; output filename quoted in command to support filenames with special characters --- io/applications/io-tee.cpp | 70 ++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/io/applications/io-tee.cpp b/io/applications/io-tee.cpp index c15efb20d..a0188585a 100644 --- a/io/applications/io-tee.cpp +++ b/io/applications/io-tee.cpp @@ -41,11 +41,9 @@ #include "../../io/select.h" #include "../../io/stream.h" -static const char *app_name = "io-tee"; - static void show_usage() { - std::cerr << "Usage: " << app_name << " [options ... --] \n"; + std::cerr << "Usage: io-tee [options ... --] \n"; } static void show_help( bool verbose = false ) @@ -63,14 +61,14 @@ static void show_help( bool verbose = false ) << " --verbose,-v: more output" << std::endl << std::endl << "Note that only single commands are supported; to run multiple commands (or a pipeline), put them inside a bash function:" << std::endl - << "*** IMPORTANT *** use \"export -f function_name\" to make the function visible to " << app_name << "." << std::endl - << "Remember that " << app_name << " will not have access to the unexported variables, so pass any required values as function arguments." << std::endl + << "*** IMPORTANT *** use \"export -f function_name\" to make the function visible to io-tee." << std::endl + << "Remember that io-tee will not have access to the unexported variables, so pass any required values as function arguments." << std::endl << "On Ubuntu 16.04, io-tee might fail to discover a bash function even if it is exported." << std::endl << "In this case, comma_tee_function defined in comma-application-util should be used." << std::endl << std::endl << "If any options are used (such as --unbuffered), \"--\" must precede the command." << std::endl << std::endl - << "A note about using \"grep\": be aware grep returns 1 if the pattern is not found, which will make " << app_name << " think the command failed." << std::endl + << "A note about using \"grep\": be aware grep returns 1 if the pattern is not found, which will make io-tee think the command failed." << std::endl << "To avoid this, call grep inside a function like this: grep (pattern) || true." << std::endl << std::endl << "Example 1:" << std::endl @@ -127,19 +125,19 @@ int main( int ac, char **av ) else if ( av[n] == std::string( "--help" ) || av[n] == std::string( "-h" ) ) { show_help(); exit( 0 ); } } int command_offset = ( dashdash_pos == -1 ? 2 : dashdash_pos + 1 ); - if ( command_offset >= ac ) { std::cerr << app_name << ": missing command; "; show_usage(); exit( 1 ); } + if ( command_offset >= ac ) { std::cerr << "io-tee: missing command; "; show_usage(); exit( 1 ); } // if there is no "--", there can be no command line options, just the output filename int options_ac = ( dashdash_pos == -1 ? 2 : dashdash_pos ); if ( debug ) { - std::cerr << app_name << ": options_ac=" << options_ac << "; command line: " << app_name; + std::cerr << "io-tee: options_ac=" << options_ac << "; command line: io-tee"; for ( int m = 1; m < ac; ++m ) { std::cerr << ' ' << av[m]; } std::cerr << std::endl; } comma::command_line_options options( options_ac, av ); const std::vector< std::string >& unnamed = options.unnamed( "--unbuffered,-u,--verbose,-v,--debug,--dry-run,--dry,--append,-a", "-.*" ); - if( unnamed.empty() ) { std::cerr << app_name << ": please specify output file name" << std::endl; return 1; } - if( unnamed.size() > 1 ) { std::cerr << app_name << ": expected one output filename, got: " << comma::join( unnamed, ' ' ) << std::endl; return 1; } + if( unnamed.empty() ) { std::cerr << "io-tee: please specify output file name" << std::endl; return 1; } + if( unnamed.size() > 1 ) { std::cerr << "io-tee: expected one output filename, got: " << comma::join( unnamed, ' ' ) << std::endl; return 1; } std::string outfile = unnamed[0]; // bash -c only takes a single argument, so put the whole command in single quotes, then double quote each individual argument std::string command = "bash -c '" + escape_quotes( av[command_offset] ); @@ -147,24 +145,24 @@ int main( int ac, char **av ) bool append_to_outfile = options.exists( "--append,-a" ); if( append_to_outfile ) { command += " >> "; } else { command += " > "; } - command += outfile; + command += '"' + outfile + '"'; command += "'"; bool unbuffered = options.exists( "--unbuffered,-u" ); bool verbose = options.exists( "--verbose,-v" ); if ( debug ) { verbose = true; } - if( !file_is_writable( outfile, append_to_outfile ) ) { std::cerr << app_name << ": cannot write to " << outfile << std::endl; exit( 1 ); } if( options.exists( "--dry-run,--dry" ) ) { std::cout << command << std::endl; return 0; } - if( verbose ) { std::cerr << app_name << ": will run command: " << command << std::endl; } + if( verbose ) { std::cerr << "io-tee: will run command: " << command << std::endl; } + if( !file_is_writable( outfile, append_to_outfile ) ) { std::cerr << "io-tee: cannot write to " << outfile << std::endl; exit( 1 ); } std::cout.flush(); pipe = ::popen( &command[0], "w" ); - if( pipe == NULL ) { std::cerr << app_name << ": failed to open pipe; command: " << command << std::endl; return 1; } + if( pipe == NULL ) { std::cerr << "io-tee: failed to open pipe; command: " << command << std::endl; return 1; } boost::array< char, 0xffff > buffer; - if ( debug ) { std::cerr << app_name << ": created buffer" << std::endl; } + if ( debug ) { std::cerr << "io-tee: created buffer" << std::endl; } comma::io::select stdin_select; - if ( debug ) { std::cerr << app_name << ": constructed comma::io::select" << std::endl; } - if( unbuffered ) { stdin_select.read().add( 0 ); if ( debug ) { std::cerr << app_name << ": did initial unbuffered read" << std::endl; } } + if ( debug ) { std::cerr << "io-tee: constructed comma::io::select" << std::endl; } + if( unbuffered ) { stdin_select.read().add( 0 ); if ( debug ) { std::cerr << "io-tee: did initial unbuffered read" << std::endl; } } comma::io::istream is( "-", comma::io::mode::binary ); - if ( debug ) { std::cerr << app_name << ": opened input stream" << std::endl; } + if ( debug ) { std::cerr << "io-tee: opened input stream" << std::endl; } if( unbuffered ) { std::ios_base::sync_with_stdio( false ); // unsync to make rdbuf()->in_avail() working @@ -172,51 +170,51 @@ int main( int ac, char **av ) } while( std::cin.good() ) { - if ( debug ) { std::cerr << app_name << ": loop" << std::endl; } + if ( debug ) { std::cerr << "io-tee: loop" << std::endl; } std::size_t bytes_to_read = buffer.size(); if( unbuffered ) { - if ( debug ) { std::cerr << app_name << ": calling stdin_select.wait(1)" << std::endl; } + if ( debug ) { std::cerr << "io-tee: calling stdin_select.wait(1)" << std::endl; } if( stdin_select.wait( boost::posix_time::seconds( 1 ) ) == 0 ) { continue; } - if ( debug ) { std::cerr << app_name << ": after stdin_select.wait" << std::endl; } + if ( debug ) { std::cerr << "io-tee: after stdin_select.wait" << std::endl; } std::size_t available = is.available_on_file_descriptor(); - if ( debug ) { std::cerr << app_name << ": " << available << " bytes available" << std::endl; } + if ( debug ) { std::cerr << "io-tee: " << available << " bytes available" << std::endl; } bytes_to_read = std::min( available, buffer.size() ); } - if ( debug ) { std::cerr << app_name << ": bytes_to_read = " << bytes_to_read << std::endl; } + if ( debug ) { std::cerr << "io-tee: bytes_to_read = " << bytes_to_read << std::endl; } std::cin.read( &buffer[0], bytes_to_read ); - if ( debug ) { std::cerr << app_name << ": cin.gcount is " << std::cin.gcount() << std::endl; } + if ( debug ) { std::cerr << "io-tee: cin.gcount is " << std::cin.gcount() << std::endl; } if( std::cin.gcount() <= 0 ) { break; } std::size_t gcount = std::cin.gcount(); - if ( debug ) { std::cerr << app_name << ": writing " << gcount << " bytes to stdout" << std::endl; } + if ( debug ) { std::cerr << "io-tee: writing " << gcount << " bytes to stdout" << std::endl; } std::cout.write( &buffer[0], gcount ); - if ( debug ) { std::cerr << app_name << ": writing " << gcount << " bytes to pipe" << std::endl; } + if ( debug ) { std::cerr << "io-tee: writing " << gcount << " bytes to pipe" << std::endl; } int r = ::fwrite( &buffer[0], sizeof( char ), gcount, pipe ); - if ( debug ) { std::cerr << app_name << ": fwrite to pipe returned " << r << std::endl; } + if ( debug ) { std::cerr << "io-tee: fwrite to pipe returned " << r << std::endl; } if( r != (int) gcount ) { - std::cerr << app_name << ": error on pipe: " << std::strerror( errno ) << std::endl; + std::cerr << "io-tee: error on pipe: " << std::strerror( errno ) << std::endl; ::pclose( pipe ); return 1; } if( unbuffered ) { - if ( debug ) { std::cerr << app_name << ": flushing stdout" << std::endl; } + if ( debug ) { std::cerr << "io-tee: flushing stdout" << std::endl; } std::cout.flush(); - if ( debug ) { std::cerr << app_name << ": flushing pipe" << std::endl; } - if ( ::fflush( pipe ) != 0 ) { std::cerr << app_name << ": flushing pipe failed: " << std::strerror( errno ) << "; command was: " << command << std::endl; ::pclose( pipe ); exit( 1 ); } - if ( debug ) { std::cerr << app_name << ": flushed stdout and pipe " << std::endl; } + if ( debug ) { std::cerr << "io-tee: flushing pipe" << std::endl; } + if ( ::fflush( pipe ) != 0 ) { std::cerr << "io-tee: flushing pipe failed: " << std::strerror( errno ) << "; command was: " << command << std::endl; ::pclose( pipe ); exit( 1 ); } + if ( debug ) { std::cerr << "io-tee: flushed stdout and pipe " << std::endl; } } } std::cout.flush(); ::fflush( pipe ); int result = ::pclose( pipe ); - if ( result == -1 ) { std::cerr << app_name << ": pipe error: " << std::strerror( errno ) << "; command was: " << command << std::endl; exit( 1 ); } - else if ( result != 0 ) { std::cerr << app_name << ": command failed: " << command << std::endl; return 1; } + if ( result == -1 ) { std::cerr << "io-tee: pipe error: " << std::strerror( errno ) << "; command was: " << command << std::endl; exit( 1 ); } + else if ( result != 0 ) { std::cerr << "io-tee: command failed: " << command << std::endl; return 1; } return 0; } - catch( std::exception& ex ) { std::cerr << app_name << ": " << ex.what() << std::endl; } - catch( ... ) { std::cerr << app_name << ": unknown exception" << std::endl; } + catch( std::exception& ex ) { std::cerr << "io-tee: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "io-tee: unknown exception" << std::endl; } if( pipe ) { ::pclose( pipe ); } return 1; } From 9c66ea9d92ee67137b1d6dadfd9b382c5fb50f58 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 31 May 2019 13:52:21 +1000 Subject: [PATCH 0036/1056] io-tee: unit test fixed --- io/test/io-tee/comma_tee_function/expected | 10 +++++----- io/test/io-tee/simple/expected | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/io/test/io-tee/comma_tee_function/expected b/io/test/io-tee/comma_tee_function/expected index 9dfe99951..79d21019b 100644 --- a/io/test/io-tee/comma_tee_function/expected +++ b/io/test/io-tee/comma_tee_function/expected @@ -20,9 +20,9 @@ pipeline/stdout/matches="true" # commands (with quotes transformed to strings) -basic_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > out(SQUOTE)" -function_call_test/command="bash -c (SQUOTE)example_function (DQUOTE)two (DQUOTE) (DQUOTE)one(DQUOTE) > out(SQUOTE)" -unbuffered_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > out(SQUOTE)" -unbuffered_fn_test/command="bash -c (SQUOTE)example_function (DQUOTE)two (DQUOTE) (DQUOTE)one(DQUOTE) > out(SQUOTE)" -pipeline/command="bash -c (SQUOTE)grep (SQUOTE)\(SQUOTE)(SQUOTE)^t(SQUOTE)\(SQUOTE)(SQUOTE) | grep (DQUOTE)o$(DQUOTE) > out(SQUOTE)" +basic_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" +function_call_test/command="bash -c (SQUOTE)example_function (DQUOTE)two (DQUOTE) (DQUOTE)one(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" +unbuffered_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" +unbuffered_fn_test/command="bash -c (SQUOTE)example_function (DQUOTE)two (DQUOTE) (DQUOTE)one(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" +pipeline/command="bash -c (SQUOTE)grep (SQUOTE)\(SQUOTE)(SQUOTE)^t(SQUOTE)\(SQUOTE)(SQUOTE) | grep (DQUOTE)o$(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" diff --git a/io/test/io-tee/simple/expected b/io/test/io-tee/simple/expected index 6b47dcb31..36ad9793b 100644 --- a/io/test/io-tee/simple/expected +++ b/io/test/io-tee/simple/expected @@ -12,7 +12,7 @@ pipeline/stdout/matches="true" # commands (with quotes transformed to strings) -basic_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > out(SQUOTE)" -unbuffered_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > out(SQUOTE)" -pipeline/command="bash -c (SQUOTE)grep (SQUOTE)\(SQUOTE)(SQUOTE)^t(SQUOTE)\(SQUOTE)(SQUOTE) | grep (DQUOTE)o$(DQUOTE) > out(SQUOTE)" +basic_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" +unbuffered_test/command="bash -c (SQUOTE)grep (DQUOTE)one two(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" +pipeline/command="bash -c (SQUOTE)grep (SQUOTE)\(SQUOTE)(SQUOTE)^t(SQUOTE)\(SQUOTE)(SQUOTE) | grep (DQUOTE)o$(DQUOTE) > (DQUOTE)out(DQUOTE)(SQUOTE)" From 8b55df1425f400cc65f5c72b28c2972eb6cb6c29 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Thu, 6 Jun 2019 15:50:49 +1000 Subject: [PATCH 0037/1056] csv-paste: signature: 'line-number;binary=ui' support added --- csv/applications/csv-paste.cpp | 15 +++++++++------ csv/test/csv-paste/expected | 6 ++++++ csv/test/csv-paste/input | 2 ++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/csv/applications/csv-paste.cpp b/csv/applications/csv-paste.cpp index ef711b9da..56d26dbb7 100644 --- a/csv/applications/csv-paste.cpp +++ b/csv/applications/csv-paste.cpp @@ -114,7 +114,7 @@ class source virtual const std::string* read() = 0; virtual const char* read( char* buf ) = 0; bool binary() const { return binary_; } - virtual const bool is_stream() const { return false; } + virtual bool is_stream() const { return false; } const std::string& properties() const { return properties_; } std::size_t size() const { return value_.size(); } @@ -151,7 +151,7 @@ class stream : public source return stream_->gcount() == int( value_.size() ) ? buf : NULL; } - const bool is_stream() { return true; } + bool is_stream() const { return true; } private: comma::io::istream stream_; @@ -180,6 +180,7 @@ class line_number : public source bool index; bool reverse; comma::uint32 begin; + std::string format; options( boost::optional< comma::uint32 > b = boost::optional< comma::uint32 >(), comma::uint32 size = 1, bool index = false, bool reverse = false ) : size( size ) @@ -199,6 +200,8 @@ class line_number : public source auto b = map.optional< comma::uint32 >( "begin" ); if( !b ) { b = o.optional< comma::uint32 >( "--begin" ); } begin = begin_( b ); + format = map.value< std::string >( "binary", "" ); + if( !format.empty() && format != "ui" ) { std::cerr << "csv-paste: currently only ui supported for line-number; got: '" << format << "'" << std::endl; exit( 1 ); } // quick and dirty for now } private: @@ -210,7 +213,7 @@ class line_number : public source }; line_number( bool is_binary, const options& options ) - : source( is_binary ? "binary=ui" : "" ) + : source( options.format.empty() ? ( is_binary ? "binary=ui" : "" ) : "binary=" + options.format ) // quick and dirty , options_( options ) , count_( 0 ) , value_( options_.begin ) @@ -261,11 +264,11 @@ int main( int ac, char** av ) std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "--delimiter,-d,--begin,--size,--block-size" ); boost::ptr_vector< source > sources; bool is_binary = false; - for( unsigned int i = 0; i < unnamed.size(); ++i ) // quick and dirty + for( unsigned int i = 0; i < unnamed.size(); ++i ) // quick and dirty; really lousy code duplication { if( unnamed[i].substr( 0, 6 ) == "value=" ) { if( value( unnamed[i] ).binary() ) { is_binary = true; } } - else if( unnamed[i] == "line-number" || unnamed[i].substr( 0, 12 ) == "line-number;" ) { continue; } // quick and dirty - if( stream( unnamed[i] ).binary() ) { is_binary = true; } + else if( unnamed[i] == "line-number" || unnamed[i].substr( 0, 12 ) == "line-number;" ) { if( line_number( is_binary, line_number::options( unnamed[i], options ) ).binary() ) { is_binary = true; } } // quick and dirty + else if( stream( unnamed[i] ).binary() ) { is_binary = true; } } for( unsigned int i = 0; i < unnamed.size(); ++i ) { diff --git a/csv/test/csv-paste/expected b/csv/test/csv-paste/expected index ad48b2ffa..95f7b8c23 100644 --- a/csv/test/csv-paste/expected +++ b/csv/test/csv-paste/expected @@ -47,3 +47,9 @@ line_number/multiple[1]/output="0,0;0,0;0,0;0,0;0,0;0,1;1,1;1,1;1,1;1,1;" line_number/multiple[1]/status=0 line_number/multiple[2]/output="0,0;0,1;0,2;0,3;0,4;1,0;1,1;1,2;1,3;1,4;" line_number/multiple[2]/status=0 + +line_number/binary[0]/output/line[0]="0" +line_number/binary[0]/output/line[1]="1" +line_number/binary[0]/output/line[2]="2" +line_number/binary[0]/output/line[3]="3" +line_number/binary[0]/status=0 diff --git a/csv/test/csv-paste/input b/csv/test/csv-paste/input index fb5037a14..f6ef59556 100644 --- a/csv/test/csv-paste/input +++ b/csv/test/csv-paste/input @@ -24,3 +24,5 @@ line_number/parametrized[8]="csv-paste 'line-number;size=5;index;reverse;begin=3 line_number/multiple[0]="csv-paste line-number 'line-number;begin=4' --begin=5 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" line_number/multiple[1]="csv-paste line-number 'line-number;size=5' --size=6 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" line_number/multiple[2]="csv-paste line-number 'line-number;index' --size=5 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" + +line_number/binary[0]="csv-paste 'line-number;binary=ui' | csv-from-bin ui | head -n4; comma_status_ok && exit 0 || exit 1" From d97d04166a70a1f5cf80822629585c3100a4942a Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Jun 2019 18:59:33 +1000 Subject: [PATCH 0038/1056] csv-intervals: join: --matching, --not-matching implemented --- csv/applications/csv-intervals.cpp | 28 ++++++++++++++++++++++------ csv/test/csv-intervals/join/expected | 12 ++++++++++++ csv/test/csv-intervals/join/input | 4 ++++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/csv/applications/csv-intervals.cpp b/csv/applications/csv-intervals.cpp index f370dd819..01e02c6de 100644 --- a/csv/applications/csv-intervals.cpp +++ b/csv/applications/csv-intervals.cpp @@ -27,7 +27,6 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author Vinny Do #include @@ -136,6 +135,8 @@ static void usage( bool verbose = false ) std::cerr << " join" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --intervals=: file or stream name" << std::endl; + std::cerr << " --matching: output matching input records, do not append the intervals" << std::endl; + std::cerr << " --not-matching: output not matching input records" << std::endl; std::cerr << std::endl; std::cerr << " make" << std::endl; std::cerr << " options" << std::endl; @@ -506,8 +507,12 @@ struct intervals int join( std::istream& is, const std::string& first_line ) { + options.assert_mutually_exclusive( "--matching,--not-matching" ); + bool matching = options.exists( "--matching" ); + bool not_matching = options.exists( "--not-matching" ); + bool output_joined = !matching && !not_matching; comma::csv::options icsv( options ); - if( csv.binary() != icsv.binary() ) { COMMA_THROW( comma::exception, "expected both inputs ascii or both binary; got stdin " << ( icsv.binary() ? "binary" : "ascii" ) << " while --intervals " << ( csv.binary() ? "binary" : "ascii" ) ); } + if( output_joined && csv.binary() != icsv.binary() ) { std::cerr << "csv-intervals: join: expected both inputs ascii or both binary; got stdin " << ( icsv.binary() ? "binary" : "ascii" ) << " while --intervals " << ( csv.binary() ? "binary" : "ascii" ) << std::endl; return 1; } icsv.full_xpath = false; comma::csv::input_stream< scalar_t< From > > istream( std::cin, icsv ); append = true; @@ -516,11 +521,18 @@ struct intervals { auto p = istream.read(); if( !p ) { break; } - for( typename map_t::iterator it = map.begin(); it != map.end(); ++it ) // todo! quadratic complexity; how the heck to query icl map? use boost::...::query? + bool found = false; + typename map_t::iterator it; + for( it = map.begin(); it != map.end(); ++it ) // todo! quadratic complexity; how the heck to query icl map? use boost::...::query? { const bound_t< bound_type >& from = it->first.lower(); const bound_t< bound_type >& to = it->first.upper(); - if( ( !from.value || p->scalar >= *from.value ) && ( !to.value || p->scalar < *to.value ) ) + found = ( !from.value || p->scalar >= *from.value ) && ( !to.value || p->scalar < *to.value ); + if( found ) { break; } + } + if( output_joined ) + { + if( found ) { std::string joined = csv.binary() ? "" : comma::join( istream.ascii().last(), icsv.delimiter ); for( const auto& s: it->second ) @@ -535,9 +547,13 @@ struct intervals std::cout << joined << icsv.delimiter << s << std::endl; } } - break; } } + else if( matching == found ) + { + if( icsv.binary() ) { std::cout.write( istream.binary().last(), icsv.format().size() ); } + else { std::cout << comma::join( istream.ascii().last(), icsv.delimiter ) << std::endl; } + } if( icsv.flush ) { std::cout.flush(); } } return 0; @@ -595,7 +611,7 @@ int main( int ac, char** av ) verbose = options.exists( "--verbose,-v" ); debug = options.exists( "--debug" ); options.assert_mutually_exclusive( "--binary,--format" ); - const auto& unnamed = options.unnamed( "--append,-a,--debug,--flush,--input-fields,--output-fields,--intervals-only,--limits,-l", "-.*" ); + const auto& unnamed = options.unnamed( "--append,-a,--debug,--flush,--input-fields,--matching,--not-matching,--output-fields,--intervals-only,--limits,-l", "-.*" ); if( unnamed.empty() ) { std::cerr << "csv-intervals: please specify operation" << std::endl; return 1; } std::string operation = unnamed[0]; if( operation == "make" ) diff --git a/csv/test/csv-intervals/join/expected b/csv/test/csv-intervals/join/expected index d78db3ead..2d0a5a80e 100644 --- a/csv/test/csv-intervals/join/expected +++ b/csv/test/csv-intervals/join/expected @@ -13,3 +13,15 @@ join/binary[0]/output/line[0]="9,0,10,0" join/binary[0]/output/line[1]="9,0,10,1" join/binary[0]/output/line[2]="9,5,20,2" join/binary[0]/status=0 +join/matching[0]/output/line[0]="1" +join/matching[0]/output/line[1]="9" +join/matching[0]/status=0 +join/matching[1]/output/line[0]="1" +join/matching[1]/output/line[1]="9" +join/matching[1]/status=0 +join/not_matching[0]/output/line[0]="5" +join/not_matching[0]/output/line[1]="11" +join/not_matching[0]/status=0 +join/not_matching[1]/output/line[0]="5" +join/not_matching[1]/output/line[1]="11" +join/not_matching[1]/status=0 diff --git a/csv/test/csv-intervals/join/input b/csv/test/csv-intervals/join/input index 9cd53fe1a..4ffbfd8aa 100644 --- a/csv/test/csv-intervals/join/input +++ b/csv/test/csv-intervals/join/input @@ -3,3 +3,7 @@ join/ascii[1]="( echo 9 ) | csv-intervals join --intervals <( echo 0,2,a; echo 0 join/fields[0]="( echo ,1; echo ,5 ) | csv-intervals --fields ,scalar join --intervals <( echo 0,2,a )" join/fields[1]="( echo ,1; echo ,5 ) | csv-intervals --fields ,scalar join --intervals <( echo a,0,2,b )';fields=,from,to'" join/binary[0]="( echo 9 ) | csv-to-bin ui | csv-intervals join --binary ui --intervals <( ( echo 0,10,0; echo 0,10,1; echo 5,20,2 ) | csv-to-bin 3ui )';binary=3ui' | csv-from-bin 4ui" +join/matching[0]="( echo 1; echo 5; echo 9; echo 11 ) | csv-intervals join --intervals <( echo 0,2,a; echo 9,11,b ) --matching" +join/matching[1]="( echo 1; echo 5; echo 9; echo 11 ) | csv-to-bin ui | csv-intervals join --intervals <( echo 0,2,a; echo 9,11,b ) --matching --binary ui | csv-from-bin ui" +join/not_matching[0]="( echo 1; echo 5; echo 9; echo 11 ) | csv-intervals join --intervals <( echo 0,2,a; echo 9,11,b ) --not-matching" +join/not_matching[1]="( echo 1; echo 5; echo 9; echo 11 ) | csv-to-bin ui | csv-intervals join --intervals <( echo 0,2,a; echo 9,11,b ) --not-matching --binary ui | csv-from-bin ui" From 7e8f4ee3867a0fb38315c5ea3ab74d3a3f061df5 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 25 Jun 2019 12:34:38 +1000 Subject: [PATCH 0039/1056] contact_info updated --- application/contact_info.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/application/contact_info.h b/application/contact_info.h index 23aae6929..d4154064c 100644 --- a/application/contact_info.h +++ b/application/contact_info.h @@ -27,16 +27,12 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine -#ifndef COMMA_APPLICATION_CONTACT_INFO_H_ -#define COMMA_APPLICATION_CONTACT_INFO_H_ +#pragma once namespace comma { -static const char* contact_info = "more info: https://github.com/acfr/comma#readme"; +static const char* contact_info = "more info: https://gitlab.com/orthographic/comma#readme"; } // namespace comma { - -#endif // #ifndef COMMA_APPLICATION_CONTACT_INFO_H_ From 8bd8db3c5676e447a13896bc9efa5528b193ae88 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 25 Jun 2019 12:34:51 +1000 Subject: [PATCH 0040/1056] csv-analyse: minor brush-up --- csv/applications/csv-analyse.cpp | 39 +++++++++----------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/csv/applications/csv-analyse.cpp b/csv/applications/csv-analyse.cpp index d07a38c4b..a551673ca 100644 --- a/csv/applications/csv-analyse.cpp +++ b/csv/applications/csv-analyse.cpp @@ -63,18 +63,15 @@ class histogram //sort, ugly std::multimap< std::size_t, std::size_t > sorted; std::size_t sum=0; - for(std::map< std::size_t, std::size_t >::const_iterator it=histogram_.begin(), end=histogram_.end(); it!=end; ++it ) { sorted.insert( std::make_pair(it->second,it->first) ); sum += it->second; - } - + } for(std::multimap< std::size_t, std::size_t >::const_reverse_iterator it=sorted.rbegin(), end=sorted.rend(); it!=end; ++it ) { os << it->second << "," << it->first << "," << (double)((double)(it->first)/(double)sum) << std::endl; } - return os; } @@ -84,12 +81,9 @@ class histogram std::map< std::size_t, std::size_t > histogram_; //length, count }; -std::ostream& operator<<(std::ostream& os, const histogram & h) -{ - return h.print_sorted(os); -} +std::ostream& operator<<(std::ostream& os, const histogram & h) { return h.print_sorted(os); } -static void usage() +static void usage( bool ) { std::cerr << std::endl; std::cerr << "Analyse binary data to guess message lengths in unknown binary stream: output candidate lengths, repeat counts and normalised probabilities" << std::endl; @@ -133,33 +127,22 @@ int main( int ac, char** av ) #ifdef WIN32 _setmode( _fileno( stdin ), _O_BINARY ); #endif - - command_line_options options( ac, av ); - if( ac > 1 || options.exists( "--help" ) || options.exists( "-h" ) ) { usage(); } //could just say ac > 1... but leave for future args - - histogram h; - - const std::size_t read_size=65535; //todo: better way? + command_line_options options( ac, av, usage ); + const std::size_t read_size = 65535; // todo? better way? std::vector< unsigned char > data( read_size ); - std::size_t offset=0; - - //read as many bytes as available on stdin - while( std::cin.good() && !std::cin.eof() ) + std::size_t offset = 0; + histogram h; + while( std::cin.good() && !std::cin.eof() ) //read as many bytes as available on stdin { int bytes_read = ::read( 0, &data[0], read_size ); if( bytes_read <= 0 ) { break; } - - for( int i=0; i Date: Fri, 28 Jun 2019 10:57:21 +1000 Subject: [PATCH 0041/1056] comma-build: quick and dirty handling python installation added --- util/applications/comma-build | 42 ++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/util/applications/comma-build b/util/applications/comma-build index 0f1ea188b..487624c58 100755 --- a/util/applications/comma-build +++ b/util/applications/comma-build @@ -33,6 +33,9 @@ source $( type -p comma-application-util ) || { echo "$basename: cannot source 'comma-application-util'" >&2; exit 1; } +function say() { echo "comma-build: $@" >&2; } +function die() { say $@; exit 1; } + function description() { cat <&2 ; exit 1 ; } - echo "comma-build: $command: running in $build_dir: ${@:2}" >&2 - ( cd "$build_dir" && ${@:2} ) + if [[ -f "$src_dir/CMakeLists.txt" ]]; then + local build_dir="../../build/$1" + mkdir "$build_dir" -p || { echo "comma-build: $command: failed to create '$build_dir'" >&2 ; exit 1 ; } + echo "comma-build: $command: running in $build_dir: ${@:2}" >&2 + ( cd "$build_dir" && ${@:2} ) + elif [[ -f "$src_dir/setup.py" ]]; then # quick and dirty + case "$command" in + make_install) echo "comma-build: $command: running in $src_dir on python3 setup: ${@:2}" >&2 + ( cd "$src_dir" && python3 setup.py install ) + ;; + make_only) echo "comma-build: $command: no CMakeLists.txt in $src_dir, but found setup.py; skipped" >&2 + ;; + sudo_make_install) echo "comma-build: $command: running in $src_dir on python3 setup: ${@:2}" >&2 + ( cd "$src_dir" && sudo python3 setup.py install ) + ;; + *) die "$command: on $src_dir: do not know how to handle command for python3 installations" + ;; + esac + else + die "$command: on $src_dir: CMakeLists.txt or setup.py not found; don't know how to handle" + fi } function run_cmake() @@ -312,13 +332,13 @@ function run_pack() # quick and dirty [[ ! -f $src_dir/dependencies.cmake-cache ]] || cmake_options+=( -C$src_dir/dependencies.cmake-cache ) comma-build install ${cmake_options[@]} if [[ -f $src_dir/dependencies.cpack-options ]]; then - cpack_make_opts=$(cat $src_dir/dependencies.cpack-options ) - # The user specified different options for packing. Re-run the make so that the Cmake files are regenerated. - # This is primarily done so that software can be built as a standard user, and installations that would otherwise go into system directories (eg /etc/systemd) can be redirected to /usr/local/... - # When the software is packed however, you'd want it to go to install location (/etc). - comma-build make ${cpack_make_opts} - fi - comma-build cpack -G RPM ${@:3} || exit 1 #-D CPACK_RPM_PACKAGE_RELOCATABLE=ON || exit 1 #-D CPACK_PACKAGING_INSTALL_PREFIX=$pack_dir/rpm + cpack_make_opts=$(cat $src_dir/dependencies.cpack-options ) + # The user specified different options for packing. Re-run the make so that the Cmake files are regenerated. + # This is primarily done so that software can be built as a standard user, and installations that would otherwise go into system directories (eg /etc/systemd) can be redirected to /usr/local/... + # When the software is packed however, you'd want it to go to install location (/etc). + comma-build make ${cpack_make_opts} + fi + comma-build cpack -G RPM ${@:3} || exit 1 #-D CPACK_RPM_PACKAGE_RELOCATABLE=ON || exit 1 #-D CPACK_PACKAGING_INSTALL_PREFIX=$pack_dir/rpm ) || { echo "comma-build: pack failed" >&2 ; exit 1 ; } cat dependencies.commits | while IFS=, read what commit ; do cp $pack_dir/build/$what/*.rpm $pack_dir/rpm ; done cp $pack_dir/build/$top_repository/*.rpm $pack_dir/rpm From dcb7d4465fcf5dd731cffc0c1a80ead2eef9c3a0 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 9 Jul 2019 11:00:14 +1000 Subject: [PATCH 0042/1056] name-value-apply: --unquote-numbers implemented --- name_value/applications/name-value-apply | 38 +++++++++++------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/name_value/applications/name-value-apply b/name_value/applications/name-value-apply index 2b064142b..d6d8a5ca3 100755 --- a/name_value/applications/name-value-apply +++ b/name_value/applications/name-value-apply @@ -3,7 +3,7 @@ source $( type -p comma-application-util ) -function say() { echo "$scriptname: $@" >&2; } +function say() { echo "name-value-apply: $@" >&2; } function die() { say "$@"; exit 1; } function bye() { say "$@"; exit 0; } function mangle_options() { comma-options-to-name-value "$@" | { grep '='; :; } | comma_path_value_mangle | sed 's/^/local options_/g'; comma_status_ok; } @@ -11,8 +11,9 @@ function mangle_options() { comma-options-to-name-value "$@" | { grep '='; :; } function description() { cat <&2 <] +usage + name-value-apply [] -Options: +options $( description | sed 's/^/ /' ) -Example: +example > ( echo a=5; echo b=7 ) > cfg1.pv; ( echo a=6; echo c=8 ) > cfg2.pv - > $scriptname cfg1.pv cfg2.pv + > name-value-apply cfg1.pv cfg2.pv a="6" b="7" c="8" - > $scriptname --source cfg* + > name-value-apply --source cfg* a="cfg2.pv" b="cfg1.pv" c="cfg2.pv" @@ -51,23 +52,18 @@ function combine_files() { for ff in "${files[@]}"; do cat "$ff" | name-value-co function execute() { - local -r scriptname=$( basename "$0" ) - (( $( comma_options_has --bash-completion $@ ) )) && { bash_completion; exit 0; } (( $( comma_options_has --help $@ ) || $( comma_options_has -h $@ ) )) && usage - local mangle_text ff local -a files mangle_text=$( description | mangle_options "$@"; comma_status_ok ) || die "Invalid arguments."; eval "$mangle_text" mangle_text="files=( $( description | comma-options-to-name-value "$@" | grep '^"' ) )"; eval "$mangle_text"; unset mangle_text - - for ff in "${files[@]}" - do - { name-value-convert < "$ff" | - { [[ -z $options_source ]] && cat - || cut -d = -f1 | sed "s~$~=\"$ff\"~g";} - } - done | name-value-convert --take-last + local unquote_numbers_option + (( !options_unquote_numbers )) || unquote_numbers_option="--unquote-numbers" + + for ff in "${files[@]}"; do + name-value-convert < "$ff" | { [[ -z $options_source ]] && cat - || cut -d = -f1 | sed "s~$~=\"$ff\"~g"; } + done | name-value-convert --take-last $unquote_numbers_option } [[ $( basename $0 ) != "name-value-apply" ]] || execute "$@" - From 2963d5dee81bc6ebf672595b368a8faa87be9a08 Mon Sep 17 00:00:00 2001 From: James McColl Date: Wed, 10 Jul 2019 13:03:56 +1000 Subject: [PATCH 0043/1056] csv-calc: --precision implemented --- csv/applications/csv-calc.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 5a5b8af5d..eea012c5b 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -1079,7 +1079,7 @@ struct Operation : public Operationbase for( std::size_t i = 0; i < input_elements_.size(); ++i ) { comma::csv::format::types_enum output_type = input_elements_[i].type; - switch( E ) // quick and dirty, implement in operations::traits, just no time + switch( E ) // quick and dirty, operations::traits would be better, but likely to be optimized by compiler anyway { case Operations::Enum::radius: case Operations::Enum::diameter: @@ -1215,7 +1215,7 @@ static void calculate( const comma::csv::options& csv, OperationsMap& operations { ( *it->second )[i].calculate(); if( csv.binary() ) { r.append( ( *it->second )[i].buffer(), ( *it->second )[i].output_format().size() ); } - else { if( i > 0 ) { r += csv.delimiter; } r.append(( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, 12 )); } + else { if( i > 0 ) { r += csv.delimiter; } r.append(( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, csv.precision )); } } results[it->first] = r; } @@ -1232,6 +1232,7 @@ int main( int ac, char** av ) std::vector< std::string > unnamed = options.unnamed( "", "--binary,-b,--delimiter,-d,--format,--fields,-f,--output-fields" ); comma::csv::options csv( options ); csv.full_xpath = false; + std::cout.precision( csv.precision ); #ifdef WIN32 if( csv.binary() ) { _setmode( _fileno( stdin ), _O_BINARY ); _setmode( _fileno( stdout ), _O_BINARY ); } #endif From 6e20b573cc91fc7072b93509ebc2561cfc08a79a Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 29 Jul 2019 13:03:19 +1000 Subject: [PATCH 0044/1056] csv-paste: block-size implemented for input streams --- csv/applications/csv-paste.cpp | 177 ++++++++------------------------- csv/test/csv-paste/expected | 20 ++++ csv/test/csv-paste/input | 23 +++-- 3 files changed, 72 insertions(+), 148 deletions(-) diff --git a/csv/applications/csv-paste.cpp b/csv/applications/csv-paste.cpp index 56d26dbb7..3258ec73e 100644 --- a/csv/applications/csv-paste.cpp +++ b/csv/applications/csv-paste.cpp @@ -75,14 +75,19 @@ static void usage( bool verbose ) std::cerr << " --verbose,-v; more debug output" << std::endl; std::cerr << std::endl; std::cerr << "inputs" << std::endl; - std::cerr << " : [;size=|binary=]: file name or \"-\" for stdin; specify size or format, if binary" << std::endl; + std::cerr << " : [;]: file name or \"-\" for stdin; specify size or format, if binary" << std::endl; + std::cerr << " properties" << std::endl; + std::cerr << " binary=: if input is binary, record binary format; or use 'size'" << std::endl; + std::cerr << " block-size=; repeat each record times" << std::endl; + std::cerr << " size=; if input is binary, record size in bytes; or use 'binary'" << std::endl; std::cerr << " value : value=[;binary=]; specify size or format, if binary" << std::endl; std::cerr << " line-number[;] : add the line number; as ui, if binary (quick and dirty, will override the file named \"line-number\")" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --begin : start line number count at ; default: 0" << std::endl; + std::cerr << " --block-size,--size=: number of records with the same line number; default: 1" << std::endl; + std::cerr << " WARNING: --size: deprecated, since it is confusing for files" << std::endl; std::cerr << " --index; instead of block number output record index in the block" << std::endl; - std::cerr << " --reverse; if --index, output index in descending order" << std::endl; - std::cerr << " --size,--block-size : number of records with the same line number; default: 1" << std::endl; + std::cerr << " --reverse; if --index, output index in descending order" << std::endl; std::cerr << " examples (try them)" << std::endl; std::cerr << " line number" << std::endl; std::cerr << " seq 0 20 | csv-paste - line-number --begin 5 --size 3" << std::endl; @@ -102,13 +107,14 @@ static void usage( bool verbose ) class source { public: - source( const std::string& properties = "" ) : properties_( properties ) + source( const std::string& properties = "" ) : properties_( properties ), block_count_( 0 ), buf_( nullptr ) { comma::name_value::map map( properties, ';', '=' ); format_ = comma::csv::format( map.value< std::string >( "binary", "" ) ); unsigned int size = map.value< unsigned int >( "size", format_.size() ); binary_ = size > 0; value_ = std::string( size, 0 ); + block_size_ = map.value< unsigned int >( "block-size", 1 ); } virtual ~source() {} virtual const std::string* read() = 0; @@ -123,6 +129,9 @@ class source bool binary_; comma::csv::format format_; std::string properties_; + unsigned int block_size_; + unsigned int block_count_; + const char* buf_; }; class stream : public source @@ -136,19 +145,32 @@ class stream : public source const std::string* read() { - while( stream_->good() && !stream_->eof() ) + if( block_count_ == block_size_ || value_.empty() ) { - std::getline( *stream_, value_ ); - if( !value_.empty() && *value_.rbegin() == '\r' ) { value_ = value_.substr( 0, value_.length() - 1 ); } // windows... sigh... - if( !value_.empty() ) { return &value_; } + block_count_ = 1; + while( stream_->good() && !stream_->eof() ) + { + std::getline( *stream_, value_ ); + if( !value_.empty() && *value_.rbegin() == '\r' ) { value_ = value_.substr( 0, value_.length() - 1 ); } // windows... sigh... + if( !value_.empty() ) { return &value_; } + } + return nullptr; } - return NULL; + ++block_count_; + return &value_; } const char* read( char* buf ) { - stream_->read( buf, value_.size() ); - return stream_->gcount() == int( value_.size() ) ? buf : NULL; + if( block_count_ == block_size_ || buf_ == nullptr ) + { + block_count_ = 1; + buf_ = buf; // quick and dirty + stream_->read( buf, value_.size() ); + return stream_->gcount() == int( value_.size() ) ? buf : nullptr; + } + ++block_count_; + return buf_; } bool is_stream() const { return true; } @@ -192,9 +214,9 @@ class line_number : public source options( const std::string& properties, const comma::command_line_options& o ) // quick and dirty: use visiting instead { - options defaults( boost::optional< comma::uint32 >(), o.value< comma::uint32 >( "--size,--block-size", 1 ), o.exists( "--index" ), o.exists( "--reverse" ) ); + options defaults( boost::optional< comma::uint32 >(), o.value< comma::uint32 >( "--block-size,--size", 1 ), o.exists( "--index" ), o.exists( "--reverse" ) ); comma::name_value::map map( properties, ';', '=' ); - size = map.value< comma::uint32 >( "size", defaults.size ); + size = map.value< comma::uint32 >( map.get().find( "block-size" ) != map.get().end() ? "block-size" : "size", defaults.size ); // quick and dirty index = map.value< bool >( "index", defaults.index ); reverse = map.value< bool >( "reverse", defaults.reverse ); auto b = map.optional< comma::uint32 >( "begin" ); @@ -261,7 +283,7 @@ int main( int ac, char** av ) { comma::command_line_options options( ac, av, usage ); char delimiter = options.value( "--delimiter,-d", ',' ); - std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "--delimiter,-d,--begin,--size,--block-size" ); + std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "-.*" ); boost::ptr_vector< source > sources; bool is_binary = false; for( unsigned int i = 0; i < unnamed.size(); ++i ) // quick and dirty; really lousy code duplication @@ -304,7 +326,7 @@ int main( int ac, char** av ) char* p = &buffer[0]; for( unsigned int i = 0; i < sources.size(); p += sources[i].size(), ++i ) { - if( sources[i].read( p ) == NULL ) + if( sources[i].read( p ) == nullptr ) { if( streams == 0 ) { return 0; } std::cerr << "csv-paste: unexpected end of file in " << unnamed[i] << std::endl; @@ -325,12 +347,12 @@ int main( int ac, char** av ) for( unsigned int i = 0; i < sources.size(); ++i ) { const std::string* s = sources[i].read(); - if( s == NULL ) + if( s == nullptr ) { if( streams == 0 ) { return 0; } std::cerr << "csv-paste: unexpected end of file in " << unnamed[i] << std::endl; return 1; } - if (sources[i].is_stream()) ++streams; + if( sources[i].is_stream() ) { ++streams; } if( i > 0 ) { oss << delimiter; } oss << *s; } @@ -343,124 +365,3 @@ int main( int ac, char** av ) catch( ... ) { std::cerr << "csv-paste: unknown exception" << std::endl; } return 1; } - - -// int main( int ac, char** av ) -// { -// bool show_usage = true; -// try -// { -// comma::command_line_options options( ac, av ); -// if( options.exists( "--help,-h" ) ) { usage(); } -// char delimiter = options.value( "--delimiter,-d", ',' ); -// std::vector< std::string > unnamed = options.unnamed( "", "--delimiter,-d" ); -// boost::ptr_vector< std::istream > files; -// std::vector< std::pair< std::istream*, std::size_t > > sources; -// bool binary = false; -// for( unsigned int i = 0; i < unnamed.size(); ++i ) -// { -// std::string filename = unnamed[i]; -// std::size_t size = 0; -// std::vector< std::string > v = comma::split( unnamed[i], ';' ); -// filename = v[0]; -// for( std::size_t j = 1; j < v.size(); ++j ) -// { -// std::vector< std::string > w = comma::split( v[j], '=' ); -// if( w.size() != 2 ) { COMMA_THROW( comma::exception, "expected filename and options, got \"" << unnamed[i] << "\"" ); } -// if( w[0] == "binary" ) -// { -// if( i == 0 ) { binary = true; } -// else if( !binary ) { COMMA_THROW( comma::exception, unnamed[0] << " is ascii, but " << filename << " is binary" ); } -// size = comma::csv::format( w[1] ).size(); -// } -// else if( w[0] == "size" ) -// { -// if( i == 0 ) { binary = true; } -// else if( !binary ) { COMMA_THROW( comma::exception, unnamed[0] << " is ascii, but " << filename << " is binary" ); } -// size = boost::lexical_cast< std::size_t >( w[1] ); -// } -// } -// if( binary && size == 0 ) { COMMA_THROW( comma::exception, "in binary mode, please specify size or format for \"" << filename << "\"" ); } -// if( filename == "-" ) -// { -// sources.push_back( std::make_pair( &std::cin, size ) ); -// } -// else -// { -// files.push_back( new std::ifstream( filename.c_str() ) ); -// if( !files.back().good() || files.back().eof() ) { COMMA_THROW( comma::exception, "failed to open " << unnamed[i] ); } -// sources.push_back( std::make_pair( &files.back(), size ) ); -// } -// } -// if( sources.empty() ) { usage(); } -// #ifdef WIN32 -// if( binary ) { _setmode( _fileno( stdin ), _O_BINARY ); } -// #endif -// show_usage = false; -// if( binary ) -// { -// std::size_t size = 0; -// for( unsigned int i = 0; i < sources.size(); ++i ) { size += sources[i].second; } -// while( true ) -// { -// for( unsigned int i = 0; i < sources.size(); ++i ) -// { -// std::string s( sources[i].second, 0 ); -// char* buf = &s[0]; -// sources[i].first->read( buf, sources[i].second ); -// int count = sources[i].first->gcount(); -// if( count != 0 && (unsigned int)count != sources[i].second ) { COMMA_THROW( comma::exception, unnamed[i] << ": expected " << sources[i].second << " bytes, got " << count ); } -// if( !sources[i].first->good() || sources[i].first->eof() ) -// { -// bool ok = true; -// for( unsigned int j = 0; j < sources.size() && ok; ++j ) -// { -// if( j > i ) { sources[j].first->peek(); } -// ok = !sources[j].first->good() || sources[j].first->eof(); -// } -// if( ok ) { return 0; } -// else { COMMA_THROW( comma::exception, unnamed[i] << ": unexpected end of file" ); } -// } -// std::cout << s; -// } -// } -// } -// else -// { -// while( true ) -// { -// bool first = true; -// for( unsigned int i = 0; i < sources.size(); ++i ) -// { -// std::string s; -// std::getline( *sources[i].first, s ); -// if( !sources[i].first->good() || sources[i].first->eof() ) -// { -// bool ok = true; -// for( unsigned int j = 0; j < sources.size() && ok; ++j ) -// { -// if( j > i ) { sources[j].first->peek(); } -// ok = !sources[j].first->good() || sources[j].first->eof(); -// } -// if( ok ) { return 0; } -// else { COMMA_THROW( comma::exception, unnamed[i] << ": unexpected end of file" ); } -// } -// if( !s.empty() && *s.rbegin() == '\r' ) { s = s.substr( 0, s.length() - 1 ); } // windows... sigh... -// if( s.empty() ) { continue; } -// if( !first ) { std::cout << delimiter; } else { first = false; } -// std::cout << s; -// } -// std::cout << std::endl; -// } -// } -// } -// catch( std::exception& ex ) -// { -// std::cerr << "csv-paste: " << ex.what() << std::endl; -// } -// catch( ... ) -// { -// std::cerr << "csv-paste: unknown exception" << std::endl; -// } -// if( show_usage ) { usage(); } -// } diff --git a/csv/test/csv-paste/expected b/csv/test/csv-paste/expected index 95f7b8c23..46ff006b9 100644 --- a/csv/test/csv-paste/expected +++ b/csv/test/csv-paste/expected @@ -53,3 +53,23 @@ line_number/binary[0]/output/line[1]="1" line_number/binary[0]/output/line[2]="2" line_number/binary[0]/output/line[3]="3" line_number/binary[0]/status=0 + +block_size/ascii[0]/output/line[0]="a,0" +block_size/ascii[0]/output/line[1]="a,0" +block_size/ascii[0]/output/line[2]="a,1" +block_size/ascii[0]/output/line[3]="a,1" +block_size/ascii[0]/output/line[4]="b,2" +block_size/ascii[0]/output/line[5]="b,2" +block_size/ascii[0]/output/line[6]="b,3" +block_size/ascii[0]/output/line[7]="b,3" +block_size/ascii[0]/status=0 + +block_size/binary[0]/output/line[0]="0,0" +block_size/binary[0]/output/line[1]="0,0" +block_size/binary[0]/output/line[2]="0,1" +block_size/binary[0]/output/line[3]="0,1" +block_size/binary[0]/output/line[4]="1,2" +block_size/binary[0]/output/line[5]="1,2" +block_size/binary[0]/output/line[6]="1,3" +block_size/binary[0]/output/line[7]="1,3" +block_size/binary[0]/status=0 diff --git a/csv/test/csv-paste/input b/csv/test/csv-paste/input index f6ef59556..d00c48ad2 100644 --- a/csv/test/csv-paste/input +++ b/csv/test/csv-paste/input @@ -12,17 +12,20 @@ line_number/command_line_options[6]="csv-paste line-number --size 5 --index --re line_number/command_line_options[7]="csv-paste line-number --size 5 --index --reverse --begin 3 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" line_number/parametrized[0]="csv-paste 'line-number;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[1]="csv-paste 'line-number;size=5' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[2]="csv-paste 'line-number;size=5;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[3]="csv-paste 'line-number;size=5;index' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[4]="csv-paste 'line-number;size=5;index;reverse' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[5]="csv-paste 'line-number;size=5;index;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[6]="csv-paste 'line-number;size=5;index;reverse;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[7]="csv-paste 'line-number;size=5;index;reverse;begin=10' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/parametrized[8]="csv-paste 'line-number;size=5;index;reverse;begin=3' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[1]="csv-paste 'line-number;block-size=5' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[2]="csv-paste 'line-number;block-size=5;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[3]="csv-paste 'line-number;block-size=5;index' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[4]="csv-paste 'line-number;block-size=5;index;reverse' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[5]="csv-paste 'line-number;block-size=5;index;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[6]="csv-paste 'line-number;block-size=5;index;reverse;begin=4' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[7]="csv-paste 'line-number;block-size=5;index;reverse;begin=10' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/parametrized[8]="csv-paste 'line-number;block-size=5;index;reverse;begin=3' | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" line_number/multiple[0]="csv-paste line-number 'line-number;begin=4' --begin=5 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/multiple[1]="csv-paste line-number 'line-number;size=5' --size=6 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" -line_number/multiple[2]="csv-paste line-number 'line-number;index' --size=5 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/multiple[1]="csv-paste line-number 'line-number;block-size=5' --block-size=6 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" +line_number/multiple[2]="csv-paste line-number 'line-number;index' --block-size=5 | head | tr '\\n' ';'; comma_status_ok && exit 0 || exit 1" line_number/binary[0]="csv-paste 'line-number;binary=ui' | csv-from-bin ui | head -n4; comma_status_ok && exit 0 || exit 1" + +block_size/ascii[0]="csv-paste <( echo a; echo b )';block-size=4' <( echo 0; echo 1; echo 2; echo 3 )';block-size=2'" +block_size/binary[0]="csv-paste <( { echo 0; echo 1; } | csv-to-bin ui )';size=4;block-size=4' <( { echo 0; echo 1; echo 2; echo 3; } | csv-to-bin ui )';size=4;block-size=2' | csv-from-bin 2ui" From a48edf4bc3317655161641eee28fbcc91e8a3358 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 29 Jul 2019 17:34:09 +1000 Subject: [PATCH 0045/1056] csv-paste: bug fixed --- csv/applications/csv-paste.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csv/applications/csv-paste.cpp b/csv/applications/csv-paste.cpp index 3258ec73e..f9043e4e8 100644 --- a/csv/applications/csv-paste.cpp +++ b/csv/applications/csv-paste.cpp @@ -283,7 +283,7 @@ int main( int ac, char** av ) { comma::command_line_options options( ac, av, usage ); char delimiter = options.value( "--delimiter,-d", ',' ); - std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "-.*" ); + std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "--delimiter,-d,--begin,--size,--block-size" ); boost::ptr_vector< source > sources; bool is_binary = false; for( unsigned int i = 0; i < unnamed.size(); ++i ) // quick and dirty; really lousy code duplication From aa202a3ddaac0bb1198949a4717b9d2f2b017468 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 6 Aug 2019 13:44:17 +1000 Subject: [PATCH 0046/1056] csv-split: --files implemented for block field --- csv/applications/csv-split.cpp | 37 +++++------ csv/applications/split/split.cpp | 106 +++++++++++++++++-------------- csv/applications/split/split.h | 21 +++--- 3 files changed, 84 insertions(+), 80 deletions(-) diff --git a/csv/applications/csv-split.cpp b/csv/applications/csv-split.cpp index 6defbc18e..ab3a68c15 100644 --- a/csv/applications/csv-split.cpp +++ b/csv/applications/csv-split.cpp @@ -27,7 +27,6 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine #ifdef WIN32 @@ -43,17 +42,17 @@ #include "../../csv/traits.h" #include "split/split.h" -comma::csv::options csv; -std::vector< std::string > streams; -boost::optional< boost::posix_time::time_duration > duration; -std::string suffix; -unsigned int size = 0; -bool passthrough; +static comma::csv::options csv; +static std::vector< std::string > streams; +static boost::optional< boost::posix_time::time_duration > duration; +static std::string suffix; +static unsigned int size = 0; +static bool passthrough; +static std::string files; -template < typename T > -void run() +template < typename T > static void run() { - comma::csv::applications::split< T > split( duration, suffix, csv, streams, passthrough ); + comma::csv::applications::split< T > split( duration, suffix, csv, streams, passthrough, files ); if( size == 0 ) { std::string line; @@ -87,16 +86,17 @@ int main( int argc, char** argv ) boost::program_options::options_description description( "options" ); description.add_options() ( "help,h", "display help message" ) - ( "size,c", boost::program_options::value< unsigned int >( &size ), "packet size, only full packets will be written" ) + ( "files", boost::program_options::value< std::string >( &files ), "if 'block' field present, list of files to save blocks; todo: --files for id field" ) + ( "passthrough,pass", "pass data through to stdout" ) ( "period,t", boost::program_options::value< double >( &period ), "period in seconds after which a new file is created" ) - ( "suffix,s", boost::program_options::value< std::string >( &extension ), "filename extension; default will be csv or bin, depending whether it is ascii or binary" ) + ( "size,c", boost::program_options::value< unsigned int >( &size ), "packet size, only full packets will be written" ) ( "string", "id is string; default: 32-bit integer" ) - ( "time", "id is time; default: 32-bit integer" ) - ( "passthrough,pass", "pass data through to stdout" ); + ( "suffix,s", boost::program_options::value< std::string >( &extension ), "filename extension; default will be csv or bin, depending whether it is ascii or binary" ) + ( "time", "id is time; default: 32-bit integer" ); description.add( comma::csv::program_options::description() ); boost::program_options::variables_map vm; boost::program_options::store( boost::program_options::parse_command_line( argc, argv, description), vm ); - boost::program_options::parsed_options parsed = boost::program_options::command_line_parser(argc, argv).options( description ).allow_unregistered().run(); + boost::program_options::parsed_options parsed = boost::program_options::command_line_parser( argc, argv ).options( description ).allow_unregistered().run(); boost::program_options::notify( vm ); if ( vm.count( "help" ) || vm.count( "long-help" ) ) { @@ -154,16 +154,13 @@ int main( int argc, char** argv ) if( csv.binary() ) { size = csv.format().size(); } bool id_is_string = vm.count( "string" ); bool id_is_time = vm.count( "time" ); - passthrough = vm.count("passthrough"); - + passthrough = vm.count("passthrough"); if( id_is_string && id_is_time ) { std::cerr << "csv-split: either --string or --time" << std::endl; } - if( period > 0 ) { duration = boost::posix_time::microseconds( static_cast (period * 1e6 )); } + if( period > 0 ) { duration = boost::posix_time::microseconds( static_cast< unsigned int >( period * 1e6 )); } if( extension.empty() ) { suffix = csv.binary() || size > 0 ? ".bin" : ".csv"; } else { suffix += "."; suffix += extension; } - streams = boost::program_options::collect_unrecognized( parsed.options, boost::program_options::include_positional ); if( !streams.empty() && ( csv.has_field( "block" ) || id_is_time ) ) { std::cerr << "publisher streams are not compatible with splitting by block or timestamp." << std::endl; return 1; } - if( id_is_string ) { run< std::string >(); } else if( id_is_time ) { run< boost::posix_time::ptime >(); } else { run< comma::uint32 >(); } diff --git a/csv/applications/split/split.cpp b/csv/applications/split/split.cpp index 71f7f8a62..1132e9912 100644 --- a/csv/applications/split/split.cpp +++ b/csv/applications/split/split.cpp @@ -27,7 +27,6 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine #ifdef WIN32 @@ -39,9 +38,10 @@ #include #endif +#include #include -#include "../../../io/file_descriptor.h" #include "../../../base/exception.h" +#include "../../../io/file_descriptor.h" #include "split.h" namespace comma { namespace csv { namespace applications { @@ -50,7 +50,8 @@ template < typename T > split< T >::split( boost::optional< boost::posix_time::time_duration > period , const std::string& suffix , const comma::csv::options& csv - , bool pass ) + , bool pass + , const std::string& filenames ) : ofstream_( std::bind( &split< T >::ofstream_by_time_, this ) ) , period_( period ) , suffix_( suffix ) @@ -62,8 +63,20 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period if( csv.fields.empty() ) { return; } if( csv.binary() ) { binary_.reset( new comma::csv::binary< input >( csv ) ); } else { ascii_.reset( new comma::csv::ascii< input >( csv ) ); } - if( csv.has_field( "block" ) ) { ofstream_ = std::bind( &split< T >::ofstream_by_block_, this ); } - else if( csv.has_field( "id" ) ) { ofstream_ = std::bind( &split< T >::ofstream_by_id_, this ); } + if( csv.has_field( "block" ) ) + { + ofstream_ = std::bind( &split< T >::ofstream_by_block_, this ); + if( !filenames.empty() ) + { + filenames_.reset( new std::ifstream( filenames ) ); + if( !filenames_->is_open() ) { COMMA_THROW( comma::exception, "failed to open '" << filenames << "'" ); } + } + } + else + { + if( !filenames.empty() ) { COMMA_THROW( comma::exception, "--files given, but no block field specified in --fields" ); } + if( csv.has_field( "id" ) ) { ofstream_ = std::bind( &split< T >::ofstream_by_id_, this ); } + } } //to-do @@ -72,46 +85,39 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period , const std::string& suffix , const comma::csv::options& csv , const std::vector< std::string >& streams //to-do - , bool pass ) - : split( period, suffix, csv, pass ) + , bool pass + , const std::string& filenames ) + : split( period, suffix, csv, pass, filenames ) { - if( 0 < streams.size() ) + if( streams.empty() ) { return; } + auto const io_mode = csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii; + for( auto const& si : streams ) { - auto const io_mode = csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii; - - for( auto const& si : streams ) + auto const stream_values = comma::split( si, ';' ); + if( 2 > stream_values.size() || stream_values[ 0 ].empty() || stream_values[ 1 ].empty() ) { COMMA_THROW( comma::exception, "please specify and output in format ;, got: " << si ); } + transaction t( publishers_ ); + std::unique_ptr< comma::io::publisher > publisher( new comma::io::publisher( stream_values[1], io_mode, false, csv.flush ) ); + if( "..." == stream_values[0] ) { - auto const stream_values = comma::split( si, ';' ); - if( 2 > stream_values.size() || stream_values[ 0 ].empty() || stream_values[ 1 ].empty() ) - { - COMMA_THROW( comma::exception, "please specify and output in format ;, got: " << si ); - } - - transaction t( publishers_ ); - std::unique_ptr< comma::io::publisher > publisher( new comma::io::publisher( stream_values[1], io_mode, false, csv.flush ) ); + if( default_publisher_ ) { COMMA_THROW( comma::exception, "multiple output streams have the id: ..." ); } + default_publisher_ = std::move( publisher ); + } + else + { + auto publisher_pos = t->insert( std::move( publisher ) ); + auto const keys = comma::split( stream_values[0], ',' ); - if( "..." == stream_values[0] ) - { - if( default_publisher_ ) { COMMA_THROW( comma::exception, "multiple output streams have the id: ..." ); } - default_publisher_ = std::move( publisher ); - } - else + for( auto const& ki : keys ) { - auto publisher_pos = t->insert( std::move( publisher ) ); - auto const keys = comma::split( stream_values[0], ',' ); + auto const kii = boost::lexical_cast< T >( ki ); + if( seen_ids_.end() != seen_ids_.find( kii ) ) { COMMA_THROW( comma::exception, "multiple output streams have the id: " << ki ); } + seen_ids_.insert( kii ); - for( auto const& ki : keys ) - { - auto const kii = boost::lexical_cast< T >( ki ); - if( seen_ids_.end() != seen_ids_.find( kii ) ) { COMMA_THROW( comma::exception, "multiple output streams have the id: " << ki ); } - seen_ids_.insert( kii ); - - mapped_publishers_.insert( std::make_pair( kii, publisher_pos.first->get() ) ); - } + mapped_publishers_.insert( std::make_pair( kii, publisher_pos.first->get() ) ); } } - acceptor_thread_ = std::thread( std::bind( &split< T >::accept_, std::ref( *this ))); } + acceptor_thread_ = std::thread( std::bind( &split< T >::accept_, std::ref( *this ))); } template < typename T > @@ -208,23 +214,29 @@ std::ofstream& split< T >::ofstream_by_block_() if( !last_ || last_->block != current_.block ) { file_.close(); - std::string name = boost::lexical_cast< std::string >( current_.block ) + suffix_; - file_.open( name.c_str(), mode_ ); + std::string filename; + if( filenames_ ) + { + while( std::cin.good() && !is_shutdown_ ) + { + std::getline( *filenames_, filename ); + if( filename.empty() ) { continue; } + const auto& dirname = boost::filesystem::path( filename ).parent_path(); + if( dirname.empty() || boost::filesystem::is_directory( dirname ) || boost::filesystem::create_directories( dirname ) ) { break; } + COMMA_THROW( comma::exception, "failed to create directory '" << dirname << "' for file: '" << filename << "'" ); + } + } + if( filename.empty() ) { filename = boost::lexical_cast< std::string >( current_.block ) + suffix_; } + file_.open( &filename[0], mode_ ); + if( !file_.is_open() ) { COMMA_THROW( comma::exception, "failed to open '" << filename << "'" ); } last_ = current_; } return file_; } -template < typename T > -static std::string make_filename_from_id(const T& id, std::string suffix ) -{ - return boost::lexical_cast< std::string >( id ) + suffix; -} +template < typename T > static std::string make_filename_from_id( const T& id, const std::string& suffix ) { return boost::lexical_cast< std::string >( id ) + suffix; } -static std::string make_filename_from_id(const boost::posix_time::ptime& id, std::string suffix ) -{ - return boost::posix_time::to_iso_string( id ) + suffix; -} +static std::string make_filename_from_id( const boost::posix_time::ptime& id, const std::string& suffix ) { return boost::posix_time::to_iso_string( id ) + suffix; } template < typename T > std::ofstream& split< T >::ofstream_by_id_() diff --git a/csv/applications/split/split.h b/csv/applications/split/split.h index 053097a31..53c4a2580 100644 --- a/csv/applications/split/split.h +++ b/csv/applications/split/split.h @@ -30,8 +30,7 @@ /// @author vsevolod vlaskine /// @author cedric wohlleber -#ifndef COMMA_CSV_SPLIT_H -#define COMMA_CSV_SPLIT_H +#pragma once #include #include @@ -119,23 +118,20 @@ class split { public: typedef applications::input< T > input; - split( boost::optional< boost::posix_time::time_duration > period , const std::string& suffix , const comma::csv::options& csv - , bool passthrough ); - - void write( const char* data, unsigned int size ); - void write( std::string line ); - - //to-do + , bool passthrough + , const std::string& filenames ); split( boost::optional< boost::posix_time::time_duration > period , const std::string& suffix , const comma::csv::options& csv , const std::vector< std::string >& streams - , bool passthrough ); + , bool passthrough + , const std::string& filenames ); ~split(); - + void write( const char* data, unsigned int size ); + void write( std::string line ); private: std::ofstream& ofstream_by_time_(); std::ofstream& ofstream_by_block_(); @@ -162,6 +158,7 @@ class split ids_type_ seen_ids_; bool pass_; bool flush_; + std::unique_ptr< std::ifstream > filenames_; //to-do bool published_on_stream( const char* data, unsigned int size ); @@ -174,5 +171,3 @@ class split }; } } } // namespace comma { namespace csv { namespace applications { - -#endif // COMMA_CSV_SPLIT_H From a9c3d51f0b4feeaf4dae385037f81f879a315b33 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 6 Aug 2019 17:42:55 +1000 Subject: [PATCH 0047/1056] csv-paste: line-number: --step implemented --- csv/applications/csv-paste.cpp | 22 +++++++----- csv/test/csv-paste/expected | 61 ++++++++++++++++++++++++++++++++++ csv/test/csv-paste/input | 9 +++++ 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/csv/applications/csv-paste.cpp b/csv/applications/csv-paste.cpp index f9043e4e8..75c736079 100644 --- a/csv/applications/csv-paste.cpp +++ b/csv/applications/csv-paste.cpp @@ -87,7 +87,8 @@ static void usage( bool verbose ) std::cerr << " --block-size,--size=: number of records with the same line number; default: 1" << std::endl; std::cerr << " WARNING: --size: deprecated, since it is confusing for files" << std::endl; std::cerr << " --index; instead of block number output record index in the block" << std::endl; - std::cerr << " --reverse; if --index, output index in descending order" << std::endl; + std::cerr << " --reverse; if --index, output index in descending order" << std::endl; + std::cerr << " --step=; default=1; line number increment/decrement step" << std::endl; std::cerr << " examples (try them)" << std::endl; std::cerr << " line number" << std::endl; std::cerr << " seq 0 20 | csv-paste - line-number --begin 5 --size 3" << std::endl; @@ -201,24 +202,27 @@ class line_number : public source comma::uint32 size; bool index; bool reverse; + comma::uint32 step; comma::uint32 begin; std::string format; - options( boost::optional< comma::uint32 > b = boost::optional< comma::uint32 >(), comma::uint32 size = 1, bool index = false, bool reverse = false ) + options( boost::optional< comma::uint32 > b = boost::optional< comma::uint32 >(), comma::uint32 size = 1, bool index = false, bool reverse = false, unsigned int s = 1 ) : size( size ) , index( index ) , reverse( reverse ) + , step( s ) , begin( begin_( b ) ) { } options( const std::string& properties, const comma::command_line_options& o ) // quick and dirty: use visiting instead { - options defaults( boost::optional< comma::uint32 >(), o.value< comma::uint32 >( "--block-size,--size", 1 ), o.exists( "--index" ), o.exists( "--reverse" ) ); + options defaults( boost::optional< comma::uint32 >(), o.value< comma::uint32 >( "--block-size,--size", 1 ), o.exists( "--index" ), o.exists( "--reverse" ), o.value< comma::uint32 >( "--step", 1 ) ); comma::name_value::map map( properties, ';', '=' ); size = map.value< comma::uint32 >( map.get().find( "block-size" ) != map.get().end() ? "block-size" : "size", defaults.size ); // quick and dirty index = map.value< bool >( "index", defaults.index ); reverse = map.value< bool >( "reverse", defaults.reverse ); + step = map.value< comma::uint32 >( "step", defaults.step ); auto b = map.optional< comma::uint32 >( "begin" ); if( !b ) { b = o.optional< comma::uint32 >( "--begin" ); } begin = begin_( b ); @@ -229,8 +233,8 @@ class line_number : public source private: comma::uint32 begin_( const boost::optional< comma::uint32 >& b ) { - if( index && reverse && b && ( *b + 1 ) < size ) { COMMA_THROW( comma::exception, "for --reverse --index, for --size " << size << " expected --begin not less than " << ( size - 1 ) << "; got: " << *b ); } - return b ? *b : reverse ? size - 1 : 0; + if( index && reverse && b && ( *b + step ) < size * step ) { COMMA_THROW( comma::exception, "for --reverse --index, for --size " << size << " expected --begin not less than " << ( size - 1 ) << "; got: " << *b ); } + return b ? *b : reverse ? ( size - 1 ) * step : 0; } }; @@ -264,14 +268,14 @@ class line_number : public source void update_() { - ++count_; + ++count_; //count_ += options_.step; if( count_ < options_.size ) { - if( options_.index ) { value_ += options_.reverse ? -1 : 1; } + if( options_.index ) { value_ += options_.reverse ? -options_.step : options_.step; } } else { - value_ = options_.index ? options_.begin : ( value_ + 1 ); + value_ = options_.index ? options_.begin : ( value_ + options_.step ); count_ = 0; } } @@ -283,7 +287,7 @@ int main( int ac, char** av ) { comma::command_line_options options( ac, av, usage ); char delimiter = options.value( "--delimiter,-d", ',' ); - std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "--delimiter,-d,--begin,--size,--block-size" ); + std::vector< std::string > unnamed = options.unnamed( "--flush,--index,--reverse", "--delimiter,-d,--begin,--size,--step,--block-size" ); boost::ptr_vector< source > sources; bool is_binary = false; for( unsigned int i = 0; i < unnamed.size(); ++i ) // quick and dirty; really lousy code duplication diff --git a/csv/test/csv-paste/expected b/csv/test/csv-paste/expected index 46ff006b9..ae81fcb6c 100644 --- a/csv/test/csv-paste/expected +++ b/csv/test/csv-paste/expected @@ -73,3 +73,64 @@ block_size/binary[0]/output/line[5]="1,2" block_size/binary[0]/output/line[6]="1,3" block_size/binary[0]/output/line[7]="1,3" block_size/binary[0]/status=0 + +line_number/step[0]/output/line[0]="0" +line_number/step[0]/output/line[1]="2" +line_number/step[0]/output/line[2]="4" +line_number/step[0]/output/line[3]="6" +line_number/step[0]/status=0 +line_number/step[1]/output/line[0]="0" +line_number/step[1]/output/line[1]="2" +line_number/step[1]/output/line[2]="4" +line_number/step[1]/output/line[3]="6" +line_number/step[1]/status=0 +line_number/step[2]/output/line[0]="0" +line_number/step[2]/output/line[1]="2" +line_number/step[2]/output/line[2]="4" +line_number/step[2]/output/line[3]="6" +line_number/step[2]/status=0 +line_number/step[3]/output/line[0]="0" +line_number/step[3]/output/line[1]="0" +line_number/step[3]/output/line[2]="0" +line_number/step[3]/output/line[3]="0" +line_number/step[3]/output/line[4]="0" +line_number/step[3]/output/line[5]="2" +line_number/step[3]/output/line[6]="2" +line_number/step[3]/output/line[7]="2" +line_number/step[3]/output/line[8]="2" +line_number/step[3]/output/line[9]="2" +line_number/step[3]/status=0 +line_number/step[4]/output/line[0]="0" +line_number/step[4]/output/line[1]="2" +line_number/step[4]/output/line[2]="4" +line_number/step[4]/output/line[3]="6" +line_number/step[4]/output/line[4]="8" +line_number/step[4]/output/line[5]="0" +line_number/step[4]/output/line[6]="2" +line_number/step[4]/output/line[7]="4" +line_number/step[4]/output/line[8]="6" +line_number/step[4]/output/line[9]="8" +line_number/step[4]/status=0 +line_number/step[5]/output/line[0]="8" +line_number/step[5]/output/line[1]="6" +line_number/step[5]/output/line[2]="4" +line_number/step[5]/output/line[3]="2" +line_number/step[5]/output/line[4]="0" +line_number/step[5]/output/line[5]="8" +line_number/step[5]/output/line[6]="6" +line_number/step[5]/output/line[7]="4" +line_number/step[5]/output/line[8]="2" +line_number/step[5]/output/line[9]="0" +line_number/step[5]/status=0 +line_number/step[6]/output/line[0]="20" +line_number/step[6]/output/line[1]="18" +line_number/step[6]/output/line[2]="16" +line_number/step[6]/output/line[3]="14" +line_number/step[6]/output/line[4]="12" +line_number/step[6]/output/line[5]="20" +line_number/step[6]/output/line[6]="18" +line_number/step[6]/output/line[7]="16" +line_number/step[6]/output/line[8]="14" +line_number/step[6]/output/line[9]="12" +line_number/step[6]/status=0 +line_number/step[7]/status=1 diff --git a/csv/test/csv-paste/input b/csv/test/csv-paste/input index d00c48ad2..331ba378d 100644 --- a/csv/test/csv-paste/input +++ b/csv/test/csv-paste/input @@ -29,3 +29,12 @@ line_number/binary[0]="csv-paste 'line-number;binary=ui' | csv-from-bin ui | hea block_size/ascii[0]="csv-paste <( echo a; echo b )';block-size=4' <( echo 0; echo 1; echo 2; echo 3 )';block-size=2'" block_size/binary[0]="csv-paste <( { echo 0; echo 1; } | csv-to-bin ui )';size=4;block-size=4' <( { echo 0; echo 1; echo 2; echo 3; } | csv-to-bin ui )';size=4;block-size=2' | csv-from-bin 2ui" + +line_number/step[0]="csv-paste line-number --step 2 | head -n4; comma_status_ok && exit 0 || exit 1" +line_number/step[1]="csv-paste "line-number;step=2" | head -n4; comma_status_ok && exit 0 || exit 1" +line_number/step[2]="csv-paste "line-number;step=2" --step 3 | head -n4; comma_status_ok && exit 0 || exit 1" +line_number/step[3]="csv-paste "line-number;block-size=5;step=2" | head; comma_status_ok && exit 0 || exit 1" +line_number/step[4]="csv-paste "line-number;block-size=5;index;step=2" | head; comma_status_ok && exit 0 || exit 1" +line_number/step[5]="csv-paste "line-number;block-size=5;index;reverse;step=2" | head; comma_status_ok && exit 0 || exit 1" +line_number/step[6]="csv-paste "line-number;block-size=5;index;reverse;begin=20;step=2" | head; comma_status_ok && exit 0 || exit 1" +line_number/step[7]="csv-paste "line-number;block-size=5;index;reverse;begin=4;step=2" | head; comma_status_ok && exit 0 || exit 1" From 96d13e377160bafe8bba03bd1ab4e64c288982fb Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 27 Aug 2019 18:49:19 +1000 Subject: [PATCH 0048/1056] csv-sort: --random, --random-seed implemented --- csv/applications/csv-sort.cpp | 86 ++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/csv/applications/csv-sort.cpp b/csv/applications/csv-sort.cpp index 48d216abf..c2665e071 100644 --- a/csv/applications/csv-sort.cpp +++ b/csv/applications/csv-sort.cpp @@ -29,10 +29,13 @@ /// @authors matthew imhoff, dewey nguyen, vsevolod vlaskine +#include #include #include #include #include +#include +#include #include #include #include @@ -73,7 +76,9 @@ static void usage( bool more ) std::cerr << " --max: output record(s) with maximum value, same semantics as --min" << std::endl; std::cerr << " --min and --max may be used together." << std::endl; std::cerr << " --numeric-keys-are-floats,--floats; in ascii, if --format not present, assume that numeric fields are floating point numbers" << std::endl; - std::cerr << " --order : order in which to sort fields; default is input field order" << std::endl; + std::cerr << " --order=: order in which to sort fields; default is input field order" << std::endl; + std::cerr << " --random: output input records in pseudo-random order" << std::endl; + std::cerr << " --random-seed,--seed=[]; random seed for --random" << std::endl; std::cerr << " --reverse,--descending,-r: sort in reverse order" << std::endl; std::cerr << " --sliding-window,--window=: sort last entries" << std::endl; std::cerr << " --string,-s: keys are strings; a quick and dirty option to support strings" << std::endl; @@ -577,9 +582,74 @@ int handle_operations_with_ids( const comma::command_line_options& options ) } } - output_current_block( min_map, max_map ); - + return 0; +} + +static int random( const comma::command_line_options& options ) +{ + auto seed = options.optional< int >( "--random-seed,--seed" ); + std::default_random_engine generator = seed ? std::default_random_engine( *seed ) : std::default_random_engine(); + std::deque< std::string > records; + if( csv.has_field( "block" ) ) + { + comma::csv::input_stream< input_with_block > is( std::cin, csv ); + comma::uint32 block = 0; + while( is.ready() || std::cin.good() ) + { + const input_with_block* p = is.read(); + if( !p || p->block != block ) + { + std::uniform_int_distribution< int > distribution( 0, records.size() - 1 ); // quick and dirty + std::random_shuffle( records.begin(), records.end(), [&]( int ) -> int { return distribution( generator ); } ); // quick and dirty, watch performance + for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } + if( csv.flush ) { std::cout.flush(); } + records.clear(); + if( p ) { block = p->block; } + } + if( !p ) { break; } + if( csv.binary() ) + { + records.push_back( std::string() ); + records.back().resize( csv.format().size() ); + std::memcpy( &records.back()[0], is.binary().last(), csv.format().size() ); + } + else + { + records.push_back( comma::join( is.ascii().last(), csv.delimiter ) + "\n" ); + } + } + } + else + { + // todo: quick and dirty, code duplication + // todo: implement --sliding-window + if( csv.binary() ) + { + std::string s( csv.format().size(), 0 ); + while( std::cin.good() ) + { + std::cin.read( &s[0], s.size() ); + if( std::cin.gcount() == 0 ) { break; } + if( std::cin.gcount() != int( s.size() ) ) { std::cerr << "csv-sort: --random: expected " << s.size() << " bytes; got " << std::cin.gcount() << std::endl; return 1; } + records.push_back( std::string() ); + records.back().resize( csv.format().size() ); + std::memcpy( &records.back()[0], &s[0], csv.format().size() ); + } + } + else + { + while( std::cin.good() ) + { + std::string s; + std::getline( std::cin, s ); + if( !s.empty() ) { records.push_back( s + "\n" ); } + } + } + std::uniform_int_distribution< int > distribution( 0, records.size() - 1 ); // quick and dirty + std::random_shuffle( records.begin(), records.end(), [&]( int ) -> int { return distribution( generator ); } ); // quick and dirty, watch performance + for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } + } return 0; } @@ -674,11 +744,15 @@ int main( int ac, char** av ) try { comma::command_line_options options( ac, av, usage ); - options.assert_mutually_exclusive( "--discard-out-of-order,--discard-unsorted,--first,--min,--sliding-window,--window,--unique" ); - options.assert_mutually_exclusive( "--discard-out-of-order,--discard-unsorted,--first,--max,--sliding-window,--window,--unique" ); + options.assert_mutually_exclusive( "--discard-out-of-order,--discard-unsorted,--first,--min,--sliding-window,--window,--unique,--random" ); + options.assert_mutually_exclusive( "--discard-out-of-order,--discard-unsorted,--first,--max,--sliding-window,--window,--unique,--random" ); verbose = options.exists( "--verbose,-v" ); csv = comma::csv::options( options ); - return options.exists( "--first,--min,--max" ) ? handle_operations_with_ids( options ) : sort( options ); + return options.exists( "--first,--min,--max" ) + ? handle_operations_with_ids( options ) + : options.exists( "--random" ) + ? random( options ) + : sort( options ); } catch( std::exception& ex ) { std::cerr << "csv-sort: " << ex.what() << std::endl; } catch( ... ) { std::cerr << "csv-sort: unknown exception" << std::endl; } From 2dde8cd4a78051a5dda4fc67102ff5a6bdbf2e54 Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 28 Aug 2019 10:57:02 +1000 Subject: [PATCH 0049/1056] packed/detail/endian: license fixed --- packed/detail/endian.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packed/detail/endian.h b/packed/detail/endian.h index 80b1b9b75..b27ac37e0 100644 --- a/packed/detail/endian.h +++ b/packed/detail/endian.h @@ -1,5 +1,5 @@ -// This file is provided in addition to snark and is not an integral -// part of snark library. +// This file is provided in addition to comma and is not an integral +// part of comma library. // Copyright (c) 2018 Vsevolod Vlaskine // All rights reserved. // @@ -25,7 +25,7 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// snark is a generic and flexible library for robotics research +// comma is a generic and flexible library // Copyright (c) 2011 The University of Sydney // All rights reserved. // From 1c9bdf192198ecf417f2737b74c3fea8bc9f9fe7 Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 28 Aug 2019 11:29:59 +1000 Subject: [PATCH 0050/1056] csv-random: first cut of shuffle operation implemented --- csv/applications/CMakeLists.txt | 4 + csv/applications/csv-random.cpp | 253 ++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 csv/applications/csv-random.cpp diff --git a/csv/applications/CMakeLists.txt b/csv/applications/CMakeLists.txt index 5a76792c2..a165fa65c 100644 --- a/csv/applications/CMakeLists.txt +++ b/csv/applications/CMakeLists.txt @@ -110,6 +110,10 @@ add_executable( csv-units ${dir}/csv-units.cpp ) target_link_libraries ( csv-units ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_csv comma_io comma_xpath comma_string ) install( TARGETS csv-units RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) +add_executable( csv-random ${dir}/csv-random.cpp ) +target_link_libraries ( csv-random ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_io comma_string comma_xpath comma_csv ) +install( TARGETS csv-random RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) + add_executable( csv-update ${dir}/csv-update.cpp ) target_link_libraries ( csv-update ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_io comma_string comma_xpath comma_csv ) install( TARGETS csv-update RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) diff --git a/csv/applications/csv-random.cpp b/csv/applications/csv-random.cpp new file mode 100644 index 000000000..b01b7e827 --- /dev/null +++ b/csv/applications/csv-random.cpp @@ -0,0 +1,253 @@ +// This file is provided in addition to comma and is not an integral +// part of comma library. +// Copyright (c) 2018 Vsevolod Vlaskine +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// comma is a generic and flexible library +// Copyright (c) 2011 The University of Sydney +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the University of Sydney nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/// @author vsevolod vlaskine + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../application/command_line_options.h" +#include "../../base/exception.h" +#include "../../base/types.h" +#include "../../csv/stream.h" +#include "../../csv/traits.h" +#include "../../string/string.h" +#include "../../visiting/traits.h" + +static void usage( bool more ) +{ + std::cerr << std::endl; + std::cerr << "Sort a csv file using one or several keys" << std::endl; + std::cerr << std::endl; + std::cerr << "Usage: cat something.csv | csv-random []" << std::endl; + std::cerr << std::endl; + std::cerr << "Options:" << std::endl; + std::cerr << " --help,-h: help; --help --verbose: more help" << std::endl; + std::cerr << " --discard-out-of-order,--discard-unsorted: instead of sorting, discard records out of order" << std::endl; + std::cerr << " --first: first line matching given keys; first line in the block, if block field present; no sorting will be done; if sorting required, use unique instead" << std::endl; + std::cerr << " fields" << std::endl; + std::cerr << " id: if present, multiple id fields accepted; output first record for each set of ids in a given block; e.g. --fields=id,a,,id" << std::endl; + std::cerr << " block: if present; output minimum for each contiguous block" << std::endl; + std::cerr << " --min: output only record(s) with minimum value for a given field." << std::endl; + std::cerr << " fields" << std::endl; + std::cerr << " id: if present, multiple id fields accepted; output minimum for each set of ids in a given block; e.g. --fields=id,a,,id" << std::endl; + std::cerr << " block: if present; output minimum for each contiguous block" << std::endl; + std::cerr << " --max: output record(s) with maximum value, same semantics as --min" << std::endl; + std::cerr << " --min and --max may be used together." << std::endl; + std::cerr << " --numeric-keys-are-floats,--floats; in ascii, if --format not present, assume that numeric fields are floating point numbers" << std::endl; + std::cerr << " --order=: order in which to sort fields; default is input field order" << std::endl; + std::cerr << " --random: output input records in pseudo-random order" << std::endl; + std::cerr << " --random-seed,--seed=[]; random seed for --random" << std::endl; + std::cerr << " --reverse,--descending,-r: sort in reverse order" << std::endl; + std::cerr << " --sliding-window,--window=: sort last entries" << std::endl; + std::cerr << " --string,-s: keys are strings; a quick and dirty option to support strings" << std::endl; + std::cerr << " default: double" << std::endl; + std::cerr << " --unique,-u: sort input, output only the first line matching given keys; if no sorting required, use --first for better performance" << std::endl; + std::cerr << " --verbose,-v: more output to stderr" << std::endl; + std::cerr << std::endl; + std::cerr << "examples" << std::endl; + std::cerr << " sort by first field:" << std::endl; + std::cerr << " echo -e \"2\\n1\\n3\" | csv-random --fields=a" << std::endl; + std::cerr << " sort by second field:" << std::endl; + std::cerr << " echo -e \"2,3\\n1,1\\n3,2\" | csv-random --fields=,b" << std::endl; + std::cerr << " sort by second field then first field:" << std::endl; + std::cerr << " echo -e \"2,3\\n3,1\\n1,1\\n2,2\\n1,3\" | csv-random --fields=a,b --order=b,a" << std::endl; + std::cerr << " minimum (using maximum would be the same):" << std::endl; + std::cerr << " basic use" << std::endl; + std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,a,3; ) | csv-random --min --fields=,,a" << std::endl; + std::cerr << " using single id" << std::endl; + std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,b,3; ) | csv-random --min --fields=a,id" << std::endl; + std::cerr << " using multiple id fields" << std::endl; + std::cerr << " ( echo 1,a,1; echo 1,b,1; echo 3,b,5; echo 3,b,5; ) | csv-random --min --fields=id,a,id" << std::endl; + std::cerr << " using block" << std::endl; + std::cerr << " ( echo 0,a,2; echo 0,a,2; echo 0,b,3; echo 0,b,1; echo 1,c,3; echo 1,c,2; ) | csv-random --min --fields=block,,a" << std::endl; + std::cerr << " using block and id" << std::endl; + std::cerr << " ( echo 0,a,2; echo 0,a,2; echo 0,b,3; echo 0,b,1; echo 1,c,3; echo 1,c,2; ) | csv-random --min --fields=block,id,a" << std::endl; + std::cerr << " minimum and maximum:" << std::endl; + std::cerr << " basic use" << std::endl; + std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,b,3; echo 5,b,7; echo 3,b,9 ) | csv-random --max --min --fields=,,a" << std::endl; + std::cerr << " using id" << std::endl; + std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,b,3; echo 5,b,7; echo 3,b,9 ) | csv-random --max --min --fields=,id,a" << std::endl; + std::cerr << std::endl; + if( more ) + { + std::cerr << std::endl; + std::cerr << "csv options:" << std::endl; + std::cerr << comma::csv::options::usage() << std::endl; + } + exit( 0 ); +} + +static bool verbose; +static comma::csv::options csv; +static boost::optional< int > seed; + +namespace comma { namespace applications { namespace random { namespace shuffle { + +struct input +{ + comma::uint32 block; + input(): block( 0 ) {} +}; + +} } } } // namespace comma { namespace applications { namespace random { namespace shuffle { + +namespace comma { namespace visiting { + +template <> struct traits< comma::applications::random::shuffle::input > +{ + template < typename K, typename V > static void visit( const K&, const comma::applications::random::shuffle::input& p, V& v ) { v.apply( "block", p.block ); } + template < typename K, typename V > static void visit( const K&, comma::applications::random::shuffle::input& p, V& v ) { v.apply( "block", p.block ); } +}; + +} } // namespace comma { namespace visiting { + +namespace comma { namespace applications { namespace random { namespace shuffle { + +static int run( const comma::command_line_options& options ) +{ + std::default_random_engine generator = seed ? std::default_random_engine( *seed ) : std::default_random_engine(); + std::deque< std::string > records; + if( ::csv.has_field( "block" ) ) + { + comma::csv::input_stream< input > is( std::cin, ::csv ); + comma::uint32 block = 0; + while( is.ready() || std::cin.good() ) + { + const input* p = is.read(); + if( !p || p->block != block ) + { + std::uniform_int_distribution< int > distribution( 0, records.size() - 1 ); // quick and dirty + std::random_shuffle( records.begin(), records.end(), [&]( int ) -> int { return distribution( generator ); } ); // quick and dirty, watch performance + for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } + if( ::csv.flush ) { std::cout.flush(); } + records.clear(); + if( p ) { block = p->block; } + } + if( !p ) { break; } + if( ::csv.binary() ) + { + records.push_back( std::string() ); + records.back().resize( ::csv.format().size() ); + std::memcpy( &records.back()[0], is.binary().last(), ::csv.format().size() ); + } + else + { + records.push_back( comma::join( is.ascii().last(), ::csv.delimiter ) + "\n" ); + } + } + } + else + { + // todo: quick and dirty, code duplication + // todo: implement --sliding-window + if( ::csv.binary() ) + { + std::string s( ::csv.format().size(), 0 ); + while( std::cin.good() ) + { + std::cin.read( &s[0], s.size() ); + if( std::cin.gcount() == 0 ) { break; } + if( std::cin.gcount() != int( s.size() ) ) { std::cerr << "csv-random: random: expected " << s.size() << " bytes; got " << std::cin.gcount() << std::endl; return 1; } + records.push_back( std::string() ); + records.back().resize( ::csv.format().size() ); + std::memcpy( &records.back()[0], &s[0], ::csv.format().size() ); + } + } + else + { + while( std::cin.good() ) + { + std::string s; + std::getline( std::cin, s ); + if( !s.empty() ) { records.push_back( s + "\n" ); } + } + } + std::uniform_int_distribution< int > distribution( 0, records.size() - 1 ); // quick and dirty + std::random_shuffle( records.begin(), records.end(), [&]( int ) -> int { return distribution( generator ); } ); // quick and dirty, watch performance + for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } + } + return 0; +} + +} } } } // namespace comma { namespace applications { namespace random { namespace shuffle { + +int main( int ac, char** av ) +{ + try + { + comma::command_line_options options( ac, av, usage ); + const auto& unnamed = options.unnamed( "--flush,--verbose,-v", "-.*" ); + if( unnamed.empty() ) { std::cerr << "csv-random: please specify operation" << std::endl; return 1; } + csv = comma::csv::options( options ); + seed = options.optional< int >( "--seed" ); + verbose = options.exists( "--verbose,-v" ); + std::string operation = unnamed[0]; + if( operation == "shuffle" ) { return comma::applications::random::shuffle::run( options ); } + std::cerr << "csv-random: expection operation; got: '" << operation << "'" << std::endl; + return 1; + } + catch( std::exception& ex ) { std::cerr << "csv-random: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "csv-random: unknown exception" << std::endl; } + return 1; +} From 9018f4531685e69929e52be86ea33fd8631c8d1b Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 28 Aug 2019 18:09:26 +1000 Subject: [PATCH 0051/1056] csv-random: shuffle: help added --- csv/applications/csv-random.cpp | 85 +++++++++------------------------ 1 file changed, 23 insertions(+), 62 deletions(-) diff --git a/csv/applications/csv-random.cpp b/csv/applications/csv-random.cpp index b01b7e827..2ca8b3bd5 100644 --- a/csv/applications/csv-random.cpp +++ b/csv/applications/csv-random.cpp @@ -74,67 +74,23 @@ #include "../../string/string.h" #include "../../visiting/traits.h" -static void usage( bool more ) +static void usage( bool verbose ) { std::cerr << std::endl; - std::cerr << "Sort a csv file using one or several keys" << std::endl; + std::cerr << "random operations on input stream" << std::endl; std::cerr << std::endl; - std::cerr << "Usage: cat something.csv | csv-random []" << std::endl; + std::cerr << "options" << std::endl; + std::cerr << " --seed=[]; random seed" << std::endl; std::cerr << std::endl; - std::cerr << "Options:" << std::endl; - std::cerr << " --help,-h: help; --help --verbose: more help" << std::endl; - std::cerr << " --discard-out-of-order,--discard-unsorted: instead of sorting, discard records out of order" << std::endl; - std::cerr << " --first: first line matching given keys; first line in the block, if block field present; no sorting will be done; if sorting required, use unique instead" << std::endl; - std::cerr << " fields" << std::endl; - std::cerr << " id: if present, multiple id fields accepted; output first record for each set of ids in a given block; e.g. --fields=id,a,,id" << std::endl; - std::cerr << " block: if present; output minimum for each contiguous block" << std::endl; - std::cerr << " --min: output only record(s) with minimum value for a given field." << std::endl; - std::cerr << " fields" << std::endl; - std::cerr << " id: if present, multiple id fields accepted; output minimum for each set of ids in a given block; e.g. --fields=id,a,,id" << std::endl; - std::cerr << " block: if present; output minimum for each contiguous block" << std::endl; - std::cerr << " --max: output record(s) with maximum value, same semantics as --min" << std::endl; - std::cerr << " --min and --max may be used together." << std::endl; - std::cerr << " --numeric-keys-are-floats,--floats; in ascii, if --format not present, assume that numeric fields are floating point numbers" << std::endl; - std::cerr << " --order=: order in which to sort fields; default is input field order" << std::endl; - std::cerr << " --random: output input records in pseudo-random order" << std::endl; - std::cerr << " --random-seed,--seed=[]; random seed for --random" << std::endl; - std::cerr << " --reverse,--descending,-r: sort in reverse order" << std::endl; - std::cerr << " --sliding-window,--window=: sort last entries" << std::endl; - std::cerr << " --string,-s: keys are strings; a quick and dirty option to support strings" << std::endl; - std::cerr << " default: double" << std::endl; - std::cerr << " --unique,-u: sort input, output only the first line matching given keys; if no sorting required, use --first for better performance" << std::endl; - std::cerr << " --verbose,-v: more output to stderr" << std::endl; + std::cerr << "operations" << std::endl; + std::cerr << " shuffle: output input records in pseudo-random order" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --fields=[]; if 'block' field present shuffle each block, otherwise read whole input and then shuffle" << std::endl; + std::cerr << " --sliding-window,--window=[]; shuffle on sliding window of records" << std::endl; std::cerr << std::endl; - std::cerr << "examples" << std::endl; - std::cerr << " sort by first field:" << std::endl; - std::cerr << " echo -e \"2\\n1\\n3\" | csv-random --fields=a" << std::endl; - std::cerr << " sort by second field:" << std::endl; - std::cerr << " echo -e \"2,3\\n1,1\\n3,2\" | csv-random --fields=,b" << std::endl; - std::cerr << " sort by second field then first field:" << std::endl; - std::cerr << " echo -e \"2,3\\n3,1\\n1,1\\n2,2\\n1,3\" | csv-random --fields=a,b --order=b,a" << std::endl; - std::cerr << " minimum (using maximum would be the same):" << std::endl; - std::cerr << " basic use" << std::endl; - std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,a,3; ) | csv-random --min --fields=,,a" << std::endl; - std::cerr << " using single id" << std::endl; - std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,b,3; ) | csv-random --min --fields=a,id" << std::endl; - std::cerr << " using multiple id fields" << std::endl; - std::cerr << " ( echo 1,a,1; echo 1,b,1; echo 3,b,5; echo 3,b,5; ) | csv-random --min --fields=id,a,id" << std::endl; - std::cerr << " using block" << std::endl; - std::cerr << " ( echo 0,a,2; echo 0,a,2; echo 0,b,3; echo 0,b,1; echo 1,c,3; echo 1,c,2; ) | csv-random --min --fields=block,,a" << std::endl; - std::cerr << " using block and id" << std::endl; - std::cerr << " ( echo 0,a,2; echo 0,a,2; echo 0,b,3; echo 0,b,1; echo 1,c,3; echo 1,c,2; ) | csv-random --min --fields=block,id,a" << std::endl; - std::cerr << " minimum and maximum:" << std::endl; - std::cerr << " basic use" << std::endl; - std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,b,3; echo 5,b,7; echo 3,b,9 ) | csv-random --max --min --fields=,,a" << std::endl; - std::cerr << " using id" << std::endl; - std::cerr << " ( echo 1,a,2; echo 2,a,2; echo 3,b,3; echo 5,b,7; echo 3,b,9 ) | csv-random --max --min --fields=,id,a" << std::endl; + std::cerr << "csv options:" << std::endl; + std::cerr << comma::csv::options::usage( "", verbose ) << std::endl; std::cerr << std::endl; - if( more ) - { - std::cerr << std::endl; - std::cerr << "csv options:" << std::endl; - std::cerr << comma::csv::options::usage() << std::endl; - } exit( 0 ); } @@ -168,8 +124,16 @@ static int run( const comma::command_line_options& options ) { std::default_random_engine generator = seed ? std::default_random_engine( *seed ) : std::default_random_engine(); std::deque< std::string > records; + auto output = []( std::deque< std::string >& records ) + { + for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } + records.clear(); + if( ::csv.flush ) { std::cout.flush(); } + }; + auto sliding_window = options.optional< unsigned int >( "--sliding-window,--window" ); if( ::csv.has_field( "block" ) ) { + if( sliding_window ) { std::cerr << "csv-random: shuffle: expected either block field or --sliding-window; got both" << std::endl; return 1; } comma::csv::input_stream< input > is( std::cin, ::csv ); comma::uint32 block = 0; while( is.ready() || std::cin.good() ) @@ -179,9 +143,7 @@ static int run( const comma::command_line_options& options ) { std::uniform_int_distribution< int > distribution( 0, records.size() - 1 ); // quick and dirty std::random_shuffle( records.begin(), records.end(), [&]( int ) -> int { return distribution( generator ); } ); // quick and dirty, watch performance - for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } - if( ::csv.flush ) { std::cout.flush(); } - records.clear(); + output( records ); if( p ) { block = p->block; } } if( !p ) { break; } @@ -197,10 +159,9 @@ static int run( const comma::command_line_options& options ) } } } - else + else // quick and dirty { - // todo: quick and dirty, code duplication - // todo: implement --sliding-window + if( sliding_window ) { std::cerr << "csv-random: shuffle: --sliding-window: todo" << std::endl; return 1; } if( ::csv.binary() ) { std::string s( ::csv.format().size(), 0 ); @@ -225,7 +186,7 @@ static int run( const comma::command_line_options& options ) } std::uniform_int_distribution< int > distribution( 0, records.size() - 1 ); // quick and dirty std::random_shuffle( records.begin(), records.end(), [&]( int ) -> int { return distribution( generator ); } ); // quick and dirty, watch performance - for( const auto& r: records ) { std::cout.write( &r[0], r.size() ); } + output( records ); } return 0; } From aeaeb8d43b7e248d6899497dd0515900272ddacc Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 28 Aug 2019 18:17:23 +1000 Subject: [PATCH 0052/1056] csv-random: shuffle: usage added --- csv/applications/csv-random.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/csv/applications/csv-random.cpp b/csv/applications/csv-random.cpp index 2ca8b3bd5..aa5e5b038 100644 --- a/csv/applications/csv-random.cpp +++ b/csv/applications/csv-random.cpp @@ -84,9 +84,12 @@ static void usage( bool verbose ) std::cerr << std::endl; std::cerr << "operations" << std::endl; std::cerr << " shuffle: output input records in pseudo-random order" << std::endl; + std::cerr << std::endl; + std::cerr << " usage: cat records.csv | csv-random shuffle [] > shuffled.csv" << std::endl; + std::cerr << std::endl; std::cerr << " options" << std::endl; std::cerr << " --fields=[]; if 'block' field present shuffle each block, otherwise read whole input and then shuffle" << std::endl; - std::cerr << " --sliding-window,--window=[]; shuffle on sliding window of records" << std::endl; + std::cerr << " --sliding-window,--window=[]; todo: shuffle on sliding window of records" << std::endl; std::cerr << std::endl; std::cerr << "csv options:" << std::endl; std::cerr << comma::csv::options::usage( "", verbose ) << std::endl; From 7afae910910f9e97e7f3824df21d27ce8eda9fc1 Mon Sep 17 00:00:00 2001 From: Toby Dunne Date: Tue, 22 Oct 2019 13:53:57 +1100 Subject: [PATCH 0053/1056] zero-cat: compilation error with new boost fixed: milliseconds cast to long exlicitly --- io/applications/zero-cat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io/applications/zero-cat.cpp b/io/applications/zero-cat.cpp index fef48ef75..52ac3750a 100644 --- a/io/applications/zero-cat.cpp +++ b/io/applications/zero-cat.cpp @@ -272,7 +272,7 @@ int main(int argc, char* argv[]) else { socket.bind( &endpoints[i][0] ); } } // we convert to milliseconds as converting to second floors the number so 0.99 becomes 0 - if( wait_after_connect > 0 ) { boost::this_thread::sleep(boost::posix_time::milliseconds(wait_after_connect * 1000.0)); } + if( wait_after_connect > 0 ) { boost::this_thread::sleep(boost::posix_time::milliseconds( static_cast< long >( wait_after_connect * 1000.0 ) ) ); } std::string buffer; if( binary ) { buffer.resize( size ); } @@ -311,7 +311,7 @@ int main(int argc, char* argv[]) else { socket.connect( endpoints[i].c_str() ); } } socket.setsockopt( ZMQ_SUBSCRIBE, "", 0 ); - if( wait_after_connect > 0 ) { boost::this_thread::sleep( boost::posix_time::milliseconds( wait_after_connect * 1000.0 ) ); } + if( wait_after_connect > 0 ) { boost::this_thread::sleep( boost::posix_time::milliseconds( static_cast< long >( wait_after_connect * 1000.0 ) ) ); } if( vm.count( "server" ) ) { comma::io::publisher publisher( server, comma::io::mode::binary, true, false ); From d44da1e42d3969309f5c8fafb2ad743f9158eb9c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 22 Oct 2019 14:39:05 +1100 Subject: [PATCH 0054/1056] io/test/stream_test: missing include added (compilation failed with boost 1.67 using more consistent includes --- io/test/stream_test.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io/test/stream_test.cpp b/io/test/stream_test.cpp index 388702229..27133a78b 100644 --- a/io/test/stream_test.cpp +++ b/io/test/stream_test.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -146,4 +147,5 @@ int main( int argc, char* argv[] ) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} + From cc89ca7d091685c534de5e3272edde6f3f67710d Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 14 Nov 2019 14:18:12 +1100 Subject: [PATCH 0055/1056] python/setup.py: fixed url --- python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index eaf1d4e55..9f31132c5 100644 --- a/python/setup.py +++ b/python/setup.py @@ -6,7 +6,7 @@ name = 'comma', version = open('comma/version.py').readlines()[-1].strip().split()[-1].strip('\"'), description = 'comma python utilties', - url = 'https://github.com/acfr/comma', + url = 'https://gitlab.com/orthographic/comma', license = 'BSD 3-Clause', packages = [ 'comma', 'comma.csv', 'comma.csv.applications', 'comma.io', 'comma.numpy', 'comma.signal', 'comma.util', 'comma.cpp_bindings', 'comma.application' ], package_dir = { 'comma.cpp_bindings': 'comma/cpp_bindings' }, From c951bfd8c1325fed4bf55c846b88d11849e6eea7 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 14 Nov 2019 14:29:38 +1100 Subject: [PATCH 0056/1056] python: small steps to port to python3 --- python/comma/numpy/functions.py | 20 ++++++------ python/comma/numpy/test/test_functions.py | 37 ++++++++++++----------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/python/comma/numpy/functions.py b/python/comma/numpy/functions.py index 277e9c8a3..3ea7fbabd 100644 --- a/python/comma/numpy/functions.py +++ b/python/comma/numpy/functions.py @@ -28,6 +28,7 @@ # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import absolute_import +from __future__ import print_function import numpy as np import operator import re @@ -114,14 +115,14 @@ def types_of_dtype(dtype, unroll=False): return tuple(types) except ValueError: import sys - print >>sys.stderr - print >>sys.stderr, "ATTENTION: types_of_dtype failed due to the version of numpy on this computer" - print >>sys.stderr, " your applications using comma.csv will mostly work; sometimes they will fail" - print >>sys.stderr, " early (meaning you will know straight away) until types_of_dtype is rewritten" - print >>sys.stderr, " See todo comment in python/comma/numpy/functions.py" - print >>sys.stderr - for s in sys.exc_info(): print >>sys.stderr, " " + str( s ) - print >>sys.stderr + print( file = sys.stderr ) + print( "ATTENTION: types_of_dtype failed due to the version of numpy on this computer", file = sys.stderr ) + print( " your applications using comma.csv will mostly work; sometimes they will fail", file = sys.stderr ) + print( " early (meaning you will know straight away) until types_of_dtype is rewritten", file = sys.stderr ) + print( " See todo comment in python/comma/numpy/functions.py", file = sys.stderr ) + print( file = sys.stderr ) + for s in sys.exc_info(): print( " " + str( s ), file = sys.stderr ) + print( file = sys.stderr ) raise def structured_dtype(format_or_type): @@ -140,8 +141,7 @@ def structured_dtype(format_or_type): >>> np.dtype('f8').names """ dtype = np.dtype(format_or_type) - if len(dtype) != 0: - return dtype + if len(dtype) != 0: return dtype return np.dtype([('', format_or_type)]) diff --git a/python/comma/numpy/test/test_functions.py b/python/comma/numpy/test/test_functions.py index 6a7f184fd..685f3d6bd 100644 --- a/python/comma/numpy/test/test_functions.py +++ b/python/comma/numpy/test/test_functions.py @@ -1,3 +1,4 @@ +from __future__ import print_function import unittest import numpy as np from comma.numpy import * @@ -177,34 +178,34 @@ def test_structure_out_of_order(self): self.assertEqual( len( functions.types_of_dtype( ndtype2 ) ), 5 ) # shall be 4 except ValueError: import sys - print >>sys.stderr - print >>sys.stderr, "ATTENTION: test_structure_out_of_order failed due to the version of numpy on this computer" - print >>sys.stderr, " your applications using comma.csv will mostly work; sometimes they will fail" - print >>sys.stderr, " early (meaning you will know straight away) until types_of_dtype is rewritten" - print >>sys.stderr, " See todo comment in python/comma/numpy/functions.py" - print >>sys.stderr - for s in sys.exc_info(): print >>sys.stderr, " " + str( s ) - print >>sys.stderr + print( file = sys.stderr ) + print( "ATTENTION: test_structure_out_of_order failed due to the version of numpy on this computer", file = sys.stderr ) + print( " your applications using comma.csv will mostly work; sometimes they will fail", file = sys.stderr ) + print( " early (meaning you will know straight away) until types_of_dtype is rewritten", file = sys.stderr ) + print( " See todo comment in python/comma/numpy/functions.py", file = sys.stderr ) + print( file = sys.stderr ) + for s in sys.exc_info(): print( " " + str( s ), file = sys.stderr ) + print( file = sys.stderr ) if False: import sys self.assertEqual( sorted( ndtype1.descr ), sorted( ndtype2.descr ) ) self.assertEqual( sorted( functions.types_of_dtype( ndtype1 ) ), sorted( functions.types_of_dtype( ndtype2 ) ) ) - print >>sys.stderr, "observe the differences:" + print( "observe the differences:", file = sys.stderr ) - print >>sys.stderr, "ndtype1: ", ndtype1 - print >>sys.stderr, "ndtype2: ", ndtype2 + print( "ndtype1: " + str( ndtype1 ), file = sys.stderr ) + print( "ndtype2: " + str( ndtype2 ), file = sys.stderr ) - print >>sys.stderr, "ndtype1.fields: ", ndtype1.fields - print >>sys.stderr, "ndtype2.fields: ", ndtype2.fields - print >>sys.stderr, "fields identical: ", sorted_fields1 == sorted_fields2 + print( "ndtype1.fields: " + str( ndtype1.fields ), file = sys.stderr ) + print( "ndtype2.fields: " + str( ndtype2.fields ), file = sys.stderr ) + print( "fields identical: " + str( sorted_fields1 == sorted_fields2 ), file = sys.stderr ) - print >>sys.stderr, "ndtype1.descr: ", ndtype1.descr - print >>sys.stderr, "ndtype2.descr: ", ndtype2.descr + print( "ndtype1.descr: " + str( ndtype1.descr ), file = sys.stderr ) + print( "ndtype2.descr: " + str( ndtype2.descr ), file = sys.stderr ) - print >>sys.stderr, "types_of_dtype( ndtype1 ): ", comma.numpy.functions.types_of_dtype( ndtype1 ) - print >>sys.stderr, "types_of_dtype( ndtype2 ): ", comma.numpy.functions.types_of_dtype( ndtype2 ) + print( "types_of_dtype( ndtype1 ): " + str( comma.numpy.functions.types_of_dtype( ndtype1 ) ), file = sys.stderr ) + print( "types_of_dtype( ndtype2 ): " + str( comma.numpy.functions.types_of_dtype( ndtype2 ) ), file = sys.stderr ) if __name__ == '__main__': From dcfe6f954a77e750f3f3a10d9d47d5cbfcac6baa Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 14 Nov 2019 14:55:10 +1100 Subject: [PATCH 0057/1056] python: more small steps to port to python3 --- python/comma/csv/test/numpy_format/test | 3 +- python/comma/csv/test/numpy_guess_format/test | 3 +- python/comma/csv/test/numpy_time/test | 3 +- python/comma/csv/test/stream/buffer_size/test | 7 ++-- .../stream/stdin/corrupt_input/ascii/test | 3 +- .../stream/stdin/corrupt_input/binary/test | 3 +- python/comma/csv/test/struct/test | 40 ++++++++++--------- 7 files changed, 36 insertions(+), 26 deletions(-) diff --git a/python/comma/csv/test/numpy_format/test b/python/comma/csv/test/numpy_format/test index 54ff41831..876acea0e 100755 --- a/python/comma/csv/test/numpy_format/test +++ b/python/comma/csv/test/numpy_format/test @@ -5,12 +5,13 @@ function comma_format_to_numpy local compress=$2 compress=${compress:+", $compress"} python -c "$( cat <>sys.stderr, "csv-eval: --init currently reads one record at a time, which may be slow" + if args.init_values == '' and args.verbose: print( "csv-eval: --init currently reads one record at a time, which may be slow", file = sys.stderr ) return args def ingest_deprecated_options(args): @@ -419,7 +420,7 @@ def prepare_options(args): args.format = comma.csv.format.guess_format(args.first_line) args.binary = False if args.verbose: - print >> sys.stderr, "{}: guessed format: {}".format(__name__, args.format) + print( "{}: guessed format: {}".format(__name__, args.format), file = sys.stderr ) if args.select or args.exit_if: return var_names = assignment_variable_names(args.expressions) @@ -496,20 +497,20 @@ def initialize_update_and_output(self): def print_info(self, file=sys.stderr): fields = ','.join(self.input_t.nondefault_fields) format = self.input_t.format - print >> file, "expressions: '{}'".format(self.args.expressions) - print >> file, "select: '{}'".format(self.args.select) - print >> file, "exit_if: '{}'".format(self.args.exit_if) - print >> file, "default values: '{}'".format(self.args.default_values) - print >> file, "input fields: '{}'".format(fields) - print >> file, "input format: '{}'".format(format) + print( "expressions: '{}'".format(self.args.expressions), file = file ) + print( "select: '{}'".format(self.args.select), file = file ) + print( "exit_if: '{}'".format(self.args.exit_if), file = file ) + print( "default values: '{}'".format(self.args.default_values), file = file ) + print( "input fields: '{}'".format(fields), file = file ) + print( "input format: '{}'".format(format), file = file ) if self.args.select or self.args.exit_if: return update_fields = ','.join(self.update_t.fields) if self.args.update_fields else '' output_fields = ','.join(self.output_t.fields) if self.args.output_fields else '' output_format = self.output_t.format if self.args.output_fields else '' - print >> file, "update fields: '{}'".format(update_fields) - print >> file, "output fields: '{}'".format(output_fields) - print >> file, "output format: '{}'".format(output_format) + print( "update fields: '{}'".format(update_fields), file = file ) + print( "output fields: '{}'".format(output_fields), file = file ) + print( "output format: '{}'".format(output_format), file = file ) def check_fields(fields, allow_numpy_names=True): @@ -598,7 +599,7 @@ def exit_if(stream): if mask: if not stream.args.with_error: sys.exit() name = os.path.basename(sys.argv[0]) - print >> sys.stderr, "{} error: {}".format(name, stream.args.with_error) + print( "{} error: {}".format(name, stream.args.with_error), file = sys.stderr ) sys.exit(1) stream.input.dump() input = stream.input.read() @@ -618,7 +619,7 @@ def main(): evaluate(stream(args)) except csv_eval_error as e: name = os.path.basename(sys.argv[0]) - print >> sys.stderr, "{} error: {}".format(name, e) + print( "{} error: {}".format(name, e), file = sys.stderr ) sys.exit(1) except StandardError as e: import traceback diff --git a/python/comma/csv/applications/test/csv-eval/permissive/ascii/test b/python/comma/csv/applications/test/csv-eval/permissive/ascii/test index 0f4f67398..8418fab95 100755 --- a/python/comma/csv/applications/test/csv-eval/permissive/ascii/test +++ b/python/comma/csv/applications/test/csv-eval/permissive/ascii/test @@ -5,13 +5,13 @@ source $( which comma-application-util ) || { echo "$scriptname: failed to sourc function csv_eval() { - csv-eval "$@" --fields=x,y 'import sys; print >> sys.stderr, "Python builtins are present!"; a=x+y' + csv-eval "$@" --fields=x,y 'import sys; print( "python builtins are present!" ); a=x+y' } function reformat_output() { prefix=$1 - name-value-from-csv x,y,a --line-number --prefix $prefix/output | sed 's/"//g' + grep -v "python" | name-value-from-csv x,y,a --line-number --prefix $prefix/output | sed 's/"//g' } input=$( cat ) diff --git a/python/comma/csv/applications/test/csv-eval/permissive/binary/test b/python/comma/csv/applications/test/csv-eval/permissive/binary/test index b068816d6..51bd6937d 100755 --- a/python/comma/csv/applications/test/csv-eval/permissive/binary/test +++ b/python/comma/csv/applications/test/csv-eval/permissive/binary/test @@ -5,7 +5,7 @@ source $( which comma-application-util ) || { echo "$scriptname: failed to sourc function csv_eval() { - csv-eval "$@" --fields=x,y --binary=2d 'import sys; print >> sys.stderr, "Python builtins are present!"; a=x+y' + csv-eval "$@" --fields=x,y --binary=2d 'import sys; sys.stderr.write( "python builtins are present!\n" ); a=x+y' } function reformat_output() diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index a950b86e1..f4509acf2 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -27,6 +27,7 @@ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from __future__ import print_function import numpy as np import sys import itertools @@ -95,8 +96,8 @@ def __init__(self, self.data_extraction_fields) #self.write_dtype = self._write_dtype() #self.unrolled_write_dtype = structured_dtype( ','.join( types_of_dtype( self.write_dtype, unroll=True ) ) ) - #print >>sys.stderr, "self.write_dtype.descr = %s" % str(self.write_dtype.descr) - #print >>sys.stderr, "self.unrolled_write_dtype = %s" % str(self.unrolled_write_dtype) + #print( "self.write_dtype.descr = %s" % str(self.write_dtype.descr), file = sys.stderr ) + #print( "self.unrolled_write_dtype = %s" % str(self.unrolled_write_dtype), file = sys.stderr ) self._input_array = None self._ascii_buffer = None self._strings = functools.partial(map, self.numpy_scalar_to_string) @@ -217,7 +218,7 @@ def write(self, s): #unrolled_array = s.view( self.unrolled_write_dtype ) if self.tied: lines = self._tie_ascii(self.tied._ascii_buffer, unrolled_array) else: lines = (self._toline(scalars) for scalars in unrolled_array) - for line in lines: print >> self.target, line + for line in lines: print( line, file = self.target ) self.target.flush() def _tie_binary(self, tied_array, array): return merge_arrays(tied_array, array) @@ -238,7 +239,7 @@ def _dump(self): if self.binary: self._input_array.tofile(self.target) else: - for line in self._ascii_buffer: print >> self.target, line + for line in self._ascii_buffer: print( line, file = self.target ) self.target.flush() def _dump_with_mask(self, mask): @@ -257,7 +258,7 @@ def _dump_with_mask(self, mask): self._input_array[mask].tofile(self.target) else: for line, allowed in itertools.izip(self._ascii_buffer, mask): - if allowed: print >> self.target, line + if allowed: print( line, file = self.target ) self.target.flush() def _warn(self, msg, verbose=True): diff --git a/python/comma/signal/signal.py b/python/comma/signal/signal.py index 209ba511c..efbdb0358 100644 --- a/python/comma/signal/signal.py +++ b/python/comma/signal/signal.py @@ -28,6 +28,7 @@ # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import absolute_import +from __future__ import print_function import signal import sys import os @@ -42,7 +43,7 @@ def __init__( self, verbose = False ): def switch_on( self, signum, frame ): self.state = True - if self.verbose: print >> sys.stderr, os.path.basename(sys.argv[0]), "caught signal:", signum + if self.verbose: print( os.path.basename(sys.argv[0]), "caught signal:", signum, file = sys.stderr ) def __nonzero__( self ): return self.state From 1b784d63227f553382e480ac1019c3c32e84e35c Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 14 Nov 2019 17:16:58 +1100 Subject: [PATCH 0059/1056] python: and yet more small steps to port to python3 --- python/comma/csv/applications/csv-eval | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/comma/csv/applications/csv-eval b/python/comma/csv/applications/csv-eval index 506d1e7d4..f170b3a22 100644 --- a/python/comma/csv/applications/csv-eval +++ b/python/comma/csv/applications/csv-eval @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # This file is part of comma, a generic and flexible library # Copyright (c) 2011 The University of Sydney From 8bf46eb624bedbbdfe96c8118733584b63178c83 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 14 Nov 2019 19:25:49 +1100 Subject: [PATCH 0060/1056] name-value-eval*: steps to make it working with python3... --- name_value/applications/name-value-eval | 3 +- .../applications/name-value-eval-preparse.cpp | 46 ++++++------------- 2 files changed, 16 insertions(+), 33 deletions(-) diff --git a/name_value/applications/name-value-eval b/name_value/applications/name-value-eval index d1eb5709d..0cbaa47b0 100755 --- a/name_value/applications/name-value-eval +++ b/name_value/applications/name-value-eval @@ -191,7 +191,8 @@ else name-value-eval-preparse --assign > $tmp_vars; fi preparse_options=$output_vars_option if (( test_option )); then preparse_options+=" --test"; fi -( cat "$tmp_vars"; +( echo "from __future__ import print_function" # uber quick and dirty + cat "$tmp_vars"; if [[ -n "$input" ]]; then name-value-eval-preparse $preparse_options "$input" else name-value-eval-preparse $preparse_options; fi ) > $python_input diff --git a/name_value/applications/name-value-eval-preparse.cpp b/name_value/applications/name-value-eval-preparse.cpp index f1f72db67..49c968946 100644 --- a/name_value/applications/name-value-eval-preparse.cpp +++ b/name_value/applications/name-value-eval-preparse.cpp @@ -43,7 +43,7 @@ static const char *exec_name = ""; static std::string kwd_expect = "expect"; -void usage() +void usage(bool) { std::cerr << "Usage: " << exec_name << " [-h|--help] [-a|--assign] [-t|--test] [-o|--output-variables=] [-d|--demangle] []\n" "\n" @@ -133,6 +133,7 @@ void usage() " rules. Normally all variables that are assigned any value in the rules are output, but this can be\n" " restricted to the variables listed in a file (one per line) using --output-variables.\n" "\n"; +exit( 0 ); } // command line options @@ -957,22 +958,22 @@ void process_test(std::vector &tokens, const std::string &original_line, std::cout << spaces(leading_spaces) - << " print '" << i->first << "/expected=" << quote(expr_str, '"') << "'\n" + << " print( '" << i->first << "/expected=" << quote(expr_str, '"') << "' )\n" << spaces(leading_spaces) << " sys.stdout.write('" << i->first << "/actual=\"')\n" << spaces(leading_spaces) - << " if __builtin__.type(" << i->second << ") == __builtin__.type({}): print dict_str(" << i->second << ")+'\"'\n" + << " if __builtin__.type(" << i->second << ") == __builtin__.type({}): print( dict_str(" << i->second << ")+'\"' )\n" << spaces(leading_spaces) // use a Python trick to force repr() to use double quotes instead of single // (for an explanation, see: http://www.gossamer-threads.com/lists/python/python/157285 // -- search that page for "Python delimits a string it by single quotes preferably") - << " else: print repr(\"'\\0\"+str(" << i->second << "))[6:]\n"; + << " else: print( repr(\"'\\0\"+str(" << i->second << "))[6:] )\n"; } } else { std::cout << spaces(leading_spaces) - << " print 'false=" << quote(input_line, '\"') << "'\n"; + << " print( 'false=" << quote(input_line, '\"') << "' )\n"; } } @@ -1023,8 +1024,8 @@ void print_header() << " res_val = math.sin(lat_delta / 2.0) * math.sin(lat_delta / 2.0) + math.cos(phi1) * math.cos(phi2) * math.sin(lon_delta / 2.0) * math.sin(lon_delta / 2.0)\n" << " return 6366.70702 * 2.0 * math.atan2(math.sqrt(res_val), math.sqrt(1.0 - res_val))\n" << "def sphere_distance_nm(lat1, lon1, lat2, lon2): return km_to_nm(sphere_distance_km(lat1, lon1, lat2, lon2))\n" - << "def err_expr_not_bool(): print >> sys.stderr, 'File \"?\", line ' + str(inspect.currentframe().f_back.f_lineno) + '\\nTypeError: expected a true or false expression'\n" - << "def err_var_is_obj(v_name): print >> sys.stderr, 'TypeError: variable \"' + v_name + '\" is used in an expression but is an object (example: \"a/b = 3; a < 0\")'\n" + << "def err_expr_not_bool(): print( 'File \"?\", line ' + str(inspect.currentframe().f_back.f_lineno) + '\\nTypeError: expected a true or false expression', file = sys.stderr )\n" + << "def err_var_is_obj(v_name): print( 'TypeError: variable \"' + v_name + '\" is used in an expression but is an object (example: \"a/b = 3; a < 0\")', file = sys.stderr )\n" << "def dict_str(d): return \"\"\n"; // note: err_expr_not_bool() imitates standard Python error printing: // 'File "name", line n' on one line, followed by the error message @@ -1039,7 +1040,7 @@ void print_assigned_variables(const Varmap &assigned_vars) { // i->first is the demangled (original) name, i->second is the mangled name // (repr() puts single quotes around strings; replace with double quotes) - std::cout << "print '" << i->first << "='+repr(" << i->second << ").replace(\"'\", '\"')\n"; + std::cout << "print( '" << i->first << "='+repr(" << i->second << ").replace(\"'\", '\"') )\n"; } } @@ -1123,49 +1124,32 @@ void process(const std::string &filename, const Options &opt, const std::set &restrict_vars) { std::ifstream file(filename.c_str()); - - if (!file) - { - std::cerr << exec_name << ": cannot open " << filename << '\n'; - exit(1); - } - + if (!file.is_open()) { std::cerr << exec_name << ": cannot open " << filename << '\n'; exit(1); } std::string line; while (std::getline(file, line)) { std::string var_name = trim_spaces(line); if (!var_name.empty()) { restrict_vars.insert(mangle_id(var_name)); } } - - if (restrict_vars.size() == 0) - { - std::cerr << exec_name << ": empty --output-variables file: " << filename << '\n'; - exit(1); - } + if(restrict_vars.size() == 0) { std::cerr << exec_name << ": empty --output-variables file: " << filename << '\n'; exit(1); } } int main(int argc, char* argv[]) { exec_name = argv[0]; - comma::command_line_options options(argc, argv); - if (options.exists("-h,--help")) { usage(); return 0; } - - // get flags + comma::command_line_options options(argc, argv, usage); Options opt; opt.assign = options.exists("-a,--assign"); opt.test = options.exists("-t,--test"); opt.restrict_vars = options.exists("-o,--output-variables"); opt.command = !(opt.assign || opt.test); opt.demangle = options.exists("-d,--demangle"); - if (opt.test) { if (opt.assign) { std::cerr << exec_name << ": cannot have --assign and --test\n"; exit(1); } if (opt.restrict_vars) { std::cerr << exec_name << ": cannot have --output-variables and --test\n"; exit(1); } } - - if (opt.demangle && (opt.assign || opt.test)) - { std::cerr << exec_name << ": cannot use --demangle with --assign or --test\n"; exit(1); } + if (opt.demangle && (opt.assign || opt.test)) { std::cerr << exec_name << ": cannot use --demangle with --assign or --test\n"; exit(1); } // get unnamed options const char *valueless_options = "-a,--assign,-t,--test,-d,--demangle"; @@ -1173,21 +1157,19 @@ int main(int argc, char* argv[]) std::vector unnamed = options.unnamed(valueless_options, options_with_values); std::set restrict_vars; std::string filename; - for (size_t i = 0;i < unnamed.size();++i) { if (unnamed[i][0] == '-') { std::cerr << exec_name << ": unknown option \"" << unnamed[i] << "\"\n"; exit(1); } else if (filename.empty()) { filename = unnamed[i]; } else { std::cerr << exec_name << ": unexpected argument \"" << unnamed[i] << "\"\n"; exit(1); } } - + if (opt.restrict_vars) { std::string restrict_filename = options.value ("-o,--output-variables"); if (restrict_filename.empty()) { std::cerr << exec_name << ": expected filename for --output-variables\n"; exit(1); } read_restrict_vars(restrict_filename, restrict_vars); } - if (!opt.assign && !opt.demangle) { print_header(); } process(filename, opt, restrict_vars); return 0; From 97416b65f496496525a4051aed267ceeefe9edc7 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 15 Nov 2019 12:11:25 +1100 Subject: [PATCH 0061/1056] csv-eval: porting to python3... --- python/comma/csv/applications/csv_eval.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 4ea78b482..2afc18c24 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -561,8 +561,8 @@ def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) size = input.size if stream.args.update_fields: update = stream.update_t(size) if stream.args.output_fields: output = stream.output_t(size) - exec init_code in env, {'_input': input, '_update': update, '_output': output} - exec code in env, {'_input': input, '_update': update, '_output': output} + exec( init_code, env, {'_input': input, '_update': update, '_output': output} ) + exec( code, env, {'_input': input, '_update': update, '_output': output} ) if stream.args.update_fields: update_buffer(stream.input, update) if stream.args.output_fields: stream.output.write(output) else: stream.input.dump() @@ -572,7 +572,7 @@ def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) def select(stream): input = None env = restricted_numpy_env() - exec stream.args.default_values in env + exec( stream.args.default_values, env ) fields = stream.input.fields code = compile(stream.args.select, '', 'eval') is_shutdown = comma.signal.is_shutdown() @@ -588,7 +588,7 @@ def select(stream): def exit_if(stream): input = None env = restricted_numpy_env() - exec stream.args.default_values in env + exec( stream.args.default_values, env ) fields = stream.input.fields code = compile(stream.args.exit_if, '', 'eval') is_shutdown = comma.signal.is_shutdown() From 71c6c2c9d70cb0847d8b567c587fd95a2cdb8764 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 15 Nov 2019 13:10:40 +1100 Subject: [PATCH 0062/1056] csv-eval: a lot of porting to python3... --- name_value/applications/name-value-calc | 57 ++++++++++--------- name_value/applications/name-value-eval | 3 + .../applications/name-value-eval-preparse.cpp | 8 ++- .../test_run/black_subdirs/test.2/expected | 4 +- 4 files changed, 41 insertions(+), 31 deletions(-) diff --git a/name_value/applications/name-value-calc b/name_value/applications/name-value-calc index 638c11719..0dc525d72 100755 --- a/name_value/applications/name-value-calc +++ b/name_value/applications/name-value-calc @@ -156,7 +156,10 @@ function logical_op() function python_converter() { # todo? python2 -c "import operator... - python -c "import operator + python -c " +from __future__ import division +from __future__ import print_function +import operator from sys import stdin filesep = '$file_separator' what = '$operation' @@ -174,11 +177,12 @@ operands = { '==' : operator.eq, '<>' : operator.ne, '!=' : operator.ne, - '/' : operator.div, + '/' : operator.truediv, + '//' : operator.floordiv, '*' : operator.mul, } -arithmetic = [ '+', '-', '/', '*' ] +arithmetic = [ '+', '-', '/', '//', '*' ] names = {} ordered_names = [] @@ -216,7 +220,7 @@ while True: names[n] = ( False, v ) ordered_names.append( n ) else: - if names.has_key( n ): + if n in names: try: names[n] = ( True, op(names[n][1], v) ) except TypeError: @@ -225,7 +229,7 @@ while True: for n in ordered_names: v = names[n] - print '%s=\"%s\"' % ( n, str(output(v[0], v[1], logical)) ) + print( '%s=\"%s\"' % ( n, str(output(v[0], v[1], logical)) ) ) " } @@ -249,31 +253,32 @@ debug=1 while [[ $# -gt 0 ]]; do case "$1" in - -h|--help) usage 0;; - --add) operation="+";; - --sub|--subtract) operation="-";; - --mul|--mult|--multiply) operation="*";; - --div|--divide) operation="/";; - --less) operation="<";; - --less-or-equal) operation="<=";; - --more) operation=">";; - --more-or-equal) operation=">=";; - --equal) operation="==";; - --non-equal) operation="!=";; - --not-equal) operation="!=";; - --json) json_format=1;; - --strict) strict=1;; - --detailed) detailed=1;; - --debug) debug=1;; - --no-debug) debug=0;; - --exclude) shift; if [[ $# == 0 ]] ; then echo "$name: --exclude lack argument" >&2; exit 1; fi; excluded+=( "$1" );; - -*) echo "$name: unrecognized option '$1'" >&2; exit 1;; - *) input_files+=( "$1" );; + -h|--help) usage 0;; + --add) operation="+";; + --sub|--subtract) operation="-";; + --mul|--mult|--multiply) operation="*";; + --div|--divide|--truediv) operation="/";; + --floordiv) operation="//";; + --less) operation="<";; + --less-or-equal) operation="<=";; + --more) operation=">";; + --more-or-equal) operation=">=";; + --equal) operation="==";; + --non-equal) operation="!=";; + --not-equal) operation="!=";; + --json) json_format=1;; + --strict) strict=1;; + --detailed) detailed=1;; + --debug) debug=1;; + --no-debug) debug=0;; + --exclude) shift; if [[ $# == 0 ]] ; then echo "$name: --exclude lack argument" >&2; exit 1; fi; excluded+=( "$1" );; + -*) echo "$name: unrecognized option '$1'" >&2; exit 1;; + *) input_files+=( "$1" );; esac shift done -binary_operands=( "-" "/" "<" "<=" ">" ">=" "==" "!=" ) +binary_operands=( "-" "/" "//" "<" "<=" ">" ">=" "==" "!=" ) logical_operands=( "<" "<=" ">" ">=" "==" "!=" ) if [[ ${#input_files[@]} == 0 ]]; then simple_usage 1; fi diff --git a/name_value/applications/name-value-eval b/name_value/applications/name-value-eval index 0cbaa47b0..4fadac7e3 100755 --- a/name_value/applications/name-value-eval +++ b/name_value/applications/name-value-eval @@ -197,6 +197,9 @@ if (( test_option )); then preparse_options+=" --test"; fi else name-value-eval-preparse $preparse_options; fi ) > $python_input if (( debug )); then cat $python_input | sed 's/^/debug: /g' > /dev/tty; fi + +#cat "$python_input" > ./python_input + $python_cmd $python_input 2> $err > $out exit_code=0 diff --git a/name_value/applications/name-value-eval-preparse.cpp b/name_value/applications/name-value-eval-preparse.cpp index 49c968946..068824190 100644 --- a/name_value/applications/name-value-eval-preparse.cpp +++ b/name_value/applications/name-value-eval-preparse.cpp @@ -936,7 +936,7 @@ void process_test(std::vector &tokens, const std::string &original_line, std::cout << "# SRCLINE " << line_num << " " << input_line_prefix << input_line << '\n' << spaces(leading_spaces) << "_result_ = (" << tokens << ")\n" - << spaces(leading_spaces) << "if __builtin__.type(_result_) != bool: err_expr_not_bool()\n" + << spaces(leading_spaces) << "if builtins_module.type(_result_) != bool: err_expr_not_bool()\n" << spaces(leading_spaces) << "elif not _result_:\n"; if (vars.size() != 0) @@ -962,7 +962,7 @@ void process_test(std::vector &tokens, const std::string &original_line, << spaces(leading_spaces) << " sys.stdout.write('" << i->first << "/actual=\"')\n" << spaces(leading_spaces) - << " if __builtin__.type(" << i->second << ") == __builtin__.type({}): print( dict_str(" << i->second << ")+'\"' )\n" + << " if builtins_module.type(" << i->second << ") == builtins_module.type({}): print( dict_str(" << i->second << ")+'\"' )\n" << spaces(leading_spaces) // use a Python trick to force repr() to use double quotes instead of single // (for an explanation, see: http://www.gossamer-threads.com/lists/python/python/157285 @@ -1000,7 +1000,9 @@ void process_command(const std::vector &tokens, Varmap &assigned_vars, co void print_header() { std::cout - << "import sys, re, inspect, math, __builtin__\n" + << "import sys, re, inspect, math\n" + << "if sys.version_info.major == 2: import __builtin__; builtins_module=__builtin__\n" + << "else: import builtins; builtins_module=builtins\n" << "def near(x, y, eps): return abs(x - y) <= eps\n" << "def near_percent(x, y, percent): return abs(x - y) <= abs(x) * percent * 0.01\n" << "def max_index(dict) : return max(dict.keys())\n" diff --git a/util/test/test_run/black_subdirs/test.2/expected b/util/test/test_run/black_subdirs/test.2/expected index 6c0055604..936856dee 100644 --- a/util/test/test_run/black_subdirs/test.2/expected +++ b/util/test/test_run/black_subdirs/test.2/expected @@ -1,5 +1,5 @@ #python expect len(expected) == 9 -expect locals().has_key('output') == False -expect locals().has_key('stdout_log') == False +expect ( 'output' in locals() ) == False +expect ( 'stdout_log' in locals() ) == False From a157ca8f3260d4bcdd3461e7649d659e15ab037a Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 15 Nov 2019 21:01:04 +1100 Subject: [PATCH 0063/1056] tests: porting to python3... --- bash/test/comma_background/signature/basic/test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bash/test/comma_background/signature/basic/test b/bash/test/comma_background/signature/basic/test index ad66984bf..7e2f3b335 100755 --- a/bash/test/comma_background/signature/basic/test +++ b/bash/test/comma_background/signature/basic/test @@ -19,7 +19,7 @@ echo "clock_ticks_per_second=$ticks" comma_process_exec_and_validate "$fifo" sleep 100 || { echo "$scriptname: fatal system error, wrong background PID" >&2; exit 1; } background_pid=$! -now=$( python -c "import sys; from numpy import int64; a = sys.stdin.readline().split()[0]; print int64(float(a) * $ticks)" < /proc/uptime ) +now=$( python -c "from __future__ import print_function; import sys; from numpy import int64; a = sys.stdin.readline().split()[0]; print( int64(float(a) * $ticks) )" < /proc/uptime ) echo "time/now=$now" signature=$( comma_process_signature "$background_pid" ) From a4486a63d37a5a9424390572671354501078842b Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 15 Nov 2019 21:37:09 +1100 Subject: [PATCH 0064/1056] tests: porting to python3... --- bash/test/comma_background/signature/args/expected | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bash/test/comma_background/signature/args/expected b/bash/test/comma_background/signature/args/expected index 4e62e0068..5acb607de 100644 --- a/bash/test/comma_background/signature/args/expected +++ b/bash/test/comma_background/signature/args/expected @@ -1,5 +1,7 @@ #python +from functools import reduce + # iterate through a path of attributes: "obj/data/member/value" def deepgetattr(obj, attr): """Recurses through an attribute chain to get the ultimate value.""" From fdbdab08ffcc908c79263bfba6ed451179f3c804 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 15 Nov 2019 21:45:29 +1100 Subject: [PATCH 0065/1056] tests: porting to python3... --- python/comma/numpy/functions.py | 1 + python/comma/numpy/test/test_functions.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/comma/numpy/functions.py b/python/comma/numpy/functions.py index 3ea7fbabd..b0214c298 100644 --- a/python/comma/numpy/functions.py +++ b/python/comma/numpy/functions.py @@ -32,6 +32,7 @@ import numpy as np import operator import re +from functools import reduce def merge_arrays(first, second): diff --git a/python/comma/numpy/test/test_functions.py b/python/comma/numpy/test/test_functions.py index 685f3d6bd..d049183fd 100644 --- a/python/comma/numpy/test/test_functions.py +++ b/python/comma/numpy/test/test_functions.py @@ -162,14 +162,14 @@ def test_structure_out_of_order(self): itemsize = 43 ndtype1 = np.dtype( dict( names=names1, formats=formats1, offsets=offsets1, itemsize=itemsize ) ) - sorted_fields1 = sorted( list( ndtype1.fields.iteritems() ), key = lambda t: t[1] ) + sorted_fields1 = sorted( list( ndtype1.fields.items() ), key = lambda t: t[1] ) names2 = ['a3', 'word', 'a2', 'byte' ] formats2 = [np.dtype((' Date: Fri, 15 Nov 2019 21:59:37 +1100 Subject: [PATCH 0066/1056] tests: porting to python3... --- python/comma/application/test/dict/test | 2 +- python/comma/csv/stream.py | 2 +- python/comma/csv/struct.py | 6 +++--- python/comma/csv/test/struct/test | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/comma/application/test/dict/test b/python/comma/application/test/dict/test index d3740e88d..b5ec5c40c 100755 --- a/python/comma/application/test/dict/test +++ b/python/comma/application/test/dict/test @@ -18,7 +18,7 @@ import argparse parser = argparse.ArgumentParser( ) parser.add_argument( '--dict', metavar='DICT', help='populate a dictionary of given type', type=str, action=comma.application.set_dictionary_action, $value_type delimiter=';', default={} ) args = parser.parse_args() -for k, v in args.dict.iteritems(): +for k, v in args.dict.items(): def quote_if_string( v ): if type( v ) == str: return '\"%s\"' % v diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index f4509acf2..c13f3a3db 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -184,7 +184,7 @@ def _missing_values(self): missing = np.zeros(1, dtype=self.missing_dtype) if self.default_values: dtype_name_of = dict(zip(self.missing_fields, self.missing_dtype.names)) - for field, value in self.default_values.iteritems(): + for field, value in self.default_values.items(): name = dtype_name_of[field] if self.missing_dtype[name] == csv_time.DTYPE: try: missing[name] = csv_time.to_numpy(value) diff --git a/python/comma/csv/struct.py b/python/comma/csv/struct.py index cb8eae907..514438741 100644 --- a/python/comma/csv/struct.py +++ b/python/comma/csv/struct.py @@ -107,7 +107,7 @@ def _make_fields_map( m, fields ): def _assign( self, data, fields_map, convert ): functors = {} - for k, v in fields_map.iteritems(): + for k, v in fields_map.items(): if len( v ) > 0: functors[k] = self._assign( getattr( data, k ), v, convert ) else: @@ -115,7 +115,7 @@ def functor( value, key = k ): setattr( data, key, value if convert is None else convert( value ) ) functors[k] = functor def apply_functors( record ): - for k, f in functors.iteritems(): f( record[k] ) + for k, f in functors.items(): f( record[k] ) return apply_functors def _nondefault_fields(self): @@ -172,7 +172,7 @@ def _shorthand(self): continue fields_of_type = [name + '/' + field for field in type.fields] shorthand[name] = tuple(fields_of_type) - for subname, subfields in type.shorthand.iteritems(): + for subname, subfields in type.shorthand.items(): xpath = name + '/' + subname shorthand[xpath] = tuple(name + '/' + field for field in subfields) return shorthand diff --git a/python/comma/csv/test/struct/test b/python/comma/csv/test/struct/test index fba303d46..961e7525d 100755 --- a/python/comma/csv/test/struct/test +++ b/python/comma/csv/test/struct/test @@ -75,7 +75,7 @@ point_t = comma.csv.struct( 'x,y,z', 'float64', 'float64', 'float64' ) timestamped_point_t = comma.csv.struct( 't,coordinates', 'datetime64[us]', point_t ) observer_t = comma.csv.struct( 'name,id', 'S3', 'uint32' ) record_t = comma.csv.struct( 'observer,event,value', observer_t, timestamped_point_t, 'float64' ) -for leaf,xpath in record_t.xpath_of_leaf.iteritems(): +for leaf,xpath in record_t.xpath_of_leaf.items(): print( "leaves/{}={}".format( leaf, record_t.xpath_of_leaf.get( leaf ) ) ) END )" From 812dd67783fd3e5a1bdcb6abc80b04de21b0305a Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sat, 16 Nov 2019 02:14:12 +1100 Subject: [PATCH 0067/1056] python/comma/csv/struct: porting to python3... --- python/comma/csv/struct.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/comma/csv/struct.py b/python/comma/csv/struct.py index 514438741..b7cfce4f6 100644 --- a/python/comma/csv/struct.py +++ b/python/comma/csv/struct.py @@ -87,7 +87,7 @@ def expand_shorthand(self, compressed_fields): >>> outer.expand_shorthand('in') ('in/i', 'in/j') """ - if isinstance(compressed_fields, basestring): + if isinstance(compressed_fields, str): #if isinstance(compressed_fields, basestring): compressed_fields = compressed_fields.split(',') expand = self.shorthand.get field_tuples = map(lambda name: expand(name) or (name,), compressed_fields) @@ -123,7 +123,7 @@ def _nondefault_fields(self): return tuple(map(lambda f: '' if f.startswith(default_name) else f, self.fields)) def _fill_blanks(self, fields): - if isinstance(fields, basestring): + if isinstance(fields, str): # if isinstance(fields, basestring): fields = fields.split(',') ntypes = len(self.concise_types) if len(fields) > ntypes: From e3cd52df105935d973dc538180c591612ef3efad Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sat, 16 Nov 2019 12:36:49 +1100 Subject: [PATCH 0068/1056] python/comma: porting to python3 --- python/comma/csv/applications/csv_eval.py | 4 ++-- python/comma/csv/stream.py | 2 +- python/comma/csv/struct.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 2afc18c24..6537f4721 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -334,7 +334,7 @@ def comma_type(maybe_type, field, default_type='d', type_of_unnamed_field='s[0]' if len(maybe_types) > len(fields): msg = "format '{}' is longer than fields '{}'".format(format, ','.join(fields)) raise ValueError(msg) - maybe_typed_fields = itertools.izip_longest(maybe_types, fields) + maybe_typed_fields = itertools.zip_longest(maybe_types, fields) if sys.version_info.major > 2 else itertools.izip_longest(maybe_types, fields) # uber quick and dirty types = [comma_type(maybe_type, field) for maybe_type, field in maybe_typed_fields] return ','.join(types) @@ -621,7 +621,7 @@ def main(): name = os.path.basename(sys.argv[0]) print( "{} error: {}".format(name, e), file = sys.stderr ) sys.exit(1) - except StandardError as e: + except Exception as e: #except StandardError as e: import traceback traceback.print_exc(file=sys.stderr) sys.exit(1) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index c13f3a3db..78505d855 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -289,7 +289,7 @@ def _fields(self, fields): return tuple(xpath(name) or name for name in fields.split(',')) def _format(self, binary, format): - if isinstance(binary, basestring): + if isinstance(binary, str): # if isinstance(binary, basestring): if self.verbose and binary and format and binary != format: msg = "ignoring '{}' and using '{}' since binary keyword has priority" \ .format(format, binary) diff --git a/python/comma/csv/struct.py b/python/comma/csv/struct.py index b7cfce4f6..f4fe66887 100644 --- a/python/comma/csv/struct.py +++ b/python/comma/csv/struct.py @@ -40,13 +40,13 @@ def __init__(self, concise_fields, *concise_types): self.concise_types = concise_types self.concise_fields = self._fill_blanks(concise_fields) self._check_fields_conciseness() - self.dtype = np.dtype(zip(self.concise_fields, self.concise_types)) + self.dtype = np.dtype(list(zip(self.concise_fields, self.concise_types))) self.fields = self._full_xpath_fields() self.nondefault_fields = self._nondefault_fields() self.types = self._basic_types() self.shorthand = self._shorthand() self.format = ','.join(self.types) - self.flat_dtype = np.dtype(zip(self.fields, self.types)) + self.flat_dtype = np.dtype(list(zip(self.fields, self.types))) unrolled_types = types_of_dtype(self.flat_dtype, unroll=True) self.unrolled_flat_dtype = structured_dtype(','.join(unrolled_types)) self.type_of_field = dict(zip(self.fields, self.types)) From b7c7df88b50a5a115f8e8a0e386b86904eb6ef26 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sat, 16 Nov 2019 13:47:54 +1100 Subject: [PATCH 0069/1056] python/comma: porting to python3; stream and time --- python/comma/csv/stream.py | 8 ++++---- python/comma/csv/test/unit/test_stream.py | 4 +++- python/comma/csv/time.py | 8 ++++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 78505d855..eb51072ef 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -361,10 +361,10 @@ def _missing_fields(self): def _missing_dtype(self): if not self.missing_fields: return n = len(self.input_dtype.names) - missing_names = ['f{}'.format(n + i) for i in xrange(len(self.missing_fields))] + missing_names = ['f{}'.format(n + i) for i in range(len(self.missing_fields))] # missing_names = ['f{}'.format(n + i) for i in xrange(len(self.missing_fields))] type_of = self.struct.type_of_field.get missing_types = [type_of(name) for name in self.missing_fields] - return np.dtype(zip(missing_names, missing_types)) + return np.dtype(list(zip(missing_names, missing_types))) def _complete_dtype(self): if self.missing_dtype: return np.dtype(self.input_dtype.descr + self.missing_dtype.descr) @@ -438,12 +438,12 @@ def numpy_scalar_to_string(scalar, precision=DEFAULT_PRECISION): '20150102T123456.123456' >>> numpy_scalar_to_string(np.timedelta64(-123, 's')) '-123' - """ - + """ if scalar.dtype.char in np.typecodes['AllInteger']: return str(scalar) elif scalar.dtype.char in np.typecodes['Float']: return "{scalar:.{precision}g}".format(scalar=scalar, precision=precision) elif scalar.dtype.char in np.typecodes['Datetime']: return csv_time.from_numpy(scalar) elif scalar.dtype.char in 'S': return scalar + elif scalar.dtype.char in 'U': return scalar elif scalar.dtype.char in '?': return str( int( scalar ) ) #elif scalar.dtype.char in '?': return str( map( int, scalar ) ) msg = "converting {} to string is not implemented".format(repr(scalar.dtype)) raise NotImplementedError(msg) diff --git a/python/comma/csv/test/unit/test_stream.py b/python/comma/csv/test/unit/test_stream.py index 191a1c58b..528b72982 100644 --- a/python/comma/csv/test/unit/test_stream.py +++ b/python/comma/csv/test/unit/test_stream.py @@ -1,3 +1,4 @@ +from __future__ import print_function import unittest import numpy as np import sys @@ -16,7 +17,8 @@ def test_use_defaults(self): self.assertTrue(s.full_xpath) def test_override_defaults(self): - from cStringIO import StringIO + if sys.version_info.major < 3: from cStringIO import StringIO # quick and dirty, sigh... + else: from io import StringIO source = StringIO("") target = StringIO("") t = comma.csv.stream(comma.csv.struct('id', 'S4'), delimiter=';') diff --git a/python/comma/csv/time.py b/python/comma/csv/time.py index 611cf6183..39c33ad3b 100644 --- a/python/comma/csv/time.py +++ b/python/comma/csv/time.py @@ -31,6 +31,7 @@ import numpy as np import re import os +import sys import time UNIT = 'us' @@ -40,6 +41,7 @@ NOT_A_DATE_TIME = np.datetime64('NaT') POSITIVE_INFINITY = np.datetime64('294247-01-09T04:00:54.775807') NEGATIVE_INFINITY = np.datetime64('-290308-12-22T19:59:05.224191') +BASESTRING = basestring if sys.version_info.major < 3 else str # sigh... def is_undefined(numpy_time): return str(numpy_time) == str(NOT_A_DATE_TIME) @@ -65,7 +67,7 @@ def to_numpy(t): if t in ['', 'not-a-date-time']: return NOT_A_DATE_TIME if t in ['+infinity', '+inf', 'infinity', 'inf']: return POSITIVE_INFINITY if t in ['-infinity', '-inf']: return NEGATIVE_INFINITY - if not (isinstance(t, basestring) and re.match(r'^(\d{8}T\d{6}(\.\d{0,6})?)$', t)): + if not (isinstance(t, BASESTRING) and re.match(r'^(\d{8}T\d{6}(\.\d{0,6})?)$', t)): msg = "expected comma time, got '{}'".format(repr(t)) raise TypeError(msg) v = list(t) @@ -100,7 +102,9 @@ def from_numpy(t): if is_undefined(t): return 'not-a-date-time' if is_negative_infinity(t): return '-infinity' if is_positive_infinity(t): return '+infinity' - return re.sub(r'(\.0{6})?([-+]\d{4}|Z)?$', '', str(t)).translate(None, ':-') + s = re.sub(r'(\.0{6})?([-+]\d{4}|Z)?$', '', str(t)) + #return re.sub(r'(\.0{6})?([-+]\d{4}|Z)?$', '', str(t)).translate(None, ':-') + return s.translate(str.maketrans('', '', ':-')) if sys.version_info.major > 2 else s.translate(None, ':-') # sigh... cannot believe i am going this... def ascii_converters(types): converters = {} From f6c81a4be74af1df269a105d4a91e56aa0091e0f Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sat, 16 Nov 2019 14:37:04 +1100 Subject: [PATCH 0070/1056] python/comma: porting to python3; stream: floor division, explicit conversion of string types to str --- python/comma/csv/stream.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index eb51072ef..57d1fabdb 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -348,7 +348,7 @@ def _input_dtype(self): def _default_buffer_size(self): if self.tied: return self.tied.size elif self.flush: return 1 - return max( 1, stream.buffer_size_in_bytes / self.input_dtype.itemsize ) # todo? too arbitrary for ascii? + return max( 1, stream.buffer_size_in_bytes // self.input_dtype.itemsize ) # todo? too arbitrary for ascii? def _missing_fields(self): missing_fields = [field for field in self.struct.fields if field not in self.fields] @@ -442,8 +442,8 @@ def numpy_scalar_to_string(scalar, precision=DEFAULT_PRECISION): if scalar.dtype.char in np.typecodes['AllInteger']: return str(scalar) elif scalar.dtype.char in np.typecodes['Float']: return "{scalar:.{precision}g}".format(scalar=scalar, precision=precision) elif scalar.dtype.char in np.typecodes['Datetime']: return csv_time.from_numpy(scalar) - elif scalar.dtype.char in 'S': return scalar - elif scalar.dtype.char in 'U': return scalar + elif scalar.dtype.char in 'S': return str(scalar) # quick and dirty, python3, sigh... + elif scalar.dtype.char in 'U': return str(scalar) # quick and dirty, python3, sigh... elif scalar.dtype.char in '?': return str( int( scalar ) ) #elif scalar.dtype.char in '?': return str( map( int, scalar ) ) msg = "converting {} to string is not implemented".format(repr(scalar.dtype)) raise NotImplementedError(msg) From d168150267d1ff8ee0993e80c00e5ab12d79f7ad Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 12:29:45 +1100 Subject: [PATCH 0071/1056] python/comma: porting to python3; stream: reading and writing binary data, it still outputs strings with binary decorator; in progress... --- python/comma/csv/stream.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 57d1fabdb..eb82e631f 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -28,10 +28,11 @@ # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import print_function +import functools +import itertools import numpy as np +import os import sys -import itertools -import functools import warnings from ..util import warning from ..io import readlines_unbuffered @@ -69,6 +70,7 @@ def __init__(self, self.flush = flush self.source = source self.target = target + if target == sys.stdout: self.stdout = os.fdopen( sys.stdout.fileno(), "wb" ) self.tied = tied self.full_xpath = full_xpath self.verbose = verbose @@ -156,11 +158,24 @@ def read_from_line(self, line): def _read(self, size): if self.binary: - if size < 0 and self.source == sys.stdin: - return np.fromstring(self.source.read(), dtype=self.input_dtype) + if np.__version__ >= '1.16.0': # sigh... + if self.source == sys.stdin: + if size < 0: + return np.fromstring( self.source.read(), dtype = self.input_dtype ) + else: + b = sys.stdin.buffer.read( self.input_dtype.itemsize * size ) + # print( "--> a: len(b):", len(b), "size:", size, "self.input_dtype.itemsize:", self.input_dtype.itemsize, file = sys.stderr ) + # todo! test on streams where bytes come with irregular delays! + if len(b) % self.input_dtype.itemsize != 0: raise TypeError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) + return np.frombuffer( b, dtype = self.input_dtype, count = len( b ) // self.input_dtype.itemsize ) + else: + return np.fromfile( self.source, dtype = self.input_dtype, count = -1 if size < 0 else size ) # this line may not be covered by regression test else: - count = -1 if size < 0 else size - return np.fromfile(self.source, dtype=self.input_dtype, count=count) + if size < 0 and self.source == sys.stdin: + return np.fromstring(self.source.read(), dtype=self.input_dtype) + else: + count = -1 if size < 0 else size + return np.fromfile(self.source, dtype=self.input_dtype, count=count) else: with warnings.catch_warnings(): warnings.simplefilter('ignore') @@ -211,8 +226,11 @@ def write(self, s): msg = "size {} not equal to tied size {}".format(s.size, tied_size) raise ValueError(msg) if self.binary: - if self.tied: self._tie_binary(self.tied._input_array, s).tofile(self.target) - else: s.tofile(self.target) + if np.__version__ >= '1.16.0' and self.target == sys.stdout: # sigh... + self.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) + else: + if self.tied: self._tie_binary(self.tied._input_array, s).tofile(self.target) + else: s.tofile(self.target) else: unrolled_array = s.view(self.struct.unrolled_flat_dtype) #unrolled_array = s.view( self.unrolled_write_dtype ) From 741507dec7de440e5ce3db5821cbeb9ceb7f6b2c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 12:52:05 +1100 Subject: [PATCH 0072/1056] python/comma: porting to python3; struct: test: S4 replaced with U4 string definition; in progress... --- python/comma/csv/test/struct/test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/comma/csv/test/struct/test b/python/comma/csv/test/struct/test index 961e7525d..2ca45ac28 100755 --- a/python/comma/csv/test/struct/test +++ b/python/comma/csv/test/struct/test @@ -122,7 +122,7 @@ class test_class: nested_struct = comma.csv.struct( 'a', 'uint32' ) #test_struct = comma.csv.struct( 'a,b,c,d,e,f,g', 'uint32', 'float32', 'datetime64[us]', 'S4', nested_struct, 'uint32', ( nested_struct, ( 2, ) ) ) -test_struct = comma.csv.struct( 'a,b,c,d,e', 'uint32', 'float32', 'datetime64[us]', 'S4', nested_struct ) +test_struct = comma.csv.struct( 'a,b,c,d,e', 'uint32', 'float32', 'datetime64[us]', 'U4', nested_struct ) t = test_struct() t['a'] = 10 t['b'] = 20 @@ -135,6 +135,7 @@ t['e']['a'] = 40 c = test_class() assign = test_struct.assign( c ) assign( t[0] ) + print( 'assign/basic/output/a=' + str( c.a ) ) print( 'assign/basic/output/b=' + str( c.b ) ) print( 'assign/basic/output/c=\"' + str( c.c ).split( '+' )[0] + '\"' ) From 7d4571a82fbaea3212879f6c58bea6997de1f64a Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 21:06:55 +1100 Subject: [PATCH 0073/1056] python/comma: porting to python3; stream: decode( 'utf-8' ) added to ascii output for strings; in progress... --- python/comma/csv/stream.py | 3 +-- python/comma/csv/test/stream/basic/test | 6 ++++-- python/comma/csv/test/stream/buffer_size/expected | 4 ++-- python/comma/csv/test/stream/buffer_size/test | 2 ++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index eb82e631f..c9f84d5df 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -164,7 +164,6 @@ def _read(self, size): return np.fromstring( self.source.read(), dtype = self.input_dtype ) else: b = sys.stdin.buffer.read( self.input_dtype.itemsize * size ) - # print( "--> a: len(b):", len(b), "size:", size, "self.input_dtype.itemsize:", self.input_dtype.itemsize, file = sys.stderr ) # todo! test on streams where bytes come with irregular delays! if len(b) % self.input_dtype.itemsize != 0: raise TypeError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) return np.frombuffer( b, dtype = self.input_dtype, count = len( b ) // self.input_dtype.itemsize ) @@ -460,7 +459,7 @@ def numpy_scalar_to_string(scalar, precision=DEFAULT_PRECISION): if scalar.dtype.char in np.typecodes['AllInteger']: return str(scalar) elif scalar.dtype.char in np.typecodes['Float']: return "{scalar:.{precision}g}".format(scalar=scalar, precision=precision) elif scalar.dtype.char in np.typecodes['Datetime']: return csv_time.from_numpy(scalar) - elif scalar.dtype.char in 'S': return str(scalar) # quick and dirty, python3, sigh... + elif scalar.dtype.char in 'Sa': return scalar.decode('UTF-8') # quick and dirty, python3, sigh... elif scalar.dtype.char in 'U': return str(scalar) # quick and dirty, python3, sigh... elif scalar.dtype.char in '?': return str( int( scalar ) ) #elif scalar.dtype.char in '?': return str( map( int, scalar ) ) msg = "converting {} to string is not implemented".format(repr(scalar.dtype)) diff --git a/python/comma/csv/test/stream/basic/test b/python/comma/csv/test/stream/basic/test index 6391c34d9..8307292ec 100755 --- a/python/comma/csv/test/stream/basic/test +++ b/python/comma/csv/test/stream/basic/test @@ -14,7 +14,9 @@ import numpy point_t = comma.csv.struct( 'x,y,z', 'float64', 'float64', 'float64' ) timestamped_point_t = comma.csv.struct( 'time,coordinates', 'datetime64[us]', point_t ) -record_t = comma.csv.struct( 'observer,event', 'S3', timestamped_point_t ) +#record_t = comma.csv.struct( 'observer,event', 'U3', timestamped_point_t +#record_t = comma.csv.struct( 'observer,event', 'S3', timestamped_point_t ) +record_t = comma.csv.struct( 'observer,event', 'S8', timestamped_point_t ) record_stream = comma.csv.stream( record_t, binary=$binary ) for i,r in enumerate( record_stream.iter(), start=1 ): @@ -31,6 +33,6 @@ echo "$input" | process_records | output ascii comma_status_ok echo "ascii/status=$?" -echo "$input" | csv-to-bin s[3],t,3d | process_records binary | csv-from-bin s[3],t,3d | output binary +echo "$input" | csv-to-bin s[8],t,3d | process_records binary | csv-from-bin s[8],t,3d | output binary comma_status_ok echo "binary/status=$?" diff --git a/python/comma/csv/test/stream/buffer_size/expected b/python/comma/csv/test/stream/buffer_size/expected index 5f647e531..c0a29afba 100644 --- a/python/comma/csv/test/stream/buffer_size/expected +++ b/python/comma/csv/test/stream/buffer_size/expected @@ -1,5 +1,5 @@ #python expect buffer_size_in_bytes = 65536 -expect size/uint8 = buffer_size_in_bytes / number_of_fields -expect size/float64 = buffer_size_in_bytes / ( 8 * number_of_fields ) +expect size/uint8 = buffer_size_in_bytes // number_of_fields +expect size/float64 = buffer_size_in_bytes // ( 8 * number_of_fields ) diff --git a/python/comma/csv/test/stream/buffer_size/test b/python/comma/csv/test/stream/buffer_size/test index c2da902de..5598de759 100755 --- a/python/comma/csv/test/stream/buffer_size/test +++ b/python/comma/csv/test/stream/buffer_size/test @@ -8,10 +8,12 @@ function get_default_size local type=$2 python -c "$( cat < Date: Sun, 17 Nov 2019 21:15:51 +1100 Subject: [PATCH 0074/1056] python/comma: porting to python3; stream: call fdopen on stdout only on numpy version >= 1.16.0; in progress... --- python/comma/csv/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index c9f84d5df..745655cab 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -70,7 +70,7 @@ def __init__(self, self.flush = flush self.source = source self.target = target - if target == sys.stdout: self.stdout = os.fdopen( sys.stdout.fileno(), "wb" ) + if np.__version__ >= '1.16.0' and target == sys.stdout: self.stdout = os.fdopen( sys.stdout.fileno(), "wb" ) self.tied = tied self.full_xpath = full_xpath self.verbose = verbose From fc91e995d494ff3b26e03ff691136b88b965e2b6 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 21:24:34 +1100 Subject: [PATCH 0075/1056] python/comma: porting to python3; stream: removed calling fdopen on stdout on numpy version >= 1.16.0, write bytes directly to sys.stdout instead; in progress... --- python/comma/csv/stream.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 745655cab..c2879513f 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -70,7 +70,7 @@ def __init__(self, self.flush = flush self.source = source self.target = target - if np.__version__ >= '1.16.0' and target == sys.stdout: self.stdout = os.fdopen( sys.stdout.fileno(), "wb" ) + #if np.__version__ >= '1.16.0' and target == sys.stdout: self.stdout = os.fdopen( sys.stdout.fileno(), "wb" ) self.tied = tied self.full_xpath = full_xpath self.verbose = verbose @@ -226,7 +226,8 @@ def write(self, s): raise ValueError(msg) if self.binary: if np.__version__ >= '1.16.0' and self.target == sys.stdout: # sigh... - self.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) + #self.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) + sys.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) else: if self.tied: self._tie_binary(self.tied._input_array, s).tofile(self.target) else: s.tofile(self.target) From 374388d968c1ef98ff3ebbed4778bfdda09e5774 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 21:25:10 +1100 Subject: [PATCH 0076/1056] python/comma: porting to python3; test_stream: use // instead of / for division; in progress... --- python/comma/csv/test/unit/test_stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/comma/csv/test/unit/test_stream.py b/python/comma/csv/test/unit/test_stream.py index 528b72982..d17ef8b3c 100644 --- a/python/comma/csv/test/unit/test_stream.py +++ b/python/comma/csv/test/unit/test_stream.py @@ -135,11 +135,11 @@ def test_size(self): s = comma.csv.struct('x,id', 'f8', 'u4') tied = comma.csv.stream(comma.csv.struct('i', 'u2')) t1 = comma.csv.stream(s) - self.assertEqual(t1.size, comma.csv.stream.buffer_size_in_bytes / 12) + self.assertEqual(t1.size, comma.csv.stream.buffer_size_in_bytes // 12) t2 = comma.csv.stream(s, flush=True) self.assertEqual(t2.size, 1) t3 = comma.csv.stream(s, tied=tied) - self.assertEqual(t3.size, comma.csv.stream.buffer_size_in_bytes / 2) + self.assertEqual(t3.size, comma.csv.stream.buffer_size_in_bytes // 2) def test_ascii_simple_single_field(self): s = comma.csv.struct('x', 'f8') From c816c8eb4a60f52e636988a6effe9f07122efc99 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 21:48:54 +1100 Subject: [PATCH 0077/1056] python/comma: porting to python3; stream: using sys.stdout.buffer.write() instead of sys.stdout.write(); in progress... --- python/comma/csv/stream.py | 2 +- python/comma/io/readlines_unbuffered.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index c2879513f..89ed2b663 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -227,7 +227,7 @@ def write(self, s): if self.binary: if np.__version__ >= '1.16.0' and self.target == sys.stdout: # sigh... #self.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) - sys.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) + sys.stdout.buffer.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) else: if self.tied: self._tie_binary(self.tied._input_array, s).tofile(self.target) else: s.tofile(self.target) diff --git a/python/comma/io/readlines_unbuffered.py b/python/comma/io/readlines_unbuffered.py index 5d34e1561..6e6d5d286 100644 --- a/python/comma/io/readlines_unbuffered.py +++ b/python/comma/io/readlines_unbuffered.py @@ -28,7 +28,8 @@ # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys -import itertools +if sys.version_info.major < 3: from itertools import ifilter +else: ifilter = filter # quick and dirty, tired of googling... def readlines_unbuffered(size, source=sys.stdin, skip_blank_lines=True): @@ -54,7 +55,7 @@ def readlines_unbuffered(size, source=sys.stdin, skip_blank_lines=True): number_of_lines += 1 return lines if skip_blank_lines: - source_ = itertools.ifilter(lambda line: line.strip(), source) + source_ = ifilter(lambda line: line.strip(), source) else: source_ = source return [line.rstrip('\n') for line in source_] From a6f29c56edf05f96dbaa2fbee3cfc25a4d59d41a Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 22:06:36 +1100 Subject: [PATCH 0078/1056] python/comma: porting to python3; stream: handling size -1 in _read()... not necessarily correctly...; in progress... --- python/comma/csv/stream.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 89ed2b663..6674934c3 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -160,13 +160,10 @@ def _read(self, size): if self.binary: if np.__version__ >= '1.16.0': # sigh... if self.source == sys.stdin: - if size < 0: - return np.fromstring( self.source.read(), dtype = self.input_dtype ) - else: - b = sys.stdin.buffer.read( self.input_dtype.itemsize * size ) - # todo! test on streams where bytes come with irregular delays! - if len(b) % self.input_dtype.itemsize != 0: raise TypeError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) - return np.frombuffer( b, dtype = self.input_dtype, count = len( b ) // self.input_dtype.itemsize ) + b = sys.stdin.buffer.read( self.input_dtype.itemsize * ( size if size > 0 else self.size ) ) + # todo! test on streams where bytes come with irregular delays! + if len(b) % self.input_dtype.itemsize != 0: raise TypeError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) + return np.frombuffer( b, dtype = self.input_dtype, count = len( b ) // self.input_dtype.itemsize ) else: return np.fromfile( self.source, dtype = self.input_dtype, count = -1 if size < 0 else size ) # this line may not be covered by regression test else: From f50a03f572f66845017ec82c18f06d5971e76368 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Sun, 17 Nov 2019 22:11:18 +1100 Subject: [PATCH 0079/1056] python/comma: porting to python3; stream: throwing valueerror instead of typeerror; in progress... --- python/comma/csv/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 6674934c3..0ca19e6f0 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -162,7 +162,7 @@ def _read(self, size): if self.source == sys.stdin: b = sys.stdin.buffer.read( self.input_dtype.itemsize * ( size if size > 0 else self.size ) ) # todo! test on streams where bytes come with irregular delays! - if len(b) % self.input_dtype.itemsize != 0: raise TypeError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) + if len(b) % self.input_dtype.itemsize != 0: raise ValueError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) return np.frombuffer( b, dtype = self.input_dtype, count = len( b ) // self.input_dtype.itemsize ) else: return np.fromfile( self.source, dtype = self.input_dtype, count = -1 if size < 0 else size ) # this line may not be covered by regression test From 687c884aca569b4486890348f2a7a8311c2622d0 Mon Sep 17 00:00:00 2001 From: seva Date: Mon, 18 Nov 2019 10:50:18 +1100 Subject: [PATCH 0080/1056] csv-eval: !/usr/bin/env python replaced with /usr/bin/python until fully ported to python3 --- python/comma/csv/applications/csv-eval | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/comma/csv/applications/csv-eval b/python/comma/csv/applications/csv-eval index f170b3a22..340655a02 100644 --- a/python/comma/csv/applications/csv-eval +++ b/python/comma/csv/applications/csv-eval @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/python +# todo: once fully ported to python3: !/usr/bin/env python # This file is part of comma, a generic and flexible library # Copyright (c) 2011 The University of Sydney From 0abbe1fd5a0d396044f0f316ba2072cedf9ce241 Mon Sep 17 00:00:00 2001 From: seva Date: Mon, 18 Nov 2019 13:54:11 +1100 Subject: [PATCH 0081/1056] python/comma: porting to python3; stream: reading stdin conditioned on python version instead of numpy version; in progress... --- python/comma/csv/stream.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 0ca19e6f0..af273094d 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -158,7 +158,8 @@ def read_from_line(self, line): def _read(self, size): if self.binary: - if np.__version__ >= '1.16.0': # sigh... + #if np.__version__ >= '1.16.0': # sigh... + if sys.version_info.major > 2: if self.source == sys.stdin: b = sys.stdin.buffer.read( self.input_dtype.itemsize * ( size if size > 0 else self.size ) ) # todo! test on streams where bytes come with irregular delays! From 87574731c73423cf1cc454ce1ff71b6f9aedfad1 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 16:26:39 +1100 Subject: [PATCH 0082/1056] python/comma: porting to python3; stream: write(): checking numpy version replaced with checking python version; in progress... --- python/comma/csv/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index af273094d..1b3e5f37b 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -223,7 +223,7 @@ def write(self, s): msg = "size {} not equal to tied size {}".format(s.size, tied_size) raise ValueError(msg) if self.binary: - if np.__version__ >= '1.16.0' and self.target == sys.stdout: # sigh... + if sys.version_info > 2 and self.target == sys.stdout: # sigh... #self.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) sys.stdout.buffer.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) else: From be459087195bca376d5961673de9275163e43612 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 19:52:25 +1100 Subject: [PATCH 0083/1056] python/comma: porting to python3; signal: ported to python3; in progress... --- python/comma/csv/applications/csv_eval.py | 6 +++++- python/comma/signal/signal.py | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 6537f4721..df54118ec 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -553,9 +553,12 @@ def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) update = None output = None input = None - is_shutdown = comma.signal.is_shutdown() + is_shutdown = comma.signal.is_shutdown( verbose = stream.args.verbose ) + if is_shutdown: print( '--> a: is shutdown', file = sys.stderr ) if stream.args.first_line: input = stream.input.read_from_line(stream.args.first_line) + print( '--> b', file = sys.stderr ) while not is_shutdown: + print( '--> c', file = sys.stderr ) if input is not None: if size != input.size: size = input.size @@ -568,6 +571,7 @@ def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) else: stream.input.dump() input = stream.input.read( read_size ) if input is None: break + print( '--> d', file = sys.stderr ) def select(stream): input = None diff --git a/python/comma/signal/signal.py b/python/comma/signal/signal.py index efbdb0358..34ed994bb 100644 --- a/python/comma/signal/signal.py +++ b/python/comma/signal/signal.py @@ -43,8 +43,10 @@ def __init__( self, verbose = False ): def switch_on( self, signum, frame ): self.state = True - if self.verbose: print( os.path.basename(sys.argv[0]), "caught signal:", signum, file = sys.stderr ) + if self.verbose: print( os.path.basename( sys.argv[0] ), ": caught signal:", signum, file = sys.stderr ) - def __nonzero__( self ): return self.state + def __bool__( self ): return self.state + + __nonzero__ = __bool__ signal.signal( signal.SIGPIPE, signal.SIG_DFL ) From a877fef6bc1b94a2f6adff676f60afbc68b34365 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 19:53:01 +1100 Subject: [PATCH 0084/1056] python/comma: porting to python3; debug output commented; in progress... --- python/comma/csv/applications/csv_eval.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index df54118ec..003cfd4ef 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -554,11 +554,11 @@ def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) output = None input = None is_shutdown = comma.signal.is_shutdown( verbose = stream.args.verbose ) - if is_shutdown: print( '--> a: is shutdown', file = sys.stderr ) + #if is_shutdown: print( '--> a: is shutdown', file = sys.stderr ) if stream.args.first_line: input = stream.input.read_from_line(stream.args.first_line) - print( '--> b', file = sys.stderr ) + #print( '--> b', file = sys.stderr ) while not is_shutdown: - print( '--> c', file = sys.stderr ) + #print( '--> c', file = sys.stderr ) if input is not None: if size != input.size: size = input.size @@ -571,7 +571,7 @@ def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) else: stream.input.dump() input = stream.input.read( read_size ) if input is None: break - print( '--> d', file = sys.stderr ) + #print( '--> d', file = sys.stderr ) def select(stream): input = None From c761e82bd9bd3336f0f6122dbb9d8399ae329249 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 20:19:38 +1100 Subject: [PATCH 0085/1056] python/comma: porting to python3; csv_eval: fixed non-backward compatible python3 change in filter semantics; in progress... --- python/comma/csv/applications/csv_eval.py | 44 +++++++++++------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 003cfd4ef..1387a56c4 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -472,7 +472,7 @@ def __init__(self, args): if self.args.verbose: self.print_info() def initialize_input(self): - self.nonblank_input_fields = filter(None, self.args.fields) + self.nonblank_input_fields = list( filter( None, self.args.fields ) ) if not self.nonblank_input_fields: raise csv_eval_error("please specify input stream fields, e.g. --fields=x,y") check_fields(self.nonblank_input_fields) types = comma.csv.format.to_numpy(self.args.format) @@ -528,50 +528,46 @@ def check_output_fields(fields, input_fields): raise csv_eval_error(msg) def evaluate(stream): - def disperse(var, fields): return '\n'.join("{f} = {v}['{f}']".format(v=var, f=f) for f in fields) - def collect(var, fields): return '\n'.join("{v}['{f}'] = {f}".format(v=var, f=f) for f in fields) + def disperse( var, fields ): return '\n'.join("{f} = {v}['{f}']".format( v = var, f = f ) for f in fields ) + def collect( var, fields ): return '\n'.join("{v}['{f}'] = {f}".format( v = var, f = f ) for f in fields ) if stream.args.init_values == '': read_size = None init_code_string = '' else: read_size = 1 - init_code_string = '\n'.join([stream.args.default_values, - stream.args.init_values, - disperse('_input', stream.nonblank_input_fields), - collect('_update', stream.args.update_fields), - collect('_output', stream.args.output_fields)]) - code_string = '\n'.join([stream.args.default_values, - disperse('_input', stream.nonblank_input_fields), - disperse('_output', stream.args.output_fields), - stream.args.expressions, - collect('_update', stream.args.update_fields), - collect('_output', stream.args.output_fields)]) - init_code = compile(init_code_string, '', 'exec') - code = compile(code_string, '', 'exec') - env = np.__dict__ if stream.args.permissive else restricted_numpy_env() + init_code_string = '\n'.join( [ stream.args.default_values, + stream.args.init_values, + disperse( '_input', stream.nonblank_input_fields ), + collect( '_update', stream.args.update_fields ), + collect( '_output', stream.args.output_fields ) ] ) + code_string = '\n'.join( [ stream.args.default_values, + disperse( '_input', stream.nonblank_input_fields ), + disperse( '_output', stream.args.output_fields ), + stream.args.expressions, + collect( '_update', stream.args.update_fields ), + collect( '_output', stream.args.output_fields ) ] ) + init_code = compile( init_code_string, '', 'exec' ) + code = compile( code_string, '', 'exec' ) + env = np.__dict__ if stream.args.permissive else restricted_numpy_env() size = None update = None output = None input = None is_shutdown = comma.signal.is_shutdown( verbose = stream.args.verbose ) - #if is_shutdown: print( '--> a: is shutdown', file = sys.stderr ) - if stream.args.first_line: input = stream.input.read_from_line(stream.args.first_line) - #print( '--> b', file = sys.stderr ) + if stream.args.first_line: input = stream.input.read_from_line( stream.args.first_line ) while not is_shutdown: - #print( '--> c', file = sys.stderr ) if input is not None: if size != input.size: size = input.size if stream.args.update_fields: update = stream.update_t(size) if stream.args.output_fields: output = stream.output_t(size) - exec( init_code, env, {'_input': input, '_update': update, '_output': output} ) - exec( code, env, {'_input': input, '_update': update, '_output': output} ) + exec( init_code, env, { '_input': input, '_update': update, '_output': output } ) + exec( code, env, { '_input': input, '_update': update, '_output': output } ) if stream.args.update_fields: update_buffer(stream.input, update) if stream.args.output_fields: stream.output.write(output) else: stream.input.dump() input = stream.input.read( read_size ) if input is None: break - #print( '--> d', file = sys.stderr ) def select(stream): input = None From 41b7c3dfbc5ab13ce0a363d8d0946fc394626e18 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 20:30:50 +1100 Subject: [PATCH 0086/1056] python/comma: porting to python3; csv_eval, stream: using zip instead itertools.izip, with potential performance deterioration; in progress... --- python/comma/csv/applications/csv_eval.py | 12 +++++++----- python/comma/csv/stream.py | 6 ++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 1387a56c4..8582b1e05 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -28,13 +28,15 @@ # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import print_function -import sys -import os import argparse +import ast +import itertools import numpy as np +import os import re -import itertools -import ast +import sys +if sys.version_info.major < 3: from itertools import izip +else: izip = zip # todo! watch performance! it's reported python3 zip is some 30% slower than izip import comma description = """ @@ -449,7 +451,7 @@ def update_buffer(stream, update_array): stream._input_array[fields[index(f)]] = update_array[f] else: def updated_lines(): - for line, scalars in itertools.izip(stream._ascii_buffer, update_array): + for line, scalars in izip(stream._ascii_buffer, update_array): values = line.split(stream.delimiter) for f, s in zip(update_array.dtype.names, stream._strings(scalars)): values[index(f)] = s diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 1b3e5f37b..cbfc59438 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -34,6 +34,8 @@ import os import sys import warnings +if sys.version_info.major < 3: from itertools import izip +else: izip = zip # todo! watch performance! it's reported python3 zip is some 30% slower than izip from ..util import warning from ..io import readlines_unbuffered from ..numpy import merge_arrays, types_of_dtype, structured_dtype @@ -240,7 +242,7 @@ def write(self, s): def _tie_binary(self, tied_array, array): return merge_arrays(tied_array, array) def _tie_ascii(self, tied_buffer, unrolled_array): - for tied_line, scalars in itertools.izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + self._strings(scalars)) + for tied_line, scalars in izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + self._strings(scalars)) def _toline(self, scalars): return self.delimiter.join(self._strings(scalars)) @@ -273,7 +275,7 @@ def _dump_with_mask(self, mask): if self.binary: self._input_array[mask].tofile(self.target) else: - for line, allowed in itertools.izip(self._ascii_buffer, mask): + for line, allowed in izip(self._ascii_buffer, mask): if allowed: print( line, file = self.target ) self.target.flush() From dd5882ed9ff4838041955d62f7c71f229b995176 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 20:57:24 +1100 Subject: [PATCH 0087/1056] python/comma: porting to python3; stream: convert mapped stuff to the list, with potential performance deterioration; in progress... --- python/comma/csv/stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index cbfc59438..a363f31ff 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -242,9 +242,9 @@ def write(self, s): def _tie_binary(self, tied_array, array): return merge_arrays(tied_array, array) def _tie_ascii(self, tied_buffer, unrolled_array): - for tied_line, scalars in izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + self._strings(scalars)) + for tied_line, scalars in izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + list(self._strings(scalars))) - def _toline(self, scalars): return self.delimiter.join(self._strings(scalars)) + def _toline(self, scalars): return list(self.delimiter.join(self._strings(scalars))) def dump(self, mask=None): """ From d22dfa6d344f5bd03b6c85aaba0d35fee1c461e7 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 21:13:21 +1100 Subject: [PATCH 0088/1056] python/comma: porting to python3; stream: typo fixed; in progress... --- python/comma/csv/stream.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index a363f31ff..cb000045f 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -160,8 +160,7 @@ def read_from_line(self, line): def _read(self, size): if self.binary: - #if np.__version__ >= '1.16.0': # sigh... - if sys.version_info.major > 2: + if sys.version_info.major > 2: #if np.__version__ >= '1.16.0': # sigh... if self.source == sys.stdin: b = sys.stdin.buffer.read( self.input_dtype.itemsize * ( size if size > 0 else self.size ) ) # todo! test on streams where bytes come with irregular delays! @@ -225,7 +224,7 @@ def write(self, s): msg = "size {} not equal to tied size {}".format(s.size, tied_size) raise ValueError(msg) if self.binary: - if sys.version_info > 2 and self.target == sys.stdout: # sigh... + if sys.version_info.major > 2 and self.target == sys.stdout: # sigh... #self.stdout.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) sys.stdout.buffer.write( self._tie_binary(self.tied._input_array, s).tobytes() if self.tied else s.tobytes() ) else: From 95a9240fbe8eeed2d5a113741aa024cc9180c254 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Mon, 18 Nov 2019 22:09:46 +1100 Subject: [PATCH 0089/1056] python/comma: porting to python3; stream: _toline() fixed, which seems to have fixed most of the ascii tests; in progress... --- python/comma/csv/stream.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index cb000045f..a17e5443e 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -240,10 +240,14 @@ def write(self, s): def _tie_binary(self, tied_array, array): return merge_arrays(tied_array, array) - def _tie_ascii(self, tied_buffer, unrolled_array): - for tied_line, scalars in izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + list(self._strings(scalars))) - - def _toline(self, scalars): return list(self.delimiter.join(self._strings(scalars))) + if sys.version_info.major < 3: # python3, sigh... don't ask + def _tie_ascii(self, tied_buffer, unrolled_array): + for tied_line, scalars in izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + self._strings(scalars)) + else: + def _tie_ascii(self, tied_buffer, unrolled_array): + for tied_line, scalars in izip(tied_buffer, unrolled_array): yield self.delimiter.join([tied_line] + list(self._strings(scalars))) + + def _toline(self, scalars): return self.delimiter.join(self._strings(scalars)) def dump(self, mask=None): """ From 5dd2ab16962850ee336bd6c73781f46ee32bbe38 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 19 Nov 2019 17:54:44 +1100 Subject: [PATCH 0090/1056] python/comma: porting to python3; stream: dump() and update_fields fixed (tobytes() and deepcopy() used respectively); in progress... --- python/comma/csv/stream.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index a17e5443e..5e685c55e 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -28,6 +28,7 @@ # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import print_function +import copy import functools import itertools import numpy as np @@ -129,7 +130,7 @@ def read(self, size=None): if no records have been read, return None """ if size is None: size = self.size - self._input_array = self._read(size) + self._input_array = copy.deepcopy( self._read( size ) ) if sys.version_info.major > 2 else self._read( size ) # todo! watch performance in python3! if self._input_array.size == 0: return return self._struct_array(self._input_array, self.missing_values) @@ -258,7 +259,10 @@ def dump(self, mask=None): def _dump(self): if self.binary: - self._input_array.tofile(self.target) + if sys.version_info.major > 2 and self.target == sys.stdout: # sigh... + sys.stdout.buffer.write( self._input_array.tobytes() ) + else: + self._input_array.tofile( self.target ) else: for line in self._ascii_buffer: print( line, file = self.target ) self.target.flush() From 4d3c4dfb670dec5d1a65daf1b263822da650b4c1 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 19 Nov 2019 21:48:34 +1100 Subject: [PATCH 0091/1056] python/comma: porting to python3; stream: zip... replaced with list(zip...), since in python3 it returns iterator, not container; dump: tobytes used instead of tofile for stdout output; in progress... --- python/comma/csv/stream.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 5e685c55e..2e4ac9931 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -97,8 +97,7 @@ def __init__(self, self.default_values = self._default_values(default_values) self.missing_values = self._missing_values() self.data_extraction_fields = self._data_extraction_fields() - self.struct_and_extraction_fields = zip(self.struct.flat_dtype.names, - self.data_extraction_fields) + self.struct_and_extraction_fields = list( zip( self.struct.flat_dtype.names, self.data_extraction_fields ) ) #self.write_dtype = self._write_dtype() #self.unrolled_write_dtype = structured_dtype( ','.join( types_of_dtype( self.write_dtype, unroll=True ) ) ) #print( "self.write_dtype.descr = %s" % str(self.write_dtype.descr), file = sys.stderr ) @@ -280,7 +279,10 @@ def _dump_with_mask(self, mask): msg = "mask size {} not equal to data size {}".format(mask.size, data_size) raise ValueError(msg) if self.binary: - self._input_array[mask].tofile(self.target) + if sys.version_info.major > 2 and self.target == sys.stdout: # sigh... + sys.stdout.buffer.write( self._input_array[mask].tobytes() ) + else: + self._input_array[mask].tofile(self.target) else: for line, allowed in izip(self._ascii_buffer, mask): if allowed: print( line, file = self.target ) From 804bf7038b0af0087339cb801b28bb2e20a33d31 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Tue, 19 Nov 2019 22:44:28 +1100 Subject: [PATCH 0092/1056] python/comma: porting to python3; stream._read(): bug fixed: if size is 0, keep it 0; in progress... --- python/comma/csv/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index 2e4ac9931..aa60094dd 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -162,7 +162,7 @@ def _read(self, size): if self.binary: if sys.version_info.major > 2: #if np.__version__ >= '1.16.0': # sigh... if self.source == sys.stdin: - b = sys.stdin.buffer.read( self.input_dtype.itemsize * ( size if size > 0 else self.size ) ) + b = sys.stdin.buffer.read( self.input_dtype.itemsize * ( size if size >= 0 else self.size ) ) # todo! test on streams where bytes come with irregular delays! if len(b) % self.input_dtype.itemsize != 0: raise ValueError( "expected records of size {}, got {} bytes, which is not divisible by record size".format( self.input_dtype.itemsize, len( b ) ) ) return np.frombuffer( b, dtype = self.input_dtype, count = len( b ) // self.input_dtype.itemsize ) From 1517f75f4c5255f13db55e9fd281559872346f10 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 20 Nov 2019 21:43:12 +1100 Subject: [PATCH 0093/1056] python/comma: porting to python3; io.readlines_unbuffered: bug fixed: filter replaced with list( filter ); in progress... --- python/comma/io/readlines_unbuffered.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/comma/io/readlines_unbuffered.py b/python/comma/io/readlines_unbuffered.py index 6e6d5d286..5e94259b7 100644 --- a/python/comma/io/readlines_unbuffered.py +++ b/python/comma/io/readlines_unbuffered.py @@ -42,6 +42,7 @@ def readlines_unbuffered(size, source=sys.stdin, skip_blank_lines=True): - a blank line is a line that has only whitespace characters or no characters - blank lines are not counted towards size """ + print( '--> readlines_unbuffered(): size:', size, file=sys.stderr ) if size >= 0: lines = [] number_of_lines = 0 @@ -55,7 +56,7 @@ def readlines_unbuffered(size, source=sys.stdin, skip_blank_lines=True): number_of_lines += 1 return lines if skip_blank_lines: - source_ = ifilter(lambda line: line.strip(), source) + source_ = list( ifilter(lambda line: line.strip(), source) ) else: source_ = source return [line.rstrip('\n') for line in source_] From cdaf88466a9191092c8a2be95ea1c9d8ca3a0803 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 20 Nov 2019 22:14:52 +1100 Subject: [PATCH 0094/1056] python/comma: porting to python3; io.readlines_unbuffered: debug print removed; in progress... --- python/comma/io/readlines_unbuffered.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/comma/io/readlines_unbuffered.py b/python/comma/io/readlines_unbuffered.py index 5e94259b7..94ec2a389 100644 --- a/python/comma/io/readlines_unbuffered.py +++ b/python/comma/io/readlines_unbuffered.py @@ -42,7 +42,6 @@ def readlines_unbuffered(size, source=sys.stdin, skip_blank_lines=True): - a blank line is a line that has only whitespace characters or no characters - blank lines are not counted towards size """ - print( '--> readlines_unbuffered(): size:', size, file=sys.stderr ) if size >= 0: lines = [] number_of_lines = 0 From c1635349618e4b41fc87253e8d20ccdceb7ca5ff Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 20 Nov 2019 22:38:36 +1100 Subject: [PATCH 0095/1056] python/comma: porting to python3; unicode string vs bytes: limitation resolved and documented in --help; in progress... --- python/comma/csv/applications/csv_eval.py | 7 ++++++- .../csv/applications/test/csv-eval/select/basic/ascii/test | 2 +- .../applications/test/csv-eval/select/basic/binary/test | 2 +- .../applications/test/csv-eval/select/fields/ascii/test | 2 +- .../applications/test/csv-eval/select/fields/binary/test | 2 +- .../test/csv-eval/select/select_all/ascii/test | 2 +- .../test/csv-eval/select/select_all/binary/test | 2 +- 7 files changed, 12 insertions(+), 7 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 8582b1e05..6acdd6a5b 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -142,9 +142,14 @@ string functions: http://docs.scipy.org/doc/numpy/reference/routines.char.html - + ( echo 'a'; echo 'a/b' ) | %(prog)s --fields=path --format=s[36] 'n=char.count(path,"/")' --output-format=ui ( echo 'a'; echo 'a/b' ) | %(prog)s --fields=path --format=s[36] 'r=char.replace(path,"/","_")' --output-format=s[36] + + LIMITATION: in python3, csv-eval represents strings as np.bytes_ (for consistent binary support) + python2: you could write: ( echo 'a'; echo 'a/b' ) | %(prog)s --fields=path --format=s[36] 'n=char.count(path,"/")' --output-format=ui + python3: you should write: ( echo 'a'; echo 'a/b' ) | %(prog)s --fields=path --format=s[36] 'n=char.count(char.decode(path),"/")' --output-format=ui + for backward compatibility, use the latter variant time arithmetic: http://docs.scipy.org/doc/numpy/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic diff --git a/python/comma/csv/applications/test/csv-eval/select/basic/ascii/test b/python/comma/csv/applications/test/csv-eval/select/basic/ascii/test index 714e7a66b..595d55549 100755 --- a/python/comma/csv/applications/test/csv-eval/select/basic/ascii/test +++ b/python/comma/csv/applications/test/csv-eval/select/basic/ascii/test @@ -1,4 +1,4 @@ #!/bin/bash -csv-eval -v --fields=a,b,name --format=2i,s[1] --select "(a < b - 1) & (name == 'y')" \ +csv-eval -v --fields=a,b,name --format=2i,s[1] --select "logical_and( a < b - 1, char.decode( name ) == 'y' )" \ | name-value-from-csv a,b,allow --line-number --prefix output | sed 's/"//g' diff --git a/python/comma/csv/applications/test/csv-eval/select/basic/binary/test b/python/comma/csv/applications/test/csv-eval/select/basic/binary/test index 12baee557..cd3dafd9d 100755 --- a/python/comma/csv/applications/test/csv-eval/select/basic/binary/test +++ b/python/comma/csv/applications/test/csv-eval/select/basic/binary/test @@ -1,4 +1,4 @@ #!/bin/bash -csv-to-bin 2i,s[1] | csv-eval --fields=a,b,name --binary=2i,s[1] --select "(a < b - 1) & (name == 'y')" | csv-from-bin 2i,s[1] \ +csv-to-bin 2i,s[1] | csv-eval --fields=a,b,name --binary=2i,s[1] --select "logical_and( a < b - 1, char.decode( name ) == 'y' )" | csv-from-bin 2i,s[1] \ | name-value-from-csv a,b,allow --line-number --prefix output | sed 's/"//g' diff --git a/python/comma/csv/applications/test/csv-eval/select/fields/ascii/test b/python/comma/csv/applications/test/csv-eval/select/fields/ascii/test index 151033cd8..c6cc4ce90 100755 --- a/python/comma/csv/applications/test/csv-eval/select/fields/ascii/test +++ b/python/comma/csv/applications/test/csv-eval/select/fields/ascii/test @@ -1,4 +1,4 @@ #!/bin/bash -csv-eval -v --fields=a,b,,name --format=2i,,s[1] --select "(a < b - 1) & (name == 'y')" \ +csv-eval -v --fields=a,b,,name --format=2i,,s[1] --select "logical_and(a < b - 1, char.decode(name) == 'y')" \ | name-value-from-csv a,b,dummy,allow,payload --line-number --prefix output | sed 's/"//g' diff --git a/python/comma/csv/applications/test/csv-eval/select/fields/binary/test b/python/comma/csv/applications/test/csv-eval/select/fields/binary/test index 53c017294..2bd0f5714 100755 --- a/python/comma/csv/applications/test/csv-eval/select/fields/binary/test +++ b/python/comma/csv/applications/test/csv-eval/select/fields/binary/test @@ -1,4 +1,4 @@ #!/bin/bash -csv-to-bin 2i,2s[1],ui | csv-eval -v --fields=a,b,,name --binary=2i,2s[1],ui --select "(a < b - 1) & (name == 'y')" | csv-from-bin 2i,2s[1],ui \ +csv-to-bin 2i,2s[1],ui | csv-eval -v --fields=a,b,,name --binary=2i,2s[1],ui --select "logical_and(a < b - 1, char.decode(name) == 'y')" | csv-from-bin 2i,2s[1],ui \ | name-value-from-csv a,b,dummy,allow,payload --line-number --prefix output | sed 's/"//g' diff --git a/python/comma/csv/applications/test/csv-eval/select/select_all/ascii/test b/python/comma/csv/applications/test/csv-eval/select/select_all/ascii/test index a7ba750aa..581b09f4f 100755 --- a/python/comma/csv/applications/test/csv-eval/select/select_all/ascii/test +++ b/python/comma/csv/applications/test/csv-eval/select/select_all/ascii/test @@ -1,4 +1,4 @@ #!/bin/bash -csv-eval --fields=a,b,name --format=2i,s[1] --select "(a < b) & ( name == 'y')" \ +csv-eval --fields=a,b,name --format=2i,s[1] --select "(a < b) & ( char.decode(name) == 'y')" \ | name-value-from-csv a,b,allow --line-number --prefix output | sed 's/"//g' diff --git a/python/comma/csv/applications/test/csv-eval/select/select_all/binary/test b/python/comma/csv/applications/test/csv-eval/select/select_all/binary/test index 6f22710aa..5d339db17 100755 --- a/python/comma/csv/applications/test/csv-eval/select/select_all/binary/test +++ b/python/comma/csv/applications/test/csv-eval/select/select_all/binary/test @@ -1,4 +1,4 @@ #!/bin/bash -csv-to-bin 2i,s[1] | csv-eval --fields=a,b,name --binary=2i,s[1] --select "(a < b) & ( name == 'y')" | csv-from-bin 2i,s[1] \ +csv-to-bin 2i,s[1] | csv-eval --fields=a,b,name --binary=2i,s[1] --select "(a < b) & ( char.decode(name) == 'y')" | csv-from-bin 2i,s[1] \ | name-value-from-csv a,b,allow --line-number --prefix output | sed 's/"//g' From eec858c3e687de0b052b9e42d7c3bf4871ce7f79 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 20 Nov 2019 22:59:57 +1100 Subject: [PATCH 0096/1056] python/comma: porting to python3; unicode string vs bytes: more tests fixed and examples added to --help; in progress... --- python/comma/csv/applications/csv_eval.py | 4 ++++ .../csv/applications/test/csv-eval/default_format/ascii/test | 2 +- .../csv/applications/test/csv-eval/strings/count/ascii/test | 2 +- .../csv/applications/test/csv-eval/strings/count/binary/test | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 6acdd6a5b..7e2ad7a07 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -150,6 +150,10 @@ python2: you could write: ( echo 'a'; echo 'a/b' ) | %(prog)s --fields=path --format=s[36] 'n=char.count(path,"/")' --output-format=ui python3: you should write: ( echo 'a'; echo 'a/b' ) | %(prog)s --fields=path --format=s[36] 'n=char.count(char.decode(path),"/")' --output-format=ui for backward compatibility, use the latter variant + it may lead to ugly constructs for python3: + python2: csv-eval --fields=s --format s[36] 'u=char.upper(name)' --output-format=s[36] + python3: csv-eval --fields=s --format s[36] 'u=char.encode(char.upper(char.decode(name)))' --output-format=s[36] + but unfortunately, this limitation is unlikely to go away time arithmetic: http://docs.scipy.org/doc/numpy/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic diff --git a/python/comma/csv/applications/test/csv-eval/default_format/ascii/test b/python/comma/csv/applications/test/csv-eval/default_format/ascii/test index 4c5bc4761..33e23bfaa 100755 --- a/python/comma/csv/applications/test/csv-eval/default_format/ascii/test +++ b/python/comma/csv/applications/test/csv-eval/default_format/ascii/test @@ -1,3 +1,3 @@ #!/bin/bash -csv-eval --fields=,path,x,name --format ,s[36],,s[4] 'n=char.count(path,"/");y=x+1;uname=char.upper(name)' --output-format=s[36],,s[4] | name-value-from-csv blank,path,x,name,n,y,uname --line-number --prefix output | tr -d '"' +csv-eval --fields=,path,x,name --format ,s[36],,s[4] 'n=char.count(char.decode(path),"/");y=x+1;uname=char.encode(char.upper(char.decode(name)))' --output-format=s[36],,s[4] | name-value-from-csv blank,path,x,name,n,y,uname --line-number --prefix output | tr -d '"' diff --git a/python/comma/csv/applications/test/csv-eval/strings/count/ascii/test b/python/comma/csv/applications/test/csv-eval/strings/count/ascii/test index 95fb5eaca..8b3efe3b6 100755 --- a/python/comma/csv/applications/test/csv-eval/strings/count/ascii/test +++ b/python/comma/csv/applications/test/csv-eval/strings/count/ascii/test @@ -1,3 +1,3 @@ #!/bin/bash -csv-eval --fields=,path --format i,s[36],ui,s[4] 'n=char.count(path,"/")' --output-format=s[36] | name-value-from-csv x,path,num,string,n --line-number --prefix output | tr -d '"' +csv-eval --fields=,path --format i,s[36],ui,s[4] 'n=char.count(char.decode(path),"/")' --output-format=s[36] | name-value-from-csv x,path,num,string,n --line-number --prefix output | tr -d '"' diff --git a/python/comma/csv/applications/test/csv-eval/strings/count/binary/test b/python/comma/csv/applications/test/csv-eval/strings/count/binary/test index 0cf2f3691..e44699078 100755 --- a/python/comma/csv/applications/test/csv-eval/strings/count/binary/test +++ b/python/comma/csv/applications/test/csv-eval/strings/count/binary/test @@ -1,3 +1,3 @@ #!/bin/bash -csv-to-bin i,s[36],ui,s[4] | csv-eval --fields=,path --binary i,s[36],ui,s[4] 'n=char.count(path,"/")' --output-format=s[36] | csv-from-bin i,s[36],ui,s[4],s[36] | name-value-from-csv x,path,num,string,n --line-number --prefix output | tr -d '"' +csv-to-bin i,s[36],ui,s[4] | csv-eval --fields=,path --binary i,s[36],ui,s[4] 'n=char.count(char.decode(path),"/")' --output-format=s[36] | csv-from-bin i,s[36],ui,s[4],s[36] | name-value-from-csv x,path,num,string,n --line-number --prefix output | tr -d '"' From a7f8742e269fa30e561ba426afae201bb8532fab Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 21 Nov 2019 09:16:53 +1100 Subject: [PATCH 0097/1056] python/comma: porting to python3; name_value/test/eval: test case fixed: integer division: // used instead of /; in progress... --- name_value/test/eval/data/input_3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/name_value/test/eval/data/input_3 b/name_value/test/eval/data/input_3 index 936c66d69..e3bd4042d 100644 --- a/name_value/test/eval/data/input_3 +++ b/name_value/test/eval/data/input_3 @@ -7,4 +7,4 @@ x != 3 x != "some string" # integer division -x = 11 / 5 +x = 11 // 5 From 66d328dc4717b4f5bf896ebffd71689960c7c4fa Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 21 Nov 2019 09:35:32 +1100 Subject: [PATCH 0098/1056] python/comma: porting to python3; stream.py: warnings seem to be buggy in python3.7; use print for now; in progress... --- python/comma/csv/stream.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/comma/csv/stream.py b/python/comma/csv/stream.py index aa60094dd..afa5c7e64 100644 --- a/python/comma/csv/stream.py +++ b/python/comma/csv/stream.py @@ -290,8 +290,10 @@ def _dump_with_mask(self, mask): def _warn(self, msg, verbose=True): if verbose: - with warning(custom_formatwarning) as warn: - warn(msg) + if sys.version_info.major < 3: # sigh, something is broken at least in python3.7; dumb it down for now + with warning(custom_formatwarning) as warn: warn(msg) + else: + print( 'stream.py: warning:', msg, file=sys.stderr ) def _struct(self, s): if not isinstance(s, struct): From 15e703a21c5e5f607a71a8b9e9f9d0c6d836b46c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 22 Nov 2019 12:10:48 +1100 Subject: [PATCH 0099/1056] python/comma: porting to python3; name-value-eval: test fixed in a quick and dirty manner; by right, the whole test is too rigid and needs to be fully rewritten; porting more or less done --- name_value/test/eval/data/stderr_13 | 3 ++- name_value/test/eval/data/stderr_14 | 3 ++- name_value/test/eval/data/stderr_27 | 3 ++- name_value/test/eval/data/stderr_8 | 3 ++- name_value/test/eval/test | 16 ++++++++++++---- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/name_value/test/eval/data/stderr_13 b/name_value/test/eval/data/stderr_13 index 49a434b97..ec96e98a6 100644 --- a/name_value/test/eval/data/stderr_13 +++ b/name_value/test/eval/data/stderr_13 @@ -1 +1,2 @@ -name-value-eval: line 1: AttributeError: OBJ instance has no attribute 'whatever' +python2:name-value-eval: line 1: AttributeError: OBJ instance has no attribute 'whatever' +python3:name-value-eval: line 1: AttributeError: 'OBJ' object has no attribute 'whatever' \ No newline at end of file diff --git a/name_value/test/eval/data/stderr_14 b/name_value/test/eval/data/stderr_14 index 224f578e7..cfa22eede 100644 --- a/name_value/test/eval/data/stderr_14 +++ b/name_value/test/eval/data/stderr_14 @@ -1 +1,2 @@ -name-value-eval: line 2: AttributeError: OBJ instance has no attribute '__getitem__' +python2:name-value-eval: line 2: AttributeError: OBJ instance has no attribute '__getitem__' +python3:name-value-eval: line 2: TypeError: 'OBJ' object does not support indexing \ No newline at end of file diff --git a/name_value/test/eval/data/stderr_27 b/name_value/test/eval/data/stderr_27 index 811dc83b2..e2105bdbe 100644 --- a/name_value/test/eval/data/stderr_27 +++ b/name_value/test/eval/data/stderr_27 @@ -1 +1,2 @@ -name-value-eval: TypeError: variable "route" is used in an expression but is an object (example: "a/b = 3; a < 0") +python2:name-value-eval: TypeError: variable "route" is used in an expression but is an object (example: "a/b = 3; a < 0") +python3:name-value-eval: line 1: TypeError: unorderable types: OBJ() > int() \ No newline at end of file diff --git a/name_value/test/eval/data/stderr_8 b/name_value/test/eval/data/stderr_8 index 790deb67b..0570fd918 100644 --- a/name_value/test/eval/data/stderr_8 +++ b/name_value/test/eval/data/stderr_8 @@ -1 +1,2 @@ -name-value-eval: line 1: TypeError: cannot concatenate 'str' and 'int' objects +python2:name-value-eval: line 1: TypeError: cannot concatenate 'str' and 'int' objects +python3:name-value-eval: line 1: TypeError: Can't convert 'int' object to str implicitly \ No newline at end of file diff --git a/name_value/test/eval/test b/name_value/test/eval/test index b6f7de6e9..580bdc23a 100755 --- a/name_value/test/eval/test +++ b/name_value/test/eval/test @@ -11,7 +11,8 @@ if [[ ! -d "$data_dir" ]]; then fi tmpdir="output/tmp" -mkdir $tmpdir +rm -rf $tmpdir +mkdir $tmpdir -p verbose=0 while [[ $# -gt 0 ]]; do @@ -27,6 +28,8 @@ done variables="$data_dir/variable_vals" pass_count=0 total_count=0 +python_version="$( python -c "from __future__ import print_function; import sys; print( sys.version_info.major )" )" # sigh... +python_prefix="python${python_version}:" for input in $data_dir/input*; do (( ++total_count )) @@ -34,13 +37,18 @@ for input in $data_dir/input*; do if (( verbose )); then echo "$name: running test $suffix" >&2; fi stdout="$tmpdir/stdout.$suffix" stderr="$tmpdir/stderr.$suffix" + if grep -q "$python_prefix" < "$data_dir/stderr_$suffix"; then + expected_stderr="$( grep "^$python_prefix" < "$data_dir/stderr_$suffix" | sed "s#^$python_prefix##" )" # quick and dirty; sigh... + else + expected_stderr="$( cat "$data_dir/stderr_$suffix" )" + fi cat $input | name-value-eval --variables=$variables --test > $stdout 2> $stderr + actual_stderr=$( cat $stderr ) if ! cmp --quiet $stdout $data_dir/stdout_$suffix; then echo "$name: test failed for input_$suffix (stdout is different):" >&2 diff $stdout $data_dir/stdout_$suffix >&2 - elif ! cmp --quiet $stderr $data_dir/stderr_$suffix; then - echo "$name: test failed for input_$suffix (stderr is different):" >&2 - diff $stderr $data_dir/stderr_$suffix >&2 + elif ! cmp --quiet <( echo "$actual_stderr" ) <( echo "$expected_stderr" ); then + diff <( echo "$actual_stderr" ) <( echo "$expected_stderr" ) >&2 else (( ++pass_count )) if (( verbose )); then echo "$name: test $suffix passed" >&2; fi From ad42b42793f9c928106bc833996e743cba96bc97 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 10 Dec 2019 13:04:03 +1100 Subject: [PATCH 0100/1056] application/command_line_options: bug fixed: now allows unnamed values; unit test added --- .../test/comma-options-validate/expected | 18 ++++++++++++++++++ .../test/comma-options-validate/input | 19 +++++++++++++++++++ application/command_line_options.cpp | 1 + 3 files changed, 38 insertions(+) create mode 100644 application/applications/test/comma-options-validate/expected create mode 100644 application/applications/test/comma-options-validate/input diff --git a/application/applications/test/comma-options-validate/expected b/application/applications/test/comma-options-validate/expected new file mode 100644 index 000000000..5840f904e --- /dev/null +++ b/application/applications/test/comma-options-validate/expected @@ -0,0 +1,18 @@ +valid_options/valueless[0]/status=0 +valid_options/valueless[1]/status=0 +valid_options/valueless[2]/status=0 +valid_options/valueless[3]/status=0 +valid_options/valueless[4]/status=0 +valid_options/valueless[5]/status=0 +valid_options/valued[0]/status=0 +valid_options/valued[1]/status=0 +valid_options/valued[2]/status=0 +valid_options/valued[3]/status=0 +valid_options/valued[4]/status=0 +valid_options/valued[5]/status=0 +invalid_options/valueless[0]/status=1 +invalid_options/valueless[1]/status=1 +invalid_options/valueless[2]/status=1 +invalid_options/valueless[3]/status=1 +invalid_options/valued[0]/status=1 +invalid_options/valued[1]/status=1 diff --git a/application/applications/test/comma-options-validate/input b/application/applications/test/comma-options-validate/input new file mode 100644 index 000000000..4782830e9 --- /dev/null +++ b/application/applications/test/comma-options-validate/input @@ -0,0 +1,19 @@ +valid_options/valueless[0]="echo '--verbose,-v' | comma-options-validate -v" +valid_options/valueless[1]="echo '--verbose,-v' | comma-options-validate hello -v" +valid_options/valueless[2]="echo '--verbose,-v' | comma-options-validate -v world" +valid_options/valueless[3]="echo '--verbose,-v' | comma-options-validate -v hello world" +valid_options/valueless[4]="echo '--verbose,-v' | comma-options-validate -v -" +valid_options/valueless[5]="echo '--verbose,-v' | comma-options-validate - -v" +valid_options/valued[0]="echo '--file,-f=' | comma-options-validate -f -5" +valid_options/valued[1]="echo '--file,-f=' | comma-options-validate -f -5 6" +valid_options/valued[2]="echo '--file,-f=' | comma-options-validate 4 -f -5" +valid_options/valued[3]="echo '--file,-f=' | comma-options-validate 4 -f -5 6" +valid_options/valued[4]="echo '--file,-f=' | comma-options-validate -f -5 -" +valid_options/valued[5]="echo '--file,-f=' | comma-options-validate - -f -5" + +invalid_options/valueless[0]="echo '--verbose,-v' | comma-options-validate -v -g" +invalid_options/valueless[1]="echo '--verbose,-v' | comma-options-validate hello -g -v" +invalid_options/valueless[2]="echo '--verbose,-v' | comma-options-validate -v -g world" +invalid_options/valueless[3]="echo '--verbose,-v' | comma-options-validate -v world -g" +invalid_options/valued[0]="echo '--file,-f=' | comma-options-validate -g -f 5" +invalid_options/valued[1]="echo '--file,-f=' | comma-options-validate -f 5 -g" diff --git a/application/command_line_options.cpp b/application/command_line_options.cpp index f7b3e601d..ecbff9b45 100644 --- a/application/command_line_options.cpp +++ b/application/command_line_options.cpp @@ -195,6 +195,7 @@ void command_line_options::assert_valid( const std::vector< description >& d, bo for( unsigned int i = 0; i < d.size(); ++i ) { for( unsigned int j = 0; j < d[i].names.size(); ++j ) { m[ d[i].names[j] ] = d[i].has_value; } } for( unsigned int i = 1; i < argv_.size(); ++i ) { + if( !boost::regex_match( argv_[i], boost::regex( "-.+" ) ) ) { continue; } auto it = m.find( argv_[i] ); if( it == m.end() ) { COMMA_THROW( comma::exception, "unknown option " << argv_[i] ); } if( it->second ) { ++i; } From 7638273ebb70d980986747d1330e4e70167455ac Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 10 Dec 2019 17:28:59 +1100 Subject: [PATCH 0101/1056] application/applications/test/test added --- application/applications/test/test | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 application/applications/test/test diff --git a/application/applications/test/test b/application/applications/test/test new file mode 100755 index 000000000..90d7cb1fc --- /dev/null +++ b/application/applications/test/test @@ -0,0 +1,6 @@ +#!/bin/bash + +source $( type -p comma-application-util ) || { echo "$0: failed to source comma-application-util" >&2 ; exit 1 ; } +source $( type -p comma-test-util ) || { echo "$0: failed to source comma-test-util" >&2 ; exit 1 ; } + +comma_test_commands From cc5acabe3391f03749a3a1abeb05a94ab8226db4 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 19 Dec 2019 17:38:08 +1100 Subject: [PATCH 0102/1056] csv/stream: passed::write( v ) implemented --- csv/stream.h | 91 +++++++++++++++++++++++++++++----------- csv/test/stream_test.cpp | 26 +++++++++++- 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/csv/stream.h b/csv/stream.h index c862c011b..2f7ba849e 100644 --- a/csv/stream.h +++ b/csv/stream.h @@ -436,11 +436,26 @@ template < typename S > class passed { public: - passed( const input_stream< S >& is, std::ostream& os, bool flush=false ) : is_( is ), os_( os ), flush(flush) - { - #ifdef WIN32 - if( is_.is_binary() && os == std::cout ) { _setmode( _fileno( stdout ), _O_BINARY ); } - #endif // #ifdef WIN32 + passed( const input_stream< S >& is, std::ostream& os, bool flush = false ); + + void write(); + + void write( const S& s ); + + private: + const input_stream< S >& is_; + std::ostream& os_; + std::string buffer_; + bool flush; + bool is_stdout_; +}; + +template < typename S > +inline passed< S >::passed( const input_stream< S >& is, std::ostream& os, bool flush ) : is_( is ), os_( os ), flush( flush ) +{ + #ifdef WIN32 + if( is_.is_binary() && os == std::cout ) { _setmode( _fileno( stdout ), _O_BINARY ); } + #endif // #ifdef WIN32 // In using view-points in 'pass' mode there were issues with the write method. // How to reproduce: take some nav data in t,6d format. Store as in.bin and other.bin. // @@ -485,29 +500,57 @@ class passed // - according to git grep, only view-points was using this class template at the moment; therefore, // the change is very localized and we preserve it in this class // - however, all the other similar modifications have been commented out using /// symbol - is_stdout_ = os.rdbuf() == std::cout.rdbuf(); - } + is_stdout_ = os.rdbuf() == std::cout.rdbuf(); +} - void write() +template < typename S > +inline void passed< S >::write() +{ + if( is_.is_binary() ) + { + if( is_stdout_ ) { - if( is_.is_binary() ) { - if ( is_stdout_ ) { - ::write( 1, is_.binary().last(), is_.binary().size() ); - if(flush) { ::fflush( stdout ); } - } else { - os_.write( is_.binary().last(), is_.binary().size() ); - if(flush) { os_.flush(); } - } - } - else os_ << comma::join( is_.ascii().last(), is_.ascii().ascii().delimiter() ) << std::endl; + ::write( 1, is_.binary().last(), is_.binary().size() ); + if( flush ) { ::fflush( stdout ); } } + else + { + os_.write( is_.binary().last(), is_.binary().size() ); + if( flush ) { os_.flush(); } + } + } + else + { + os_ << comma::join( is_.ascii().last(), is_.ascii().ascii().delimiter() ) << std::endl; + } +} - private: - const input_stream< S >& is_; - std::ostream& os_; - bool flush; - bool is_stdout_; -}; +template < typename S > +inline void passed< S >::write( const S& s ) +{ + if( is_.is_binary() ) + { + buffer_.resize( is_.binary().size() ); + ::memcpy( &buffer_[0], is_.binary().last(), is_.binary().size() ); // quick and dirty + is_.binary().binary().put( s, &buffer_[0] ); + if( is_stdout_ ) + { + ::write( 1, &buffer_[0], is_.binary().size() ); + if( flush ) { ::fflush( stdout ); } + } + else + { + os_.write( &buffer_[0], is_.binary().size() ); + if( flush ) { os_.flush(); } + } + } + else + { + std::vector< std::string > v = is_.ascii().last(); + is_.ascii().ascii().put( s, v ); + os_ << comma::join( v, is_.ascii().ascii().delimiter() ) << std::endl; + } +} template < typename S > inline ascii_input_stream< S >::ascii_input_stream( std::istream& is, const std::string& column_names, char delimiter, bool full_path_as_name, const S& sample ) diff --git a/csv/test/stream_test.cpp b/csv/test/stream_test.cpp index bef1731db..04a788baf 100644 --- a/csv/test/stream_test.cpp +++ b/csv/test/stream_test.cpp @@ -100,7 +100,7 @@ TEST( csv, container ) test_container sample; sample.vector = std::vector< int >( 5, 1 ); comma::csv::input_stream< test_container > istream( iss, csv, sample ); const test_container *c = istream.read(); - EXPECT_EQ( c->vector.size(), 5 ); + EXPECT_EQ( int( c->vector.size() ), 5 ); std::string so = comma::join( c->vector, ',' ); EXPECT_EQ( so, "2,3,1,1,6" ); } @@ -111,11 +111,33 @@ TEST( csv, container ) c.vector[1] = 5; c.vector[2] = 3; ostream.write( c ); - EXPECT_EQ( c.vector.size(), 5 ); + EXPECT_EQ( int( c.vector.size() ), 5 ); EXPECT_EQ( oss.str(), "1,5,3,1,1\n" ); } } +TEST( csv, passed_ascii ) +{ + { + std::istringstream iss( "1,2\n3,4" ); + comma::csv::input_stream< test_struct > is( iss ); + std::ostringstream oss; + comma::csv::passed< test_struct > p( is, oss ); + is.read(); + p.write(); + EXPECT_EQ( "1,2\n", oss.str() ); + is.read(); + p.write(); + EXPECT_EQ( "1,2\n3,4\n", oss.str() ); + is.read(); + p.write( test_struct( 10, 20 ) ); + EXPECT_EQ( "1,2\n3,4\n10,20\n", oss.str() ); + } + { + // todo! binary test + } +} + } } } // namespace comma { namespace csv { namespace stream_test { namespace comma { namespace csv { namespace stream_test { From c180f35d35062cb38c43008def19c895785912a5 Mon Sep 17 00:00:00 2001 From: seva Date: Mon, 30 Dec 2019 12:10:59 +1100 Subject: [PATCH 0103/1056] application: unit test added --- application/test/application_test.cpp | 36 +++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/application/test/application_test.cpp b/application/test/application_test.cpp index 037ae84e0..f3cfd2481 100644 --- a/application/test/application_test.cpp +++ b/application/test/application_test.cpp @@ -118,6 +118,32 @@ TEST( application, unnamed ) EXPECT_EQ( free[5], "free5" ); } } + { + std::vector< std::string > argv; + argv.push_back( "application" ); + argv.push_back( "--no-value" ); + argv.push_back( "--value" ); + argv.push_back( "some-value" ); + comma::command_line_options options( argv ); + { + std::vector< std::string > unnamed = options.unnamed( "--no-value", "-.*" ); + EXPECT_EQ( 0u, unnamed.size() ); + } + } + { + std::vector< std::string > argv; + argv.push_back( "application" ); + argv.push_back( "unnamed" ); + argv.push_back( "--no-value" ); + argv.push_back( "--value" ); + argv.push_back( "x,y,z" ); + comma::command_line_options options( argv ); + { + std::vector< std::string > unnamed = options.unnamed( "--no-value", "-.*" ); + EXPECT_EQ( 1u, unnamed.size() ); + EXPECT_EQ( "unnamed", unnamed[0] ); + } + } // TODO: definitely more tests! } @@ -180,7 +206,7 @@ TEST( application, command_line_options_description_parsing ) { { comma::command_line_options::description d = comma::command_line_options::description::from_string( "--verbose" ); - EXPECT_EQ( 1, d.names.size() ); + EXPECT_EQ( 1u, d.names.size() ); EXPECT_EQ( "--verbose", d.names[0] ); EXPECT_FALSE( d.has_value ); EXPECT_TRUE( d.is_optional ); @@ -188,7 +214,7 @@ TEST( application, command_line_options_description_parsing ) } { comma::command_line_options::description d = comma::command_line_options::description::from_string( "--verbose,-v" ); - EXPECT_EQ( 2, d.names.size() ); + EXPECT_EQ( 2u, d.names.size() ); EXPECT_EQ( "--verbose", d.names[0] ); EXPECT_EQ( "-v", d.names[1] ); EXPECT_FALSE( d.has_value ); @@ -197,7 +223,7 @@ TEST( application, command_line_options_description_parsing ) } { comma::command_line_options::description d = comma::command_line_options::description::from_string( "--filename,-f=; some filename" ); - EXPECT_EQ( 2, d.names.size() ); + EXPECT_EQ( 2u, d.names.size() ); EXPECT_EQ( "--filename", d.names[0] ); EXPECT_EQ( "-f", d.names[1] ); EXPECT_TRUE( d.has_value ); @@ -207,7 +233,7 @@ TEST( application, command_line_options_description_parsing ) } { comma::command_line_options::description d = comma::command_line_options::description::from_string( "--filename,-f=[]; some filename" ); - EXPECT_EQ( 2, d.names.size() ); + EXPECT_EQ( 2u, d.names.size() ); EXPECT_EQ( "--filename", d.names[0] ); EXPECT_EQ( "-f", d.names[1] ); EXPECT_TRUE( d.has_value ); @@ -226,7 +252,7 @@ void check_default_value( const std::string& line, const std::string& default_va { typedef comma::command_line_options::description description; description d = description::from_string( line ); - EXPECT_EQ( 2, d.names.size() ); + EXPECT_EQ( 2u, d.names.size() ); EXPECT_EQ( "--filename", d.names[0] ); EXPECT_EQ( "-f", d.names[1] ); EXPECT_TRUE( d.has_value ); From 889b42ac28c44a0280304f6556c936d2db88a6ef Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 7 Jan 2020 12:18:19 +1100 Subject: [PATCH 0104/1056] csv-enumerate: refactored to make adding more output features easier --- csv/applications/csv-enumerate.cpp | 53 +++++++++++++++++------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/csv/applications/csv-enumerate.cpp b/csv/applications/csv-enumerate.cpp index ff5d4955a..3fd3ee910 100644 --- a/csv/applications/csv-enumerate.cpp +++ b/csv/applications/csv-enumerate.cpp @@ -61,6 +61,22 @@ static void usage( bool verbose ) exit( 0 ); } +struct output +{ + comma::uint32 id; + output( comma::uint32 id = 0 ): id( id ) {} +}; + +namespace comma { namespace visiting { + +template <> struct traits< output > +{ + template < typename K, typename V > static void visit( const K&, const output& p, V& v ) { v.apply( "id", p.id ); } + template < typename K, typename V > static void visit( const K&, output& p, V& v ) { v.apply( "id", p.id ); } +}; + +} } // namespace comma { namespace visiting { + int main( int ac, char** av ) { typedef comma::csv::impl::unstructured input_t; @@ -94,10 +110,6 @@ int main( int ac, char** av ) } if( verbose ) { std::cerr << "csv-enumerate: fields " << csv.fields << " interpreted as: " << comma::join( v, ',' ) << std::endl; } csv.fields = comma::join( v, ',' ); - comma::csv::input_stream< input_t > istream( std::cin, csv, default_input ); - #ifdef WIN32 - if( istream.is_binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); } - #endif static map_t map; comma::uint32 id = 0; if( !first_line.empty() ) @@ -106,6 +118,15 @@ int main( int ac, char** av ) map[ comma::csv::ascii< input_t >( csv, default_input ).get( first_line ) ] = std::make_pair( id++, 1 ); if( !output_map ) { std::cout << first_line << csv.delimiter << 0 << std::endl; } } + comma::csv::options output_csv; + output_csv.delimiter = csv.delimiter; + if( csv.binary() ) { output_csv.format( comma::csv::format::value< output >() ); } + comma::csv::input_stream< input_t > istream( std::cin, csv, default_input ); + comma::csv::output_stream< output > ostream( std::cout, output_csv ); + comma::csv::tied< input_t, output > tied( istream, ostream ); + #ifdef WIN32 + if( istream.is_binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); } + #endif while( istream.ready() || std::cin.good() ) { const input_t* p = istream.read(); @@ -113,26 +134,14 @@ int main( int ac, char** av ) map_t::iterator it = map.find( *p ); comma::uint32 cur = id; if( it == map.end() ) { map[ *p ] = std::make_pair( id++, 1 ); } else { cur = it->second.first; ++( it->second.second ); } - if( !output_map ) - { - if( csv.binary() ) - { - std::cout.write( istream.binary().last(), csv.format().size() ); - std::cout.write( reinterpret_cast< const char* >( &cur ), sizeof( comma::uint32 ) ); - if( csv.flush ) { std::cout.flush(); } - } - else - { - std::cout << comma::join( istream.ascii().last(), csv.delimiter ) << csv.delimiter << cur << std::endl; - } - } + if( !output_map ) { tied.append( output( cur ) ); } } if( !output_map ) { return 0; } - comma::csv::options output_csv; - output_csv.delimiter = csv.delimiter; - if( csv.binary() ) { output_csv.format( comma::csv::format::value< input_t >( default_input ) + ",2ui" ); } - comma::csv::output_stream< map_t::value_type > ostream( std::cout, output_csv, std::make_pair( default_input, std::make_pair( 0, 0 ) ) ); - for( map_t::const_iterator it = map.begin(); it != map.end(); ++it ) { ostream.write( *it ); } + comma::csv::options output_map_csv; + output_map_csv.delimiter = csv.delimiter; + if( csv.binary() ) { output_map_csv.format( comma::csv::format::value< input_t >( default_input ) + ",2ui" ); } + comma::csv::output_stream< map_t::value_type > omstream( std::cout, output_map_csv, std::make_pair( default_input, std::make_pair( 0, 0 ) ) ); + for( map_t::const_iterator it = map.begin(); it != map.end(); ++it ) { omstream.write( *it ); } return 0; } catch( std::exception& ex ) { std::cerr << "csv-enumerate: " << ex.what() << std::endl; } From 777bfd1adea19d91d712958eeaf006469b3a431e Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 7 Jan 2020 12:22:04 +1100 Subject: [PATCH 0105/1056] csv-enumerate: --verbose documented --- csv/applications/csv-enumerate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/csv/applications/csv-enumerate.cpp b/csv/applications/csv-enumerate.cpp index 3fd3ee910..82a3e89c9 100644 --- a/csv/applications/csv-enumerate.cpp +++ b/csv/applications/csv-enumerate.cpp @@ -54,9 +54,10 @@ static void usage( bool verbose ) std::cerr << " - list of input key values; in same binary as input" << std::endl; std::cerr << " - corresponding enumeration index as ui" << std::endl; std::cerr << " - number of values for this enumeration index as ui" << std::endl; + std::cerr << " --verbose,-v: more output to stderr" << std::endl; std::cerr << std::endl; std::cerr << "csv options" << std::endl; - if( verbose ) { std::cerr << comma::csv::options::usage() << std::endl; } else { std::cerr << " run csv-enumerate --help --verbose for more..." << std::endl; } + std::cerr << comma::csv::options::usage( verbose ) << std::endl; std::cerr << std::endl; exit( 0 ); } From 7cfc1c7ee7c8bc7c5e2edecd132cf198dd55bae8 Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 8 Jan 2020 14:33:24 +1100 Subject: [PATCH 0106/1056] csv-split: --files: explicit mapping of ids to filenames implemented --- csv/applications/csv-split.cpp | 26 +++++----- csv/applications/split/split.cpp | 83 ++++++++++++++++++++++++++------ csv/applications/split/split.h | 3 +- 3 files changed, 84 insertions(+), 28 deletions(-) diff --git a/csv/applications/csv-split.cpp b/csv/applications/csv-split.cpp index ab3a68c15..c9cfc9477 100644 --- a/csv/applications/csv-split.cpp +++ b/csv/applications/csv-split.cpp @@ -37,7 +37,6 @@ #include #include -#include "../../application/contact_info.h" #include "../../csv/impl/program_options.h" #include "../../csv/traits.h" #include "split/split.h" @@ -115,7 +114,12 @@ int main( int argc, char** argv ) std::cerr << " split by block field, output to files" << std::endl; std::cerr << " if block field present in --fields:" << std::endl; std::cerr << " output records with this block to a separate file, on change of block, open a new file, e.g. 0.csv, 1.csv, etc" << std::endl; - std::cerr << " example: ( echo 0,a; echo 1,b; echo 1,c; echo 2,d ) | csv-split --fields block" << std::endl; + std::cerr << " by block with default filenames, e.g:" << std::endl; + std::cerr << " ( echo 0,a; echo 1,b; echo 1,c; echo 2,d ) | csv-split --fields block" << std::endl; + std::cerr << " by block with specified filenames" << std::endl; + std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields block --files <( echo a; echo b; echo c )" << std::endl; + std::cerr << " by block with filenames mapped to block ids" << std::endl; + std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields block --files <( echo 0,a; echo 1,b; echo 2,c )';fields=id,filename'" << std::endl; std::cerr << std::endl; std::cerr << " split by t field, output to files" << std::endl; std::cerr << " if t (timestamp) field present in --fields:" << std::endl; @@ -126,17 +130,17 @@ int main( int argc, char** argv ) std::cerr << " if output streams (see example below) are present on the command line and id field present in --fields:" << std::endl; std::cerr << " output records with the given ids to the corresponding streams, while outputing the rest into files" << std::endl; std::cerr << " records with ids for which output stream is not specified will be discarded, unless ... stream is specified:" << std::endl; - std::cerr << std::endl; - std::cerr << " outputs: ;; to send records with a given set of ids to this stream" << std::endl; + std::cerr << std::endl; + std::cerr << " outputs: ;; to send records with a given set of ids to this stream" << std::endl; std::cerr << " keys:" << std::endl; std::cerr << " [,]*: comma-separated list of ids, e.g: '5' or '2,5,7', etc" << std::endl; std::cerr << " ...: three dots mean: send to this stream all the records with ids for which no other stream is specified (see example below)" << std::endl; std::cerr << " stream:" << std::endl; - std::cerr << " tcp:: e.g. tcp:1234" << std::endl; - std::cerr << " udp:: e.g. udp:1234 (todo)" << std::endl; - std::cerr << " local:: linux/unix local server socket e.g. local:./tmp/my_socket" << std::endl; - std::cerr << " : named pipe, which will be re-opened, if client reconnects" << std::endl; - std::cerr << " : a regular file" << std::endl; + std::cerr << " tcp:: e.g. tcp:1234" << std::endl; + std::cerr << " udp:: e.g. udp:1234 (todo)" << std::endl; + std::cerr << " local:: linux/unix local server socket e.g. local:./tmp/my_socket" << std::endl; + std::cerr << " : named pipe, which will be re-opened, if client reconnects" << std::endl; + std::cerr << " : a regular file" << std::endl; std::cerr << " example: ( echo 0,a; echo 1,b; echo 0,c; echo 2,d ) | csv-split --fields id \"0,1;tcp:5999\" \"...;local:/tmp/named_fifo\"" << std::endl; std::cerr << std::endl; std::cerr << description << std::endl; @@ -145,10 +149,8 @@ int main( int argc, char** argv ) std::cerr << " block: split on the block number change" << std::endl; std::cerr << " id: split by id (same as block, except does not have to be contiguous by the price of worse performance)" << std::endl; std::cerr << " t: if present, use timestamp from the packet; if absent, use system time" << std::endl; - std::cerr << std::endl; - std::cerr << comma::contact_info << std::endl; std::cerr << std::endl; - return 1; + return 0; } csv = comma::csv::program_options::get( vm ); if( csv.binary() ) { size = csv.format().size(); } diff --git a/csv/applications/split/split.cpp b/csv/applications/split/split.cpp index 1132e9912..2714a6665 100644 --- a/csv/applications/split/split.cpp +++ b/csv/applications/split/split.cpp @@ -38,20 +38,75 @@ #include #endif +#include #include #include #include "../../../base/exception.h" +#include "../../../csv/stream.h" +#include "../../../csv/traits.h" #include "../../../io/file_descriptor.h" +#include "../../../name_value/parser.h" +#include "../../../visiting/traits.h" #include "split.h" namespace comma { namespace csv { namespace applications { +struct filename_record +{ + comma::uint32 id; + std::string filename; + filename_record( comma::uint32 id = 0, const std::string& filename = "" ): id( id ), filename( filename ) {} +}; + +} } } // namespace comma { namespace csv { namespace applications { + +namespace comma { namespace visiting { + +template <> struct traits< comma::csv::applications::filename_record > +{ + template< typename K, typename V > static void visit( const K& k, comma::csv::applications::filename_record& t, V& v ) + { + v.apply( "id", t.id ); + v.apply( "filename", t.filename ); + } + + template< typename K, typename V > static void visit( const K& k, const comma::csv::applications::filename_record& t, V& v ) + { + v.apply( "id", t.id ); + v.apply( "filename", t.filename ); + } +}; + +} } // namespace comma { namespace visiting { + +namespace comma { namespace csv { namespace applications { + +std::pair< std::unordered_map< comma::uint32, std::string >, bool > static filenames( const std::string& filename ) +{ + auto csv = comma::name_value::parser( "filename" ).get< comma::csv::options >( filename ); + if( csv.fields.empty() ) { csv.fields = "filename"; } + std::ifstream ifs( csv.filename ); + if( !ifs.is_open() ) { COMMA_THROW( comma::exception, "could not open --files='" << csv.filename << "'" ); } + comma::csv::input_stream< filename_record > is( ifs, csv ); + comma::uint32 id = 0; + std::pair< std::unordered_map< comma::uint32, std::string >, bool > r; + r.second = csv.has_field( "id" ); + while( is.ready() || ifs.good() ) + { + auto p = is.read(); + if( p == nullptr ) { break; } + r.first[ r.second ? p->id : id++ ] = p->filename; // quick and dirty + } + if( r.first.empty() ) { COMMA_THROW( comma::exception, "got no filenames from '" << csv.filename << "'" ); } + return r; +} + template < typename T > split< T >::split( boost::optional< boost::posix_time::time_duration > period - , const std::string& suffix - , const comma::csv::options& csv - , bool pass - , const std::string& filenames ) + , const std::string& suffix + , const comma::csv::options& csv + , bool pass + , const std::string& filenames ) : ofstream_( std::bind( &split< T >::ofstream_by_time_, this ) ) , period_( period ) , suffix_( suffix ) @@ -66,11 +121,7 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period if( csv.has_field( "block" ) ) { ofstream_ = std::bind( &split< T >::ofstream_by_block_, this ); - if( !filenames.empty() ) - { - filenames_.reset( new std::ifstream( filenames ) ); - if( !filenames_->is_open() ) { COMMA_THROW( comma::exception, "failed to open '" << filenames << "'" ); } - } + if( !filenames.empty() ) { boost::tie( filenames_, filenames_with_id_ ) = applications::filenames( filenames ); } } else { @@ -211,18 +262,19 @@ std::ofstream& split< T >::ofstream_by_time_() template < typename T > std::ofstream& split< T >::ofstream_by_block_() { + static comma::uint32 id = 0; if( !last_ || last_->block != current_.block ) { file_.close(); std::string filename; - if( filenames_ ) + if( !filenames_.empty() ) { - while( std::cin.good() && !is_shutdown_ ) + auto it = filenames_.find( filenames_with_id_ ? current_.block : id ); + if( it == filenames_.end() ) { COMMA_THROW( comma::exception, "filename not found for block " << current_.block << "; todo: skipping blocks with no matching filenames" ); } + filename = it->second; + const auto& dirname = boost::filesystem::path( filename ).parent_path(); + if( !( dirname.empty() || boost::filesystem::is_directory( dirname ) || boost::filesystem::create_directories( dirname ) ) ) { - std::getline( *filenames_, filename ); - if( filename.empty() ) { continue; } - const auto& dirname = boost::filesystem::path( filename ).parent_path(); - if( dirname.empty() || boost::filesystem::is_directory( dirname ) || boost::filesystem::create_directories( dirname ) ) { break; } COMMA_THROW( comma::exception, "failed to create directory '" << dirname << "' for file: '" << filename << "'" ); } } @@ -230,6 +282,7 @@ std::ofstream& split< T >::ofstream_by_block_() file_.open( &filename[0], mode_ ); if( !file_.is_open() ) { COMMA_THROW( comma::exception, "failed to open '" << filename << "'" ); } last_ = current_; + ++id; } return file_; } diff --git a/csv/applications/split/split.h b/csv/applications/split/split.h index 53c4a2580..3322e8b72 100644 --- a/csv/applications/split/split.h +++ b/csv/applications/split/split.h @@ -158,7 +158,8 @@ class split ids_type_ seen_ids_; bool pass_; bool flush_; - std::unique_ptr< std::ifstream > filenames_; + std::unordered_map< comma::uint32, std::string > filenames_; + bool filenames_with_id_; //to-do bool published_on_stream( const char* data, unsigned int size ); From 7b3d52d656a64280ea9ae2696ba2c01376944fc0 Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 8 Jan 2020 14:52:41 +1100 Subject: [PATCH 0107/1056] csv-split: minor refactoring to prepare for more generic use of --files --- csv/applications/split/split.cpp | 14 ++++++++------ csv/applications/split/split.h | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/csv/applications/split/split.cpp b/csv/applications/split/split.cpp index 2714a6665..541eabf99 100644 --- a/csv/applications/split/split.cpp +++ b/csv/applications/split/split.cpp @@ -130,7 +130,6 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period } } -//to-do template < typename T > split< T >::split( boost::optional< boost::posix_time::time_duration > period , const std::string& suffix @@ -157,13 +156,11 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period { auto publisher_pos = t->insert( std::move( publisher ) ); auto const keys = comma::split( stream_values[0], ',' ); - for( auto const& ki : keys ) { auto const kii = boost::lexical_cast< T >( ki ); if( seen_ids_.end() != seen_ids_.find( kii ) ) { COMMA_THROW( comma::exception, "multiple output streams have the id: " << ki ); } seen_ids_.insert( kii ); - mapped_publishers_.insert( std::make_pair( kii, publisher_pos.first->get() ) ); } } @@ -287,9 +284,14 @@ std::ofstream& split< T >::ofstream_by_block_() return file_; } -template < typename T > static std::string make_filename_from_id( const T& id, const std::string& suffix ) { return boost::lexical_cast< std::string >( id ) + suffix; } +template < typename T > std::string to_string( const T& v ) { return boost::lexical_cast< std::string >( v ); } +template <> std::string to_string< boost::posix_time::ptime >( const boost::posix_time::ptime& v ) { return boost::posix_time::to_iso_string( v ); } -static std::string make_filename_from_id( const boost::posix_time::ptime& id, const std::string& suffix ) { return boost::posix_time::to_iso_string( id ) + suffix; } +template < typename T > +std::string split< T >::filename_from_id_( const T& id ) +{ + return to_string( id ) + suffix_; +} template < typename T > std::ofstream& split< T >::ofstream_by_id_() @@ -309,7 +311,7 @@ std::ofstream& split< T >::ofstream_by_id_() std::ios_base::openmode mode = mode_; if( seen_ids_.find( current_.id ) == seen_ids_.end() ) { seen_ids_.insert( current_.id ); } else { mode |= std::ofstream::app; } - std::string name = make_filename_from_id( current_.id, suffix_); + std::string name = filename_from_id_( current_.id ); std::shared_ptr< std::ofstream > stmp( new std::ofstream( name.c_str(), mode ) ); it = files_.insert( std::make_pair( current_.id, stmp ) ).first; } diff --git a/csv/applications/split/split.h b/csv/applications/split/split.h index 3322e8b72..7799a1c47 100644 --- a/csv/applications/split/split.h +++ b/csv/applications/split/split.h @@ -136,6 +136,7 @@ class split std::ofstream& ofstream_by_time_(); std::ofstream& ofstream_by_block_(); std::ofstream& ofstream_by_id_(); + std::string filename_from_id_( const T& id ); void update_( const char* data, unsigned int size ); void update_( const std::string& line ); void accept_(); From 4e3b7fd676e6d72216660403ed53fef33bcef6ee Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 8 Jan 2020 18:24:20 +1100 Subject: [PATCH 0108/1056] csv-split: id field: --files implemente3d --- csv/applications/split/split.cpp | 51 +++++++++++++++++++++----------- csv/applications/split/split.h | 2 +- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/csv/applications/split/split.cpp b/csv/applications/split/split.cpp index 541eabf99..d039e3cc8 100644 --- a/csv/applications/split/split.cpp +++ b/csv/applications/split/split.cpp @@ -51,26 +51,26 @@ namespace comma { namespace csv { namespace applications { -struct filename_record +template < typename T > struct filename_record { - comma::uint32 id; + T id; std::string filename; - filename_record( comma::uint32 id = 0, const std::string& filename = "" ): id( id ), filename( filename ) {} + filename_record( const T& id = 0, const std::string& filename = "" ): id( id ), filename( filename ) {} }; } } } // namespace comma { namespace csv { namespace applications { namespace comma { namespace visiting { -template <> struct traits< comma::csv::applications::filename_record > +template < typename T > struct traits< comma::csv::applications::filename_record< T > > { - template< typename K, typename V > static void visit( const K& k, comma::csv::applications::filename_record& t, V& v ) + template< typename K, typename V > static void visit( const K& k, comma::csv::applications::filename_record< T >& t, V& v ) { v.apply( "id", t.id ); v.apply( "filename", t.filename ); } - template< typename K, typename V > static void visit( const K& k, const comma::csv::applications::filename_record& t, V& v ) + template< typename K, typename V > static void visit( const K& k, const comma::csv::applications::filename_record< T >& t, V& v ) { v.apply( "id", t.id ); v.apply( "filename", t.filename ); @@ -83,13 +83,15 @@ namespace comma { namespace csv { namespace applications { std::pair< std::unordered_map< comma::uint32, std::string >, bool > static filenames( const std::string& filename ) { + std::pair< std::unordered_map< comma::uint32, std::string >, bool > r; + r.second = false; + if( filename.empty() ) { return r; } auto csv = comma::name_value::parser( "filename" ).get< comma::csv::options >( filename ); if( csv.fields.empty() ) { csv.fields = "filename"; } std::ifstream ifs( csv.filename ); if( !ifs.is_open() ) { COMMA_THROW( comma::exception, "could not open --files='" << csv.filename << "'" ); } - comma::csv::input_stream< filename_record > is( ifs, csv ); + comma::csv::input_stream< filename_record< comma::uint32 > > is( ifs, csv ); // quick and dirty; todo: support templated map comma::uint32 id = 0; - std::pair< std::unordered_map< comma::uint32, std::string >, bool > r; r.second = csv.has_field( "id" ); while( is.ready() || ifs.good() ) { @@ -118,15 +120,21 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period if( csv.fields.empty() ) { return; } if( csv.binary() ) { binary_.reset( new comma::csv::binary< input >( csv ) ); } else { ascii_.reset( new comma::csv::ascii< input >( csv ) ); } + boost::tie( filenames_, filenames_have_id_ ) = applications::filenames( filenames ); if( csv.has_field( "block" ) ) { ofstream_ = std::bind( &split< T >::ofstream_by_block_, this ); - if( !filenames.empty() ) { boost::tie( filenames_, filenames_with_id_ ) = applications::filenames( filenames ); } } else { - if( !filenames.empty() ) { COMMA_THROW( comma::exception, "--files given, but no block field specified in --fields" ); } - if( csv.has_field( "id" ) ) { ofstream_ = std::bind( &split< T >::ofstream_by_id_, this ); } + if( csv.has_field( "id" ) ) + { + ofstream_ = std::bind( &split< T >::ofstream_by_id_, this ); + } + else + { + if( !filenames_.empty() ) { COMMA_THROW( comma::exception, "--files given, but no block field specified in --fields" ); } + } } } @@ -266,7 +274,7 @@ std::ofstream& split< T >::ofstream_by_block_() std::string filename; if( !filenames_.empty() ) { - auto it = filenames_.find( filenames_with_id_ ? current_.block : id ); + auto it = filenames_.find( filenames_have_id_ ? current_.block : id ); if( it == filenames_.end() ) { COMMA_THROW( comma::exception, "filename not found for block " << current_.block << "; todo: skipping blocks with no matching filenames" ); } filename = it->second; const auto& dirname = boost::filesystem::path( filename ).parent_path(); @@ -284,15 +292,22 @@ std::ofstream& split< T >::ofstream_by_block_() return file_; } -template < typename T > std::string to_string( const T& v ) { return boost::lexical_cast< std::string >( v ); } +template < typename T > static std::string to_string( const T& v ) { return boost::lexical_cast< std::string >( v ); } + template <> std::string to_string< boost::posix_time::ptime >( const boost::posix_time::ptime& v ) { return boost::posix_time::to_iso_string( v ); } -template < typename T > -std::string split< T >::filename_from_id_( const T& id ) -{ - return to_string( id ) + suffix_; +template < typename T, typename M > static std::string find_( const M& m, const T& id ) { COMMA_THROW( comma::exception, "id-to-filename map not implemented for this type" ); } + +template <> std::string find_< comma::uint32, std::unordered_map< comma::uint32, std::string > >( const std::unordered_map< comma::uint32, std::string >& m, const comma::uint32& id ) +{ + auto it = m.find( id ); + if( it == m.end() ) { COMMA_THROW( comma::exception, "filename not found for id " << id << "; todo: skipping id with no matching filenames" ); } + return it->second; } +template < typename T > +std::string split< T >::filename_from_id_( const T& id ) { return filenames_.empty() ? to_string( id ) + suffix_ : find_( filenames_, id ); } + template < typename T > std::ofstream& split< T >::ofstream_by_id_() { @@ -312,7 +327,7 @@ std::ofstream& split< T >::ofstream_by_id_() if( seen_ids_.find( current_.id ) == seen_ids_.end() ) { seen_ids_.insert( current_.id ); } else { mode |= std::ofstream::app; } std::string name = filename_from_id_( current_.id ); - std::shared_ptr< std::ofstream > stmp( new std::ofstream( name.c_str(), mode ) ); + std::shared_ptr< std::ofstream > stmp( new std::ofstream( &name[0], mode ) ); it = files_.insert( std::make_pair( current_.id, stmp ) ).first; } return *it->second; diff --git a/csv/applications/split/split.h b/csv/applications/split/split.h index 7799a1c47..2f0e9b57f 100644 --- a/csv/applications/split/split.h +++ b/csv/applications/split/split.h @@ -160,7 +160,7 @@ class split bool pass_; bool flush_; std::unordered_map< comma::uint32, std::string > filenames_; - bool filenames_with_id_; + bool filenames_have_id_; //to-do bool published_on_stream( const char* data, unsigned int size ); From de04cf74caff4c6d259965742dd2b78d9817490f Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 8 Jan 2020 20:10:11 +1100 Subject: [PATCH 0109/1056] csv-split: --files: discarding unmatched ids added; --default-file: placeholder added, left for later --- csv/applications/csv-split.cpp | 24 ++++++++++------ csv/applications/split/split.cpp | 49 +++++++++++++++++++------------- csv/applications/split/split.h | 19 +++++++------ 3 files changed, 56 insertions(+), 36 deletions(-) diff --git a/csv/applications/csv-split.cpp b/csv/applications/csv-split.cpp index c9cfc9477..c9df78092 100644 --- a/csv/applications/csv-split.cpp +++ b/csv/applications/csv-split.cpp @@ -48,6 +48,7 @@ static std::string suffix; static unsigned int size = 0; static bool passthrough; static std::string files; +static std::string default_filename; template < typename T > static void run() { @@ -85,7 +86,8 @@ int main( int argc, char** argv ) boost::program_options::options_description description( "options" ); description.add_options() ( "help,h", "display help message" ) - ( "files", boost::program_options::value< std::string >( &files ), "if 'block' field present, list of files to save blocks; todo: --files for id field" ) + ( "default-file", boost::program_options::value< std::string >( &default_filename ), "todo: if --files present, unmatched ids will be put in the file with a given name; otherwise, unmatched values will be ignored" ) + ( "files", boost::program_options::value< std::string >( &files ), "if 'block' or 'id' field present, list of output files (see examples below)" ) ( "passthrough,pass", "pass data through to stdout" ) ( "period,t", boost::program_options::value< double >( &period ), "period in seconds after which a new file is created" ) ( "size,c", boost::program_options::value< unsigned int >( &size ), "packet size, only full packets will be written" ) @@ -109,17 +111,22 @@ int main( int argc, char** argv ) std::cerr << " split by id field, output to files" << std::endl; std::cerr << " if id field present in --fields:" << std::endl; std::cerr << " for each id value, output records with this id to a separate file, e.g. 0.csv, 1.csv, etc" << std::endl; - std::cerr << " example: ( echo 0,a; echo 1,b; echo 0,c; echo 2,d ) | csv-split --fields id" << std::endl; + std::cerr << " - by id with default filenames, e.g:" << std::endl; + std::cerr << " ( echo 0,a; echo 1,b; echo 1,c; echo 2,d ) | csv-split --fields id" << std::endl; + std::cerr << " - by id with specified filenames" << std::endl; + std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields id --files <( echo a; echo b; echo c )" << std::endl; + std::cerr << " - by id with filenames mapped to block ids" << std::endl; + std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields id --files <( echo 0,a; echo 1,b; echo 2,c )';fields=id,filename'" << std::endl; std::cerr << std::endl; std::cerr << " split by block field, output to files" << std::endl; std::cerr << " if block field present in --fields:" << std::endl; std::cerr << " output records with this block to a separate file, on change of block, open a new file, e.g. 0.csv, 1.csv, etc" << std::endl; - std::cerr << " by block with default filenames, e.g:" << std::endl; - std::cerr << " ( echo 0,a; echo 1,b; echo 1,c; echo 2,d ) | csv-split --fields block" << std::endl; - std::cerr << " by block with specified filenames" << std::endl; - std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields block --files <( echo a; echo b; echo c )" << std::endl; - std::cerr << " by block with filenames mapped to block ids" << std::endl; - std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields block --files <( echo 0,a; echo 1,b; echo 2,c )';fields=id,filename'" << std::endl; + std::cerr << " - by block with default filenames, e.g:" << std::endl; + std::cerr << " ( echo 0,a; echo 1,b; echo 1,c; echo 2,d ) | csv-split --fields block" << std::endl; + std::cerr << " - by block with specified filenames" << std::endl; + std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields block --files <( echo a; echo b; echo c )" << std::endl; + std::cerr << " - by block with filenames mapped to block ids" << std::endl; + std::cerr << " ( echo 0; echo 1; echo 2 ) | csv-split --fields block --files <( echo 0,a; echo 1,b; echo 2,c )';fields=id,filename'" << std::endl; std::cerr << std::endl; std::cerr << " split by t field, output to files" << std::endl; std::cerr << " if t (timestamp) field present in --fields:" << std::endl; @@ -153,6 +160,7 @@ int main( int argc, char** argv ) return 0; } csv = comma::csv::program_options::get( vm ); + if( !default_filename.empty() ) { std::cerr << "csv-split: --default-filename: todo, just ask" << std::endl; } if( csv.binary() ) { size = csv.format().size(); } bool id_is_string = vm.count( "string" ); bool id_is_time = vm.count( "time" ); diff --git a/csv/applications/split/split.cpp b/csv/applications/split/split.cpp index d039e3cc8..31e93695e 100644 --- a/csv/applications/split/split.cpp +++ b/csv/applications/split/split.cpp @@ -104,11 +104,12 @@ std::pair< std::unordered_map< comma::uint32, std::string >, bool > static filen } template < typename T > -split< T >::split( boost::optional< boost::posix_time::time_duration > period +split< T >::split( const boost::optional< boost::posix_time::time_duration >& period , const std::string& suffix , const comma::csv::options& csv , bool pass - , const std::string& filenames ) + , const std::string& filenames + , const std::string& default_filename ) : ofstream_( std::bind( &split< T >::ofstream_by_time_, this ) ) , period_( period ) , suffix_( suffix ) @@ -139,13 +140,14 @@ split< T >::split( boost::optional< boost::posix_time::time_duration > period } template < typename T > -split< T >::split( boost::optional< boost::posix_time::time_duration > period +split< T >::split( const boost::optional< boost::posix_time::time_duration >& period , const std::string& suffix , const comma::csv::options& csv , const std::vector< std::string >& streams //to-do , bool pass - , const std::string& filenames ) - : split( period, suffix, csv, pass, filenames ) + , const std::string& filenames + , const std::string& default_filename ) + : split( period, suffix, csv, pass, filenames, default_filename ) { if( streams.empty() ) { return; } auto const io_mode = csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii; @@ -227,8 +229,12 @@ void split< T >::write( const char* data, unsigned int size ) else { current_.timestamp = boost::get_system_time(); } if( !published_on_stream( data, size ) ) // todo? or bind write function on initialisation and call it here? { - ofstream_().write( data, size ); - if( flush_ ) { ofstream_().flush(); } + auto ofs = ofstream_(); + if( ofs ) + { + ofs->write( data, size ); + if( flush_ ) { ofs->flush(); } + } } if ( pass_ ) { std::cout.write( data, size ); std::cout.flush(); } } @@ -242,16 +248,19 @@ void split< T >::write ( std::string line ) line += '\n'; if( !published_on_stream( &line[0], line.size()) ) // todo? or bind write function on initialisation and call it here? { - std::ofstream& ofs = ofstream_(); - ofs.write( &line[0], line.size() ); - //ofs.put( '\n' ); - if( flush_ ) { ofs.flush(); } + auto ofs = ofstream_(); + if( ofs ) + { + ofs->write( &line[0], line.size() ); + //ofs.put( '\n' ); + if( flush_ ) { ofs->flush(); } + } } if ( pass_ ) { std::cout.write( &line[0], line.size() ); /*std::cout.put('\n');*/ std::cout.flush(); } } template < typename T > -std::ofstream& split< T >::ofstream_by_time_() +std::ofstream* split< T >::ofstream_by_time_() { if( !last_ || current_.timestamp > ( last_->timestamp + *period_ ) ) { @@ -261,11 +270,11 @@ std::ofstream& split< T >::ofstream_by_time_() file_.open( ( time + suffix_ ).c_str(), mode_ ); last_ = current_; } - return file_; + return &file_; } template < typename T > -std::ofstream& split< T >::ofstream_by_block_() +std::ofstream* split< T >::ofstream_by_block_() { static comma::uint32 id = 0; if( !last_ || last_->block != current_.block ) @@ -275,7 +284,7 @@ std::ofstream& split< T >::ofstream_by_block_() if( !filenames_.empty() ) { auto it = filenames_.find( filenames_have_id_ ? current_.block : id ); - if( it == filenames_.end() ) { COMMA_THROW( comma::exception, "filename not found for block " << current_.block << "; todo: skipping blocks with no matching filenames" ); } + if( it == filenames_.end() ) { return nullptr; } filename = it->second; const auto& dirname = boost::filesystem::path( filename ).parent_path(); if( !( dirname.empty() || boost::filesystem::is_directory( dirname ) || boost::filesystem::create_directories( dirname ) ) ) @@ -289,7 +298,7 @@ std::ofstream& split< T >::ofstream_by_block_() last_ = current_; ++id; } - return file_; + return &file_; } template < typename T > static std::string to_string( const T& v ) { return boost::lexical_cast< std::string >( v ); } @@ -301,15 +310,14 @@ template < typename T, typename M > static std::string find_( const M& m, const template <> std::string find_< comma::uint32, std::unordered_map< comma::uint32, std::string > >( const std::unordered_map< comma::uint32, std::string >& m, const comma::uint32& id ) { auto it = m.find( id ); - if( it == m.end() ) { COMMA_THROW( comma::exception, "filename not found for id " << id << "; todo: skipping id with no matching filenames" ); } - return it->second; + return it == m.end() ? std::string() : it->second; } template < typename T > std::string split< T >::filename_from_id_( const T& id ) { return filenames_.empty() ? to_string( id ) + suffix_ : find_( filenames_, id ); } template < typename T > -std::ofstream& split< T >::ofstream_by_id_() +std::ofstream* split< T >::ofstream_by_id_() { typename Files::iterator it = files_.find( current_.id ); if( it == files_.end() ) @@ -327,10 +335,11 @@ std::ofstream& split< T >::ofstream_by_id_() if( seen_ids_.find( current_.id ) == seen_ids_.end() ) { seen_ids_.insert( current_.id ); } else { mode |= std::ofstream::app; } std::string name = filename_from_id_( current_.id ); + if( name.empty() ) { return nullptr; } std::shared_ptr< std::ofstream > stmp( new std::ofstream( &name[0], mode ) ); it = files_.insert( std::make_pair( current_.id, stmp ) ).first; } - return *it->second; + return it->second.get(); } template class split< comma::uint32 >; diff --git a/csv/applications/split/split.h b/csv/applications/split/split.h index 2f0e9b57f..7fb36b57d 100644 --- a/csv/applications/split/split.h +++ b/csv/applications/split/split.h @@ -118,30 +118,32 @@ class split { public: typedef applications::input< T > input; - split( boost::optional< boost::posix_time::time_duration > period + split( const boost::optional< boost::posix_time::time_duration >& period , const std::string& suffix , const comma::csv::options& csv , bool passthrough - , const std::string& filenames ); - split( boost::optional< boost::posix_time::time_duration > period + , const std::string& filenames + , const std::string& default_filename = "" ); + split( const boost::optional< boost::posix_time::time_duration >& period , const std::string& suffix , const comma::csv::options& csv , const std::vector< std::string >& streams , bool passthrough - , const std::string& filenames ); + , const std::string& filenames + , const std::string& default_filename = "" ); ~split(); void write( const char* data, unsigned int size ); void write( std::string line ); private: - std::ofstream& ofstream_by_time_(); - std::ofstream& ofstream_by_block_(); - std::ofstream& ofstream_by_id_(); + std::ofstream* ofstream_by_time_(); + std::ofstream* ofstream_by_block_(); + std::ofstream* ofstream_by_id_(); std::string filename_from_id_( const T& id ); void update_( const char* data, unsigned int size ); void update_( const std::string& line ); void accept_(); - std::function< std::ofstream&() > ofstream_; + std::function< std::ofstream*() > ofstream_; std::unique_ptr< comma::csv::ascii< input > > ascii_; std::unique_ptr< comma::csv::binary< input > > binary_; boost::optional< boost::posix_time::time_duration > period_; @@ -160,6 +162,7 @@ class split bool pass_; bool flush_; std::unordered_map< comma::uint32, std::string > filenames_; + // todo? std::unique_ptr< comma::io::publisher > default_file_; bool filenames_have_id_; //to-do From df6c53ca531c300076b21d1ea9b51dc61dfd482e Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 9 Jan 2020 12:49:57 +1100 Subject: [PATCH 0110/1056] csv::ascii::sample() added --- csv/ascii.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/csv/ascii.h b/csv/ascii.h index 0e1fdf3ed..cb653b627 100644 --- a/csv/ascii.h +++ b/csv/ascii.h @@ -87,6 +87,9 @@ class ascii /// return quote sign boost::optional< char > quote() const { return quote_; } + + /// return default value + const S& sample() const { return sample_; } private: char delimiter_; From 405fb8f363da1ca0558da95afc21c8547786ad19 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 23 Jan 2020 19:22:45 +1100 Subject: [PATCH 0111/1056] csv-strings: in progress... --- csv/applications/CMakeLists.txt | 4 + csv/applications/csv-strings.cpp | 199 +++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 csv/applications/csv-strings.cpp diff --git a/csv/applications/CMakeLists.txt b/csv/applications/CMakeLists.txt index a165fa65c..f615f4c5c 100644 --- a/csv/applications/CMakeLists.txt +++ b/csv/applications/CMakeLists.txt @@ -114,6 +114,10 @@ add_executable( csv-random ${dir}/csv-random.cpp ) target_link_libraries ( csv-random ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_io comma_string comma_xpath comma_csv ) install( TARGETS csv-random RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) +add_executable( csv-strings ${dir}/csv-strings.cpp ) +target_link_libraries ( csv-strings ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_io comma_string comma_xpath comma_csv ) +install( TARGETS csv-strings RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) + add_executable( csv-update ${dir}/csv-update.cpp ) target_link_libraries ( csv-update ${comma_ALL_EXTERNAL_LIBRARIES} comma_application comma_io comma_string comma_xpath comma_csv ) install( TARGETS csv-update RUNTIME DESTINATION ${comma_INSTALL_BIN_DIR} COMPONENT Runtime ) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp new file mode 100644 index 000000000..b42e5a86d --- /dev/null +++ b/csv/applications/csv-strings.cpp @@ -0,0 +1,199 @@ +// This file is provided in addition to comma and is not an integral +// part of comma library. +// Copyright (c) 2018 Vsevolod Vlaskine +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// comma is a generic and flexible library +// Copyright (c) 2011 The University of Sydney +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the University of Sydney nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE +// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT +// HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/// @author vsevolod vlaskine + +#include +#include +#include +#include "../../application/command_line_options.h" +#include "../../csv/stream.h" +#include "../../csv/traits.h" +#include "../../string/string.h" + +static void usage( bool verbose ) +{ + std::cerr << std::endl; + std::cerr << "operations on strings" << std::endl; + std::cerr << std::endl; + std::cerr << " usage: cat input.csv | csv-strings [] > output.csv" << std::endl; + std::cerr << std::endl; + std::cerr << "operations" << std::endl; + std::cerr << " path-basename" << std::endl; + std::cerr << " path-dirname" << std::endl; + std::cerr << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --fields=[]; will perform operation on any non-empty fields" << std::endl; + std::cerr << " unless different semantics specified for operation" << std::endl; + std::cerr << std::endl; + std::cerr << "path-basename" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --depth=; default=1; if path length less than depth, output empty string" << std::endl; + std::cerr << " --emplace; perform operation emplace" << std::endl; + std::cerr << " --path-delimiter,-p=; default=/" << std::endl; + std::cerr << std::endl; + std::cerr << "path-dirname" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --depth=; default=1; if path length less than depth, output empty string" << std::endl; + std::cerr << " --emplace; perform operation emplace" << std::endl; + std::cerr << " --fixed-depth=[]; output paths of fixed depth starting from root" << std::endl; + std::cerr << " --path-delimiter,-p=; default=/" << std::endl; + std::cerr << std::endl; + std::cerr << "csv options:" << std::endl; + std::cerr << comma::csv::options::usage( "", verbose ) << std::endl; + std::cerr << std::endl; + exit( 0 ); +} + +static comma::csv::options csv; + +namespace comma { namespace applications { namespace strings { namespace path { + +struct input +{ + std::vector< std::string > strings; + input( unsigned int n = 0 ): strings( n ) {} +}; + +} } } } // namespace comma { namespace applications { namespace strings { namespace path { + +namespace comma { namespace visiting { + +template <> struct traits< comma::applications::strings::path::input > +{ + template < typename K, typename V > static void visit( const K&, const comma::applications::strings::path::input& p, V& v ) { v.apply( "strings", p.strings ); } + template < typename K, typename V > static void visit( const K&, comma::applications::strings::path::input& p, V& v ) { v.apply( "strings", p.strings ); } +}; + +} } // namespace comma { namespace visiting { + +namespace comma { namespace applications { namespace strings { namespace path { + +template < typename T > +static int run( const comma::command_line_options& options ) +{ + auto v = comma::split( ::csv.fields, ',' ); + unsigned int n = 0; + for( unsigned int i = 0; i < v.size(); ++i ) + { + if( v.empty() ) { continue; } + v[i] = "strings[" + boost::lexical_cast< std::string >( i ) + "]"; + ++n; + } + if( n == 0 ) { std::cerr << "csv-strings: path-" << T::name() << ": please specify at least one non-empty field" << std::endl; exit( 1 ); } + ::csv.fields = comma::join( v, ',' ); + comma::csv::input_stream< input > istream( std::cin, ::csv, input( n ) ); + std::function< void( const input& p ) > write; + auto run_ = [&]()->int + { + while( istream.ready() || std::cin.good() ) + { + const input* p = istream.read(); + if( !p ) { break; } + input r( n ); + for( unsigned int i = 0; i < p->strings.size(); ++i ) { r.strings[i] = T::convert( p->strings[i] ); } + write( r ); + if( ::csv.flush ) { std::cout.flush(); } + } + return 0; + }; + if( options.exists( "--emplace" ) ) + { + comma::csv::passed< input > passed( istream, std::cout, ::csv.flush ); + write = [&]( const input& p ) { passed.write( p ); }; + return run_(); + } + comma::csv::options output_csv = ::csv; + output_csv.fields = "strings"; + if( ::csv.binary() ) { std::cerr << "csv-strings: path-" << T::name() << ": binary mode supported only for --emplace; todo, just ask" << std::endl; exit( 1 ); } + comma::csv::output_stream< input > ostream( std::cout, output_csv, input( n ) ); + comma::csv::tied< input, input > tied( istream, ostream ); + write = [&]( const input& p ) { tied.append( p ); }; + return run_(); +} + +struct basename +{ + static const char* name() { return "basename"; } + static std::string convert( const std::string& s ) { return "basename: todo"; } +}; + +struct dirname +{ + static const char* name() { return "dirname"; } + static std::string convert( const std::string& s ) { return "dirname: todo"; } +}; + +} } } } // namespace comma { namespace applications { namespace strings { namespace path { + +int main( int ac, char** av ) +{ + try + { + comma::command_line_options options( ac, av, usage ); + const auto& unnamed = options.unnamed( "--flush,--verbose,-v,--emplace", "-.*" ); + if( unnamed.empty() ) { std::cerr << "csv-strings: please specify operation" << std::endl; return 1; } + std::string operation = unnamed[0]; + csv = comma::csv::options( options ); + if( operation == "path-basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } + if( operation == "path-dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } + std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; + return 1; + } + catch( std::exception& ex ) { std::cerr << "csv-strings: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "csv-strings: unknown exception" << std::endl; } + return 1; +} From cf0fcc4afcb26ef068239781c13bcef14d9937c7 Mon Sep 17 00:00:00 2001 From: seva Date: Fri, 24 Jan 2020 13:01:05 +1100 Subject: [PATCH 0112/1056] csv-string: basename, dirname: implemented, tested --- csv/applications/csv-strings.cpp | 83 +++++++++++++++++---- csv/test/csv-strings/expected | 122 +++++++++++++++++++++++++++++++ csv/test/csv-strings/input | 48 ++++++++++++ 3 files changed, 239 insertions(+), 14 deletions(-) create mode 100644 csv/test/csv-strings/expected create mode 100644 csv/test/csv-strings/input diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index b42e5a86d..cc1318f1c 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -60,6 +60,7 @@ #include #include #include "../../application/command_line_options.h" +#include "../../base/exception.h" #include "../../csv/stream.h" #include "../../csv/traits.h" #include "../../string/string.h" @@ -72,12 +73,14 @@ static void usage( bool verbose ) std::cerr << " usage: cat input.csv | csv-strings [] > output.csv" << std::endl; std::cerr << std::endl; std::cerr << "operations" << std::endl; - std::cerr << " path-basename" << std::endl; - std::cerr << " path-dirname" << std::endl; + std::cerr << " path-basename,basename" << std::endl; + std::cerr << " path-dirname,dirname" << std::endl; std::cerr << std::endl; - std::cerr << " options" << std::endl; - std::cerr << " --fields=[]; will perform operation on any non-empty fields" << std::endl; - std::cerr << " unless different semantics specified for operation" << std::endl; + std::cerr << "options" << std::endl; + std::cerr << " --fields=[]; will perform operation on any non-empty fields" << std::endl; + std::cerr << " unless different semantics specified for operation" << std::endl; + std::cerr << " default: perform operation on the first field" << std::endl; + std::cerr << " --strict; exit on strings on which operation does not make sense" << std::endl; std::cerr << std::endl; std::cerr << "path-basename" << std::endl; std::cerr << " options" << std::endl; @@ -98,6 +101,7 @@ static void usage( bool verbose ) exit( 0 ); } +static bool strict; static comma::csv::options csv; namespace comma { namespace applications { namespace strings { namespace path { @@ -129,22 +133,24 @@ static int run( const comma::command_line_options& options ) unsigned int n = 0; for( unsigned int i = 0; i < v.size(); ++i ) { - if( v.empty() ) { continue; } - v[i] = "strings[" + boost::lexical_cast< std::string >( i ) + "]"; + if( v[i].empty() ) { continue; } + v[i] = "strings[" + boost::lexical_cast< std::string >( n ) + "]"; ++n; } - if( n == 0 ) { std::cerr << "csv-strings: path-" << T::name() << ": please specify at least one non-empty field" << std::endl; exit( 1 ); } - ::csv.fields = comma::join( v, ',' ); + ::csv.fields = n == 0 ? std::string( "strings[0]" ) : comma::join( v, ',' ); + if( n == 0 ) { ++n; } + char delimiter = options.value( "--path-delimiter,-p", '/' ); comma::csv::input_stream< input > istream( std::cin, ::csv, input( n ) ); std::function< void( const input& p ) > write; auto run_ = [&]()->int { + T t( options ); while( istream.ready() || std::cin.good() ) { const input* p = istream.read(); if( !p ) { break; } input r( n ); - for( unsigned int i = 0; i < p->strings.size(); ++i ) { r.strings[i] = T::convert( p->strings[i] ); } + for( unsigned int i = 0; i < p->strings.size(); ++i ) { r.strings[i] = t.convert( comma::split( p->strings[i], delimiter ) ); } write( r ); if( ::csv.flush ) { std::cout.flush(); } } @@ -167,14 +173,62 @@ static int run( const comma::command_line_options& options ) struct basename { + unsigned int depth; + char delimiter; + static const char* name() { return "basename"; } - static std::string convert( const std::string& s ) { return "basename: todo"; } + + basename( const comma::command_line_options& options ) + : depth( options.value( "--depth", 1 ) ) + , delimiter( options.value( "--path-delimiter,-p", '/' ) ) + { + } + + std::string convert( const std::vector< std::string >& s ) + { + if( s.size() < depth ) + { + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << depth << "; got: '" << comma::join( s, delimiter ) << "'" ); } + return ""; + } + return comma::join( s.end() - depth, s.end(), delimiter ); + } }; struct dirname { + unsigned int depth; + unsigned int fixed_depth; + char delimiter; + static const char* name() { return "dirname"; } - static std::string convert( const std::string& s ) { return "dirname: todo"; } + + dirname( const comma::command_line_options& options ) + : depth( options.value( "--depth", 1 ) ) + , fixed_depth( options.value( "--fixed-depth", 0 ) ) + , delimiter( options.value( "--path-delimiter,-p", '/' ) ) + { + options.assert_mutually_exclusive( "--depth,--fixed-depth" ); + } + + std::string convert( const std::vector< std::string >& s ) + { + if( fixed_depth > 0 ) + { + if( s.size() < fixed_depth ) + { + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << fixed_depth << "; got: '" << comma::join( s, delimiter ) << "'" ); } + return ""; + } + return comma::join( s, fixed_depth, delimiter ); + } + if( s.size() < depth ) + { + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << depth << "; got: '" << comma::join( s, '/' ) << "'" ); } + return ""; + } + return comma::join( s.begin(), s.end() - depth, delimiter ); + } }; } } } } // namespace comma { namespace applications { namespace strings { namespace path { @@ -187,9 +241,10 @@ int main( int ac, char** av ) const auto& unnamed = options.unnamed( "--flush,--verbose,-v,--emplace", "-.*" ); if( unnamed.empty() ) { std::cerr << "csv-strings: please specify operation" << std::endl; return 1; } std::string operation = unnamed[0]; + strict = options.exists( "--strict" ); csv = comma::csv::options( options ); - if( operation == "path-basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } - if( operation == "path-dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } + if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } + if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; return 1; } diff --git a/csv/test/csv-strings/expected b/csv/test/csv-strings/expected new file mode 100644 index 000000000..47409cc92 --- /dev/null +++ b/csv/test/csv-strings/expected @@ -0,0 +1,122 @@ +basename/append[0]/output/line[0]="a,a" +basename/append[0]/output/line[1]="a/b,b" +basename/append[0]/output/line[2]="a/b/c,c" +basename/append[1]/output/line[0]="a," +basename/append[1]/output/line[1]="a/b,a/b" +basename/append[1]/output/line[2]="a/b/c,b/c" +basename/append[2]/output/line[0]="a," +basename/append[2]/output/line[1]="a/b," +basename/append[2]/output/line[2]="a/b/c,a/b/c" +basename/append[3]/output/line[0]="a," +basename/append[3]/output/line[1]="a/b," +basename/append[3]/output/line[2]="a/b/c," + +basename/emplace[0]/output/line[0]="a" +basename/emplace[0]/output/line[1]="b" +basename/emplace[0]/output/line[2]="c" +basename/emplace[1]/output/line[0]="" +basename/emplace[1]/output/line[1]="a/b" +basename/emplace[1]/output/line[2]="b/c" +basename/emplace[2]/output/line[0]="" +basename/emplace[2]/output/line[1]="" +basename/emplace[2]/output/line[2]="a/b/c" +basename/emplace[3]/output/line[0]="," +basename/emplace[3]/output/line[1]="," +basename/emplace[3]/output/line[2]="," + +dirname/depth/append[0]/output/line[0]="a," +dirname/depth/append[0]/output/line[1]="a/b,a" +dirname/depth/append[0]/output/line[2]="a/b/c,a/b" +dirname/depth/append[1]/output/line[0]="a," +dirname/depth/append[1]/output/line[1]="a/b," +dirname/depth/append[1]/output/line[2]="a/b/c,a" +dirname/depth/append[2]/output/line[0]="a," +dirname/depth/append[2]/output/line[1]="a/b," +dirname/depth/append[2]/output/line[2]="a/b/c," +dirname/depth/append[3]/output/line[0]="a," +dirname/depth/append[3]/output/line[1]="a/b," +dirname/depth/append[3]/output/line[2]="a/b/c," + +dirname/depth/emplace[0]/output/line[0]="" +dirname/depth/emplace[0]/output/line[1]="a" +dirname/depth/emplace[0]/output/line[2]="a/b" +dirname/depth/emplace[1]/output/line[0]="" +dirname/depth/emplace[1]/output/line[1]="" +dirname/depth/emplace[1]/output/line[2]="a" +dirname/depth/emplace[2]/output/line[0]="," +dirname/depth/emplace[2]/output/line[1]="," +dirname/depth/emplace[2]/output/line[2]="," +dirname/depth/emplace[3]/output/line[0]="," +dirname/depth/emplace[3]/output/line[1]="," +dirname/depth/emplace[3]/output/line[2]="," + +dirname/fixed_depth/append[0]/output/line[0]="a," +dirname/fixed_depth/append[0]/output/line[1]="a/b,a" +dirname/fixed_depth/append[0]/output/line[2]="a/b/c,a/b" +dirname/fixed_depth/append[1]/output/line[0]="a," +dirname/fixed_depth/append[1]/output/line[1]="a/b,a/b" +dirname/fixed_depth/append[1]/output/line[2]="a/b/c,a/b" +dirname/fixed_depth/append[2]/output/line[0]="a," +dirname/fixed_depth/append[2]/output/line[1]="a/b," +dirname/fixed_depth/append[2]/output/line[2]="a/b/c,a/b/c" +dirname/fixed_depth/append[3]/output/line[0]="a," +dirname/fixed_depth/append[3]/output/line[1]="a/b," +dirname/fixed_depth/append[3]/output/line[2]="a/b/c," + +dirname/fixed_depth/emplace[0]/output/line[0]="" +dirname/fixed_depth/emplace[0]/output/line[1]="a" +dirname/fixed_depth/emplace[0]/output/line[2]="a/b" +dirname/fixed_depth/emplace[1]/output/line[0]="" +dirname/fixed_depth/emplace[1]/output/line[1]="a/b" +dirname/fixed_depth/emplace[1]/output/line[2]="a/b" +dirname/fixed_depth/emplace[2]/output/line[0]="" +dirname/fixed_depth/emplace[2]/output/line[1]="" +dirname/fixed_depth/emplace[2]/output/line[2]="a/b/c" +dirname/fixed_depth/emplace[3]/output/line[0]="," +dirname/fixed_depth/emplace[3]/output/line[1]="," +dirname/fixed_depth/emplace[3]/output/line[2]="," + +delimiter[0]/output/line[0]="a.a" +delimiter[0]/output/line[1]="a.b.a" +delimiter[0]/output/line[2]="a.b/c.d/e.a" +delimiter[1]/output/line[0]="a" +delimiter[1]/output/line[1]="a.b" +delimiter[1]/output/line[2]="a.b/c.d/e" +delimiter[2]/output/line[0]="a." +delimiter[2]/output/line[1]="a.b." +delimiter[2]/output/line[2]="a.b/c.d/e." +delimiter[3]/output/line[0]="" +delimiter[3]/output/line[1]=".b" +delimiter[3]/output/line[2]=".b/c.d/e" + +strict[0]/output="a/b/c,a/b/c" +strict[0]/status=1 +strict[1]/output="a/b/c" +strict[1]/status=1 +strict[2]/output="a/b/c," +strict[2]/status=1 +strict[3]/output="" +strict[3]/status=1 +strict[4]/output="a/b/c,a/b/c" +strict[4]/status=1 +strict[5]/output="a/b/c" +strict[5]/status=1 + +fields[0]/output/line[0]="k,,a,,x,a,x" +fields[0]/output/line[1]="l,,a/b,,x/y,b,y" +fields[0]/output/line[2]="m,,a/b/c,,x/y/z,c,z" +fields[1]/output/line[0]="k,,a,,x" +fields[1]/output/line[1]="l,,b,,y" +fields[1]/output/line[2]="m,,c,,z" +fields[2]/output/line[0]="k,,,," +fields[2]/output/line[1]="l,,a/b,,x/y" +fields[2]/output/line[2]="m,,b/c,,y/z" +fields[3]/output/line[0]="k,,a,,x,," +fields[3]/output/line[1]="l,,a/b,,x/y,a,x" +fields[3]/output/line[2]="m,,a/b/c,,x/y/z,a/b,x/y" +fields[4]/output/line[0]="k,,,," +fields[4]/output/line[1]="l,,a,,x" +fields[4]/output/line[2]="m,,a/b,,x/y" +fields[5]/output/line[0]="k,,,," +fields[5]/output/line[1]="l,,,," +fields[5]/output/line[2]="m,,a,,x" diff --git a/csv/test/csv-strings/input b/csv/test/csv-strings/input new file mode 100644 index 000000000..4c176c25f --- /dev/null +++ b/csv/test/csv-strings/input @@ -0,0 +1,48 @@ +basename/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename" +basename/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 2" +basename/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 3" +basename/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 4" + +basename/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --emplace" +basename/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 2 --emplace" +basename/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 3 --emplace" +basename/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-basename --depth 4 --emplace" + +dirname/depth/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" +dirname/depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 2" +dirname/depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 3" +dirname/depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 4" + +dirname/depth/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" +dirname/depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 2 --emplace" +dirname/depth/emplace[2]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --depth 3 --emplace" +dirname/depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --depth 4 --emplace" + +dirname/fixed_depth/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" +dirname/fixed_depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 2" +dirname/fixed_depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 3" +dirname/fixed_depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 4" + +dirname/fixed_depth/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" +dirname/fixed_depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 2 --emplace" +dirname/fixed_depth/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 3 --emplace" +dirname/fixed_depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --fixed-depth 4 --emplace" + +delimiter[0]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-basename --delimiter=." +delimiter[1]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-basename --delimiter=. --emplace" +delimiter[2]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-dirname --delimiter=." +delimiter[3]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-dirname --delimiter=. --emplace" + +strict[0]="( echo a/b/c; echo a/b ) | csv-strings path-basename --depth 3 --strict" +strict[1]="( echo a/b/c; echo a/b ) | csv-strings path-basename --depth 3 --emplace --strict" +strict[2]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --depth 3 --strict" +strict[3]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --depth 3 --emplace --strict" +strict[4]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --fixed-depth 3 --strict" +strict[5]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --fixed-depth 3 --emplace --strict" + +fields[0]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n" +fields[1]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n --emplace" +fields[2]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n --emplace --depth 2" +fields[3]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n" +fields[4]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace" +fields[5]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace --depth 2" From 698259fddcdba67cd163de9c5f755cae54854aad Mon Sep 17 00:00:00 2001 From: seva Date: Fri, 24 Jan 2020 14:54:39 +1100 Subject: [PATCH 0113/1056] csv-strings: path-canonical operation implemented --- csv/applications/csv-strings.cpp | 44 ++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index cc1318f1c..1baa1741c 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -58,6 +58,7 @@ #include #include +#include #include #include "../../application/command_line_options.h" #include "../../base/exception.h" @@ -75,26 +76,30 @@ static void usage( bool verbose ) std::cerr << "operations" << std::endl; std::cerr << " path-basename,basename" << std::endl; std::cerr << " path-dirname,dirname" << std::endl; + std::cerr << " path-realpath,path-canonical,canonical" << std::endl; std::cerr << std::endl; std::cerr << "options" << std::endl; + std::cerr << " --emplace; perform operation emplace" << std::endl; std::cerr << " --fields=[]; will perform operation on any non-empty fields" << std::endl; std::cerr << " unless different semantics specified for operation" << std::endl; std::cerr << " default: perform operation on the first field" << std::endl; std::cerr << " --strict; exit on strings on which operation does not make sense" << std::endl; std::cerr << std::endl; - std::cerr << "path-basename" << std::endl; + std::cerr << "path-basename,basename" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --depth=; default=1; if path length less than depth, output empty string" << std::endl; - std::cerr << " --emplace; perform operation emplace" << std::endl; std::cerr << " --path-delimiter,-p=; default=/" << std::endl; std::cerr << std::endl; - std::cerr << "path-dirname" << std::endl; + std::cerr << "path-dirname,dirname" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --depth=; default=1; if path length less than depth, output empty string" << std::endl; - std::cerr << " --emplace; perform operation emplace" << std::endl; std::cerr << " --fixed-depth=[]; output paths of fixed depth starting from root" << std::endl; std::cerr << " --path-delimiter,-p=; default=/" << std::endl; std::cerr << std::endl; + std::cerr << "path-realpath,path-canonical,canonical" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --base=[]; base path, default: current directory" << std::endl; + std::cerr << std::endl; std::cerr << "csv options:" << std::endl; std::cerr << comma::csv::options::usage( "", verbose ) << std::endl; std::cerr << std::endl; @@ -139,7 +144,6 @@ static int run( const comma::command_line_options& options ) } ::csv.fields = n == 0 ? std::string( "strings[0]" ) : comma::join( v, ',' ); if( n == 0 ) { ++n; } - char delimiter = options.value( "--path-delimiter,-p", '/' ); comma::csv::input_stream< input > istream( std::cin, ::csv, input( n ) ); std::function< void( const input& p ) > write; auto run_ = [&]()->int @@ -150,7 +154,7 @@ static int run( const comma::command_line_options& options ) const input* p = istream.read(); if( !p ) { break; } input r( n ); - for( unsigned int i = 0; i < p->strings.size(); ++i ) { r.strings[i] = t.convert( comma::split( p->strings[i], delimiter ) ); } + for( unsigned int i = 0; i < p->strings.size(); ++i ) { r.strings[i] = t.convert( p->strings[i] ); } write( r ); if( ::csv.flush ) { std::cout.flush(); } } @@ -184,8 +188,9 @@ struct basename { } - std::string convert( const std::vector< std::string >& s ) + std::string convert( const std::string& t ) { + const auto& s = comma::split( t, delimiter ); if( s.size() < depth ) { if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << depth << "; got: '" << comma::join( s, delimiter ) << "'" ); } @@ -211,8 +216,9 @@ struct dirname options.assert_mutually_exclusive( "--depth,--fixed-depth" ); } - std::string convert( const std::vector< std::string >& s ) + std::string convert( const std::string& t ) { + const auto& s = comma::split( t, delimiter ); if( fixed_depth > 0 ) { if( s.size() < fixed_depth ) @@ -231,6 +237,27 @@ struct dirname } }; +struct canonical +{ + boost::filesystem::path base; + + static const char* name() { return "canonical"; } + + canonical( const comma::command_line_options& options ) + : base( options.exists( "--base" ) + ? boost::filesystem::path( options.value< std::string >( "--base" ) ) + : boost::filesystem::current_path() ) + { + if( ( options.value( "--path-delimiter,-p", '/' ) ) != '/' ) { COMMA_THROW( comma::exception, "path-canonical: expected path delimiter '/'; got: '" << options.value( "--path-delimiter,-p", '/' ) << "'" ); } + } + + std::string convert( const std::string& s ) + { + try { return boost::filesystem::canonical( boost::filesystem::path( s ), base ).string(); } catch( ... ) { if( strict ) { throw; } } + return s; + } +}; + } } } } // namespace comma { namespace applications { namespace strings { namespace path { int main( int ac, char** av ) @@ -245,6 +272,7 @@ int main( int ac, char** av ) csv = comma::csv::options( options ); if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } + if( operation == "path-realpath" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::path::run< comma::applications::strings::path::canonical >( options ); } std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; return 1; } From 3f4288eeefae03ef050b61beef8d16da7183cd3c Mon Sep 17 00:00:00 2001 From: seva Date: Fri, 24 Jan 2020 14:57:44 +1100 Subject: [PATCH 0114/1056] csv-strings: path-realpath renamed to path-real --- csv/applications/csv-strings.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index 1baa1741c..1f9d5cf36 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -76,7 +76,7 @@ static void usage( bool verbose ) std::cerr << "operations" << std::endl; std::cerr << " path-basename,basename" << std::endl; std::cerr << " path-dirname,dirname" << std::endl; - std::cerr << " path-realpath,path-canonical,canonical" << std::endl; + std::cerr << " path-real,path-canonical,canonical" << std::endl; std::cerr << std::endl; std::cerr << "options" << std::endl; std::cerr << " --emplace; perform operation emplace" << std::endl; @@ -96,7 +96,7 @@ static void usage( bool verbose ) std::cerr << " --fixed-depth=[]; output paths of fixed depth starting from root" << std::endl; std::cerr << " --path-delimiter,-p=; default=/" << std::endl; std::cerr << std::endl; - std::cerr << "path-realpath,path-canonical,canonical" << std::endl; + std::cerr << "path-real,path-canonical,canonical" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --base=[]; base path, default: current directory" << std::endl; std::cerr << std::endl; @@ -272,7 +272,7 @@ int main( int ac, char** av ) csv = comma::csv::options( options ); if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } - if( operation == "path-realpath" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::path::run< comma::applications::strings::path::canonical >( options ); } + if( operation == "path-real" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::path::run< comma::applications::strings::path::canonical >( options ); } std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; return 1; } From 77d22ff530945bd1a07e11efcdf6b0b5abef4125 Mon Sep 17 00:00:00 2001 From: seva Date: Fri, 24 Jan 2020 19:18:18 +1100 Subject: [PATCH 0115/1056] string: split_bracketed(): first cut implemented --- string/split.cpp | 40 ++++++++++++++++++++++++++++++++++++- string/split.h | 11 +++++----- string/test/string_test.cpp | 33 ++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/string/split.cpp b/string/split.cpp index fd09fe931..25b4d80ae 100644 --- a/string/split.cpp +++ b/string/split.cpp @@ -70,7 +70,7 @@ std::vector< std::string > split_escaped( const std::string & s, const char * se std::vector< std::string > v; const char* begin( &s[0] ); const char* const end( begin + s.length() ); - boost::optional quoted; + boost::optional< char > quoted; v.push_back( std::string() ); for( const char* p = begin; p < end; ++p ) { @@ -108,4 +108,42 @@ std::vector< std::string > split_escaped( const std::string & s, char separator, return split_escaped( s, separators, quotes, escape ); } +std::vector< std::string > split_bracketed( const std::string& s, const char* separators, char lbracket, char rbracket ) +{ + std::vector< std::string > v; + const char* begin( &s[0] ); + const char* const end( begin + s.length() ); + unsigned int depth = 0; + v.push_back( std::string() ); + for( const char* p = begin; p < end; ++p ) + { + if( lbracket == *p ) + { + ++depth; + v.back() += *p; + } + else if( rbracket == *p ) + { + if( depth > 0 ) { --depth; } + v.back() += *p; + } + else if( depth == 0 && string::is_one_of( *p, separators ) ) + { + v.push_back( std::string() ); + } + else + { + v.back() += *p; + } + } + return v; +} + +std::vector< std::string > split_bracketed( const std::string& s, char separator, char lbracket, char rbracket ) +{ + const char separators[] = { separator, 0 }; + return split_bracketed( s, separators, lbracket, rbracket ); +} + + } // namespace comma { diff --git a/string/split.h b/string/split.h index ec8f176e3..8de556dbc 100644 --- a/string/split.h +++ b/string/split.h @@ -27,11 +27,9 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine -#ifndef COMMA_STRING_SPLIT_H_ -#define COMMA_STRING_SPLIT_H_ +#pragma once #include #include @@ -59,7 +57,7 @@ std::vector< std::string > split( const std::string& s, char separator, bool emp /// An escape character will only escape a delimiter, quote or escape character; /// escaping any other character will result in both being kept; /// e.g. c:\windows\ will be kept as c:\windows\ with the trailing backslash -/// e.g. fname;delimiter=\\;field=a,b will be kept as fname;delimiter=\;field=a,b +/// e.g. filename;delimiter=\\;fields=a,b will be kept as filename;delimiter=\;fields=a,b /// /// A quote may be anywhere in a string. Quotes must be closed; i.e Each start /// quote must be paired with an end quote, or an exception is thrown. @@ -69,7 +67,10 @@ std::vector< std::string > split_escaped( const std::string& s, const char * sep /// split string into tokens; always contains at least one element; /// skips backslash escaped seperator, handle boolean quotes std::vector< std::string > split_escaped( const std::string& s, char separator, const char * quotes = "\"\'", char escape = '\\' ); +/// skips bracketed separators +std::vector< std::string > split_bracketed( const std::string& s, const char * separators = " ", char lbracket = '(', char rbrackets = ')' ); +/// skips bracketed separators +std::vector< std::string > split_bracketed( const std::string& s, char separator, char lbracket = '(', char rbracket = ')' ); } // namespace comma { -#endif // COMMA_STRING_SPLIT_H_ diff --git a/string/test/string_test.cpp b/string/test/string_test.cpp index 4fb612134..00a6775b3 100644 --- a/string/test/string_test.cpp +++ b/string/test/string_test.cpp @@ -282,6 +282,39 @@ TEST( string, split_escaped_quoted ) } } +TEST( string, split_bracketed ) +{ + { + std::vector< std::string > v( split_bracketed( "" ) ); + EXPECT_EQ( 1u, v.size() ); + EXPECT_EQ( "", v[0] ); + } + { + std::vector< std::string > v( split_bracketed( ")()" ) ); + EXPECT_EQ( 1u, v.size() ); + EXPECT_EQ( ")()", v[0] ); + } + { + std::vector< std::string > v( split_bracketed( ")(,)" ) ); + EXPECT_EQ( 1u, v.size() ); + EXPECT_EQ( ")(,)", v[0] ); + } + { + std::vector< std::string > v( split_bracketed( "a[,b,c],d", ',', '[', ']' ) ); + EXPECT_EQ( 2u, v.size() ); + EXPECT_EQ( "a[,b,c]", v[0] ); + EXPECT_EQ( "d", v[1] ); + } + { + std::vector< std::string > v( split_bracketed( "a,( b, c, d ),e( f ( g, h ) ), i", ',' ) ); + EXPECT_EQ( 4u, v.size() ); + EXPECT_EQ( "a", v[0] ); + EXPECT_EQ( "( b, c, d )", v[1] ); + EXPECT_EQ( "e( f ( g, h ) )", v[2] ); + EXPECT_EQ( " i", v[3] ); + } +} + TEST( string, strip ) { EXPECT_EQ( strip( "", ";" ), "" ); From 4aff56b60f57d9aa43972b0985bc03d08c8f7f09 Mon Sep 17 00:00:00 2001 From: seva Date: Fri, 24 Jan 2020 19:50:22 +1100 Subject: [PATCH 0116/1056] string: split_bracketed(): strip_brackets parameter added, default set to true --- string/split.cpp | 17 ++++++++++----- string/split.h | 4 ++-- string/test/string_test.cpp | 43 ++++++++++++++++++++++++++++--------- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/string/split.cpp b/string/split.cpp index 25b4d80ae..8a4eb6888 100644 --- a/string/split.cpp +++ b/string/split.cpp @@ -108,7 +108,7 @@ std::vector< std::string > split_escaped( const std::string & s, char separator, return split_escaped( s, separators, quotes, escape ); } -std::vector< std::string > split_bracketed( const std::string& s, const char* separators, char lbracket, char rbracket ) +std::vector< std::string > split_bracketed( const std::string& s, const char* separators, char lbracket, char rbracket, bool strip_brackets ) { std::vector< std::string > v; const char* begin( &s[0] ); @@ -119,13 +119,20 @@ std::vector< std::string > split_bracketed( const std::string& s, const char* se { if( lbracket == *p ) { + if( strip_brackets && depth == 0 ) + { + if( !v.back().empty() ) { COMMA_THROW( comma::exception, "asked to strip brackets; expected opening bracket immediately following separator, got'" << s << "'" ); } + } + else + { + v.back() += *p; + } ++depth; - v.back() += *p; } else if( rbracket == *p ) { + if( !strip_brackets || depth > 1 ) { v.back() += *p; } if( depth > 0 ) { --depth; } - v.back() += *p; } else if( depth == 0 && string::is_one_of( *p, separators ) ) { @@ -139,10 +146,10 @@ std::vector< std::string > split_bracketed( const std::string& s, const char* se return v; } -std::vector< std::string > split_bracketed( const std::string& s, char separator, char lbracket, char rbracket ) +std::vector< std::string > split_bracketed( const std::string& s, char separator, char lbracket, char rbracket, bool strip_brackets ) { const char separators[] = { separator, 0 }; - return split_bracketed( s, separators, lbracket, rbracket ); + return split_bracketed( s, separators, lbracket, rbracket, strip_brackets ); } diff --git a/string/split.h b/string/split.h index 8de556dbc..a12403aa1 100644 --- a/string/split.h +++ b/string/split.h @@ -68,9 +68,9 @@ std::vector< std::string > split_escaped( const std::string& s, const char * sep /// skips backslash escaped seperator, handle boolean quotes std::vector< std::string > split_escaped( const std::string& s, char separator, const char * quotes = "\"\'", char escape = '\\' ); /// skips bracketed separators -std::vector< std::string > split_bracketed( const std::string& s, const char * separators = " ", char lbracket = '(', char rbrackets = ')' ); +std::vector< std::string > split_bracketed( const std::string& s, const char * separators = " ", char lbracket = '(', char rbrackets = ')', bool strip_brackets = true ); /// skips bracketed separators -std::vector< std::string > split_bracketed( const std::string& s, char separator, char lbracket = '(', char rbracket = ')' ); +std::vector< std::string > split_bracketed( const std::string& s, char separator, char lbracket = '(', char rbracket = ')', bool strip_brackets = true ); } // namespace comma { diff --git a/string/test/string_test.cpp b/string/test/string_test.cpp index 00a6775b3..8b9e75059 100644 --- a/string/test/string_test.cpp +++ b/string/test/string_test.cpp @@ -290,27 +290,50 @@ TEST( string, split_bracketed ) EXPECT_EQ( "", v[0] ); } { - std::vector< std::string > v( split_bracketed( ")()" ) ); + std::vector< std::string > v( split_bracketed( "()", ',' ) ); EXPECT_EQ( 1u, v.size() ); - EXPECT_EQ( ")()", v[0] ); + EXPECT_EQ( "", v[0] ); + } + { + std::vector< std::string > v( split_bracketed( "(),(),()", ',' ) ); + EXPECT_EQ( 3u, v.size() ); + EXPECT_EQ( "", v[0] ); + EXPECT_EQ( "", v[1] ); + EXPECT_EQ( "", v[2] ); } { - std::vector< std::string > v( split_bracketed( ")(,)" ) ); + std::vector< std::string > v( split_bracketed( ")()", ',', '(', ')', false ) ); EXPECT_EQ( 1u, v.size() ); - EXPECT_EQ( ")(,)", v[0] ); + EXPECT_EQ( ")()", v[0] ); } { - std::vector< std::string > v( split_bracketed( "a[,b,c],d", ',', '[', ']' ) ); - EXPECT_EQ( 2u, v.size() ); - EXPECT_EQ( "a[,b,c]", v[0] ); - EXPECT_EQ( "d", v[1] ); + std::vector< std::string > v( split_bracketed( "(),(,),(,)", ',' ) ); + EXPECT_EQ( 3u, v.size() ); + EXPECT_EQ( "", v[0] ); + EXPECT_EQ( ",", v[1] ); + EXPECT_EQ( ",", v[2] ); + } + { + std::vector< std::string > v( split_bracketed( "a,[,b,[c]],d", ',', '[', ']' ) ); + EXPECT_EQ( 3u, v.size() ); + EXPECT_EQ( "a", v[0] ); + EXPECT_EQ( ",b,[c]", v[1] ); + EXPECT_EQ( "d", v[2] ); + } + { + std::vector< std::string > v( split_bracketed( "a,( b, c, d ),( f ( g, h ) ), i", ',' ) ); + EXPECT_EQ( 4u, v.size() ); + EXPECT_EQ( "a", v[0] ); + EXPECT_EQ( " b, c, d ", v[1] ); + EXPECT_EQ( " f ( g, h ) ", v[2] ); + EXPECT_EQ( " i", v[3] ); } { - std::vector< std::string > v( split_bracketed( "a,( b, c, d ),e( f ( g, h ) ), i", ',' ) ); + std::vector< std::string > v( split_bracketed( "a,( b, c, d ),( f ( g, h ) ), i", ',', '(', ')', false ) ); EXPECT_EQ( 4u, v.size() ); EXPECT_EQ( "a", v[0] ); EXPECT_EQ( "( b, c, d )", v[1] ); - EXPECT_EQ( "e( f ( g, h ) )", v[2] ); + EXPECT_EQ( "( f ( g, h ) )", v[2] ); EXPECT_EQ( " i", v[3] ); } } From d7a2dcddfba0124a3add3fe7d57a67d5cd1077df Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 28 Jan 2020 17:56:57 +1100 Subject: [PATCH 0117/1056] csv-string: minor refactoring --- csv/applications/csv-strings.cpp | 54 +++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index 1f9d5cf36..2f2ec04b8 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -111,20 +111,23 @@ static comma::csv::options csv; namespace comma { namespace applications { namespace strings { namespace path { -struct input +template < typename T > +struct record { - std::vector< std::string > strings; - input( unsigned int n = 0 ): strings( n ) {} + std::vector< T > values; + record( unsigned int n = 0 ): values( n ) {} }; +typedef record< std::string > input; + } } } } // namespace comma { namespace applications { namespace strings { namespace path { namespace comma { namespace visiting { -template <> struct traits< comma::applications::strings::path::input > +template < typename T > struct traits< comma::applications::strings::path::record< T > > { - template < typename K, typename V > static void visit( const K&, const comma::applications::strings::path::input& p, V& v ) { v.apply( "strings", p.strings ); } - template < typename K, typename V > static void visit( const K&, comma::applications::strings::path::input& p, V& v ) { v.apply( "strings", p.strings ); } + template < typename K, typename V > static void visit( const K&, const comma::applications::strings::path::record< T >& p, V& v ) { v.apply( "values", p.values ); } + template < typename K, typename V > static void visit( const K&, comma::applications::strings::path::record< T >& p, V& v ) { v.apply( "values", p.values ); } }; } } // namespace comma { namespace visiting { @@ -139,10 +142,10 @@ static int run( const comma::command_line_options& options ) for( unsigned int i = 0; i < v.size(); ++i ) { if( v[i].empty() ) { continue; } - v[i] = "strings[" + boost::lexical_cast< std::string >( n ) + "]"; + v[i] = "values[" + boost::lexical_cast< std::string >( n ) + "]"; ++n; } - ::csv.fields = n == 0 ? std::string( "strings[0]" ) : comma::join( v, ',' ); + ::csv.fields = n == 0 ? std::string( "values[0]" ) : comma::join( v, ',' ); if( n == 0 ) { ++n; } comma::csv::input_stream< input > istream( std::cin, ::csv, input( n ) ); std::function< void( const input& p ) > write; @@ -153,8 +156,8 @@ static int run( const comma::command_line_options& options ) { const input* p = istream.read(); if( !p ) { break; } - input r( n ); - for( unsigned int i = 0; i < p->strings.size(); ++i ) { r.strings[i] = t.convert( p->strings[i] ); } + typename T::output_t r( n ); + for( unsigned int i = 0; i < p->values.size(); ++i ) { r.values[i] = t.convert( p->values[i] ); } write( r ); if( ::csv.flush ) { std::cout.flush(); } } @@ -167,16 +170,18 @@ static int run( const comma::command_line_options& options ) return run_(); } comma::csv::options output_csv = ::csv; - output_csv.fields = "strings"; + output_csv.fields = "values"; if( ::csv.binary() ) { std::cerr << "csv-strings: path-" << T::name() << ": binary mode supported only for --emplace; todo, just ask" << std::endl; exit( 1 ); } - comma::csv::output_stream< input > ostream( std::cout, output_csv, input( n ) ); - comma::csv::tied< input, input > tied( istream, ostream ); - write = [&]( const input& p ) { tied.append( p ); }; + comma::csv::output_stream< typename T::output_t > ostream( std::cout, output_csv, input( n ) ); + comma::csv::tied< input, typename T::output_t > tied( istream, ostream ); + write = [&]( const typename T::output_t& p ) { tied.append( p ); }; return run_(); } struct basename { + typedef input output_t; + unsigned int depth; char delimiter; @@ -202,6 +207,8 @@ struct basename struct dirname { + typedef input output_t; + unsigned int depth; unsigned int fixed_depth; char delimiter; @@ -239,6 +246,8 @@ struct dirname struct canonical { + typedef input output_t; + boost::filesystem::path base; static const char* name() { return "canonical"; } @@ -258,6 +267,22 @@ struct canonical } }; +// struct is_leaf // todo: refactor, reuse generic run +// { +// typedef record< unsigned int > output_t; +// +// std::string last; +// +// static const char* name() { return "is-leaf"; } +// +// is_leaf( const comma::command_line_options& ) {} +// +// unsigned int convert( const std::string& s ) +// { +// bool r = s +// } +// }; + } } } } // namespace comma { namespace applications { namespace strings { namespace path { int main( int ac, char** av ) @@ -272,6 +297,7 @@ int main( int ac, char** av ) csv = comma::csv::options( options ); if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } + if( operation == "path-is-leaf" || operation == "is-leaf" ) {} //{ return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } if( operation == "path-real" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::path::run< comma::applications::strings::path::canonical >( options ); } std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; return 1; From 91fa88e4fb858f9f7d22c1b379474ea22b208bd5 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 28 Jan 2020 18:02:58 +1100 Subject: [PATCH 0118/1056] csv-string: commented code removed --- csv/applications/csv-strings.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index 2f2ec04b8..9acc1b6e1 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -267,22 +267,6 @@ struct canonical } }; -// struct is_leaf // todo: refactor, reuse generic run -// { -// typedef record< unsigned int > output_t; -// -// std::string last; -// -// static const char* name() { return "is-leaf"; } -// -// is_leaf( const comma::command_line_options& ) {} -// -// unsigned int convert( const std::string& s ) -// { -// bool r = s -// } -// }; - } } } } // namespace comma { namespace applications { namespace strings { namespace path { int main( int ac, char** av ) @@ -297,7 +281,6 @@ int main( int ac, char** av ) csv = comma::csv::options( options ); if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } - if( operation == "path-is-leaf" || operation == "is-leaf" ) {} //{ return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } if( operation == "path-real" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::path::run< comma::applications::strings::path::canonical >( options ); } std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; return 1; From ce440fac972947173504d82f73f4313a6c7300c9 Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 29 Jan 2020 14:48:39 +1100 Subject: [PATCH 0119/1056] csv-strings: --fixed-depth, --depth: replaced with --head and --tail --- csv/applications/csv-strings.cpp | 53 ++++++++++++++++---------------- csv/test/csv-strings/input | 52 +++++++++++++++---------------- 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index 9acc1b6e1..c1fa2feea 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -87,13 +87,14 @@ static void usage( bool verbose ) std::cerr << std::endl; std::cerr << "path-basename,basename" << std::endl; std::cerr << " options" << std::endl; - std::cerr << " --depth=; default=1; if path length less than depth, output empty string" << std::endl; + std::cerr << " --head=; default=0; number of path elements at the beginning of the path to remove" << std::endl; + std::cerr << " --tail=; default=1; number of path elements at the end of the path to keep" << std::endl; std::cerr << " --path-delimiter,-p=; default=/" << std::endl; std::cerr << std::endl; std::cerr << "path-dirname,dirname" << std::endl; std::cerr << " options" << std::endl; - std::cerr << " --depth=; default=1; if path length less than depth, output empty string" << std::endl; - std::cerr << " --fixed-depth=[]; output paths of fixed depth starting from root" << std::endl; + std::cerr << " --head=; default=0; number of path elements at the beginning of the path to keep" << std::endl; + std::cerr << " --tail=; default=1; number of path elements at the end of the path to remove" << std::endl; std::cerr << " --path-delimiter,-p=; default=/" << std::endl; std::cerr << std::endl; std::cerr << "path-real,path-canonical,canonical" << std::endl; @@ -182,26 +183,32 @@ struct basename { typedef input output_t; - unsigned int depth; + unsigned int head; + unsigned int tail; char delimiter; static const char* name() { return "basename"; } basename( const comma::command_line_options& options ) - : depth( options.value( "--depth", 1 ) ) + : head( options.value( "--head", 0 ) ) + , tail( options.value( "--tail", 1 ) ) , delimiter( options.value( "--path-delimiter,-p", '/' ) ) { + options.assert_mutually_exclusive( "--head,--tail" ); } std::string convert( const std::string& t ) { const auto& s = comma::split( t, delimiter ); - if( s.size() < depth ) + if( head > 0 ) { - if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << depth << "; got: '" << comma::join( s, delimiter ) << "'" ); } + if( s.size() >= head ) { return comma::join( s.begin() + head, s.end(), delimiter ); } + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << head << "; got: '" << comma::join( s, delimiter ) << "'" ); } return ""; } - return comma::join( s.end() - depth, s.end(), delimiter ); + if( s.size() >= tail ) { return comma::join( s.end() - tail, s.end(), delimiter ); } + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << tail << "; got: '" << comma::join( s, delimiter ) << "'" ); } + return ""; } }; @@ -209,38 +216,32 @@ struct dirname { typedef input output_t; - unsigned int depth; - unsigned int fixed_depth; + unsigned int head; + unsigned int tail; char delimiter; static const char* name() { return "dirname"; } dirname( const comma::command_line_options& options ) - : depth( options.value( "--depth", 1 ) ) - , fixed_depth( options.value( "--fixed-depth", 0 ) ) + : head( options.value( "--head", 0 ) ) + , tail( options.value( "--tail", 1 ) ) , delimiter( options.value( "--path-delimiter,-p", '/' ) ) { - options.assert_mutually_exclusive( "--depth,--fixed-depth" ); + options.assert_mutually_exclusive( "--head,--tail" ); } std::string convert( const std::string& t ) { const auto& s = comma::split( t, delimiter ); - if( fixed_depth > 0 ) + if( head > 0 ) { - if( s.size() < fixed_depth ) - { - if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << fixed_depth << "; got: '" << comma::join( s, delimiter ) << "'" ); } - return ""; - } - return comma::join( s, fixed_depth, delimiter ); - } - if( s.size() < depth ) - { - if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << depth << "; got: '" << comma::join( s, '/' ) << "'" ); } + if( s.size() >= head ) { return comma::join( s.begin(), s.begin() + head, delimiter ); } + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << head << "; got: '" << comma::join( s, delimiter ) << "'" ); } return ""; } - return comma::join( s.begin(), s.end() - depth, delimiter ); + if( s.size() >= tail ) { return comma::join( s.begin(), s.end() - tail, delimiter ); } + if( strict ) { COMMA_THROW( comma::exception, "expected path depth at least " << tail << "; got: '" << comma::join( s, delimiter ) << "'" ); } + return ""; } }; @@ -274,7 +275,7 @@ int main( int ac, char** av ) try { comma::command_line_options options( ac, av, usage ); - const auto& unnamed = options.unnamed( "--flush,--verbose,-v,--emplace", "-.*" ); + const auto& unnamed = options.unnamed( "--flush,--verbose,-v,--emplace,--strict", "-.*" ); if( unnamed.empty() ) { std::cerr << "csv-strings: please specify operation" << std::endl; return 1; } std::string operation = unnamed[0]; strict = options.exists( "--strict" ); diff --git a/csv/test/csv-strings/input b/csv/test/csv-strings/input index 4c176c25f..3966a6308 100644 --- a/csv/test/csv-strings/input +++ b/csv/test/csv-strings/input @@ -1,48 +1,48 @@ basename/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename" -basename/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 2" -basename/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 3" -basename/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 4" +basename/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --tail 2" +basename/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --tail 3" +basename/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --tail 4" basename/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --emplace" -basename/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 2 --emplace" -basename/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --depth 3 --emplace" -basename/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-basename --depth 4 --emplace" +basename/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --tail 2 --emplace" +basename/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --tail 3 --emplace" +basename/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-basename --tail 4 --emplace" dirname/depth/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" -dirname/depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 2" -dirname/depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 3" -dirname/depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 4" +dirname/depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 2" +dirname/depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 3" +dirname/depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 4" dirname/depth/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" -dirname/depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --depth 2 --emplace" -dirname/depth/emplace[2]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --depth 3 --emplace" -dirname/depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --depth 4 --emplace" +dirname/depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 2 --emplace" +dirname/depth/emplace[2]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --tail 3 --emplace" +dirname/depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --tail 4 --emplace" dirname/fixed_depth/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" -dirname/fixed_depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 2" -dirname/fixed_depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 3" -dirname/fixed_depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 4" +dirname/fixed_depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 2" +dirname/fixed_depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 3" +dirname/fixed_depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 4" dirname/fixed_depth/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" -dirname/fixed_depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 2 --emplace" -dirname/fixed_depth/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --fixed-depth 3 --emplace" -dirname/fixed_depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --fixed-depth 4 --emplace" +dirname/fixed_depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 2 --emplace" +dirname/fixed_depth/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 3 --emplace" +dirname/fixed_depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --head 4 --emplace" delimiter[0]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-basename --delimiter=." delimiter[1]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-basename --delimiter=. --emplace" delimiter[2]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-dirname --delimiter=." delimiter[3]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-dirname --delimiter=. --emplace" -strict[0]="( echo a/b/c; echo a/b ) | csv-strings path-basename --depth 3 --strict" -strict[1]="( echo a/b/c; echo a/b ) | csv-strings path-basename --depth 3 --emplace --strict" -strict[2]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --depth 3 --strict" -strict[3]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --depth 3 --emplace --strict" -strict[4]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --fixed-depth 3 --strict" -strict[5]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --fixed-depth 3 --emplace --strict" +strict[0]="( echo a/b/c; echo a/b ) | csv-strings path-basename --tail 3 --strict" +strict[1]="( echo a/b/c; echo a/b ) | csv-strings path-basename --tail 3 --emplace --strict" +strict[2]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --tail 3 --strict" +strict[3]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --tail 3 --emplace --strict" +strict[4]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --head 3 --strict" +strict[5]="( echo a/b/c; echo a/b ) | csv-strings path-dirname --head 3 --emplace --strict" fields[0]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n" fields[1]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n --emplace" -fields[2]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n --emplace --depth 2" +fields[2]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-basename --fields ,,m,,n --emplace --tail 2" fields[3]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n" fields[4]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace" -fields[5]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace --depth 2" +fields[5]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace --tail 2" From f9ca8f4e841142a912df3429499bd2f044536696 Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 29 Jan 2020 14:58:19 +1100 Subject: [PATCH 0120/1056] csv-strings: basename --head: unit tests added --- csv/test/csv-strings/expected | 110 +++++++++++++++++++--------------- csv/test/csv-strings/input | 44 ++++++++------ 2 files changed, 87 insertions(+), 67 deletions(-) diff --git a/csv/test/csv-strings/expected b/csv/test/csv-strings/expected index 47409cc92..4432d8d83 100644 --- a/csv/test/csv-strings/expected +++ b/csv/test/csv-strings/expected @@ -24,57 +24,71 @@ basename/emplace[3]/output/line[0]="," basename/emplace[3]/output/line[1]="," basename/emplace[3]/output/line[2]="," -dirname/depth/append[0]/output/line[0]="a," -dirname/depth/append[0]/output/line[1]="a/b,a" -dirname/depth/append[0]/output/line[2]="a/b/c,a/b" -dirname/depth/append[1]/output/line[0]="a," -dirname/depth/append[1]/output/line[1]="a/b," -dirname/depth/append[1]/output/line[2]="a/b/c,a" -dirname/depth/append[2]/output/line[0]="a," -dirname/depth/append[2]/output/line[1]="a/b," -dirname/depth/append[2]/output/line[2]="a/b/c," -dirname/depth/append[3]/output/line[0]="a," -dirname/depth/append[3]/output/line[1]="a/b," -dirname/depth/append[3]/output/line[2]="a/b/c," +basename/head/append[0]/output/line[0]="a," +basename/head/append[0]/output/line[1]="a/b,b" +basename/head/append[0]/output/line[2]="a/b/c,b/c" +basename/head/append[1]/output/line[0]="a," +basename/head/append[1]/output/line[1]="a/b," +basename/head/append[1]/output/line[2]="a/b/c,c" -dirname/depth/emplace[0]/output/line[0]="" -dirname/depth/emplace[0]/output/line[1]="a" -dirname/depth/emplace[0]/output/line[2]="a/b" -dirname/depth/emplace[1]/output/line[0]="" -dirname/depth/emplace[1]/output/line[1]="" -dirname/depth/emplace[1]/output/line[2]="a" -dirname/depth/emplace[2]/output/line[0]="," -dirname/depth/emplace[2]/output/line[1]="," -dirname/depth/emplace[2]/output/line[2]="," -dirname/depth/emplace[3]/output/line[0]="," -dirname/depth/emplace[3]/output/line[1]="," -dirname/depth/emplace[3]/output/line[2]="," +basename/head/emplace[0]/output/line[0]="" +basename/head/emplace[0]/output/line[1]="b" +basename/head/emplace[0]/output/line[2]="b/c" +basename/head/emplace[1]/output/line[0]="" +basename/head/emplace[1]/output/line[1]="" +basename/head/emplace[1]/output/line[2]="c" -dirname/fixed_depth/append[0]/output/line[0]="a," -dirname/fixed_depth/append[0]/output/line[1]="a/b,a" -dirname/fixed_depth/append[0]/output/line[2]="a/b/c,a/b" -dirname/fixed_depth/append[1]/output/line[0]="a," -dirname/fixed_depth/append[1]/output/line[1]="a/b,a/b" -dirname/fixed_depth/append[1]/output/line[2]="a/b/c,a/b" -dirname/fixed_depth/append[2]/output/line[0]="a," -dirname/fixed_depth/append[2]/output/line[1]="a/b," -dirname/fixed_depth/append[2]/output/line[2]="a/b/c,a/b/c" -dirname/fixed_depth/append[3]/output/line[0]="a," -dirname/fixed_depth/append[3]/output/line[1]="a/b," -dirname/fixed_depth/append[3]/output/line[2]="a/b/c," +dirname/tail/append[0]/output/line[0]="a," +dirname/tail/append[0]/output/line[1]="a/b,a" +dirname/tail/append[0]/output/line[2]="a/b/c,a/b" +dirname/tail/append[1]/output/line[0]="a," +dirname/tail/append[1]/output/line[1]="a/b," +dirname/tail/append[1]/output/line[2]="a/b/c,a" +dirname/tail/append[2]/output/line[0]="a," +dirname/tail/append[2]/output/line[1]="a/b," +dirname/tail/append[2]/output/line[2]="a/b/c," +dirname/tail/append[3]/output/line[0]="a," +dirname/tail/append[3]/output/line[1]="a/b," +dirname/tail/append[3]/output/line[2]="a/b/c," -dirname/fixed_depth/emplace[0]/output/line[0]="" -dirname/fixed_depth/emplace[0]/output/line[1]="a" -dirname/fixed_depth/emplace[0]/output/line[2]="a/b" -dirname/fixed_depth/emplace[1]/output/line[0]="" -dirname/fixed_depth/emplace[1]/output/line[1]="a/b" -dirname/fixed_depth/emplace[1]/output/line[2]="a/b" -dirname/fixed_depth/emplace[2]/output/line[0]="" -dirname/fixed_depth/emplace[2]/output/line[1]="" -dirname/fixed_depth/emplace[2]/output/line[2]="a/b/c" -dirname/fixed_depth/emplace[3]/output/line[0]="," -dirname/fixed_depth/emplace[3]/output/line[1]="," -dirname/fixed_depth/emplace[3]/output/line[2]="," +dirname/tail/emplace[0]/output/line[0]="" +dirname/tail/emplace[0]/output/line[1]="a" +dirname/tail/emplace[0]/output/line[2]="a/b" +dirname/tail/emplace[1]/output/line[0]="" +dirname/tail/emplace[1]/output/line[1]="" +dirname/tail/emplace[1]/output/line[2]="a" +dirname/tail/emplace[2]/output/line[0]="," +dirname/tail/emplace[2]/output/line[1]="," +dirname/tail/emplace[2]/output/line[2]="," +dirname/tail/emplace[3]/output/line[0]="," +dirname/tail/emplace[3]/output/line[1]="," +dirname/tail/emplace[3]/output/line[2]="," + +dirname/head/append[0]/output/line[0]="a," +dirname/head/append[0]/output/line[1]="a/b,a" +dirname/head/append[0]/output/line[2]="a/b/c,a/b" +dirname/head/append[1]/output/line[0]="a," +dirname/head/append[1]/output/line[1]="a/b,a/b" +dirname/head/append[1]/output/line[2]="a/b/c,a/b" +dirname/head/append[2]/output/line[0]="a," +dirname/head/append[2]/output/line[1]="a/b," +dirname/head/append[2]/output/line[2]="a/b/c,a/b/c" +dirname/head/append[3]/output/line[0]="a," +dirname/head/append[3]/output/line[1]="a/b," +dirname/head/append[3]/output/line[2]="a/b/c," + +dirname/head/emplace[0]/output/line[0]="" +dirname/head/emplace[0]/output/line[1]="a" +dirname/head/emplace[0]/output/line[2]="a/b" +dirname/head/emplace[1]/output/line[0]="" +dirname/head/emplace[1]/output/line[1]="a/b" +dirname/head/emplace[1]/output/line[2]="a/b" +dirname/head/emplace[2]/output/line[0]="" +dirname/head/emplace[2]/output/line[1]="" +dirname/head/emplace[2]/output/line[2]="a/b/c" +dirname/head/emplace[3]/output/line[0]="," +dirname/head/emplace[3]/output/line[1]="," +dirname/head/emplace[3]/output/line[2]="," delimiter[0]/output/line[0]="a.a" delimiter[0]/output/line[1]="a.b.a" diff --git a/csv/test/csv-strings/input b/csv/test/csv-strings/input index 3966a6308..b02076f42 100644 --- a/csv/test/csv-strings/input +++ b/csv/test/csv-strings/input @@ -8,25 +8,31 @@ basename/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basenam basename/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --tail 3 --emplace" basename/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-basename --tail 4 --emplace" -dirname/depth/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" -dirname/depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 2" -dirname/depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 3" -dirname/depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 4" - -dirname/depth/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" -dirname/depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 2 --emplace" -dirname/depth/emplace[2]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --tail 3 --emplace" -dirname/depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --tail 4 --emplace" - -dirname/fixed_depth/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" -dirname/fixed_depth/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 2" -dirname/fixed_depth/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 3" -dirname/fixed_depth/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 4" - -dirname/fixed_depth/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" -dirname/fixed_depth/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 2 --emplace" -dirname/fixed_depth/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 3 --emplace" -dirname/fixed_depth/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --head 4 --emplace" +basename/head/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --head 1" +basename/head/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --head 2" + +basename/head/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --head 1 --emplace" +basename/head/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-basename --head 2 --emplace" + +dirname/tail/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" +dirname/tail/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 2" +dirname/tail/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 3" +dirname/tail/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 4" + +dirname/tail/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" +dirname/tail/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --tail 2 --emplace" +dirname/tail/emplace[2]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --tail 3 --emplace" +dirname/tail/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --tail 4 --emplace" + +dirname/head/append[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname" +dirname/head/append[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 2" +dirname/head/append[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 3" +dirname/head/append[3]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 4" + +dirname/head/emplace[0]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --emplace" +dirname/head/emplace[1]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 2 --emplace" +dirname/head/emplace[2]="( echo a; echo a/b; echo a/b/c ) | csv-strings path-dirname --head 3 --emplace" +dirname/head/emplace[3]="( echo a,; echo a/b,; echo a/b/c, ) | csv-strings path-dirname --head 4 --emplace" delimiter[0]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-basename --delimiter=." delimiter[1]="( echo a; echo a.b; echo a.b/c.d/e ) | csv-strings path-basename --delimiter=. --emplace" From a36ed3bc3db3d22da85736b5068ba4a262a98b6c Mon Sep 17 00:00:00 2001 From: seva Date: Fri, 31 Jan 2020 17:56:27 +1100 Subject: [PATCH 0121/1056] csv-shuffle: removed flushing in binary on every record; from now on, if flush required, --flush has to be given on command line --- csv/applications/csv-shuffle.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/csv/applications/csv-shuffle.cpp b/csv/applications/csv-shuffle.cpp index aecceed64..dbd2b1331 100644 --- a/csv/applications/csv-shuffle.cpp +++ b/csv/applications/csv-shuffle.cpp @@ -156,7 +156,7 @@ int main( int ac, char** av ) { std::cout.write( &buf[ elements[k].offset ], elements[k].size ); } - std::cout.flush(); // todo: flushing too often? + if( csv.flush ) { std::cout.flush(); } } } else @@ -192,13 +192,7 @@ int main( int ac, char** av ) } return 0; } - catch( std::exception& ex ) - { - std::cerr << "csv-shuffle: " << ex.what() << std::endl; - } - catch( ... ) - { - std::cerr << "csv-shuffle: unknown exception" << std::endl; - } + catch( std::exception& ex ) { std::cerr << "csv-shuffle: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "csv-shuffle: unknown exception" << std::endl; } return 1; } From 7d58f161ffeb06c6f0929fa23fc39e02284e2453 Mon Sep 17 00:00:00 2001 From: seva Date: Mon, 3 Feb 2020 10:11:49 +1100 Subject: [PATCH 0122/1056] csv-split: --files: example added --- csv/applications/csv-split.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/csv/applications/csv-split.cpp b/csv/applications/csv-split.cpp index c9df78092..0372668ff 100644 --- a/csv/applications/csv-split.cpp +++ b/csv/applications/csv-split.cpp @@ -148,7 +148,9 @@ int main( int argc, char** argv ) std::cerr << " local:: linux/unix local server socket e.g. local:./tmp/my_socket" << std::endl; std::cerr << " : named pipe, which will be re-opened, if client reconnects" << std::endl; std::cerr << " : a regular file" << std::endl; - std::cerr << " example: ( echo 0,a; echo 1,b; echo 0,c; echo 2,d ) | csv-split --fields id \"0,1;tcp:5999\" \"...;local:/tmp/named_fifo\"" << std::endl; + std::cerr << " examples" << std::endl; + std::cerr << " ( echo 0,a; echo 1,b; echo 0,c; echo 2,d ) | csv-split --fields id \"0,1;tcp:5999\" \"...;local:/tmp/named_fifo\"" << std::endl; + std::cerr << " ( echo 0,a; echo 1,b ) | csv-split --fields id --files <( echo '1,one.csv'; echo '0,zero.csv' )';fields=id,filename'" << std::endl; std::cerr << std::endl; std::cerr << description << std::endl; std::cerr << std::endl; From 2395f31ab9c1a18f752e1fa0e1907ffcd02b5ee4 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 4 Feb 2020 18:29:52 +1100 Subject: [PATCH 0123/1056] csv-shuffle: tried to improve performance, but commented it out, since it did not work --- csv/applications/csv-shuffle.cpp | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/csv/applications/csv-shuffle.cpp b/csv/applications/csv-shuffle.cpp index dbd2b1331..5b159a262 100644 --- a/csv/applications/csv-shuffle.cpp +++ b/csv/applications/csv-shuffle.cpp @@ -125,6 +125,58 @@ int main( int ac, char** av ) if( !fields[i].input_index ) { std::cerr << "csv-shuffle: \"" << fields[i].name << "\" not found in input fields " << csv.fields << std::endl; return 1; } } if( csv.binary() ) +// { +// #ifdef WIN32 +// _setmode( _fileno( stdin ), _O_BINARY ); +// _setmode( _fileno( stdout ), _O_BINARY ); +// #endif +// std::vector< char > in( csv.format().size() ); +// std::vector< comma::csv::format::element > elements; +// elements.reserve( csv.format().count() ); // quick and dirty, can be really wasteful on large things like images +// for( unsigned int i = 0; i < elements.capacity(); ++i ) +// { +// elements.push_back( csv.format().offset( i ) ); +// } +// unsigned int previous_index = 0; +// unsigned int output_size = 0; +// for( unsigned int i = 0; i < fields.size(); ++i ) // quick and dirty +// { +// for( unsigned int k = previous_index; k < fields[i].index && k < elements.size(); ++k ) { output_size += elements[k].size; } +// output_size += fields[i].size; +// previous_index = fields[i].index + 1; +// } +// for( unsigned int k = previous_index; output_trailing_fields && k < elements.size(); ++k ) { output_size += elements[k].size; } +// std::vector< char > out( output_size ); +// while( std::cin.good() && !std::cin.eof() ) +// { +// // todo: quick and dirty; if performance is an issue, you could read more than +// // one record every time see comma::csv::binary_input_stream::read() for reference +// std::cin.read( &in[0], csv.format().size() ); +// if( std::cin.gcount() == 0 ) { continue; } +// if( std::cin.gcount() < int( csv.format().size() ) ) { std::cerr << "csv-shuffle: expected " << csv.format().size() << " bytes, got only " << std::cin.gcount() << std::endl; return 1; } +// unsigned int offset = 0; +// unsigned int previous_index = 0; +// for( unsigned int i = 0; i < fields.size(); ++i ) // quick and dirty +// { +// for( unsigned int k = previous_index; k < fields[i].index && k < elements.size(); ++k ) +// { +// std::memcpy( &out[offset], &in[ elements[k].offset ], elements[k].size ); // std::cout.write( &in[ elements[k].offset ], elements[k].size ); +// offset += elements[k].size; +// } +// std::memcpy( &out[offset], &in[ fields[i].input_offset ], fields[i].size ); // std::cout.write( &in[ fields[i].input_offset ], fields[i].size ); +// offset += fields[i].size; +// previous_index = fields[i].index + 1; +// } +// //std::cerr << "--> previous_index: " << previous_index << " elements.size(): " << elements.size() << std::endl; +// for( unsigned int k = previous_index; output_trailing_fields && k < elements.size(); ++k ) +// { +// std::memcpy( &out[offset], &in[ elements[k].offset ], elements[k].size ); // std::cout.write( &in[ elements[k].offset ], elements[k].size ); +// offset += elements[k].size; +// } +// std::cout.write( &out[0], out.size() ); +// if( csv.flush ) { std::cout.flush(); } +// } +// } { #ifdef WIN32 _setmode( _fileno( stdin ), _O_BINARY ); From 34836b00e239a99f6d7d585a1d335236fc964f21 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 4 Feb 2020 18:47:50 +1100 Subject: [PATCH 0124/1056] csv-shuffle: output: flushing fixed, performance improved --- csv/applications/csv-shuffle.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csv/applications/csv-shuffle.cpp b/csv/applications/csv-shuffle.cpp index 5b159a262..7e8f03368 100644 --- a/csv/applications/csv-shuffle.cpp +++ b/csv/applications/csv-shuffle.cpp @@ -147,6 +147,7 @@ int main( int ac, char** av ) // } // for( unsigned int k = previous_index; output_trailing_fields && k < elements.size(); ++k ) { output_size += elements[k].size; } // std::vector< char > out( output_size ); +// if( !csv.flush ) { std::cin.tie( NULL ); } // quick and dirty; std::cin is tied to std::cout by default, which is thread-unsafe now // while( std::cin.good() && !std::cin.eof() ) // { // // todo: quick and dirty; if performance is an issue, you could read more than @@ -186,6 +187,7 @@ int main( int ac, char** av ) std::vector< comma::csv::format::element > elements; elements.reserve( csv.format().count() ); // quick and dirty, can be really wasteful on large things like images for( unsigned int i = 0; i < elements.capacity(); ++i ) { elements.push_back( csv.format().offset( i ) ); } + if( !csv.flush ) { std::cin.tie( NULL ); } // quick and dirty; std::cin is tied to std::cout by default, which is thread-unsafe now while( std::cin.good() && !std::cin.eof() ) { // todo: quick and dirty; if performance is an issue, you could read more than From 6c591814f55e63ea0285dbce2a1f393ca4d3be9d Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 5 Feb 2020 18:29:28 +1100 Subject: [PATCH 0125/1056] csv-cast: --flush implemented; not flushing stdout by default now --- csv/applications/csv-cast.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/csv/applications/csv-cast.cpp b/csv/applications/csv-cast.cpp index a1b1a83ac..7370731bc 100644 --- a/csv/applications/csv-cast.cpp +++ b/csv/applications/csv-cast.cpp @@ -55,6 +55,7 @@ static void usage() std::cerr << std::endl; std::cerr << " --binary,-b,--from: input binary format" << std::endl; std::cerr << " --output-binary,--output,-o,--to: output binary format" << std::endl; + std::cerr << " --flush: flush stdout after each record" << std::endl; std::cerr << " --force: allow narrowing conversions" << std::endl; std::cerr << std::endl; std::cerr << comma::csv::format::usage() << std::endl; @@ -262,15 +263,18 @@ int main( int ac, char** av ) comma::csv::format iformat( options.value< std::string >( "--binary,-b,--from", av[1] ) ); comma::csv::format oformat( options.value< std::string >( "--output-binary,--output,-o,--to", av[2] ) ); check_conversions( iformat, oformat, options.exists( "--force" ) ); + bool flush = options.exists( "--flush" ); std::vector< char > in( iformat.size() ); std::vector< char > out( oformat.size() ); + if( !flush ) { std::cin.tie( NULL ); } while( std::cin.good() ) { std::cin.read( &in[0], iformat.size() ); if( std::cin.gcount() == 0 ) { break; } if( std::cin.gcount() < static_cast< int >( iformat.size() ) ) { COMMA_THROW( comma::exception, "expected " << iformat.size() << " bytes, got only " << std::cin.gcount() ); } cast( iformat, in, oformat, out ); - std::cout.write( &out[0], oformat.size() ).flush(); + std::cout.write( &out[0], oformat.size() ); + if( flush ) { std::cout.flush(); } } return 0; } From 1d16fd8c79f5de7e4f6e5d6af9060e693608dd69 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Wed, 5 Feb 2020 22:00:06 +1100 Subject: [PATCH 0126/1056] csv-shuffle, csv-calc, csv-to-bin: --flush: when absent, don't flush stdout on each input --- csv/applications/csv-calc.cpp | 6 ++++-- csv/applications/csv-shuffle.cpp | 4 ++-- csv/applications/csv-to-bin.cpp | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index eea012c5b..488802095 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -1181,7 +1181,7 @@ static void output( const comma::csv::options& csv, ResultsMap& results, boost:: { if( has_id ) { std::cout.write( reinterpret_cast< const char* >( &it->first ), sizeof( comma::uint32 ) ); } // quick and dirty if( has_block ) { std::cout.write( reinterpret_cast< const char* >( &( *block ) ), sizeof( comma::uint32 ) ); } // quick and dirty - std::cout.flush(); + if( csv.flush ) { std::cout.flush(); } } else { @@ -1201,6 +1201,7 @@ static void append_and_output( const comma::csv::options& csv, Inputs& inputs, R if (!csv.binary()) { std::cout << csv.delimiter; } std::cout << results.find(inputs[i].first)->second; if (!csv.binary()) { std::cout << std::endl; } + if( csv.flush ) { std::cout.flush(); } } results.clear(); inputs.clear(); @@ -1294,7 +1295,8 @@ int main( int ac, char** av ) if (has_block && !append) { std::cout << csv.delimiter << "ui"; } std::cout << std::endl; return 0; - } + } + if( !csv.flush && csv.binary() ) { std::cin.tie( NULL ); } while( std::cin.good() && !std::cin.eof() ) { const Values* v = csv.binary() ? binary->read() : ascii->read(); diff --git a/csv/applications/csv-shuffle.cpp b/csv/applications/csv-shuffle.cpp index 7e8f03368..f6d460289 100644 --- a/csv/applications/csv-shuffle.cpp +++ b/csv/applications/csv-shuffle.cpp @@ -147,7 +147,7 @@ int main( int ac, char** av ) // } // for( unsigned int k = previous_index; output_trailing_fields && k < elements.size(); ++k ) { output_size += elements[k].size; } // std::vector< char > out( output_size ); -// if( !csv.flush ) { std::cin.tie( NULL ); } // quick and dirty; std::cin is tied to std::cout by default, which is thread-unsafe now +// if( !csv.flush && csv.binary() ) { std::cin.tie( NULL ); } // quick and dirty; std::cin is tied to std::cout by default, which is thread-unsafe now // while( std::cin.good() && !std::cin.eof() ) // { // // todo: quick and dirty; if performance is an issue, you could read more than @@ -187,7 +187,7 @@ int main( int ac, char** av ) std::vector< comma::csv::format::element > elements; elements.reserve( csv.format().count() ); // quick and dirty, can be really wasteful on large things like images for( unsigned int i = 0; i < elements.capacity(); ++i ) { elements.push_back( csv.format().offset( i ) ); } - if( !csv.flush ) { std::cin.tie( NULL ); } // quick and dirty; std::cin is tied to std::cout by default, which is thread-unsafe now + if( !csv.flush && csv.binary() ) { std::cin.tie( NULL ); } // quick and dirty; std::cin is tied to std::cout by default, which is thread-unsafe now while( std::cin.good() && !std::cin.eof() ) { // todo: quick and dirty; if performance is an issue, you could read more than diff --git a/csv/applications/csv-to-bin.cpp b/csv/applications/csv-to-bin.cpp index d3b029ca6..acf8ed667 100644 --- a/csv/applications/csv-to-bin.cpp +++ b/csv/applications/csv-to-bin.cpp @@ -76,7 +76,8 @@ int main( int ac, char** av ) char delimiter = options.value( "--delimiter", ',' ); bool flush = options.exists( "--flush" ); comma::csv::format format( av[1] ); - //{ ProfilerStart( "csv-to-bin.prof" ); + if( !flush ) { std::cin.tie( NULL ); } + //{ ProfilerStart( "csvg-to-bin.prof" ); while( std::cin.good() && !std::cin.eof() ) { std::getline( std::cin, line ); From 3058abd99dd0bf8d0b3639a8a9669dc9e6fff869 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 6 Feb 2020 12:28:28 +1100 Subject: [PATCH 0127/1056] csv-calc: (unsuccessfully) tried to improve performance on large number of ids; minor refactoring in the process --- csv/applications/csv-calc.cpp | 124 ++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 52 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 488802095..044eb4136 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -309,7 +309,7 @@ class ascii_input return values_.get(); } - const std::string line() { return line_; } + const std::string& line() const { return line_; } private: comma::csv::options csv_; @@ -334,10 +334,15 @@ class binary_input { while( true ) { + //std::cin.read( &buffer_[0], csv_.format().size() ); + //if( std::cin.gcount() == 0 ) { return NULL; } + //if( std::cin.gcount() != int( csv_.format().size() ) ) { COMMA_THROW( comma::exception, "expected " << csv_.format().size() << " bytes; got " << std::cin.gcount() ); } + //values_.set( &buffer_[0] ); + //return &values_; if( offset_ >= csv_.format().size() ) { values_.set( cur_ ); - line_ = std::string(cur_, csv_.format().size()); + line_ = std::string( cur_, csv_.format().size() ); cur_ += csv_.format().size(); offset_ -= csv_.format().size(); if( cur_ == end_ ) { cur_ = &buffer_[0]; offset_ = 0; } @@ -348,7 +353,8 @@ class binary_input offset_ += count; } } - const std::string line() { return line_; } + + const std::string& line() const { return line_; } private: comma::csv::options csv_; @@ -1035,13 +1041,13 @@ namespace Operations template <> struct traits< Enum::kurtosis > { template < typename T, comma::csv::format::types_enum F > struct FromEnum { typedef Kurtosis< T, F > Type; }; }; } // namespace Operations -class Operationbase +class operation_base { public: - virtual ~Operationbase() {} + virtual ~operation_base() {} virtual void push( const char* buf ) = 0; virtual void calculate() = 0; - virtual Operationbase* clone() const = 0; + virtual operation_base* clone() const = 0; const comma::csv::format& output_format() const { return output_format_; } const char* buffer() const { return &buffer_[0]; } @@ -1053,7 +1059,7 @@ class Operationbase std::vector< comma::csv::format::element > output_elements_; std::vector< char > buffer_; - Operationbase* deep_copy_to_( Operationbase* lhs ) const + operation_base* deep_copy_to_( operation_base* lhs ) const { lhs->input_format_ = input_format_; lhs->input_elements_ = input_elements_; @@ -1066,7 +1072,7 @@ class Operationbase }; template < Operations::Enum::Values E > -struct Operation : public Operationbase +struct Operation : public operation_base { Operation() {} Operation( const comma::csv::format& format @@ -1132,18 +1138,18 @@ struct Operation : public Operationbase for( std::size_t i = 0; i < operations_.size(); ++i ) { operations_[i].calculate( &buffer_[0] + output_elements_[i].offset ); } } - Operationbase* clone() const { Operation< E >* op = new Operation< E >; return deep_copy_to_( op ); } + operation_base* clone() const { Operation< E >* op = new Operation< E >; return deep_copy_to_( op ); } }; -typedef boost::unordered_map< comma::uint32, boost::ptr_vector< Operationbase >* > OperationsMap; -typedef boost::unordered_map< comma::uint32, std::string > ResultsMap; +typedef boost::unordered_map< comma::uint32, boost::ptr_vector< operation_base >* > operations_map_t; +typedef boost::unordered_map< comma::uint32, std::string > results_map_t; typedef std::vector< std::pair < comma::uint32, std::string > > Inputs; -static void init_operations( boost::ptr_vector< Operationbase >& operations +static void init_operations( boost::ptr_vector< operation_base >& operations , const std::vector< Operations::operation_parameters >& operations_parameters , const comma::csv::format& format ) { - static boost::ptr_vector< Operationbase > sample; + static boost::ptr_vector< operation_base > sample; if( sample.empty() ) { sample.reserve( operations_parameters.size() ); @@ -1169,14 +1175,15 @@ static void init_operations( boost::ptr_vector< Operationbase >& operations } } operations.clear(); - for( std::size_t i = 0; i < sample.size(); ++i ) { operations.push_back( sample[i].clone() ); } + operations.reserve( sample.size() ); + for( auto& s: sample ) { operations.push_back( s.clone() ); } // todo! this is really slow, if there are many ids } -static void output( const comma::csv::options& csv, ResultsMap& results, boost::optional< comma::uint32 > block, bool has_block, bool has_id ) +static void output( const comma::csv::options& csv, results_map_t& results, boost::optional< comma::uint32 > block, bool has_block, bool has_id ) { - for( ResultsMap::iterator it = results.begin(); it != results.end(); ++it ) + for( results_map_t::iterator it = results.begin(); it != results.end(); ++it ) { - std::cout << it->second; + std::cout.write( &it->second[0], it->second.size() ); if( csv.binary() ) { if( has_id ) { std::cout.write( reinterpret_cast< const char* >( &it->first ), sizeof( comma::uint32 ) ); } // quick and dirty @@ -1193,34 +1200,48 @@ static void output( const comma::csv::options& csv, ResultsMap& results, boost:: results.clear(); } -static void append_and_output( const comma::csv::options& csv, Inputs& inputs, ResultsMap& results ) -{ +static void append_and_output( const comma::csv::options& csv, Inputs& inputs, results_map_t& results ) +{ for ( size_t i = 0; i < inputs.size(); ++i ) { std::cout << inputs[i].second; - if (!csv.binary()) { std::cout << csv.delimiter; } - std::cout << results.find(inputs[i].first)->second; - if (!csv.binary()) { std::cout << std::endl; } + if( !csv.binary() ) { std::cout << csv.delimiter; } + const auto& r = results.find( inputs[i].first )->second; + std::cout.write( &r[0], r.size() ); + if( !csv.binary() ) { std::cout << std::endl; } if( csv.flush ) { std::cout.flush(); } } results.clear(); inputs.clear(); } -static void calculate( const comma::csv::options& csv, OperationsMap& operations, ResultsMap& results ) +static void calculate( const comma::csv::options& csv, operations_map_t& operations, results_map_t& results ) { - for( OperationsMap::iterator it = operations.begin(); it != operations.end(); ++it ) + for( operations_map_t::iterator it = operations.begin(); it != operations.end(); ++it ) { std::string r; + if( csv.binary() ) + { + unsigned int size = 0; + for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i].output_format().size(); } + r.reserve( size ); + } for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i].calculate(); - if( csv.binary() ) { r.append( ( *it->second )[i].buffer(), ( *it->second )[i].output_format().size() ); } - else { if( i > 0 ) { r += csv.delimiter; } r.append(( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, csv.precision )); } + if( csv.binary() ) + { + r.append( ( *it->second )[i].buffer(), ( *it->second )[i].output_format().size() ); + } + else + { + if( i > 0 ) { r += csv.delimiter; } + r.append( ( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, csv.precision ) ); + } } - results[it->first] = r; + results[ it->first ] = r; } - for( OperationsMap::iterator it = operations.begin(); it != operations.end(); ++it ) { delete it->second; } // quick and dirty + for( operations_map_t::iterator it = operations.begin(); it != operations.end(); ++it ) { delete it->second; } // quick and dirty operations.clear(); } @@ -1230,13 +1251,14 @@ int main( int ac, char** av ) { comma::command_line_options options( ac, av, usage ); if( options.exists( "--bash-completion" ) ) bash_completion( ac, av ); - std::vector< std::string > unnamed = options.unnamed( "", "--binary,-b,--delimiter,-d,--format,--fields,-f,--output-fields" ); + std::vector< std::string > unnamed = options.unnamed( "--append,--flush,--output-fields,--output-format", "--binary,-b,--delimiter,-d,--format,--fields,-f,--output-fields" ); comma::csv::options csv( options ); csv.full_xpath = false; std::cout.precision( csv.precision ); #ifdef WIN32 if( csv.binary() ) { _setmode( _fileno( stdin ), _O_BINARY ); _setmode( _fileno( stdout ), _O_BINARY ); } #endif + if( !csv.flush && csv.binary() ) { std::cin.tie( NULL ); std::ios_base::sync_with_stdio( false ); } // todo? quick and dirty, redesign binary_input instead? if( unnamed.empty() ) { std::cerr << comma::verbose.app_name() << ": please specify operations" << std::endl; exit( 1 ); } std::vector< std::string > v = comma::split( unnamed[0], ',' ); std::vector< Operations::operation_parameters > operations_parameters( v.size() ); @@ -1253,14 +1275,13 @@ int main( int ac, char** av ) boost::scoped_ptr< binary_input > binary; if( csv.binary() ) { binary.reset( new binary_input( csv ) ); } else { ascii.reset( new ascii_input( csv, format ) ); } - OperationsMap operations; - ResultsMap results; + operations_map_t operations; + results_map_t results; Inputs inputs; boost::optional< comma::uint32 > block = boost::make_optional< comma::uint32 >( false, 0 ); bool has_block = csv.has_field( "block" ); bool has_id = csv.has_field( "id" ); - bool append = options.exists("--append"); - + bool append = options.exists( "--append" ); if( options.exists( "--output-fields" ) ) { std::vector < std::string > fields = comma::split(csv.fields, ','); @@ -1270,33 +1291,32 @@ int main( int ac, char** av ) std::replace(v[op].begin(), v[op].end(), '=', '_'); std::replace(v[op].begin(), v[op].end(), '.', '_'); std::replace(v[op].begin(), v[op].end(), ':', '_'); - for (std::size_t f = 0; f < fields.size(); f++ ) + for( std::size_t f = 0; f < fields.size(); f++ ) { - if (fields[f] == "" || fields[f] == "id" || fields[f] == "block") { continue; } - output_fields.push_back(fields[f] + "/" + v[op]); + if( fields[f] == "" || fields[f] == "id" || fields[f] == "block" ) { continue; } + output_fields.push_back( fields[f] + "/" + v[op] ); } } - if (has_id && !append) { output_fields.push_back("id"); } - if (has_block && !append ) { output_fields.push_back("block"); } - std::cout << comma::join(output_fields, ',') << std::endl; + if( has_id && !append ) { output_fields.push_back( "id" ); } + if( has_block && !append ) { output_fields.push_back( "block" ); } + std::cout << comma::join( output_fields, ',' ) << std::endl; return 0; } - if (options.exists("--output-format")) + if( options.exists( "--output-format" ) ) { if ( !format ) { std::cerr << comma::verbose.app_name() << ": option --output-format requires input format to be specified, please use --format or --binary" << std::endl; return 1; } - boost::ptr_vector< Operationbase > ops; - init_operations(ops, operations_parameters, Values(csv, *format).format()); + boost::ptr_vector< operation_base > ops; + init_operations( ops, operations_parameters, Values(csv, *format).format() ); for ( std::size_t i = 0; i < ops.size(); ++i ) { if ( i > 0 ) { std::cout << csv.delimiter; } std::cout << ops[i].output_format().string(); } - if (has_id && !append) { std::cout << csv.delimiter << "ui"; } - if (has_block && !append) { std::cout << csv.delimiter << "ui"; } + if( has_id && !append ) { std::cout << csv.delimiter << "ui"; } + if( has_block && !append ) { std::cout << csv.delimiter << "ui"; } std::cout << std::endl; return 0; } - if( !csv.flush && csv.binary() ) { std::cin.tie( NULL ); } while( std::cin.good() && !std::cin.eof() ) { const Values* v = csv.binary() ? binary->read() : ascii->read(); @@ -1305,23 +1325,23 @@ int main( int ac, char** av ) { if( block && *block != v->block() ) { - calculate(csv, operations, results); - if ( append ) { append_and_output(csv, inputs, results); inputs.clear(); } + calculate( csv, operations, results ); + if ( append ) { append_and_output( csv, inputs, results ); inputs.clear(); } else { output( csv, results, block, has_block, has_id ); } } block = v->block(); } - OperationsMap::iterator it = operations.find( v->id() ); + operations_map_t::iterator it = operations.find( v->id() ); if( it == operations.end() ) { - it = operations.insert( std::make_pair( v->id(), new boost::ptr_vector< Operationbase > ) ).first; + it = operations.insert( std::make_pair( v->id(), new boost::ptr_vector< operation_base > ) ).first; init_operations( *it->second, operations_parameters, v->format() ); } - if (append) { inputs.push_back( std::make_pair( v->id(), csv.binary() ? binary->line() : ascii->line() ) ); } + if( append ) { inputs.push_back( std::make_pair( v->id(), csv.binary() ? binary->line() : ascii->line() ) ); } for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i].push( v->buffer() ); } } - calculate(csv, operations, results); - if ( append ) { append_and_output(csv, inputs, results); } + calculate( csv, operations, results ); + if ( append ) { append_and_output( csv, inputs, results ); } else { output( csv, results, block, has_block, has_id ); } return 0; } From c84e16aeebb73498fba4332b1574d7b43ecfd71c Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Feb 2020 12:25:32 +1100 Subject: [PATCH 0128/1056] csv-calc: trying to speed up for high number of ids vs polymorhic operations; performance improved by some 60%, but still is very slow --- csv/applications/csv-calc.cpp | 165 ++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 59 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 044eb4136..a145ef85a 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -27,7 +27,6 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine #ifdef WIN32 @@ -437,6 +436,7 @@ namespace Operations struct base { virtual ~base() {} + virtual void reset() = 0; virtual void push( const char* ) = 0; virtual void calculate( char* ) = 0; virtual base* clone() const = 0; @@ -451,6 +451,7 @@ namespace Operations class Min : public base { public: + void reset() { min_ = boost::optional< T >(); } void push( const char* buf ) { const T& t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -469,6 +470,7 @@ namespace Operations class Max : public base { public: + void reset() { max_ = boost::optional< T >(); } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -487,6 +489,7 @@ namespace Operations class Sum : public base { public: + void reset() { sum_ = boost::optional< T >(); } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -501,6 +504,7 @@ namespace Operations template < comma::csv::format::types_enum F > class Sum< boost::posix_time::ptime, F > : public base { + void reset() { COMMA_THROW( comma::exception, "sum not defined for time" ); } void push( const char* ) { COMMA_THROW( comma::exception, "sum not defined for time" ); } void calculate( char* ) { COMMA_THROW( comma::exception, "sum not defined for time" ); } base* clone() const { COMMA_THROW( comma::exception, "sum not defined for time" ); } @@ -510,6 +514,7 @@ namespace Operations class Centre : public base { public: + void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< T, F >::to_bin( *min_.min_ + ( *max_.max_ - *min_.min_ ) / 2, buf ); } } base* clone() const { return new Centre< T, F >( *this ); } @@ -525,6 +530,7 @@ namespace Operations class Mode : public base { public: + void reset() { value_count_ = impl::value_count< T >(); } void push( const char* buf ) { value_count_.update( comma::csv::format::traits< T, F >::from_bin( buf ) ); } void calculate( char* buf ) { if( !value_count_.map().empty() ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( value_count_.mode().first ), buf ); } } base* clone() const { return new Mode< T, F >( *this ); } @@ -537,6 +543,7 @@ namespace Operations { public: Mean() : count_( 0 ) {} + void reset() { mean_ = boost::none; count_ = 0; } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -641,6 +648,8 @@ namespace Operations } base* clone() const { return new Percentile< T, F >( *this ); } + + void reset() { values_.clear(); percentile_ = 0; } private: std::multiset< T > values_; @@ -651,6 +660,7 @@ namespace Operations template < comma::csv::format::types_enum F > class Percentile< boost::posix_time::ptime, F > : public base { + void reset() { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } void push( const char* ) { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } void calculate( char* ) { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } base* clone() const { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } @@ -728,6 +738,8 @@ namespace Operations typename result_traits< T >::type mean() const { return previous_.mean(); } + void reset() { previous_.reset(); value_ = 0; count_ = 0; } + private: Moment< T, M - 1 > previous_; typename result_traits< T >::type value_; @@ -739,6 +751,7 @@ namespace Operations { public: Moment() : value_( 0 ), count_( 0 ) {} + void update ( const T t ) { ++count_; @@ -747,6 +760,8 @@ namespace Operations typename result_traits< T >::type mean() const { return value_; } + void reset() { value_ = 0; count_ = 0; } + private: typename result_traits< T >::type value_; std::size_t count_; @@ -768,6 +783,7 @@ namespace Operations void update( const T t ) { moments_.update(t); } void calculate( char* buf ) { if( moments_.count() > 0 ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( std::sqrt( static_cast< long double >( moments_.value() / ( sample_ ? moments_.count() - 1 : moments_.count() ) ) ) ), buf ); } } base* clone() const { return new Stddev< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 2 > moments_; boost::optional first_; @@ -788,6 +804,7 @@ namespace Operations } void calculate( char* buf ) { stddev_.calculate(buf); } base* clone() const { return new Stddev< boost::posix_time::ptime, F >( *this ); } + void reset() { stddev_.reset(); first_ = boost::none; } private: Stddev< double, F > stddev_; boost::optional first_; @@ -809,6 +826,7 @@ namespace Operations void update( const T t ) { moments_.update(t); } void calculate( char* buf ) { if( moments_.count() > 0 ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( moments_.value() / ( sample_ ? moments_.count() - 1 : moments_.count() ) ), buf ); } } base* clone() const { return new Variance< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 2 > moments_; boost::optional first_; @@ -829,9 +847,10 @@ namespace Operations } void calculate( char* buf ) { variance_.calculate(buf); } base* clone() const { return new Variance< boost::posix_time::ptime, F >( *this ); } + void reset() { variance_.reset(); first_ = boost::none; } private: - Variance< double, F> variance_; - boost::optional first_; + Variance< double, F > variance_; + boost::optional< boost::posix_time::ptime > first_; }; template < typename T, comma::csv::format::types_enum F = comma::csv::format::type_to_enum< T >::value > @@ -852,7 +871,6 @@ namespace Operations if( moments_.count() > 0 ) { typename result_traits< T >::type n = moments_.count(); - // corrected sample skew requires at least 3 samples typename result_traits< T >::type correction = sample_ ? sqrt( n * ( n - 1 ) ) / ( n - 2 ) : 1 ; typename result_traits< T >::type m2 = moments_.previous().value(); @@ -861,9 +879,10 @@ namespace Operations } } base* clone() const { return new Skew< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 3 > moments_; - boost::optional first_; + boost::optional< T > first_; bool sample_; }; @@ -881,9 +900,10 @@ namespace Operations } void calculate( char* buf ) { skew_.calculate(buf); } base* clone() const { return new Skew< boost::posix_time::ptime, F >( *this ); } + void reset() { skew_.reset(); first_ = boost::none; } private: - Skew< double, F> skew_; - boost::optional first_; + Skew< double, F > skew_; + boost::optional< boost::posix_time::ptime > first_; }; template < typename T, comma::csv::format::types_enum F = comma::csv::format::type_to_enum< T >::value > @@ -895,8 +915,8 @@ namespace Operations { for (std::size_t i = 0; i < options.size(); i++) { - if ( options[i] == "sample" ) { sample_ = true; } - else if ( options[i] == "excess" ) { excess_ = true; } + if( options[i] == "sample" ) { sample_ = true; } + else if( options[i] == "excess" ) { excess_ = true; } } } void push( const char* buf ) @@ -923,9 +943,10 @@ namespace Operations } } base* clone() const { return new Kurtosis< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 4 > moments_; - boost::optional first_; + boost::optional< T > first_; bool sample_; bool excess_; }; @@ -944,9 +965,10 @@ namespace Operations } void calculate( char* buf ) { kurtosis_.calculate(buf); } base* clone() const { return new Kurtosis< boost::posix_time::ptime, F >( *this ); } + void reset() { kurtosis_.reset(); first_ = boost::none; } private: - Kurtosis< double, F> kurtosis_; - boost::optional first_; + Kurtosis< double, F > kurtosis_; + boost::optional< boost::posix_time::ptime > first_; }; template < typename T > struct Diff @@ -968,6 +990,7 @@ namespace Operations void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< typename Diff< T >::Type >::to_bin( Diff< T >::subtract( *max_.max_, *min_.min_ ), buf ); } } base* clone() const { return new Diameter< T, F >( *this ); } + void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } private: Min< T, F > min_; Max< T, F > max_; @@ -980,6 +1003,7 @@ namespace Operations void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< typename Diff< T >::Type >::to_bin( Diff< T >::subtract( *max_.max_, *min_.min_ ) / 2, buf ); } } base* clone() const { return new Radius< T, F >( *this ); } + void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } private: Min< T, F > min_; Max< T, F > max_; @@ -993,6 +1017,7 @@ namespace Operations void push( const char* ) { ++count_; } void calculate( char* buf ) { comma::csv::format::traits< comma::uint32 >::to_bin( count_, buf ); } base* clone() const { return new Size< T, F >( *this ); } + void reset() { count_ = 0; } private: std::size_t count_; }; @@ -1048,6 +1073,7 @@ class operation_base virtual void push( const char* buf ) = 0; virtual void calculate() = 0; virtual operation_base* clone() const = 0; + virtual void reset() = 0; const comma::csv::format& output_format() const { return output_format_; } const char* buffer() const { return &buffer_[0]; } @@ -1137,48 +1163,74 @@ struct Operation : public operation_base { for( std::size_t i = 0; i < operations_.size(); ++i ) { operations_[i].calculate( &buffer_[0] + output_elements_[i].offset ); } } + + void reset() { for( auto& o: operations_ ) { o.reset(); } } operation_base* clone() const { Operation< E >* op = new Operation< E >; return deep_copy_to_( op ); } }; -typedef boost::unordered_map< comma::uint32, boost::ptr_vector< operation_base >* > operations_map_t; +typedef boost::unordered_map< comma::uint32, std::vector< operation_base* >* > operations_map_t; typedef boost::unordered_map< comma::uint32, std::string > results_map_t; typedef std::vector< std::pair < comma::uint32, std::string > > Inputs; -static void init_operations( boost::ptr_vector< operation_base >& operations - , const std::vector< Operations::operation_parameters >& operations_parameters - , const comma::csv::format& format ) +class operations_battery_farm_t // all this pain is because operations polymorhism is too slow when there are a lot of ids { - static boost::ptr_vector< operation_base > sample; - if( sample.empty() ) - { - sample.reserve( operations_parameters.size() ); - for( std::size_t i = 0; i < operations_parameters.size(); ++i ) + public: + typedef std::vector< operation_base* > operations_t; + + operations_battery_farm_t(): end_( 0 ) {} + + ~operations_battery_farm_t() + { + for( auto& sample: operations_ ) { for( auto& s: sample ) { delete s; } } // quick and dirty; shame on me + } + + operations_t& make( const std::vector< Operations::operation_parameters >& operations_parameters, const comma::csv::format& format ) { - switch( operations_parameters[i].type ) + if( operations_.empty() ) { - case Operations::Enum::min: sample.push_back( new Operation< Operations::Enum::min >( format ) ); break; - case Operations::Enum::max: sample.push_back( new Operation< Operations::Enum::max >( format ) ); break; - case Operations::Enum::centre: sample.push_back( new Operation< Operations::Enum::centre >( format ) ); break; - case Operations::Enum::mean: sample.push_back( new Operation< Operations::Enum::mean >( format ) ); break; - case Operations::Enum::mode: sample.push_back( new Operation< Operations::Enum::mode >( format ) ); break; - case Operations::Enum::percentile: sample.push_back( new Operation< Operations::Enum::percentile >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::radius: sample.push_back( new Operation< Operations::Enum::radius >( format ) ); break; - case Operations::Enum::diameter: sample.push_back( new Operation< Operations::Enum::diameter >( format ) ); break; - case Operations::Enum::variance: sample.push_back( new Operation< Operations::Enum::variance >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::stddev: sample.push_back( new Operation< Operations::Enum::stddev >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::skew: sample.push_back( new Operation< Operations::Enum::skew >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::kurtosis: sample.push_back( new Operation< Operations::Enum::kurtosis >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::sum: sample.push_back( new Operation< Operations::Enum::sum >( format ) ); break; - case Operations::Enum::size: sample.push_back( new Operation< Operations::Enum::size >( format ) ); break; + operations_.push_back( operations_t() ); + operations_[0].reserve( operations_parameters.size() ); + for( std::size_t i = 0; i < operations_parameters.size(); ++i ) + { + switch( operations_parameters[i].type ) + { + case Operations::Enum::min: operations_[0].push_back( new Operation< Operations::Enum::min >( format ) ); break; + case Operations::Enum::max: operations_[0].push_back( new Operation< Operations::Enum::max >( format ) ); break; + case Operations::Enum::centre: operations_[0].push_back( new Operation< Operations::Enum::centre >( format ) ); break; + case Operations::Enum::mean: operations_[0].push_back( new Operation< Operations::Enum::mean >( format ) ); break; + case Operations::Enum::mode: operations_[0].push_back( new Operation< Operations::Enum::mode >( format ) ); break; + case Operations::Enum::percentile: operations_[0].push_back( new Operation< Operations::Enum::percentile >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::radius: operations_[0].push_back( new Operation< Operations::Enum::radius >( format ) ); break; + case Operations::Enum::diameter: operations_[0].push_back( new Operation< Operations::Enum::diameter >( format ) ); break; + case Operations::Enum::variance: operations_[0].push_back( new Operation< Operations::Enum::variance >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::stddev: operations_[0].push_back( new Operation< Operations::Enum::stddev >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::skew: operations_[0].push_back( new Operation< Operations::Enum::skew >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::kurtosis: operations_[0].push_back( new Operation< Operations::Enum::kurtosis >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::sum: operations_[0].push_back( new Operation< Operations::Enum::sum >( format ) ); break; + case Operations::Enum::size: operations_[0].push_back( new Operation< Operations::Enum::size >( format ) ); break; + } + } } + if( end_ == operations_.size() ) + { + operations_.push_back( operations_t() ); + for( auto& s: operations_[0] ) { operations_.back().push_back( s->clone() ); } + } + for( auto& s: operations_[end_] ) { s->reset(); } + return operations_[ end_++ ]; } - } - operations.clear(); - operations.reserve( sample.size() ); - for( auto& s: sample ) { operations.push_back( s.clone() ); } // todo! this is really slow, if there are many ids -} + + void reset() { end_ = 0; } + + private: + typedef std::deque< operations_t > operations_t_; + operations_t_ operations_; + unsigned int end_; +}; +static operations_battery_farm_t operations_battery_farm; + static void output( const comma::csv::options& csv, results_map_t& results, boost::optional< comma::uint32 > block, bool has_block, bool has_id ) { for( results_map_t::iterator it = results.begin(); it != results.end(); ++it ) @@ -1223,26 +1275,26 @@ static void calculate( const comma::csv::options& csv, operations_map_t& operati if( csv.binary() ) { unsigned int size = 0; - for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i].output_format().size(); } + for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i]->output_format().size(); } r.reserve( size ); } for( std::size_t i = 0; i < it->second->size(); ++i ) { - ( *it->second )[i].calculate(); + ( *it->second )[i]->calculate(); if( csv.binary() ) { - r.append( ( *it->second )[i].buffer(), ( *it->second )[i].output_format().size() ); + r.append( ( *it->second )[i]->buffer(), ( *it->second )[i]->output_format().size() ); } else { if( i > 0 ) { r += csv.delimiter; } - r.append( ( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, csv.precision ) ); + r.append( ( *it->second )[i]->output_format().bin_to_csv( ( *it->second )[i]->buffer(), csv.delimiter, csv.precision ) ); } } results[ it->first ] = r; } - for( operations_map_t::iterator it = operations.begin(); it != operations.end(); ++it ) { delete it->second; } // quick and dirty operations.clear(); + operations_battery_farm.reset(); } int main( int ac, char** av ) @@ -1305,15 +1357,11 @@ int main( int ac, char** av ) if( options.exists( "--output-format" ) ) { if ( !format ) { std::cerr << comma::verbose.app_name() << ": option --output-format requires input format to be specified, please use --format or --binary" << std::endl; return 1; } - boost::ptr_vector< operation_base > ops; - init_operations( ops, operations_parameters, Values(csv, *format).format() ); - for ( std::size_t i = 0; i < ops.size(); ++i ) - { - if ( i > 0 ) { std::cout << csv.delimiter; } - std::cout << ops[i].output_format().string(); - } - if( has_id && !append ) { std::cout << csv.delimiter << "ui"; } - if( has_block && !append ) { std::cout << csv.delimiter << "ui"; } + auto ops = operations_battery_farm.make( operations_parameters, Values( csv, *format ).format() ); + std::cout << ops[0]->output_format().string(); + for( std::size_t i = 1; i < ops.size(); ++i ) { std::cout << ',' << ops[i]->output_format().string(); } + if( has_id && !append ) { std::cout << ",ui"; } + if( has_block && !append ) { std::cout << ",ui"; } std::cout << std::endl; return 0; } @@ -1324,7 +1372,7 @@ int main( int ac, char** av ) if( has_block ) { if( block && *block != v->block() ) - { + { calculate( csv, operations, results ); if ( append ) { append_and_output( csv, inputs, results ); inputs.clear(); } else { output( csv, results, block, has_block, has_id ); } @@ -1334,11 +1382,10 @@ int main( int ac, char** av ) operations_map_t::iterator it = operations.find( v->id() ); if( it == operations.end() ) { - it = operations.insert( std::make_pair( v->id(), new boost::ptr_vector< operation_base > ) ).first; - init_operations( *it->second, operations_parameters, v->format() ); + it = operations.insert( std::make_pair( v->id(), &operations_battery_farm.make( operations_parameters, v->format() ) ) ).first; } if( append ) { inputs.push_back( std::make_pair( v->id(), csv.binary() ? binary->line() : ascii->line() ) ); } - for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i].push( v->buffer() ); } + for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i]->push( v->buffer() ); } } calculate( csv, operations, results ); if ( append ) { append_and_output( csv, inputs, results ); } From facc5d549c2b96891035548ce378384144122580 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Feb 2020 12:41:08 +1100 Subject: [PATCH 0129/1056] csv-calc: reverted to previous version due to the bugs --- csv/applications/csv-calc.cpp | 165 ++++++++++++---------------------- 1 file changed, 59 insertions(+), 106 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index a145ef85a..044eb4136 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -27,6 +27,7 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + /// @author vsevolod vlaskine #ifdef WIN32 @@ -436,7 +437,6 @@ namespace Operations struct base { virtual ~base() {} - virtual void reset() = 0; virtual void push( const char* ) = 0; virtual void calculate( char* ) = 0; virtual base* clone() const = 0; @@ -451,7 +451,6 @@ namespace Operations class Min : public base { public: - void reset() { min_ = boost::optional< T >(); } void push( const char* buf ) { const T& t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -470,7 +469,6 @@ namespace Operations class Max : public base { public: - void reset() { max_ = boost::optional< T >(); } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -489,7 +487,6 @@ namespace Operations class Sum : public base { public: - void reset() { sum_ = boost::optional< T >(); } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -504,7 +501,6 @@ namespace Operations template < comma::csv::format::types_enum F > class Sum< boost::posix_time::ptime, F > : public base { - void reset() { COMMA_THROW( comma::exception, "sum not defined for time" ); } void push( const char* ) { COMMA_THROW( comma::exception, "sum not defined for time" ); } void calculate( char* ) { COMMA_THROW( comma::exception, "sum not defined for time" ); } base* clone() const { COMMA_THROW( comma::exception, "sum not defined for time" ); } @@ -514,7 +510,6 @@ namespace Operations class Centre : public base { public: - void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< T, F >::to_bin( *min_.min_ + ( *max_.max_ - *min_.min_ ) / 2, buf ); } } base* clone() const { return new Centre< T, F >( *this ); } @@ -530,7 +525,6 @@ namespace Operations class Mode : public base { public: - void reset() { value_count_ = impl::value_count< T >(); } void push( const char* buf ) { value_count_.update( comma::csv::format::traits< T, F >::from_bin( buf ) ); } void calculate( char* buf ) { if( !value_count_.map().empty() ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( value_count_.mode().first ), buf ); } } base* clone() const { return new Mode< T, F >( *this ); } @@ -543,7 +537,6 @@ namespace Operations { public: Mean() : count_( 0 ) {} - void reset() { mean_ = boost::none; count_ = 0; } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -648,8 +641,6 @@ namespace Operations } base* clone() const { return new Percentile< T, F >( *this ); } - - void reset() { values_.clear(); percentile_ = 0; } private: std::multiset< T > values_; @@ -660,7 +651,6 @@ namespace Operations template < comma::csv::format::types_enum F > class Percentile< boost::posix_time::ptime, F > : public base { - void reset() { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } void push( const char* ) { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } void calculate( char* ) { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } base* clone() const { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } @@ -738,8 +728,6 @@ namespace Operations typename result_traits< T >::type mean() const { return previous_.mean(); } - void reset() { previous_.reset(); value_ = 0; count_ = 0; } - private: Moment< T, M - 1 > previous_; typename result_traits< T >::type value_; @@ -751,7 +739,6 @@ namespace Operations { public: Moment() : value_( 0 ), count_( 0 ) {} - void update ( const T t ) { ++count_; @@ -760,8 +747,6 @@ namespace Operations typename result_traits< T >::type mean() const { return value_; } - void reset() { value_ = 0; count_ = 0; } - private: typename result_traits< T >::type value_; std::size_t count_; @@ -783,7 +768,6 @@ namespace Operations void update( const T t ) { moments_.update(t); } void calculate( char* buf ) { if( moments_.count() > 0 ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( std::sqrt( static_cast< long double >( moments_.value() / ( sample_ ? moments_.count() - 1 : moments_.count() ) ) ) ), buf ); } } base* clone() const { return new Stddev< T, F >( *this ); } - void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 2 > moments_; boost::optional first_; @@ -804,7 +788,6 @@ namespace Operations } void calculate( char* buf ) { stddev_.calculate(buf); } base* clone() const { return new Stddev< boost::posix_time::ptime, F >( *this ); } - void reset() { stddev_.reset(); first_ = boost::none; } private: Stddev< double, F > stddev_; boost::optional first_; @@ -826,7 +809,6 @@ namespace Operations void update( const T t ) { moments_.update(t); } void calculate( char* buf ) { if( moments_.count() > 0 ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( moments_.value() / ( sample_ ? moments_.count() - 1 : moments_.count() ) ), buf ); } } base* clone() const { return new Variance< T, F >( *this ); } - void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 2 > moments_; boost::optional first_; @@ -847,10 +829,9 @@ namespace Operations } void calculate( char* buf ) { variance_.calculate(buf); } base* clone() const { return new Variance< boost::posix_time::ptime, F >( *this ); } - void reset() { variance_.reset(); first_ = boost::none; } private: - Variance< double, F > variance_; - boost::optional< boost::posix_time::ptime > first_; + Variance< double, F> variance_; + boost::optional first_; }; template < typename T, comma::csv::format::types_enum F = comma::csv::format::type_to_enum< T >::value > @@ -871,6 +852,7 @@ namespace Operations if( moments_.count() > 0 ) { typename result_traits< T >::type n = moments_.count(); + // corrected sample skew requires at least 3 samples typename result_traits< T >::type correction = sample_ ? sqrt( n * ( n - 1 ) ) / ( n - 2 ) : 1 ; typename result_traits< T >::type m2 = moments_.previous().value(); @@ -879,10 +861,9 @@ namespace Operations } } base* clone() const { return new Skew< T, F >( *this ); } - void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 3 > moments_; - boost::optional< T > first_; + boost::optional first_; bool sample_; }; @@ -900,10 +881,9 @@ namespace Operations } void calculate( char* buf ) { skew_.calculate(buf); } base* clone() const { return new Skew< boost::posix_time::ptime, F >( *this ); } - void reset() { skew_.reset(); first_ = boost::none; } private: - Skew< double, F > skew_; - boost::optional< boost::posix_time::ptime > first_; + Skew< double, F> skew_; + boost::optional first_; }; template < typename T, comma::csv::format::types_enum F = comma::csv::format::type_to_enum< T >::value > @@ -915,8 +895,8 @@ namespace Operations { for (std::size_t i = 0; i < options.size(); i++) { - if( options[i] == "sample" ) { sample_ = true; } - else if( options[i] == "excess" ) { excess_ = true; } + if ( options[i] == "sample" ) { sample_ = true; } + else if ( options[i] == "excess" ) { excess_ = true; } } } void push( const char* buf ) @@ -943,10 +923,9 @@ namespace Operations } } base* clone() const { return new Kurtosis< T, F >( *this ); } - void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 4 > moments_; - boost::optional< T > first_; + boost::optional first_; bool sample_; bool excess_; }; @@ -965,10 +944,9 @@ namespace Operations } void calculate( char* buf ) { kurtosis_.calculate(buf); } base* clone() const { return new Kurtosis< boost::posix_time::ptime, F >( *this ); } - void reset() { kurtosis_.reset(); first_ = boost::none; } private: - Kurtosis< double, F > kurtosis_; - boost::optional< boost::posix_time::ptime > first_; + Kurtosis< double, F> kurtosis_; + boost::optional first_; }; template < typename T > struct Diff @@ -990,7 +968,6 @@ namespace Operations void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< typename Diff< T >::Type >::to_bin( Diff< T >::subtract( *max_.max_, *min_.min_ ), buf ); } } base* clone() const { return new Diameter< T, F >( *this ); } - void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } private: Min< T, F > min_; Max< T, F > max_; @@ -1003,7 +980,6 @@ namespace Operations void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< typename Diff< T >::Type >::to_bin( Diff< T >::subtract( *max_.max_, *min_.min_ ) / 2, buf ); } } base* clone() const { return new Radius< T, F >( *this ); } - void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } private: Min< T, F > min_; Max< T, F > max_; @@ -1017,7 +993,6 @@ namespace Operations void push( const char* ) { ++count_; } void calculate( char* buf ) { comma::csv::format::traits< comma::uint32 >::to_bin( count_, buf ); } base* clone() const { return new Size< T, F >( *this ); } - void reset() { count_ = 0; } private: std::size_t count_; }; @@ -1073,7 +1048,6 @@ class operation_base virtual void push( const char* buf ) = 0; virtual void calculate() = 0; virtual operation_base* clone() const = 0; - virtual void reset() = 0; const comma::csv::format& output_format() const { return output_format_; } const char* buffer() const { return &buffer_[0]; } @@ -1163,74 +1137,48 @@ struct Operation : public operation_base { for( std::size_t i = 0; i < operations_.size(); ++i ) { operations_[i].calculate( &buffer_[0] + output_elements_[i].offset ); } } - - void reset() { for( auto& o: operations_ ) { o.reset(); } } operation_base* clone() const { Operation< E >* op = new Operation< E >; return deep_copy_to_( op ); } }; -typedef boost::unordered_map< comma::uint32, std::vector< operation_base* >* > operations_map_t; +typedef boost::unordered_map< comma::uint32, boost::ptr_vector< operation_base >* > operations_map_t; typedef boost::unordered_map< comma::uint32, std::string > results_map_t; typedef std::vector< std::pair < comma::uint32, std::string > > Inputs; -class operations_battery_farm_t // all this pain is because operations polymorhism is too slow when there are a lot of ids +static void init_operations( boost::ptr_vector< operation_base >& operations + , const std::vector< Operations::operation_parameters >& operations_parameters + , const comma::csv::format& format ) { - public: - typedef std::vector< operation_base* > operations_t; - - operations_battery_farm_t(): end_( 0 ) {} - - ~operations_battery_farm_t() - { - for( auto& sample: operations_ ) { for( auto& s: sample ) { delete s; } } // quick and dirty; shame on me - } - - operations_t& make( const std::vector< Operations::operation_parameters >& operations_parameters, const comma::csv::format& format ) + static boost::ptr_vector< operation_base > sample; + if( sample.empty() ) + { + sample.reserve( operations_parameters.size() ); + for( std::size_t i = 0; i < operations_parameters.size(); ++i ) { - if( operations_.empty() ) + switch( operations_parameters[i].type ) { - operations_.push_back( operations_t() ); - operations_[0].reserve( operations_parameters.size() ); - for( std::size_t i = 0; i < operations_parameters.size(); ++i ) - { - switch( operations_parameters[i].type ) - { - case Operations::Enum::min: operations_[0].push_back( new Operation< Operations::Enum::min >( format ) ); break; - case Operations::Enum::max: operations_[0].push_back( new Operation< Operations::Enum::max >( format ) ); break; - case Operations::Enum::centre: operations_[0].push_back( new Operation< Operations::Enum::centre >( format ) ); break; - case Operations::Enum::mean: operations_[0].push_back( new Operation< Operations::Enum::mean >( format ) ); break; - case Operations::Enum::mode: operations_[0].push_back( new Operation< Operations::Enum::mode >( format ) ); break; - case Operations::Enum::percentile: operations_[0].push_back( new Operation< Operations::Enum::percentile >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::radius: operations_[0].push_back( new Operation< Operations::Enum::radius >( format ) ); break; - case Operations::Enum::diameter: operations_[0].push_back( new Operation< Operations::Enum::diameter >( format ) ); break; - case Operations::Enum::variance: operations_[0].push_back( new Operation< Operations::Enum::variance >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::stddev: operations_[0].push_back( new Operation< Operations::Enum::stddev >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::skew: operations_[0].push_back( new Operation< Operations::Enum::skew >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::kurtosis: operations_[0].push_back( new Operation< Operations::Enum::kurtosis >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::sum: operations_[0].push_back( new Operation< Operations::Enum::sum >( format ) ); break; - case Operations::Enum::size: operations_[0].push_back( new Operation< Operations::Enum::size >( format ) ); break; - } - } + case Operations::Enum::min: sample.push_back( new Operation< Operations::Enum::min >( format ) ); break; + case Operations::Enum::max: sample.push_back( new Operation< Operations::Enum::max >( format ) ); break; + case Operations::Enum::centre: sample.push_back( new Operation< Operations::Enum::centre >( format ) ); break; + case Operations::Enum::mean: sample.push_back( new Operation< Operations::Enum::mean >( format ) ); break; + case Operations::Enum::mode: sample.push_back( new Operation< Operations::Enum::mode >( format ) ); break; + case Operations::Enum::percentile: sample.push_back( new Operation< Operations::Enum::percentile >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::radius: sample.push_back( new Operation< Operations::Enum::radius >( format ) ); break; + case Operations::Enum::diameter: sample.push_back( new Operation< Operations::Enum::diameter >( format ) ); break; + case Operations::Enum::variance: sample.push_back( new Operation< Operations::Enum::variance >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::stddev: sample.push_back( new Operation< Operations::Enum::stddev >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::skew: sample.push_back( new Operation< Operations::Enum::skew >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::kurtosis: sample.push_back( new Operation< Operations::Enum::kurtosis >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::sum: sample.push_back( new Operation< Operations::Enum::sum >( format ) ); break; + case Operations::Enum::size: sample.push_back( new Operation< Operations::Enum::size >( format ) ); break; } - if( end_ == operations_.size() ) - { - operations_.push_back( operations_t() ); - for( auto& s: operations_[0] ) { operations_.back().push_back( s->clone() ); } - } - for( auto& s: operations_[end_] ) { s->reset(); } - return operations_[ end_++ ]; } - - void reset() { end_ = 0; } - - private: - typedef std::deque< operations_t > operations_t_; - operations_t_ operations_; - unsigned int end_; -}; + } + operations.clear(); + operations.reserve( sample.size() ); + for( auto& s: sample ) { operations.push_back( s.clone() ); } // todo! this is really slow, if there are many ids +} -static operations_battery_farm_t operations_battery_farm; - static void output( const comma::csv::options& csv, results_map_t& results, boost::optional< comma::uint32 > block, bool has_block, bool has_id ) { for( results_map_t::iterator it = results.begin(); it != results.end(); ++it ) @@ -1275,26 +1223,26 @@ static void calculate( const comma::csv::options& csv, operations_map_t& operati if( csv.binary() ) { unsigned int size = 0; - for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i]->output_format().size(); } + for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i].output_format().size(); } r.reserve( size ); } for( std::size_t i = 0; i < it->second->size(); ++i ) { - ( *it->second )[i]->calculate(); + ( *it->second )[i].calculate(); if( csv.binary() ) { - r.append( ( *it->second )[i]->buffer(), ( *it->second )[i]->output_format().size() ); + r.append( ( *it->second )[i].buffer(), ( *it->second )[i].output_format().size() ); } else { if( i > 0 ) { r += csv.delimiter; } - r.append( ( *it->second )[i]->output_format().bin_to_csv( ( *it->second )[i]->buffer(), csv.delimiter, csv.precision ) ); + r.append( ( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, csv.precision ) ); } } results[ it->first ] = r; } + for( operations_map_t::iterator it = operations.begin(); it != operations.end(); ++it ) { delete it->second; } // quick and dirty operations.clear(); - operations_battery_farm.reset(); } int main( int ac, char** av ) @@ -1357,11 +1305,15 @@ int main( int ac, char** av ) if( options.exists( "--output-format" ) ) { if ( !format ) { std::cerr << comma::verbose.app_name() << ": option --output-format requires input format to be specified, please use --format or --binary" << std::endl; return 1; } - auto ops = operations_battery_farm.make( operations_parameters, Values( csv, *format ).format() ); - std::cout << ops[0]->output_format().string(); - for( std::size_t i = 1; i < ops.size(); ++i ) { std::cout << ',' << ops[i]->output_format().string(); } - if( has_id && !append ) { std::cout << ",ui"; } - if( has_block && !append ) { std::cout << ",ui"; } + boost::ptr_vector< operation_base > ops; + init_operations( ops, operations_parameters, Values(csv, *format).format() ); + for ( std::size_t i = 0; i < ops.size(); ++i ) + { + if ( i > 0 ) { std::cout << csv.delimiter; } + std::cout << ops[i].output_format().string(); + } + if( has_id && !append ) { std::cout << csv.delimiter << "ui"; } + if( has_block && !append ) { std::cout << csv.delimiter << "ui"; } std::cout << std::endl; return 0; } @@ -1372,7 +1324,7 @@ int main( int ac, char** av ) if( has_block ) { if( block && *block != v->block() ) - { + { calculate( csv, operations, results ); if ( append ) { append_and_output( csv, inputs, results ); inputs.clear(); } else { output( csv, results, block, has_block, has_id ); } @@ -1382,10 +1334,11 @@ int main( int ac, char** av ) operations_map_t::iterator it = operations.find( v->id() ); if( it == operations.end() ) { - it = operations.insert( std::make_pair( v->id(), &operations_battery_farm.make( operations_parameters, v->format() ) ) ).first; + it = operations.insert( std::make_pair( v->id(), new boost::ptr_vector< operation_base > ) ).first; + init_operations( *it->second, operations_parameters, v->format() ); } if( append ) { inputs.push_back( std::make_pair( v->id(), csv.binary() ? binary->line() : ascii->line() ) ); } - for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i]->push( v->buffer() ); } + for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i].push( v->buffer() ); } } calculate( csv, operations, results ); if ( append ) { append_and_output( csv, inputs, results ); } From 638e20b2f382ae3c3f5bd63d2fb37fab67fc766b Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Feb 2020 13:11:38 +1100 Subject: [PATCH 0130/1056] csv-calc: performance on large number of ids somewhat improved... --- csv/applications/csv-calc.cpp | 171 +++++++++++++++++++++------------- 1 file changed, 108 insertions(+), 63 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 044eb4136..18f2e94ce 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -27,7 +27,6 @@ // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - /// @author vsevolod vlaskine #ifdef WIN32 @@ -437,6 +436,7 @@ namespace Operations struct base { virtual ~base() {} + virtual void reset() = 0; virtual void push( const char* ) = 0; virtual void calculate( char* ) = 0; virtual base* clone() const = 0; @@ -451,6 +451,7 @@ namespace Operations class Min : public base { public: + void reset() { min_ = boost::optional< T >(); } void push( const char* buf ) { const T& t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -469,6 +470,7 @@ namespace Operations class Max : public base { public: + void reset() { max_ = boost::optional< T >(); } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -487,6 +489,7 @@ namespace Operations class Sum : public base { public: + void reset() { sum_ = boost::optional< T >(); } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -501,6 +504,7 @@ namespace Operations template < comma::csv::format::types_enum F > class Sum< boost::posix_time::ptime, F > : public base { + void reset() { COMMA_THROW( comma::exception, "sum not defined for time" ); } void push( const char* ) { COMMA_THROW( comma::exception, "sum not defined for time" ); } void calculate( char* ) { COMMA_THROW( comma::exception, "sum not defined for time" ); } base* clone() const { COMMA_THROW( comma::exception, "sum not defined for time" ); } @@ -510,6 +514,7 @@ namespace Operations class Centre : public base { public: + void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< T, F >::to_bin( *min_.min_ + ( *max_.max_ - *min_.min_ ) / 2, buf ); } } base* clone() const { return new Centre< T, F >( *this ); } @@ -525,6 +530,7 @@ namespace Operations class Mode : public base { public: + void reset() { value_count_ = impl::value_count< T >(); } void push( const char* buf ) { value_count_.update( comma::csv::format::traits< T, F >::from_bin( buf ) ); } void calculate( char* buf ) { if( !value_count_.map().empty() ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( value_count_.mode().first ), buf ); } } base* clone() const { return new Mode< T, F >( *this ); } @@ -537,6 +543,7 @@ namespace Operations { public: Mean() : count_( 0 ) {} + void reset() { mean_ = boost::none; count_ = 0; } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -615,8 +622,7 @@ namespace Operations comma::verbose << "NIST linear interpolation method" << std::endl; comma::verbose << "see http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm" << std::endl; double x = percentile_ * ( count + 1 ); - comma::verbose << "p = " << percentile_ << "; N = " << count - << "; p(N + 1) = " << x; + comma::verbose << "p = " << percentile_ << "; N = " << count << "; p(N + 1) = " << x; if( x <= 1.0 ) { comma::verbose << "; below 1 - choosing smallest value" << std::endl; value = *it; @@ -631,8 +637,7 @@ namespace Operations double v1 = *it; double v2 = *++it; value = v1 + ( v2 - v1 ) * remainder; - comma::verbose << "v1 = " << v1 << "; v2 = " << v2 - << "; result = " << value << std::endl; + comma::verbose << "v1 = " << v1 << "; v2 = " << v2 << "; result = " << value << std::endl; } break; } @@ -641,6 +646,8 @@ namespace Operations } base* clone() const { return new Percentile< T, F >( *this ); } + + void reset() { values_.clear(); } private: std::multiset< T > values_; @@ -651,6 +658,7 @@ namespace Operations template < comma::csv::format::types_enum F > class Percentile< boost::posix_time::ptime, F > : public base { + void reset() { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } void push( const char* ) { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } void calculate( char* ) { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } base* clone() const { COMMA_THROW( comma::exception, "percentile not implemented for time, todo" ); } @@ -728,6 +736,8 @@ namespace Operations typename result_traits< T >::type mean() const { return previous_.mean(); } + void reset() { previous_.reset(); value_ = 0; count_ = 0; } + private: Moment< T, M - 1 > previous_; typename result_traits< T >::type value_; @@ -739,6 +749,7 @@ namespace Operations { public: Moment() : value_( 0 ), count_( 0 ) {} + void update ( const T t ) { ++count_; @@ -747,6 +758,8 @@ namespace Operations typename result_traits< T >::type mean() const { return value_; } + void reset() { value_ = 0; count_ = 0; } + private: typename result_traits< T >::type value_; std::size_t count_; @@ -768,6 +781,7 @@ namespace Operations void update( const T t ) { moments_.update(t); } void calculate( char* buf ) { if( moments_.count() > 0 ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( std::sqrt( static_cast< long double >( moments_.value() / ( sample_ ? moments_.count() - 1 : moments_.count() ) ) ) ), buf ); } } base* clone() const { return new Stddev< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 2 > moments_; boost::optional first_; @@ -788,6 +802,7 @@ namespace Operations } void calculate( char* buf ) { stddev_.calculate(buf); } base* clone() const { return new Stddev< boost::posix_time::ptime, F >( *this ); } + void reset() { stddev_.reset(); first_ = boost::none; } private: Stddev< double, F > stddev_; boost::optional first_; @@ -809,6 +824,7 @@ namespace Operations void update( const T t ) { moments_.update(t); } void calculate( char* buf ) { if( moments_.count() > 0 ) { comma::csv::format::traits< T, F >::to_bin( static_cast< T >( moments_.value() / ( sample_ ? moments_.count() - 1 : moments_.count() ) ), buf ); } } base* clone() const { return new Variance< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 2 > moments_; boost::optional first_; @@ -829,9 +845,10 @@ namespace Operations } void calculate( char* buf ) { variance_.calculate(buf); } base* clone() const { return new Variance< boost::posix_time::ptime, F >( *this ); } + void reset() { variance_.reset(); first_ = boost::none; } private: - Variance< double, F> variance_; - boost::optional first_; + Variance< double, F > variance_; + boost::optional< boost::posix_time::ptime > first_; }; template < typename T, comma::csv::format::types_enum F = comma::csv::format::type_to_enum< T >::value > @@ -852,7 +869,6 @@ namespace Operations if( moments_.count() > 0 ) { typename result_traits< T >::type n = moments_.count(); - // corrected sample skew requires at least 3 samples typename result_traits< T >::type correction = sample_ ? sqrt( n * ( n - 1 ) ) / ( n - 2 ) : 1 ; typename result_traits< T >::type m2 = moments_.previous().value(); @@ -861,9 +877,10 @@ namespace Operations } } base* clone() const { return new Skew< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 3 > moments_; - boost::optional first_; + boost::optional< T > first_; bool sample_; }; @@ -881,9 +898,10 @@ namespace Operations } void calculate( char* buf ) { skew_.calculate(buf); } base* clone() const { return new Skew< boost::posix_time::ptime, F >( *this ); } + void reset() { skew_.reset(); first_ = boost::none; } private: - Skew< double, F> skew_; - boost::optional first_; + Skew< double, F > skew_; + boost::optional< boost::posix_time::ptime > first_; }; template < typename T, comma::csv::format::types_enum F = comma::csv::format::type_to_enum< T >::value > @@ -895,8 +913,8 @@ namespace Operations { for (std::size_t i = 0; i < options.size(); i++) { - if ( options[i] == "sample" ) { sample_ = true; } - else if ( options[i] == "excess" ) { excess_ = true; } + if( options[i] == "sample" ) { sample_ = true; } + else if( options[i] == "excess" ) { excess_ = true; } } } void push( const char* buf ) @@ -923,9 +941,10 @@ namespace Operations } } base* clone() const { return new Kurtosis< T, F >( *this ); } + void reset() { moments_.reset(); first_ = boost::none; } private: Moment< T, 4 > moments_; - boost::optional first_; + boost::optional< T > first_; bool sample_; bool excess_; }; @@ -944,9 +963,10 @@ namespace Operations } void calculate( char* buf ) { kurtosis_.calculate(buf); } base* clone() const { return new Kurtosis< boost::posix_time::ptime, F >( *this ); } + void reset() { kurtosis_.reset(); first_ = boost::none; } private: - Kurtosis< double, F> kurtosis_; - boost::optional first_; + Kurtosis< double, F > kurtosis_; + boost::optional< boost::posix_time::ptime > first_; }; template < typename T > struct Diff @@ -968,6 +988,7 @@ namespace Operations void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< typename Diff< T >::Type >::to_bin( Diff< T >::subtract( *max_.max_, *min_.min_ ), buf ); } } base* clone() const { return new Diameter< T, F >( *this ); } + void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } private: Min< T, F > min_; Max< T, F > max_; @@ -980,6 +1001,7 @@ namespace Operations void push( const char* buf ) { min_.push( buf ); max_.push( buf ); } void calculate( char* buf ) { if( min_.min_ ) { comma::csv::format::traits< typename Diff< T >::Type >::to_bin( Diff< T >::subtract( *max_.max_, *min_.min_ ) / 2, buf ); } } base* clone() const { return new Radius< T, F >( *this ); } + void reset() { min_ = Min< T, F >(); max_ = Max< T, F >(); } private: Min< T, F > min_; Max< T, F > max_; @@ -993,6 +1015,7 @@ namespace Operations void push( const char* ) { ++count_; } void calculate( char* buf ) { comma::csv::format::traits< comma::uint32 >::to_bin( count_, buf ); } base* clone() const { return new Size< T, F >( *this ); } + void reset() { count_ = 0; } private: std::size_t count_; }; @@ -1048,6 +1071,7 @@ class operation_base virtual void push( const char* buf ) = 0; virtual void calculate() = 0; virtual operation_base* clone() const = 0; + virtual void reset() = 0; const comma::csv::format& output_format() const { return output_format_; } const char* buffer() const { return &buffer_[0]; } @@ -1137,48 +1161,74 @@ struct Operation : public operation_base { for( std::size_t i = 0; i < operations_.size(); ++i ) { operations_[i].calculate( &buffer_[0] + output_elements_[i].offset ); } } + + void reset() { for( auto& o: operations_ ) { o.reset(); } } operation_base* clone() const { Operation< E >* op = new Operation< E >; return deep_copy_to_( op ); } }; -typedef boost::unordered_map< comma::uint32, boost::ptr_vector< operation_base >* > operations_map_t; +typedef boost::unordered_map< comma::uint32, std::vector< operation_base* >* > operations_map_t; typedef boost::unordered_map< comma::uint32, std::string > results_map_t; typedef std::vector< std::pair < comma::uint32, std::string > > Inputs; -static void init_operations( boost::ptr_vector< operation_base >& operations - , const std::vector< Operations::operation_parameters >& operations_parameters - , const comma::csv::format& format ) +class operations_battery_farm_t // all this pain is because operations polymorhism is too slow when there are a lot of ids { - static boost::ptr_vector< operation_base > sample; - if( sample.empty() ) - { - sample.reserve( operations_parameters.size() ); - for( std::size_t i = 0; i < operations_parameters.size(); ++i ) + public: + typedef std::vector< operation_base* > operations_t; + + operations_battery_farm_t(): end_( 0 ) {} + + ~operations_battery_farm_t() + { + for( auto& sample: operations_ ) { for( auto& s: sample ) { delete s; } } // quick and dirty; shame on me + } + + operations_t& make( const std::vector< Operations::operation_parameters >& operations_parameters, const comma::csv::format& format ) { - switch( operations_parameters[i].type ) + if( operations_.empty() ) { - case Operations::Enum::min: sample.push_back( new Operation< Operations::Enum::min >( format ) ); break; - case Operations::Enum::max: sample.push_back( new Operation< Operations::Enum::max >( format ) ); break; - case Operations::Enum::centre: sample.push_back( new Operation< Operations::Enum::centre >( format ) ); break; - case Operations::Enum::mean: sample.push_back( new Operation< Operations::Enum::mean >( format ) ); break; - case Operations::Enum::mode: sample.push_back( new Operation< Operations::Enum::mode >( format ) ); break; - case Operations::Enum::percentile: sample.push_back( new Operation< Operations::Enum::percentile >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::radius: sample.push_back( new Operation< Operations::Enum::radius >( format ) ); break; - case Operations::Enum::diameter: sample.push_back( new Operation< Operations::Enum::diameter >( format ) ); break; - case Operations::Enum::variance: sample.push_back( new Operation< Operations::Enum::variance >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::stddev: sample.push_back( new Operation< Operations::Enum::stddev >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::skew: sample.push_back( new Operation< Operations::Enum::skew >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::kurtosis: sample.push_back( new Operation< Operations::Enum::kurtosis >( format, operations_parameters[i].options ) ); break; - case Operations::Enum::sum: sample.push_back( new Operation< Operations::Enum::sum >( format ) ); break; - case Operations::Enum::size: sample.push_back( new Operation< Operations::Enum::size >( format ) ); break; + operations_.push_back( operations_t() ); + operations_[0].reserve( operations_parameters.size() ); + for( std::size_t i = 0; i < operations_parameters.size(); ++i ) + { + switch( operations_parameters[i].type ) + { + case Operations::Enum::min: operations_[0].push_back( new Operation< Operations::Enum::min >( format ) ); break; + case Operations::Enum::max: operations_[0].push_back( new Operation< Operations::Enum::max >( format ) ); break; + case Operations::Enum::centre: operations_[0].push_back( new Operation< Operations::Enum::centre >( format ) ); break; + case Operations::Enum::mean: operations_[0].push_back( new Operation< Operations::Enum::mean >( format ) ); break; + case Operations::Enum::mode: operations_[0].push_back( new Operation< Operations::Enum::mode >( format ) ); break; + case Operations::Enum::percentile: operations_[0].push_back( new Operation< Operations::Enum::percentile >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::radius: operations_[0].push_back( new Operation< Operations::Enum::radius >( format ) ); break; + case Operations::Enum::diameter: operations_[0].push_back( new Operation< Operations::Enum::diameter >( format ) ); break; + case Operations::Enum::variance: operations_[0].push_back( new Operation< Operations::Enum::variance >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::stddev: operations_[0].push_back( new Operation< Operations::Enum::stddev >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::skew: operations_[0].push_back( new Operation< Operations::Enum::skew >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::kurtosis: operations_[0].push_back( new Operation< Operations::Enum::kurtosis >( format, operations_parameters[i].options ) ); break; + case Operations::Enum::sum: operations_[0].push_back( new Operation< Operations::Enum::sum >( format ) ); break; + case Operations::Enum::size: operations_[0].push_back( new Operation< Operations::Enum::size >( format ) ); break; + } + } } + if( end_ == operations_.size() ) + { + operations_.push_back( operations_t() ); + for( auto& s: operations_[0] ) { operations_.back().push_back( s->clone() ); } + } + for( auto& s: operations_[end_] ) { s->reset(); } + return operations_[ end_++ ]; } - } - operations.clear(); - operations.reserve( sample.size() ); - for( auto& s: sample ) { operations.push_back( s.clone() ); } // todo! this is really slow, if there are many ids -} + + void reset() { end_ = 0; } + + private: + typedef std::deque< operations_t > operations_t_; + operations_t_ operations_; + unsigned int end_; +}; +static operations_battery_farm_t operations_battery_farm; + static void output( const comma::csv::options& csv, results_map_t& results, boost::optional< comma::uint32 > block, bool has_block, bool has_id ) { for( results_map_t::iterator it = results.begin(); it != results.end(); ++it ) @@ -1223,26 +1273,26 @@ static void calculate( const comma::csv::options& csv, operations_map_t& operati if( csv.binary() ) { unsigned int size = 0; - for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i].output_format().size(); } + for( std::size_t i = 0; i < it->second->size(); ++i ) { size += ( *it->second )[i]->output_format().size(); } r.reserve( size ); } for( std::size_t i = 0; i < it->second->size(); ++i ) { - ( *it->second )[i].calculate(); + ( *it->second )[i]->calculate(); if( csv.binary() ) { - r.append( ( *it->second )[i].buffer(), ( *it->second )[i].output_format().size() ); + r.append( ( *it->second )[i]->buffer(), ( *it->second )[i]->output_format().size() ); } else { if( i > 0 ) { r += csv.delimiter; } - r.append( ( *it->second )[i].output_format().bin_to_csv( ( *it->second )[i].buffer(), csv.delimiter, csv.precision ) ); + r.append( ( *it->second )[i]->output_format().bin_to_csv( ( *it->second )[i]->buffer(), csv.delimiter, csv.precision ) ); } } results[ it->first ] = r; } - for( operations_map_t::iterator it = operations.begin(); it != operations.end(); ++it ) { delete it->second; } // quick and dirty operations.clear(); + operations_battery_farm.reset(); } int main( int ac, char** av ) @@ -1305,15 +1355,11 @@ int main( int ac, char** av ) if( options.exists( "--output-format" ) ) { if ( !format ) { std::cerr << comma::verbose.app_name() << ": option --output-format requires input format to be specified, please use --format or --binary" << std::endl; return 1; } - boost::ptr_vector< operation_base > ops; - init_operations( ops, operations_parameters, Values(csv, *format).format() ); - for ( std::size_t i = 0; i < ops.size(); ++i ) - { - if ( i > 0 ) { std::cout << csv.delimiter; } - std::cout << ops[i].output_format().string(); - } - if( has_id && !append ) { std::cout << csv.delimiter << "ui"; } - if( has_block && !append ) { std::cout << csv.delimiter << "ui"; } + auto ops = operations_battery_farm.make( operations_parameters, Values( csv, *format ).format() ); + std::cout << ops[0]->output_format().string(); + for( std::size_t i = 1; i < ops.size(); ++i ) { std::cout << ',' << ops[i]->output_format().string(); } + if( has_id && !append ) { std::cout << ",ui"; } + if( has_block && !append ) { std::cout << ",ui"; } std::cout << std::endl; return 0; } @@ -1324,7 +1370,7 @@ int main( int ac, char** av ) if( has_block ) { if( block && *block != v->block() ) - { + { calculate( csv, operations, results ); if ( append ) { append_and_output( csv, inputs, results ); inputs.clear(); } else { output( csv, results, block, has_block, has_id ); } @@ -1334,11 +1380,10 @@ int main( int ac, char** av ) operations_map_t::iterator it = operations.find( v->id() ); if( it == operations.end() ) { - it = operations.insert( std::make_pair( v->id(), new boost::ptr_vector< operation_base > ) ).first; - init_operations( *it->second, operations_parameters, v->format() ); + it = operations.insert( std::make_pair( v->id(), &operations_battery_farm.make( operations_parameters, v->format() ) ) ).first; } if( append ) { inputs.push_back( std::make_pair( v->id(), csv.binary() ? binary->line() : ascii->line() ) ); } - for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i].push( v->buffer() ); } + for( std::size_t i = 0; i < it->second->size(); ++i ) { ( *it->second )[i]->push( v->buffer() ); } } calculate( csv, operations, results ); if ( append ) { append_and_output( csv, inputs, results ); } From 1b9c02e982dd5dd43d3c8aec058d0d4694d70000 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Feb 2020 13:42:39 +1100 Subject: [PATCH 0131/1056] csv-calc: minor refactoring to improve performance... --- csv/applications/csv-calc.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 18f2e94ce..69b326619 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -596,7 +596,6 @@ namespace Operations void calculate( char* buf ) { std::size_t count = values_.size(); - if( count > 0 ) { comma::verbose << "calculating " << percentile_*100 << "th percentile using "; @@ -1180,7 +1179,7 @@ class operations_battery_farm_t // all this pain is because operations polymorhi ~operations_battery_farm_t() { - for( auto& sample: operations_ ) { for( auto& s: sample ) { delete s; } } // quick and dirty; shame on me + for( auto& operation: operations_ ) { for( auto& o: operation ) { delete o; } } // quick and dirty; shame on me } operations_t& make( const std::vector< Operations::operation_parameters >& operations_parameters, const comma::csv::format& format ) @@ -1212,8 +1211,8 @@ class operations_battery_farm_t // all this pain is because operations polymorhi } if( end_ == operations_.size() ) { - operations_.push_back( operations_t() ); - for( auto& s: operations_[0] ) { operations_.back().push_back( s->clone() ); } + operations_.push_back( operations_t( operations_[0].size() ) ); + for( unsigned int i = 0; i < operations_[0].size(); ++i ) { operations_.back()[i] = operations_[0][i]->clone(); } } for( auto& s: operations_[end_] ) { s->reset(); } return operations_[ end_++ ]; From 33eba1b3dffae79421d1af845cf070d5aa74d7f2 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Feb 2020 15:59:17 +1100 Subject: [PATCH 0132/1056] csv-calc: trivial refactoring --- csv/applications/csv-calc.cpp | 120 +++++++++++++++------------------- 1 file changed, 54 insertions(+), 66 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 69b326619..7ca448927 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -543,7 +543,7 @@ namespace Operations { public: Mean() : count_( 0 ) {} - void reset() { mean_ = boost::none; count_ = 0; } + void reset() { mean_.reset(); count_ = 0; } void push( const char* buf ) { T t = comma::csv::format::traits< T, F >::from_bin( buf ); @@ -565,83 +565,71 @@ namespace Operations Percentile() : percentile_( 0.0 ), method_( nearest ) {} - void push( const char* buf ) - { - values_.insert( comma::csv::format::traits< T, F >::from_bin( buf )); - } + void push( const char* buf ) { values_.insert( comma::csv::format::traits< T, F >::from_bin( buf ) ); } void set_options( const std::vector< std::string >& options ) { - if( options.size() == 0 ) { - std::cerr << comma::verbose.app_name() << ": percentile operation requires a percentile" << std::endl; - exit( 1 ); - } - + if( options.empty() ) { std::cerr << comma::verbose.app_name() << ": percentile operation requires a percentile" << std::endl; exit( 1 ); } percentile_ = boost::lexical_cast< double >( options[0] ); - if( percentile_ < 0.0 || percentile_ > 1.0 ) { - std::cerr << comma::verbose.app_name() << ": percentile value should be between 0 and 1, got " << percentile_ << std::endl; - exit( 1 ); - } - - if( options.size() == 2 ) { - if( options[1] == "nearest" ) method_ = nearest; - else if( options[1] == "interpolate" ) method_ = interpolate; - else { - std::cerr << comma::verbose.app_name() << ": expected percentile method, got " << options[1] << std::endl; - exit( 1 ); - } - } + if( percentile_ < 0.0 || percentile_ > 1.0 ) { std::cerr << comma::verbose.app_name() << ": percentile value should be between 0 and 1, got " << percentile_ << std::endl; exit( 1 ); } + if( options.size() < 2 ) { return; } + if( options[1] == "nearest" ) { method_ = nearest; } + else if( options[1] == "interpolate" ) { method_ = interpolate; } + else { std::cerr << comma::verbose.app_name() << ": expected percentile method, got '" << options[1] << "'" << std::endl; exit( 1 ); } } void calculate( char* buf ) { + if( values_.empty() ) { return; } std::size_t count = values_.size(); - if( count > 0 ) + comma::verbose << "calculating " << percentile_*100 << "th percentile using "; + T value; + typename std::multiset< T >::iterator it = values_.begin(); + switch( method_ ) { - comma::verbose << "calculating " << percentile_*100 << "th percentile using "; - T value; - typename std::multiset< T >::iterator it = values_.begin(); - switch( method_ ) - { - std::size_t rank; - - case nearest: - // https://en.wikipedia.org/wiki/Percentile#The_Nearest_Rank_method - comma::verbose << "nearest rank method" << std::endl; - comma::verbose << "see https://en.wikipedia.org/wiki/Percentile#The_Nearest_Rank_method" << std::endl; - rank = ( percentile_ == 0.0 ? 1 : std::ceil( count * percentile_ )); - comma::verbose << "n = " << rank << std::endl; - std::advance( it, rank - 1 ); + std::size_t rank; + + case nearest: + // https://en.wikipedia.org/wiki/Percentile#The_Nearest_Rank_method + comma::verbose << "nearest rank method" << std::endl; + comma::verbose << "see https://en.wikipedia.org/wiki/Percentile#The_Nearest_Rank_method" << std::endl; + rank = ( percentile_ == 0.0 ? 1 : std::ceil( count * percentile_ )); + comma::verbose << "n = " << rank << std::endl; + std::advance( it, rank - 1 ); + value = *it; + break; + + case interpolate: + // https://en.wikipedia.org/wiki/Percentile#The_Linear_Interpolation_Between_Closest_Ranks_method + // (third method in that section) + comma::verbose << "NIST linear interpolation method" << std::endl; + comma::verbose << "see http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm" << std::endl; + double x = percentile_ * ( count + 1 ); + comma::verbose << "p = " << percentile_ << "; N = " << count << "; p(N + 1) = " << x; + if( x <= 1.0 ) + { + comma::verbose << "; below 1 - choosing smallest value" << std::endl; value = *it; - break; - - case interpolate: - // https://en.wikipedia.org/wiki/Percentile#The_Linear_Interpolation_Between_Closest_Ranks_method - // (third method in that section) - comma::verbose << "NIST linear interpolation method" << std::endl; - comma::verbose << "see http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm" << std::endl; - double x = percentile_ * ( count + 1 ); - comma::verbose << "p = " << percentile_ << "; N = " << count << "; p(N + 1) = " << x; - if( x <= 1.0 ) { - comma::verbose << "; below 1 - choosing smallest value" << std::endl; - value = *it; - } else if( x >= count ) { - comma::verbose << "; above N - choosing largest value" << std::endl; - value = *( values_.rbegin() ); - } else { - rank = x; - double remainder = x - rank; - comma::verbose << "; k = " << rank << "; d = " << remainder << std::endl; - std::advance( it, rank - 1 ); - double v1 = *it; - double v2 = *++it; - value = v1 + ( v2 - v1 ) * remainder; - comma::verbose << "v1 = " << v1 << "; v2 = " << v2 << "; result = " << value << std::endl; - } - break; - } - comma::csv::format::traits< T, F >::to_bin( static_cast< T >( value ), buf ); + } + else if( x >= count ) + { + comma::verbose << "; above N - choosing largest value" << std::endl; + value = *( values_.rbegin() ); + } + else + { + rank = x; + double remainder = x - rank; + comma::verbose << "; k = " << rank << "; d = " << remainder << std::endl; + std::advance( it, rank - 1 ); + double v1 = *it; + double v2 = *++it; + value = v1 + ( v2 - v1 ) * remainder; + comma::verbose << "v1 = " << v1 << "; v2 = " << v2 << "; result = " << value << std::endl; + } + break; } + comma::csv::format::traits< T, F >::to_bin( static_cast< T >( value ), buf ); } base* clone() const { return new Percentile< T, F >( *this ); } From 59ad3efdcb8cd3c14993d2aeb073b7e98ebfd874 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 7 Feb 2020 16:07:30 +1100 Subject: [PATCH 0133/1056] csv-calc: trivial refactoring --- csv/applications/csv-calc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 7ca448927..181e6ab60 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -1077,7 +1077,7 @@ class operation_base lhs->output_format_ = output_format_; lhs->output_elements_ = output_elements_; lhs->buffer_ = buffer_; - for( std::size_t i = 0; i < operations_.size(); ++i ) { lhs->operations_.push_back( operations_[i].clone() ); } + for( auto& o: operations_ ) { lhs->operations_.push_back( o.clone() ); } return lhs; } }; From d888ccb61520adf05039898f35f65529b29f0ed1 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 11 Feb 2020 09:46:21 +1100 Subject: [PATCH 0134/1056] csv-strings: add operation implemented --- csv/applications/csv-strings.cpp | 41 ++++++++++++++++++++++++-------- csv/test/csv-strings/expected | 3 +++ csv/test/csv-strings/input | 3 +++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/csv/applications/csv-strings.cpp b/csv/applications/csv-strings.cpp index c1fa2feea..f1140f820 100644 --- a/csv/applications/csv-strings.cpp +++ b/csv/applications/csv-strings.cpp @@ -74,6 +74,7 @@ static void usage( bool verbose ) std::cerr << " usage: cat input.csv | csv-strings [] > output.csv" << std::endl; std::cerr << std::endl; std::cerr << "operations" << std::endl; + std::cerr << " add" << std::endl; std::cerr << " path-basename,basename" << std::endl; std::cerr << " path-dirname,dirname" << std::endl; std::cerr << " path-real,path-canonical,canonical" << std::endl; @@ -85,6 +86,11 @@ static void usage( bool verbose ) std::cerr << " default: perform operation on the first field" << std::endl; std::cerr << " --strict; exit on strings on which operation does not make sense" << std::endl; std::cerr << std::endl; + std::cerr << "add" << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --prefix=[]; add prefix" << std::endl; + std::cerr << " --suffix=[]; add suffix" << std::endl; + std::cerr << std::endl; std::cerr << "path-basename,basename" << std::endl; std::cerr << " options" << std::endl; std::cerr << " --head=; default=0; number of path elements at the beginning of the path to remove" << std::endl; @@ -110,7 +116,7 @@ static void usage( bool verbose ) static bool strict; static comma::csv::options csv; -namespace comma { namespace applications { namespace strings { namespace path { +namespace comma { namespace applications { namespace strings { template < typename T > struct record @@ -121,19 +127,19 @@ struct record typedef record< std::string > input; -} } } } // namespace comma { namespace applications { namespace strings { namespace path { +} } } // namespace comma { namespace applications { namespace strings { namespace comma { namespace visiting { -template < typename T > struct traits< comma::applications::strings::path::record< T > > +template < typename T > struct traits< comma::applications::strings::record< T > > { - template < typename K, typename V > static void visit( const K&, const comma::applications::strings::path::record< T >& p, V& v ) { v.apply( "values", p.values ); } - template < typename K, typename V > static void visit( const K&, comma::applications::strings::path::record< T >& p, V& v ) { v.apply( "values", p.values ); } + template < typename K, typename V > static void visit( const K&, const comma::applications::strings::record< T >& p, V& v ) { v.apply( "values", p.values ); } + template < typename K, typename V > static void visit( const K&, comma::applications::strings::record< T >& p, V& v ) { v.apply( "values", p.values ); } }; } } // namespace comma { namespace visiting { -namespace comma { namespace applications { namespace strings { namespace path { +namespace comma { namespace applications { namespace strings { template < typename T > static int run( const comma::command_line_options& options ) @@ -179,6 +185,8 @@ static int run( const comma::command_line_options& options ) return run_(); } +namespace path { + struct basename { typedef input output_t; @@ -268,7 +276,19 @@ struct canonical } }; -} } } } // namespace comma { namespace applications { namespace strings { namespace path { +} // namespace path { + +struct add +{ + typedef input output_t; + std::string prefix; + std::string suffix; + static const char* name() { return "add"; } + add( const comma::command_line_options& options ): prefix( options.value( "--prefix", std::string() ) ), suffix( options.value( "--suffix", std::string() ) ) {} + std::string convert( const std::string& t ) { return prefix + t + suffix; } +}; + +} } } // namespace comma { namespace applications { namespace strings { int main( int ac, char** av ) { @@ -280,9 +300,10 @@ int main( int ac, char** av ) std::string operation = unnamed[0]; strict = options.exists( "--strict" ); csv = comma::csv::options( options ); - if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::path::run< comma::applications::strings::path::basename >( options ); } - if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::path::run< comma::applications::strings::path::dirname >( options ); } - if( operation == "path-real" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::path::run< comma::applications::strings::path::canonical >( options ); } + if( operation == "add" ) { return comma::applications::strings::run< comma::applications::strings::add >( options ); } + if( operation == "path-basename" || operation == "basename" ) { return comma::applications::strings::run< comma::applications::strings::path::basename >( options ); } + if( operation == "path-dirname" || operation == "dirname" ) { return comma::applications::strings::run< comma::applications::strings::path::dirname >( options ); } + if( operation == "path-real" || operation == "path-canonical" || operation == "canonical" ) { return comma::applications::strings::run< comma::applications::strings::path::canonical >( options ); } std::cerr << "csv-strings: expection operation; got: '" << operation << "'" << std::endl; return 1; } diff --git a/csv/test/csv-strings/expected b/csv/test/csv-strings/expected index 4432d8d83..88e6f5b44 100644 --- a/csv/test/csv-strings/expected +++ b/csv/test/csv-strings/expected @@ -134,3 +134,6 @@ fields[4]/output/line[2]="m,,a/b,,x/y" fields[5]/output/line[0]="k,,,," fields[5]/output/line[1]="l,,,," fields[5]/output/line[2]="m,,a,,x" + +add[0]/output="a,b,xay,xby" +add[1]/output="xay,xby" diff --git a/csv/test/csv-strings/input b/csv/test/csv-strings/input index b02076f42..ae544b409 100644 --- a/csv/test/csv-strings/input +++ b/csv/test/csv-strings/input @@ -52,3 +52,6 @@ fields[2]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-string fields[3]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n" fields[4]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace" fields[5]="( echo k,,a,,x; echo l,,a/b,,x/y; echo m,,a/b/c,,x/y/z ) | csv-strings path-dirname --fields ,,m,,n --emplace --tail 2" + +add[0]="echo a,b | csv-strings add --prefix x --suffix y --fields a,b" +add[1]="echo a,b | csv-strings add --prefix x --suffix y --fields a,b --emplace" From f9d500354fef1c6e953ce98d9c13eb5ab3c8fa71 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 11 Feb 2020 12:59:46 +1100 Subject: [PATCH 0135/1056] csv-split: --files: if directory in output file path does not exist, create it --- csv/applications/split/split.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/csv/applications/split/split.cpp b/csv/applications/split/split.cpp index 31e93695e..056c82b0c 100644 --- a/csv/applications/split/split.cpp +++ b/csv/applications/split/split.cpp @@ -336,6 +336,11 @@ std::ofstream* split< T >::ofstream_by_id_() else { mode |= std::ofstream::app; } std::string name = filename_from_id_( current_.id ); if( name.empty() ) { return nullptr; } + const auto& dirname = boost::filesystem::path( name ).parent_path(); + if( !( dirname.empty() || boost::filesystem::is_directory( dirname ) || boost::filesystem::create_directories( dirname ) ) ) + { + COMMA_THROW( comma::exception, "failed to create directory '" << dirname << "' for file: '" << name << "'" ); + } std::shared_ptr< std::ofstream > stmp( new std::ofstream( &name[0], mode ) ); it = files_.insert( std::make_pair( current_.id, stmp ) ).first; } From dce6a8af2bd6ba9f5adca41d6ff01f942be0c960 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 13 Feb 2020 18:04:24 +1100 Subject: [PATCH 0136/1056] comma-call-graph: error message directed to stderr instead of stdout --- bash/applications/comma-call-graph | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bash/applications/comma-call-graph b/bash/applications/comma-call-graph index 3ebef826a..d9e916ace 100755 --- a/bash/applications/comma-call-graph +++ b/bash/applications/comma-call-graph @@ -108,10 +108,10 @@ load_options $@ if [[ $dot_output && $dot_output != "dot" ]]; then type -p dot > /dev/null || { - echo "$basename requires graphviz" - echo "Install on Ubuntu with:" - echo "$ sudo apt-get install graphviz" - exit + echo "$basename: requires graphviz" >&2 + echo "$basename: install on ubuntu with:" >&2 + echo "$basename: sudo apt-get install graphviz" >&2 + exit 1 } output_fn="dot -T$dot_output" else From 5b63bc4f0babb8d5aba111d3935b10f442ed5075 Mon Sep 17 00:00:00 2001 From: seva Date: Tue, 25 Feb 2020 19:35:56 +1100 Subject: [PATCH 0137/1056] comma.signal: disabling sigpipe signal commented out; csv-eval: sigpipe disabled by hand --- python/comma/csv/applications/csv_eval.py | 2 ++ python/comma/signal/signal.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/comma/csv/applications/csv_eval.py b/python/comma/csv/applications/csv_eval.py index 7e2ad7a07..37b8d2927 100644 --- a/python/comma/csv/applications/csv_eval.py +++ b/python/comma/csv/applications/csv_eval.py @@ -34,6 +34,7 @@ import numpy as np import os import re +import signal import sys if sys.version_info.major < 3: from itertools import izip else: izip = zip # todo! watch performance! it's reported python3 zip is some 30% slower than izip @@ -619,6 +620,7 @@ def exit_if(stream): def main(): try: + signal.signal( signal.SIGPIPE, signal.SIG_DFL ) comma.csv.time.zone('UTC') args = get_args() prepare_options(args) diff --git a/python/comma/signal/signal.py b/python/comma/signal/signal.py index 34ed994bb..9612127e6 100644 --- a/python/comma/signal/signal.py +++ b/python/comma/signal/signal.py @@ -49,4 +49,4 @@ def __bool__( self ): return self.state __nonzero__ = __bool__ -signal.signal( signal.SIGPIPE, signal.SIG_DFL ) +# signal.signal( signal.SIGPIPE, signal.SIG_DFL ) From d7f45ef9f9d728cf60e9ae0dee9e2f30a8cc8be0 Mon Sep 17 00:00:00 2001 From: Dave Jennings Date: Mon, 24 Feb 2020 14:23:20 +1100 Subject: [PATCH 0138/1056] check.c++.standard.cmake: remove unnecessary loop it looks like it was just there for early debugging --- CMakeFiles/check.c++.standard.cmake | 43 +++++++++++++---------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/CMakeFiles/check.c++.standard.cmake b/CMakeFiles/check.c++.standard.cmake index fd1942322..1b188830e 100644 --- a/CMakeFiles/check.c++.standard.cmake +++ b/CMakeFiles/check.c++.standard.cmake @@ -28,27 +28,22 @@ ######################################################################### " ) ENDIF() - FOREACH( STANDARD ${CXX_STANDARDS} ) - # message( "Check if using C++${STANDARD}" ) - IF( ${CXX_STANDARD_TO_USE} MATCHES "${STANDARD}" ) - # message( "Yes, using C++${STANDARD}" ) - IF( NOT ( ${CXX_STANDARD_TO_USE} MATCHES ${CXX_STANDARD_LAST} ) ) - # message( "Have to check if ${CMAKE_CXX_COMPILER} supports C++${STANDARD}" ) - message( "Attempt to use C++ standard ${STANDARD}" ) - UNSET( compiler_supports_standard CACHE ) - UNSET( compiler_flag_to_check CACHE ) - SET( compiler_flag_to_check "-std=c++${STANDARD}" ) - if ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND ${STANDARD} MATCHES "11" ) - set( compiler_flag_to_check "${compiler_flag_to_check} -Wc++11-narrowing" ) - endif() - CHECK_CXX_COMPILER_FLAG( "${compiler_flag_to_check}" compiler_supports_standard ) - if( NOT compiler_supports_standard ) - message( FATAL_ERROR "attempt to use C++ standard ${STANDARD} but ${CMAKE_CXX_COMPILER} does not support it" ) - endif() - STRING( REPLACE " ${CXX_STANDARD_FLAGS}" "" CXX_FLAGS_NO_STANDARD "${CMAKE_CXX_FLAGS}" ) - SET( CXX_STANDARD_FLAGS ${compiler_flag_to_check} CACHE STRING "updating compiler flags selecting C++ standard" FORCE ) - SET( CXX_STANDARD_LAST ${CXX_STANDARD_TO_USE} CACHE STRING "updating C++ standard to use option" FORCE ) - set( CMAKE_CXX_FLAGS "${CXX_FLAGS_NO_STANDARD} ${compiler_flag_to_check}" CACHE STRING "" FORCE ) - ENDIF() - ENDIF() - ENDFOREACH() + + IF( NOT ( ${CXX_STANDARD_TO_USE} MATCHES ${CXX_STANDARD_LAST} ) ) + # message( "Have to check if ${CMAKE_CXX_COMPILER} supports C++${CXX_STANDARD_TO_USE}" ) + message( "Attempt to use C++ standard ${CXX_STANDARD_TO_USE}" ) + UNSET( compiler_supports_standard CACHE ) + UNSET( compiler_flag_to_check CACHE ) + SET( compiler_flag_to_check "-std=c++${CXX_STANDARD_TO_USE}" ) + if ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND ${CXX_STANDARD_TO_USE} MATCHES "11" ) + set( compiler_flag_to_check "${compiler_flag_to_check} -Wc++11-narrowing" ) + endif() + CHECK_CXX_COMPILER_FLAG( "${compiler_flag_to_check}" compiler_supports_standard ) + if( NOT compiler_supports_standard ) + message( FATAL_ERROR "attempt to use C++ standard ${CXX_STANDARD_TO_USE} but ${CMAKE_CXX_COMPILER} does not support it" ) + endif() + STRING( REPLACE " ${CXX_STANDARD_FLAGS}" "" CXX_FLAGS_NO_STANDARD "${CMAKE_CXX_FLAGS}" ) + SET( CXX_STANDARD_FLAGS ${compiler_flag_to_check} CACHE STRING "updating compiler flags selecting C++ standard" FORCE ) + SET( CXX_STANDARD_LAST ${CXX_STANDARD_TO_USE} CACHE STRING "updating C++ standard to use option" FORCE ) + set( CMAKE_CXX_FLAGS "${CXX_FLAGS_NO_STANDARD} ${compiler_flag_to_check}" CACHE STRING "" FORCE ) + ENDIF() From 1dc6bb58f7c71d936bf8c8ad60b9e2a3a68693ce Mon Sep 17 00:00:00 2001 From: Dave Jennings Date: Mon, 24 Feb 2020 14:51:51 +1100 Subject: [PATCH 0139/1056] check.c++.standard.cmake: use CMAKE_CXX_STANDARD to set standard flag rather than directly setting it through CMAKE_CXX_FLAGS. Using CMAKE_CXX_FLAGS breaks when using Qt and a version later than C++11. Qt will set -std=gnu+11 if CMAKE_CXX_STANDARD is not set, overriding the desired setting. --- CMakeFiles/check.c++.standard.cmake | 11 +++++------ CMakeLists.txt | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CMakeFiles/check.c++.standard.cmake b/CMakeFiles/check.c++.standard.cmake index 1b188830e..d734acf41 100644 --- a/CMakeFiles/check.c++.standard.cmake +++ b/CMakeFiles/check.c++.standard.cmake @@ -17,7 +17,6 @@ MARK_AS_ADVANCED( FORCE CXX_STANDARD_FLAGS ) ENDIF() - # A much better way to do this is with CXX_STANDARD but that requires CMake 3.1 include( CheckCXXCompilerFlag ) IF( ${CXX_STANDARD_TO_USE} MATCHES "0x" ) message( WARNING " @@ -36,14 +35,14 @@ UNSET( compiler_flag_to_check CACHE ) SET( compiler_flag_to_check "-std=c++${CXX_STANDARD_TO_USE}" ) if ( CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND ${CXX_STANDARD_TO_USE} MATCHES "11" ) - set( compiler_flag_to_check "${compiler_flag_to_check} -Wc++11-narrowing" ) + set( extra_compiler_flags "${compiler_flag_to_check} -Wc++11-narrowing" ) endif() - CHECK_CXX_COMPILER_FLAG( "${compiler_flag_to_check}" compiler_supports_standard ) + CHECK_CXX_COMPILER_FLAG( "${compiler_flag_to_check} ${extra_compiler_flags}" compiler_supports_standard ) if( NOT compiler_supports_standard ) message( FATAL_ERROR "attempt to use C++ standard ${CXX_STANDARD_TO_USE} but ${CMAKE_CXX_COMPILER} does not support it" ) endif() - STRING( REPLACE " ${CXX_STANDARD_FLAGS}" "" CXX_FLAGS_NO_STANDARD "${CMAKE_CXX_FLAGS}" ) - SET( CXX_STANDARD_FLAGS ${compiler_flag_to_check} CACHE STRING "updating compiler flags selecting C++ standard" FORCE ) SET( CXX_STANDARD_LAST ${CXX_STANDARD_TO_USE} CACHE STRING "updating C++ standard to use option" FORCE ) - set( CMAKE_CXX_FLAGS "${CXX_FLAGS_NO_STANDARD} ${compiler_flag_to_check}" CACHE STRING "" FORCE ) + set( CMAKE_CXX_FLAGS "${extra_compiler_flags}" CACHE STRING "" FORCE ) ENDIF() + + set( CMAKE_CXX_STANDARD ${CXX_STANDARD_TO_USE} ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 980e1a74e..e21f8de81 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ endif( USE_ARM_TOOLCHAIN ) PROJECT( "comma" ) -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required( VERSION 3.1 ) # option( USE_ARM_TOOLCHAIN "Cross compile using arm toolchain" OFF ) # if( USE_ARM_TOOLCHAIN ) From 201c05d55ff5bdaba8859abeee004e97fb7f97d7 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 27 Feb 2020 16:02:03 +1100 Subject: [PATCH 0140/1056] csv-random: make operation implemented --- csv/applications/csv-random.cpp | 112 ++++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 4 deletions(-) diff --git a/csv/applications/csv-random.cpp b/csv/applications/csv-random.cpp index aa5e5b038..28ee82acc 100644 --- a/csv/applications/csv-random.cpp +++ b/csv/applications/csv-random.cpp @@ -83,6 +83,17 @@ static void usage( bool verbose ) std::cerr << " --seed=[]; random seed" << std::endl; std::cerr << std::endl; std::cerr << "operations" << std::endl; + std::cerr << " make: output pseudo-random numbers" << std::endl; + std::cerr << std::endl; + std::cerr << " usage: csv-random make > random.csv" << std::endl; + std::cerr << " cat records.csv | csv-random make --append > appended.csv" << std::endl; + std::cerr << std::endl; + std::cerr << " options" << std::endl; + std::cerr << " --append; append random numbers to stdin input" << std::endl; + std::cerr << " --distribution=; default=uniform; values: uniform, more todo, just ask" << std::endl; + std::cerr << " --range=[,]; desired value range, default: whatever stl defines (usually numeric limits)" << std::endl; + std::cerr << " --type=; default=i; supported values: i, ui, f, d" << std::endl; + std::cerr << std::endl; std::cerr << " shuffle: output input records in pseudo-random order" << std::endl; std::cerr << std::endl; std::cerr << " usage: cat records.csv | csv-random shuffle [] > shuffled.csv" << std::endl; @@ -121,7 +132,97 @@ template <> struct traits< comma::applications::random::shuffle::input > } } // namespace comma { namespace visiting { -namespace comma { namespace applications { namespace random { namespace shuffle { +namespace comma { namespace applications { namespace random { + +namespace make { + +template < typename T, typename Distribution > +static int run_impl( Distribution& distribution, bool append ) +{ + std::default_random_engine generator = seed ? std::default_random_engine( *seed ) : std::default_random_engine(); + if( !::csv.flush ) { std::cin.tie( NULL ); } + if( append ) + { + if( ::csv.binary() ) + { + std::vector< char > buf( ::csv.format().size() ); + while( std::cin.good() ) + { + std::cin.read( &buf[0], buf.size() ); + if( std::cin.gcount() == 0 ) { break; } + if( std::cin.gcount() != int( buf.size() ) ) { std::cerr << "csv-random: make: expected " << buf.size() << " bytes; got " << std::cin.gcount() << std::endl; return 1; } + std::cout.write( &buf[0], buf.size() ); + T r = distribution( generator ); + std::cout.write( reinterpret_cast< char* >( &r ), sizeof( T ) ); + if( ::csv.flush ) { std::cout.flush(); } + } + } + else + { + while( std::cin.good() ) + { + std::string s; + std::getline( std::cin, s ); + if( s.empty() ) { continue; } + std::cout << s << ::csv.delimiter << distribution( generator ) << std::endl; + if( ::csv.flush ) { std::cout.flush(); } + } + } + } + else + { + while( std::cout.good() ) + { + T r = distribution( generator ); + if( ::csv.binary() ) { std::cout.write( reinterpret_cast< char* >( &r ), sizeof( T ) ); } + else { std::cout << r << std::endl; } + if( ::csv.flush ) { std::cout.flush(); } + } + } + return 0; +} + +template < typename T, template < typename > class Distribution > +static int run_impl( const comma::command_line_options& options ) +{ + bool append = options.exists( "--append" ); + auto r = options.optional< std::string >( "--range" ); + auto range = comma::csv::ascii< std::pair< T, T > >().get( *r ); + auto distribution = r ? Distribution< T >( range.first, range.second ) : Distribution< T >(); + return run_impl< T >( distribution, append ); +} + +static int run( const comma::command_line_options& options ) // quick and dirty +{ + auto distribution = options.value< std::string >( "--distribution", "uniform" ); + auto type = options.value< std::string >( "--type", "int" ); + if( type == "i" ) + { + if( distribution == "uniform" ) { return run_impl< comma::int32, std::uniform_int_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + if( type == "ui" ) + { + if( distribution == "uniform" ) { return run_impl< comma::int32, std::uniform_int_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + if( type == "f" ) + { + if( distribution == "uniform" ) { return run_impl< float, std::uniform_real_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + if( type == "d" ) + { + if( distribution == "uniform" ) { return run_impl< double, std::uniform_real_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + std::cerr << "csv-random make: expected type; got: '" << type << "'" << std::endl; + return 1; +} + +} // namespace make { + +namespace shuffle { static int run( const comma::command_line_options& options ) { @@ -194,7 +295,9 @@ static int run( const comma::command_line_options& options ) return 0; } -} } } } // namespace comma { namespace applications { namespace random { namespace shuffle { +} // namespace shuffle { + +} } } // namespace comma { namespace applications { namespace random { int main( int ac, char** av ) { @@ -203,10 +306,11 @@ int main( int ac, char** av ) comma::command_line_options options( ac, av, usage ); const auto& unnamed = options.unnamed( "--flush,--verbose,-v", "-.*" ); if( unnamed.empty() ) { std::cerr << "csv-random: please specify operation" << std::endl; return 1; } - csv = comma::csv::options( options ); + ::csv = comma::csv::options( options ); seed = options.optional< int >( "--seed" ); - verbose = options.exists( "--verbose,-v" ); + ::verbose = options.exists( "--verbose,-v" ); std::string operation = unnamed[0]; + if( operation == "make" ) { return comma::applications::random::make::run( options ); } if( operation == "shuffle" ) { return comma::applications::random::shuffle::run( options ); } std::cerr << "csv-random: expection operation; got: '" << operation << "'" << std::endl; return 1; From b05e0878306e83ace3803265abf1bf02fb69e8c2 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 27 Feb 2020 16:46:23 +1100 Subject: [PATCH 0141/1056] csv-random: make operation: a few bugs fixed; usage semantics fixed --- csv/applications/csv-random.cpp | 69 +++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/csv/applications/csv-random.cpp b/csv/applications/csv-random.cpp index 28ee82acc..5ce089527 100644 --- a/csv/applications/csv-random.cpp +++ b/csv/applications/csv-random.cpp @@ -91,8 +91,9 @@ static void usage( bool verbose ) std::cerr << " options" << std::endl; std::cerr << " --append; append random numbers to stdin input" << std::endl; std::cerr << " --distribution=; default=uniform; values: uniform, more todo, just ask" << std::endl; + std::cerr << " --output-binary; output random numbers as binary, or specify --binary= for stdin input" << std::endl; std::cerr << " --range=[,]; desired value range, default: whatever stl defines (usually numeric limits)" << std::endl; - std::cerr << " --type=; default=i; supported values: i, ui, f, d" << std::endl; + std::cerr << " --type=; default=ui; supported values: b, ub, w, uw, i, ui, f, d" << std::endl; std::cerr << std::endl; std::cerr << " shuffle: output input records in pseudo-random order" << std::endl; std::cerr << std::endl; @@ -108,10 +109,12 @@ static void usage( bool verbose ) exit( 0 ); } +static bool output_binary; static bool verbose; static comma::csv::options csv; static boost::optional< int > seed; + namespace comma { namespace applications { namespace random { namespace shuffle { struct input @@ -136,8 +139,26 @@ namespace comma { namespace applications { namespace random { namespace make { +template < typename T > +struct type_traits +{ + static T cast( T t ) { return t; } +}; + +template <> +struct type_traits< char > +{ + static int cast( char t ) { return static_cast< int >( t ); } +}; + +template <> +struct type_traits< unsigned char > +{ + static unsigned int cast( unsigned char t ) { return static_cast< int >( t ); } +}; + template < typename T, typename Distribution > -static int run_impl( Distribution& distribution, bool append ) +static int run_impl( Distribution& distribution, bool append, bool binary ) { std::default_random_engine generator = seed ? std::default_random_engine( *seed ) : std::default_random_engine(); if( !::csv.flush ) { std::cin.tie( NULL ); } @@ -164,7 +185,7 @@ static int run_impl( Distribution& distribution, bool append ) std::string s; std::getline( std::cin, s ); if( s.empty() ) { continue; } - std::cout << s << ::csv.delimiter << distribution( generator ) << std::endl; + std::cout << s << ::csv.delimiter << type_traits< T >::cast( distribution( generator ) ) << std::endl; if( ::csv.flush ) { std::cout.flush(); } } } @@ -174,8 +195,8 @@ static int run_impl( Distribution& distribution, bool append ) while( std::cout.good() ) { T r = distribution( generator ); - if( ::csv.binary() ) { std::cout.write( reinterpret_cast< char* >( &r ), sizeof( T ) ); } - else { std::cout << r << std::endl; } + if( binary ) { std::cout.write( reinterpret_cast< char* >( &r ), sizeof( T ) ); } + else { std::cout << type_traits< T >::cast( r ) << std::endl; } if( ::csv.flush ) { std::cout.flush(); } } } @@ -186,16 +207,42 @@ template < typename T, template < typename > class Distribution > static int run_impl( const comma::command_line_options& options ) { bool append = options.exists( "--append" ); - auto r = options.optional< std::string >( "--range" ); - auto range = comma::csv::ascii< std::pair< T, T > >().get( *r ); - auto distribution = r ? Distribution< T >( range.first, range.second ) : Distribution< T >(); - return run_impl< T >( distribution, append ); + bool binary = options.exists( "--output-binary" ) || ::csv.binary(); + auto r = options.optional< std::string >( "--range" ); // todo: parse distribution parameters + if( r ) + { + auto range = comma::csv::ascii< std::pair< T, T > >().get( *r ); + Distribution< T > distribution( range.first, range.second ); + return run_impl< T >( distribution, append, binary ); + } + Distribution< T > distribution; + return run_impl< T >( distribution, append, binary ); } static int run( const comma::command_line_options& options ) // quick and dirty { auto distribution = options.value< std::string >( "--distribution", "uniform" ); - auto type = options.value< std::string >( "--type", "int" ); + auto type = options.value< std::string >( "--type", "ui" ); + if( type == "b" ) + { + if( distribution == "uniform" ) { return run_impl< char, std::uniform_int_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + if( type == "ub" ) + { + if( distribution == "uniform" ) { return run_impl< unsigned char, std::uniform_int_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + if( type == "w" ) + { + if( distribution == "uniform" ) { return run_impl< comma::int16, std::uniform_int_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } + if( type == "uw" ) + { + if( distribution == "uniform" ) { return run_impl< comma::uint16, std::uniform_int_distribution >( options ); } + std::cerr << "csv-random make: expected distribution; got: '" << distribution << "'" << std::endl; + } if( type == "i" ) { if( distribution == "uniform" ) { return run_impl< comma::int32, std::uniform_int_distribution >( options ); } @@ -304,7 +351,7 @@ int main( int ac, char** av ) try { comma::command_line_options options( ac, av, usage ); - const auto& unnamed = options.unnamed( "--flush,--verbose,-v", "-.*" ); + const auto& unnamed = options.unnamed( "--append,--flush,--verbose,-v", "-.*" ); if( unnamed.empty() ) { std::cerr << "csv-random: please specify operation" << std::endl; return 1; } ::csv = comma::csv::options( options ); seed = options.optional< int >( "--seed" ); From 8735ed3846a6a1972dd918e31d8763e1d4c87b04 Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 27 Feb 2020 17:34:41 +1100 Subject: [PATCH 0142/1056] examples.md added --- examples.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 examples.md diff --git a/examples.md b/examples.md new file mode 100644 index 000000000..b4802fc0d --- /dev/null +++ b/examples.md @@ -0,0 +1,28 @@ +# csv + +## accumulate values + +### take velocities, calculate distance + +generate sample file with velocity a m/sec at each given time: + +``` +cat < velocities.csv +20200101T000000,0.7 +20200101T000001,1.1 +20200101T000002,1.1 +20200101T000003,0.9 +20200101T000004,1.3 +eof + +``` + +append distance travelled to each data point: + +``` +cat velocities.csv \ + | csv-shuffle --fields t,v --output-fields t,t,v \ + | csv-time --to seconds --fields ,t \ + | csv-eval --init-values "prev=0;sum=0" --fields ,cur,v "sum+=(cur-prev)*(prev>0)*v;prev=cur" \ + | csv-shuffle --fields t,,v,d --output-fields t,v,d +``` From 5247fda668c035ffdf600a51230ce93593a33d81 Mon Sep 17 00:00:00 2001 From: Vsevolod Vlaskine Date: Thu, 27 Feb 2020 06:36:41 +0000 Subject: [PATCH 0143/1056] Update examples.md --- examples.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples.md b/examples.md index b4802fc0d..e0c521e7b 100644 --- a/examples.md +++ b/examples.md @@ -6,7 +6,7 @@ generate sample file with velocity a m/sec at each given time: -``` +```bash cat < velocities.csv 20200101T000000,0.7 20200101T000001,1.1 @@ -19,7 +19,7 @@ eof append distance travelled to each data point: -``` +```bash cat velocities.csv \ | csv-shuffle --fields t,v --output-fields t,t,v \ | csv-time --to seconds --fields ,t \ From 723bdf5e5af1a346df7e5a6d796b2bb6b9c04af8 Mon Sep 17 00:00:00 2001 From: vlaskine Date: Fri, 28 Feb 2020 11:56:21 +1100 Subject: [PATCH 0144/1056] csv-calc: trivial change --- csv/applications/csv-calc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csv/applications/csv-calc.cpp b/csv/applications/csv-calc.cpp index 181e6ab60..25d4a10d5 100644 --- a/csv/applications/csv-calc.cpp +++ b/csv/applications/csv-calc.cpp @@ -1377,7 +1377,7 @@ int main( int ac, char** av ) else { output( csv, results, block, has_block, has_id ); } return 0; } - catch( std::exception& ex ) { std::cerr << comma::verbose.app_name() << ": " << ex.what() << std::endl; } - catch( ... ) { std::cerr << comma::verbose.app_name() << ": unknown exception" << std::endl; } + catch( std::exception& ex ) { std::cerr << "csv-calc: " << ex.what() << std::endl; } + catch( ... ) { std::cerr << "csv-calc: unknown exception" << std::endl; } return 1; } From 30c202752d9b6e7287eae93befe3254feae8315f Mon Sep 17 00:00:00 2001 From: seva Date: Wed, 4 Mar 2020 09:35:59 +1100 Subject: [PATCH 0145/1056] COPYING: updated --- AUTHORS | 8 ++++---- COPYING | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/AUTHORS b/AUTHORS index 53aaa505a..b3ae456ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ - Vsevolod Vlaskine - Cedric Wohlleber - Matthew Herrmann - James Underwood +Vsevolod Vlaskine +Cedric Wohlleber +Matthew Herrmann +James Underwood diff --git a/COPYING b/COPYING index 5702f519c..d880d7275 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,5 @@ Copyright (c) 2011 The University of Sydney +Copyright (c) 2018 Vsevolod Vlaskine All rights reserved. Redistribution and use in source and binary forms, with or without From a26920e36dab3684116d5b6d8b19c8ff0256b95f Mon Sep 17 00:00:00 2001 From: vlaskine Date: Thu, 12 Mar 2020 12:13:35 +1100 Subject: [PATCH 0146/1056] python: comma.csv.time: made permissive on nanoseconds present --- python/comma/csv/test/numpy_time/expected | 2 ++ python/comma/csv/test/numpy_time/input | 1 + python/comma/csv/time.py | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/python/comma/csv/test/numpy_time/expected b/python/comma/csv/test/numpy_time/expected index 4bb01817a..ef29ef5e3 100644 --- a/python/comma/csv/test/numpy_time/expected +++ b/python/comma/csv/test/numpy_time/expected @@ -6,6 +6,8 @@ incomplete/output="20150101T010203.123000" incomplete/status=0 microseconds/output="20140101T010203.123456" microseconds/status=0 +nanoseconds/output="20140101T010203.123456" +nanoseconds/status=0 not_a_date_time/output="not-a-date-time" not_a_date_time/status=0 invalid_month/output="" diff --git a/python/comma/csv/test/numpy_time/input b/python/comma/csv/test/numpy_time/input index 13c35aa3d..57b935473 100644 --- a/python/comma/csv/test/numpy_time/input +++ b/python/comma/csv/test/numpy_time/input @@ -2,6 +2,7 @@ basic 20150101T010203 dot 20150101T010203. incomplete 20150101T010203.123 microseconds 20140101T010203.123456 +nanoseconds 20140101T010203.123456789 invalid_month 20150001T000000 invalid_format 20150101T0000 not_a_date_time not-a-date-time diff --git a/python/comma/csv/time.py b/python/comma/csv/time.py index 39c33ad3b..b1deaa33f 100644 --- a/python/comma/csv/time.py +++ b/python/comma/csv/time.py @@ -52,6 +52,7 @@ def is_negative_infinity(numpy_time): return numpy_time == NEGATIVE_INFINITY def to_numpy(t): """ return numpy datetime64 scalar corresponding to the given comma time string + if t has nanoseconds, it will be trunkated (rather than rounded) to microseconds >>> import numpy as np >>> from comma.csv.time import to_numpy @@ -67,7 +68,7 @@ def to_numpy(t): if t in ['', 'not-a-date-time']: return NOT_A_DATE_TIME if t in ['+infinity', '+inf', 'infinity', 'inf']: return POSITIVE_INFINITY if t in ['-infinity', '-inf']: return NEGATIVE_INFINITY - if not (isinstance(t, BASESTRING) and re.match(r'^(\d{8}T\d{6}(\.\d{0,6})?)$', t)): + if not (isinstance(t, BASESTRING) and re.match(r'^(\d{8}T\d{6}(\.\d{0,12})?)$', t)): msg = "expected comma time, got '{}'".format(repr(t)) raise TypeError(msg) v = list(t) From 0defca981e09037ec4e883a03a35f07bcf7385cc Mon Sep 17 00:00:00 2001 From: seva Date: Thu, 19 Mar 2020 19:02:34 +1100 Subject: [PATCH 0147/1056] csv-repeat: --timestamped: implemented; unit test: todo... --- csv/applications/csv-repeat.cpp | 103 ++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 17 deletions(-) diff --git a/csv/applications/csv-repeat.cpp b/csv/applications/csv-repeat.cpp index 460133a7e..81e2db55b 100644 --- a/csv/applications/csv-repeat.cpp +++ b/csv/applications/csv-repeat.cpp @@ -29,6 +29,7 @@ /// @author dave jennings +#include #include #include #include @@ -73,6 +74,11 @@ void usage( bool verbose = false ) std::cerr << " warning: currently is very simplistic; see todo comments in the code to make it more robust" << std::endl; std::cerr << " --period=[]: period of repeated record" << std::endl; std::cerr << " --timeout,-t=[]: timeout before repeating the last record; if not specified, timeout is set to --period" << std::endl; + std::cerr << " --timestamped: use input timestamp for repeating; currently, would do blocking read" << std::endl; + std::cerr << " convenient for filling holes in data in offline processing" << std::endl; + std::cerr << " --timestamped options" << std::endl; + std::cerr << " --from=[