#!/usr/bin/ruby -w

# Copyright Nick Willson, nick@acrasis.net.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3, as
# published by the Free Software Foundation.  The text of the license is
# at http://www.gnu.org/licenses/gpl.html

require 'csv'
require 'English'
require 'ostruct'
require 'optparse'

ME = File.basename(__FILE__)
###############################################################################

# A nonempty and contiguous range of fields.  Fields are numbered starting at 1.
class DimRange
   # Since a range may not be empty, lo and hi are inclusive.
   attr_reader :lo, :hi

   # Return the elements of arr that fall within the ranges specified in self.
   def extract (arr)
      arr_slice =
         if self.hi.nil? then arr.slice(self.lo - 1, arr.size - self.lo + 1)
         else arr.slice(Range.new(self.lo - 1, self.hi - 1))
         end
      arr_slice || []
   end

   # The given integer is contained in self.
   def include? (int)
      overlaps?(DimRange.new("#{int}-#{int}"))
   end

   def initialize (str)
      if str !~ /^-/ && numeric?(str)
         self.lo = self.hi = str.to_i
      else
         md = str.match(/^(\d+)?-(\d+)?$/)
         @lo = md[1] ? md[1].to_i : 1
         @hi = md[2] ? md[2].to_i : nil
      end
      raise ArgumentError.new("Invalid range: \`#{str}'") if
         self.lo < 1 || (self.hi || self.lo) < self.lo
   end

   def numeric? (str)
      Kernel.Float(str)
   rescue ArgumentError, TypeError
      false
   end

   def overlaps? (rhs)
      !( self.precedes?(rhs.lo) || self.succeeds?(rhs.hi) )
   end

   def precedes? (loval)
      raise ArgumentError.new("Don't understand \`#{loval}'") unless
         loval.kind_of?(Integer)
      # Nil cannot precede anything.
      ! self.hi.nil? && self.hi < loval
   end

   def succeeds? (hival)
      raise ArgumentError.new("Don't understand \`#{hival}'") unless
         hival.nil? || hival.kind_of?(Integer)
      # Nothing can succeed nil.
      ! hival.nil? && self.lo > hival
   end

   def to_s
      self.lo == self.hi ? "#{self.lo}" : "#{self.lo}-#{self.hi}"
   end

   def <=> (rhs)
      self.lo <=> rhs.lo
   end

private
   def lo= (val); @lo = val; end
   def hi= (val); @hi = val; end
end
###############################################################################

class DimRangeList

   attr_reader :ranges

   # Return the elements of arr that fall within the ranges specified in self.
   def extract (arr)
      self.ranges.inject([]) { |m, r| m.concat(r.extract(arr)) }
   end

   # The highest hi over self's ranges, or nil for 'infinity'.
   def hi
      self.ranges.last.hi
   end

   # The given integer is contained in one of self's ranges.
   def include? (int)
      self.ranges.any? { |r| r.include?(int) }
   end

   def initialize (str_arr)
      @ranges = []
      str_arr.each { |e| add(e) }
      @ranges.sort!
   end

   def to_s
      self.ranges.collect { |r| r.to_s }.join(',')
   end

private
   # Range is a string like '3' for a single field, '4-6' for a closed
   # range of fields or '-4' or '7-' for an open range of fields.
   def add (str)
      newr = DimRange.new(str)
      raise ArgumentError.new("Invalid range list") if
         self.ranges.any? { |r| r.overlaps?(newr) }
      @ranges << newr
   end

end
###############################################################################

class Optgetter
   def self.parse(args)
      op_struct = OpenStruct.new
      op_struct.din = ','
      op_struct.dout = ','
      op_struct.fields = nil
      op_struct.records = nil
      help_text =<<EOF
Usage: #{ME} [OPTION...] [FILE...]

Print selected parts of selected lines from each FILE to standard
output.  If you omit FILE or a FILE is -, I read from standard input.
I resemble cut(1) except that I understand how to ignore field
delimiters that are present inside a quoted field.  E.g. in the record
   a,"b,c",d
cut sees four fields, but I see three.

OPTIONS:
   --din=STR
   --dout=STR
      The input and output delimiters, respectively.  Both default to ","
      (comma).

   --fields=R[,R...]
      Optional.  The ranges of fields to extract and print.  If
      omitted, all fields.  The first field in a record is numbered 1.
      Each range R can be in one of four forms:

         N   - the Nth field.
         N-  - all fields from the Nth onwards, inclusive.
         N-M - field from the Nth to the Mth, inclusive.  M may not be
               less than N.
         -M  - all fields to the Mth, inclusive.  Equivalent to 1-M.

      No ranges may overlap.  E.g. "2,2-4" will cause an error.

   --records=R[,R...]
      Optional.  The ranges of lines to extract and print.  If
      omitted, all lines.  The first line in a file is numbered 1.
      Each R is the same as in --fields.

   --help
      Show this help.

Example:

   \$ #{ME} --dout='|' --fields='-3,6,14-17,30-' somefile.csv
EOF
      opar = OptionParser.new do |opts|
         opts.on("--help") { warn(help_text); exit(1) }
         opts.on("--din, STR") { |s| op_struct.din = s }
         opts.on("--dout, STR") {|s|
            # Nasty but necessary to de-literalize '\n'.
            op_struct.dout = eval(%Q("#{s}"))
         }
         opts.on("--fields []", Array) do |f|
            op_struct.fields = DimRangeList.new(f)
         end
         opts.on("--recordss []", Array) do |r|
            op_struct.records = DimRangeList.new(r)
         end
      end
      opar.parse!(args)
      op_struct.fields = DimRangeList.new(['1-']) unless op_struct.fields
      op_struct.records = DimRangeList.new(['1-']) unless op_struct.records
      return op_struct
   end

end
options = Optgetter.parse(ARGV)
###############################################################################

# Main

csv_in = CSV::Reader.parse($DEFAULT_INPUT, options.din)
CSV::Writer.generate(STDOUT, options.dout) do |csv_out|
   csv_in.each_with_index do |r, x|
      csv_out << (options.fields.extract(r) || []) if
         options.records.nil? || options.records.include?(x + 1)
      break if options.records.hi && options.records.hi < x + 2
   end
end

exit(0)
__END__

