#!/usr/bin/mawk -We
# *********************************************************************
# tabletocsv: convert a NoSQL table into CSV (comma-separated values).
#
# Copyright (c) 2002,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
# $Id: tabletocsv,v 1.5 2006/03/10 11:26:13 carlo Exp $

BEGIN {
  NULL = "" ;  FS = OFS = "\t"; cr = 1

  # Get local settings.
  nosql_install = ENVIRON["NOSQL_INSTALL"]
  stdout = ENVIRON["NOSQL_STDOUT"]
  stderr = ENVIRON["NOSQL_STDERR"]

  # Set default values if necessary.
  if (nosql_install == NULL) nosql_install = "/usr/local/nosql"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
    else if (ARGV[i] == "-n" || ARGV[i] == "--no-cr") cr = 0
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-B" || ARGV[i] == "--no-break") no_brk = 1
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " nosql_install "/help/tabletocsv.txt")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-copying") {
       system("cat " nosql_install "/doc/COPYING")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-warranty") {
       system("cat " nosql_install "/doc/WARRANTY")
       exit(rc=1)
    }
  }

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = stdout
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }
}

#
# Main loop                                                                     
#
# Note: the CVS format comes in different flavours; the one implemented
# here is explained at various places, including the following:
# http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm#FileFormat

# Column names.
NR == 1 {

   gsub(/\001/, "")				# Remove SOH markers
   gsub(/ +/,"")				# trim blanks in names

   if (no_hdr) next
   gsub(/\t/,",")

   if (cr) print $0 "\r" > o_file
   else print > o_file
}

NR > 1 {

   gsub(/ +\t/,"\t"); sub(/ +$/,NULL)		# trim trailing blanks.

   # CSV escapes doble-quotes by doubling them.
   gsub(/"/,"\"\"")

   #gsub(/\t/,"\",\"")

   out = NULL

   for (i=1; i<=NF; i++) {

       if (no_brk) gsub(/\n/," ",$i)		# convert \n if required.

       # non-empty fields must be enclosed in double-quotes,
       # or embedded spaces won't be preserved.
       if ($i != NULL) $i = "\"" $i "\""
       if (i > 1) $i = "," $i
       out = out $i
   }

   out = nosqlUnescape(out)

   if (cr) {
      gsub(/\n/,"\r\n",out)
      out = out "\r"
   }

   print out > o_file
}

# *********************************************************************
# nosqlUnescape(string)
#
# Takes a string and translates any unescaped '\t' and '\n' strings into
# physical tabs and newlines respectively. Returns the converted string.
# *********************************************************************
function nosqlUnescape(s,		S,i,s_length,a,escaped) {
  s_length = split(s, a, "")
  s_length++				# Cope with s_length==1
  while (++i <= s_length) {
    if (a[i] == "\\" && !escaped) { escaped = 1; continue }
    if (a[i] == "n" && escaped) { S = S "\n"; escaped = 0; continue }
    if (a[i] == "t" && escaped) { S = S "\t"; escaped = 0; continue }
    if (escaped) { S = S "\\" a[i]; escaped = 0; continue }
    S = S a[i]
  }
  return S
}

#
# End of program.
#
