#!/usr/bin/mawk -We
# *********************************************************************
# istable: checks that a file is a valid NoSQL table.
# Copyright (c) 1998,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
# $Id: istable,v 1.8 2006/03/10 11:26:13 carlo Exp $

BEGIN {
  NULL = ""; FS = OFS = "\t"

  # Get local settings.
  nosql_install = ENVIRON["NOSQL_INSTALL"]
  stdout = ENVIRON["NOSQL_STDOUT"]
  stderr = ENVIRON["NOSQL_STDERR"]

  # Set default values if necessary.
  if (nosql_install == NULL) nosql_install = "/usr/local/nosql"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-v" || ARGV[i] == "--verbose") verbose = 1
    else if (ARGV[i] == "-n" || ARGV[i] == "--skip-header") skip_hdr = 1
    else if (ARGV[i] == "-e" || ARGV[i] == "--echo") echo = 1
    else if (ARGV[i] == "-E" || ARGV[i] == "--edit") edit = 1
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") minrec = 2
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") ifile = ARGV[++i]
    else if (ARGV[i] == "-a" || ARGV[i] == "--allow-duplicates") dups = 1
    else if (ARGV[i] == "-r" || ARGV[i] == "--relaxed") relaxed=dups=1
    else if (ARGV[i] == "-R" || ARGV[i] == "--really-relaxed")
						relaxed=Relaxed=dups=1
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " nosql_install "/help/istable.txt")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-copying") {
       system("cat " nosql_install "/doc/COPYING")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-warranty") {
       system("cat " nosql_install "/doc/WARRANTY")
       exit(rc=1)
    }
  }

  ARGC = 1						# Fix argv[]

  if (ifile != NULL) { ARGV[1] = ifile; ARGC = 2 }
}

# Main loop

echo && NR > minrec { print }

# Column names.
NR == 1 {
  num_cols = NF

  # Check the table header.
  if (gsub(/\001/,"") != NF || !/^[A-Za-z0-9\t_ ]+$/) {
    if (verbose)
       print "istable: invalid table header" > stderr
    exit(errors=1)
  }

  # Check individual column names.
  for (i = 1; i <= NF; i++) {
    if (relaxed) name_re = "^[_A-Za-z]"
    else name_re = "^[A-Za-z]"
    if ($i !~ name_re) {
      if (verbose)
         print "istable: invalid column name '" $i "'" > stderr
      exit(errors=1)
    }
    if (dups || col_names[$i] == "") col_names[$i] = $i
    else {
      if (verbose)
	 print "istable: duplicated column name '" $i "'" > stderr
      exit(errors=1)
    }
  }
}

NR > 1 {
  # Empty rows in a one-column table are ok.
  if (/^$/ && num_cols == 1) next

  if (NF != num_cols) {
    if (verbose) {
      if (skip_hdr) nr = NR - 1
      else nr = NR
      if (NF > num_cols)
	 print "istable: long record at line " nr-1 > stderr
      else print "istable: short record at line " nr-1 > stderr
    }
    if (!Relaxed) exit(errors=1)
  }
}

END {
  if (rc) exit(rc)
  if (NR < 1) { errors = 1 }
  if (verbose) {
    if (errors) {
       print "istable: table not ok" > stderr
       if (edit) print "istable: " NR-1
    }  else print "istable: table ok" > stderr
  }
  if (errors) exit(rc=1)
}

# End of program.
