/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
    Return values: 0 for a normal exit, 1 for environmental problems
    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
    corrupt or invalid input file, 3 for an internal consistency error
    (eg, bug) which caused Ocrad to panic.
*/

#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <getopt.h>
#include "common.h"
#include "rectangle.h"
#include "bitmap.h"
#include "block.h"
#include "blockmap.h"
#include "character.h"
#include "textline.h"
#include "textblock.h"


namespace {

// Date of this version: 2003-12-18

const char *const program_version = "0.6";
const char *const program_year    = "2003";

void show_version()
  {
  std::printf( "GNU Ocrad version %s\n", program_version );
  std::printf( "Copyright (C) %s Antonio Diaz.\n", program_year );
  std::printf( "This program is free software; you may redistribute it under the terms of\n" );
  std::printf( "the GNU General Public License.  This program has absolutely no warranty.\n" );
  }

void show_error( char * msg )
  {
  if( msg && msg[0] != 0 ) std::fprintf( stderr,"ocrad: %s\n", msg );
  std::fprintf( stderr,"Try `ocrad --help' for more information.\n" );
  }

void show_help( bool verbose )
  {
  std::printf( "GNU Ocrad, Optical Character Recognition program\n" );
  std::printf( "Reads pbm file(s), or standard input, and sends text to standard output\n" );
  std::printf( "\nUsage: ocrad [options] [files]\n" );
  std::printf( "Options:\n");
  std::printf( "  -h, --help               display this help and exit\n");
  std::printf( "  -V, --version            output version information and exit\n");
  std::printf( "  -a, --append             append text to output file\n");
  std::printf( "  -b, --block=<n>          process only the specified text block\n");
  std::printf( "  -f, --force              force overwrite of output file\n");
  std::printf( "  -i, --invert             invert image levels (white on black)\n");
  std::printf( "  -l, --layout=<n>         layout analysis, 0=none, 1=column, 2=full\n");
  std::printf( "  -o <file>                place the output into <file>\n");
  std::printf( "  -v, --verbose            be verbose\n");
  std::printf( "  -x <file>                export OCR Results File to <file>\n");
  if( verbose )
    {
    std::printf( "  -1, -4                   pbm output file type (debug)\n");
    std::printf( "  -C, --copy               'copy' input to output (debug)\n");
    std::printf( "  -D, --debug=<level>      (0-100) output intermediate data (debug)\n");
//    std::printf( "  -L, --level              level input image before doing OCR\n");
    std::printf( "  -R <scale>               reduce input image by <scale> (debug)\n");
    std::printf( "  -S <type>                make a 'special file' (debug)\n");
    std::printf( "  -T <threshold>           set threshold for -R option (debug)\n");
    }
  std::printf( "\nReport bugs to bug-ocrad@gnu.org\n");
  }


void ocr( const Bitmap & page_image, FILE * outfile, FILE * exportfile,
          int rindex, int debug_level ) throw()
  {
  Blockmap blockmap( page_image, rindex, debug_level );
  if( !blockmap.blocks() ) return;

  if( debug_level >= 90 )
    { blockmap.print( outfile, debug_level ); return; }

  Textblock textblock( blockmap.block_vector() );

  if( debug_level >= 86 )
    {
    bool graph = ( debug_level >= 88 );
    bool recursive = ( debug_level & 1 );
    textblock.print( outfile, graph, recursive );
    }

  if( debug_level == 0 ) textblock.print( outfile, false, false );
  if( exportfile ) textblock.xprint( exportfile );
  }


int process_file( FILE *infile, FILE *outfile, FILE *exportfile, int rindex,
                  int debug_level, int layout_level, int scale, int threshold,
                  bool copy, bool invert, bool level, bool verbose,
                  char filetype, char specialtype )
  {
  try
    {
    Bitmap page_image( infile, invert );

    page_image.analyse_layout( layout_level );
    if( verbose ) std::fprintf( stderr, "number of text blocks %d\n",
                                page_image.rectangles() );

    if( rindex >= page_image.rectangles() )
      {
      std::fprintf( stderr,"This page has only %d text block(s)\n",
                    page_image.rectangles() );
      return 1;
      }

    if( level )
      std::fprintf( stderr, "\nslope_best = %d\n",
                    page_image.horizontalify( verbose ) );

    if( scale != 0 )
      {
      Bitmap reduced( page_image, scale, threshold );
      reduced.save( outfile, filetype ); return 0;
      }
    else if( specialtype != 0 )
      {
      Bitmap::type t;
      if( specialtype == 'v' ) t = Bitmap::vertical_histogram ;
      else if( specialtype == 'h' ) t = Bitmap::horizontal_histogram ;
      else if( specialtype == 'g' ) t = Bitmap::connected_ground ;
      else { show_error( "bad special type" ); return 1; }
      Bitmap sb( page_image, t );
      sb.save( outfile, filetype ); return 0;
      }
    else if( copy )
      {
      if( rindex < 0 && page_image.rectangles() == 1 )
        { page_image.save( outfile, filetype ); return 0; }
      for( int c = 0; c < page_image.rectangles(); ++c )
        if( rindex < 0 || rindex == c )
          {
          Bitmap bitmap( page_image, page_image.rectangle_vector()[c] );
          bitmap.save( outfile, filetype );
          }
      return 0;
      }

    if( exportfile )
      std::fprintf( exportfile, "total blocks %d\n",
                    ( rindex < 0 ) ? page_image.rectangles() : 1 );

    // call the character recognizer for every rectangle of text
    for( int c = 0; c < page_image.rectangles(); ++c )
      if( rindex < 0 || rindex == c )
        {
        if( exportfile )
          {
          const Rectangle & r = page_image.rectangle_vector()[c];
          std::fprintf( exportfile, "block %d %d %d %d %d\n", c + 1,
                        r.left(), r.top(), r.width(), r.height() );
          }
        ocr( page_image, outfile, exportfile, c, debug_level );
        }
    }
  catch( Bitmap::Error e ) { std::fputs( e.s, stderr ); return 2; }
  return 0;
  }


const char * my_basename( const char * filename )
  {
  const char * c = filename;
  while( *c ) { if( *c == '/' ) filename = c + 1; ++c; }
  return filename;
  }

} // end namespace


// 'infile' contains the scanned image (in pbm format) to be converted
// to text.
// 'outfile' is the destination for the text version of the scanned
// image. (or for a pbm file if debugging).
// 'exportfile' is the Ocr Results File.
int main( int argc, char *argv[] ) 
  {
  FILE *outfile = stdout, *exportfile = 0;
  char *outfile_name = 0, *exportfile_name = 0;
  int rindex = -1, debug_level = 0, layout_level = 0;
  int scale = 0, threshold = -1;
  char filetype = 0, specialtype = 0;
  bool append = false, copy = false, force = false, invert = false;
  bool level = false, verbose = false;

  // scan arguments

  while( true )			// process options
    {
    static struct option long_options[] =
      {
      {"append", no_argument, 0, 'a'},
      {"block", required_argument, 0, 'b'},
      {"copy", no_argument, 0, 'C'},
      {"debug", required_argument, 0, 'D'},
      {"force", no_argument, 0, 'f'},
      {"help", no_argument, 0, 'h'},
      {"invert", no_argument, 0, 'i'},
      {"layout", required_argument, 0, 'l'},
      {"level", no_argument, 0, 'L'},
      {"verbose", no_argument, 0, 'v'},
      {"version", no_argument, 0, 'V'},
      {0, 0, 0, 0}
      };

    int c = getopt_long( argc, argv, "14ab:CD:fhiLl:o:R:S:T:Vvx:",
                         long_options, 0 );
    if( c == -1 ) break;		// all options processed

    switch( c )
      {
      case 0: break;
      case '?': return 1;  // `getopt_long' already printed an error message.
      case '1':
      case '4': filetype = c; break;
      case 'a': append = true; break;
      case 'b': rindex = strtol( optarg, 0, 0 ) - 1; break;
      case 'C': copy = true; break;
      case 'D': debug_level = strtol( optarg, 0, 0 ); break;
      case 'f': force = true; break;
      case 'h': show_help( verbose ); return 0;
      case 'i': invert = true; break;
      case 'L': level = true; break;
      case 'l': layout_level = strtol( optarg, 0, 0 ); break;
      case 'o':	outfile_name = optarg; break;
      case 'R': scale = strtol( optarg, 0, 0 ); break;
      case 'S': specialtype = optarg[0]; break;
      case 'T': threshold = strtol( optarg, 0, 0 ); break;
      case 'V':	show_version(); return 0;
      case 'v': verbose = true; break;
      case 'x':	exportfile_name = optarg; break;
      default: return 1;
      }
    } // end process options

  if( outfile_name )
    {
    if( append ) outfile = std::fopen( outfile_name, "a" );
    else if( force ) outfile = std::fopen( outfile_name, "w" );
    else if( ( outfile = std::fopen( outfile_name, "wx" ) ) == 0 )
      {
      std::fprintf( stderr, "Output file %s already exists.\n", outfile_name );
      return 1;
      }
    if( !outfile )
      { std::fprintf( stderr, "Cannot open %s\n", outfile_name ); return 1; }
    }

  if( exportfile_name )
    {
    exportfile = std::fopen( exportfile_name, "w" );
    if( !exportfile )
      { std::fprintf( stderr, "Cannot open %s\n", exportfile_name ); return 1; }
    std::fprintf( exportfile, "# Ocr Results File. Created by GNU Ocrad version %s\n",
                  program_version );
    }

// process any remaining command line arguments (input files)
  FILE *infile = (optind < argc) ? 0 : stdin;
  char *infile_name = "-";
  int retval = 0;
  while( true )
    {
    if( infile == stdin )
      {
      std::ungetc( std::getc( infile ), infile );
      if( std::feof( infile ) || std::ferror( infile ) ) infile = 0;
      }
    while( infile != stdin )
      {
      if( infile ) std::fclose( infile );
      if( optind >= argc ) { infile = 0; break; }
      infile_name = argv[optind++];
      if( strcmp( infile_name, "-" ) == 0 ) infile = stdin;
      else infile = std::fopen( infile_name, "r" );
      if( infile ) break;
      std::fprintf( stderr, "Cannot open %s\n", infile_name );
      if( retval == 0 ) retval = 1;
      }
    if( !infile ) break;

    if( exportfile )
      std::fprintf( exportfile, "source file %s\n", my_basename( infile_name ) );

    int tmp = process_file( infile, outfile, exportfile, rindex, debug_level,
                            layout_level, scale, threshold, copy, invert,
                            level, verbose, filetype, specialtype );
    if( tmp > 0 && infile == stdin ) infile = 0;
    if( tmp > retval ) retval = tmp;
    std::fflush( outfile );
    if( exportfile ) std::fflush( exportfile );
    }
  std::fclose( outfile );
  if( exportfile ) std::fclose( exportfile );
  return retval;
  }
