/*  GNU OCRAD - Optical Character Recognition program
    Copyright (C) 2003 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <cctype>
#include <cstdio>
#include <list>
#include <map>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "block.h"
#include "character.h"
#include "iso_8859_1.h"
#include "textline.h"


const Character & Textline::add_character( const Character & c ) throw()
  {
  data.push_back( c ); return data.back();
  }


void Textline::insert_character( int i, const Character & c ) throw()
  {
  if( i < 0 || i > characters() )
    internal_error( "insert_character, index out of bounds" );
  data.insert( data.begin() + i, c );
  }


void Textline::delete_character( int i ) throw()
  {
  if( i < 0 || i >= characters() )
    internal_error( "delete_character, index out of bounds" );
  data.erase( data.begin() + i );
  }


const Character & Textline::shift_character( const Character & c ) throw()
  {
  int i;

  for( i = characters(); i > 0; --i )
    if( c.left() >= data[i-1].left() ) break;
  data.insert( data.begin() + i, c );
  return data[i];
  }


void Textline::swap_characters( int i, int j ) throw()
  {
  if( i < 0 || i >= characters() || j < 0 || j >= characters() )
    internal_error( "swap_characters, index out of bounds" );
  Character tmp = data[i]; data[i] = data[j]; data[j] = tmp;
  }


Character & Textline::character( int i ) const throw()
  {
  if( i < 0 || i >= characters() )
    internal_error( "character, index out of bounds" );
  return data[i];
  }


int Textline::mean_height() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].height();
  return sum / characters();
  }


int Textline::mean_height1() const throw()
  {
  int sum = 0;

  if( characters() <= 1 ) return sum;
  for( int i = 1; i < characters(); ++i ) sum += data[i].height();
  return sum / ( characters() - 1 );
  }


int Textline::mean_width() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].width();
  return sum / characters();
  }


int Textline::mean_gap_width() const throw()
  {
  int sum = 0;

  if( characters() < 2 ) return sum;
  for( int i = 1; i < characters(); ++i )
    if( data[i].left() > data[i-1].right() )
      sum += data[i].left() - data[i-1].right() - 1;
  return sum / ( characters() - 1 );
  }


int Textline::mean_hcenter() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].hcenter();
  return sum / characters();
  }


int Textline::mean_vcenter() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].vcenter();
  return sum / characters();
  }


void Textline::join( Textline & l ) throw()
  {
  for( int i = 0; i < l.characters(); ++i )
    data.push_back( l.data[i] );
  l.data.clear();
  }


void Textline::print( FILE * outfile, bool graph, bool recursive ) const throw()
  {
  if( graph || recursive ) fprintf( outfile, "mean_height = %d\n", mean_height() );

  for( int i = 0; i < characters(); ++i )
    character( i ).print( outfile, graph, recursive );
  fputs( "\n", outfile );
  }


void Textline::xprint( FILE * exportfile ) const throw()
  {
  for( int i = 0; i < characters(); ++i )
    character(i).xprint( exportfile );
  }


void Textline::recognize1() const throw()
  {
  if( characters() == 0 ) return;
  int mh = mean_height(), l, r;

  for( l = 0; l < characters() / 2; ++l )
    if( similar( character( l ).height(), mh, 20 ) ) break;
  for( r = characters() - 1; r > characters() / 2; --r )
    if( similar( character( r ).height(), mh, 20 ) ) break;
  if( r - l < characters() / 2 )
    {
    for( l = 0; l < characters() / 2; ++l )
      if( character( l ).height() > mh ) break;
    for( r = characters() - 1; r > characters() / 2; --r )
      if( character( r ).height() > mh ) break;
    if( r - l < characters() / 2 ) { l = 0; r = characters() - 1; }
    }

  int xl = character( l ).hcenter(), yl = character( l ).vcenter();
  int xr = character( r ).hcenter(), yr = character( r ).vcenter();
  int dx = xr - xl, dy = yr - yl;
  for( int i = 0; i < characters(); ++i )
    {
    Character & c = character( i );
    int charbox_vcenter = yl;
    if( dx )
      charbox_vcenter += ( dy * ( c.hcenter() - xl ) / dx );
    c.recognize1( charbox_vcenter );
    }
  }


void Textline::recognize2() throw()
  {
  if( characters() == 0 ) return;

  // separate merged characters recognized by recognize1
  for( int i = 0; i < characters(); )
    {
    const Character & c = character( i );
    if( c.guesses() > 2 && c.guess( 0 ).ch == 0 && c.blocks() >= 1 )
      {
      int g, left = c.guess( 0 ).value;
      for( g = 1; g < c.guesses(); ++g )
        {
        const Character & c = character( i );
        const Block & b = c.block_list().front();
        Rectangle r( left, b.top(), c.guess( g ).value, b.bottom() );
        Character c1( Block( r, *c.blockmap(), b.id() ) );
        std::list< Block >::const_iterator p = c.block_list().begin();
        for( ++p; p != c.block_list().end(); ++p )
          if( r.includes_hcenter( *p ) ) c1.add_block( *p );
        c1.add_guess( c.guess( g ).ch, 0 );
        left = c.guess( g ).value + 1;
        insert_character( i + g, c1 );
        }
      delete_character( i ); i += g - 1;
      }
    else ++i;
    }

  // try to recognize separately the 2 overlapped blocks of an
  // unrecognized character
  for( int i = 0; i < characters(); ++i )
    {
    Character & c = character( i );
    if( c.guesses() == 0 && c.block_list().size() == 2 &&
        c.block_list().front().v_overlaps( c.block_list().back() ) )
      {
      Character c1( c.block_list().front() );
      Character c2( c.block_list().back() );
      c1.recognize1( c.vcenter() );
      c2.recognize1( c.vcenter() );
      if( c1.left() <= c2.left() ) { c = c1; insert_character( i + 1, c2 ); }
      else { c = c2; insert_character( i + 1, c1 ); }
      ++i;
      }
    }

  // transform some small letters to capitals
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() == 1 )
      {
      unsigned char ch = c1.guess( 0 ).ch;
      if( isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch != 'c' && ch != 'o' && ch != 's' && ch != 'u' && ch != 'v' &&
          ch != 'w' && ch != 'x' && ch != 'z' ) continue;
      if( 4 * c1.height() > 5 * mean_height() )
        { c1.only_guess( toupper( ch ), 0 ); continue; }
      if( 5 * c1.height() < 4 * mean_height() ) continue;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guesses() >= 1 )
          {
          unsigned char ch2 = c2.guess( 0 ).ch;
          if( isspace( ch2 ) ) break;
          if( ( isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
              ( isupper( ch2 ) && ch2 != 'B' &&
                ( c1.height() >= c2.height() ||
                  similar( c1.height(), c2.height(), 10 ) ) ) )
            { c1.insert_guess( 0, toupper( ch ), 1 ); break; }
          }
        }
      }
    }

  // transform 'i' into 'j'
  for( int i = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() == 1 && c1.guess( 0 ).ch == 'i' )
      {
      int j = i + 1;
      if( j >= characters() || !character( j ).guesses() )
        { j = i - 1; if( j < 0 || !character( j ).guesses() ) continue; }
      Character & c2 = character( j );
      if( ISO_8859_1::isvowel( c2.guess( 0 ).ch ) &&
          c1.bottom() >= c2.bottom() + ( c2.height() / 4 ) )
        c1.insert_guess( 0, 'j', 1 );
      }
    }

  // transform a small p to a capital p
  for( int i = characters() - 1; i > 0; --i )
    {
    Character & c1 = character( i - 1 );
    if( c1.guesses() == 1 && c1.guess( 0 ).ch == 'p' )
      {
      Character & c2 = character( i );
      if( c2.guesses() == 0 || !isalnum( c2.guess( 0 ).ch ) ) continue;
      switch( c2.guess( 0 ).ch )
        {
        case 'g': case 'j': case 'p': case 'q': case 'y':
                  if( c1.bottom() + 2 > c2.bottom() ) continue; break;
        case 'Q': if( abs( c1.top() - c2.top() ) > 2 ) continue; break;
        default : if( abs( c1.bottom() - c2.bottom() ) > 2 ) continue; break;
        }
      c1.only_guess( 'P', 0 );
      }
    }

  // transform small o or u with accent or diaeresis to capital
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() >= 1 )
      {
      unsigned char ch = c1.guess( 0 ).ch;
      if( isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch <= 127 || c1.block_list().size() < 2 ) continue;
      int chb = ISO_8859_1::base_letter( ch );
      if( chb != 'o' && chb != 'u' ) continue;
      const Block & b1 = c1.block_list().front();
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guesses() >= 1 )
          {
          unsigned char ch2 = c2.guess( 0 ).ch;
          int ch2b = ISO_8859_1::base_letter( ch2 );
          if( !ch2b && ch2 > 127 ) continue;
          if( isspace( ch2 ) ) break;
          if( ( isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
              ( isupper( ch2 ) && similar( b1.height(), c2.height(), 10 ) ) ||
              ( isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
              ( isupper( ch2b ) && similar( c1.height(), c2.height(), 10 ) ) )
            { c1.insert_guess( 0, ISO_8859_1::toupper( ch ), 1 ); break; }
          }
        }
      }
    }

  // transform a vertical bar into l or I
  for( int i = 0; i < characters(); ++i )
    {
    Character & c = character( i );
    if( c.guesses() == 1 && c.guess( 0 ).ch == '|' )
      {
      unsigned char lch = 0, rch = 0;
      if( i > 0 && character( i - 1 ).guesses() )
        lch = character( i - 1 ).guess( 0 ).ch;
      if( i < characters() - 1 && character( i + 1 ).guesses() )
        rch = character( i + 1 ).guess( 0 ).ch;
      if( ISO_8859_1::isupper( rch ) &&
          ( !lch || ISO_8859_1::isupper( lch ) || isspace( lch ) ) )
        { c.insert_guess( 0, 'I', 1 ); continue; }
      if( ISO_8859_1::isalpha( lch ) || ISO_8859_1::isalpha( rch ) )
        { c.insert_guess( 0, 'l', 1 ); continue; }
      }
    }

  // join two adjacent single quotes into a double quote
  for( int i = 0; i < characters() - 1; ++i )
    {
    Character & c1 = character( i );
    Character & c2 = character( i + 1 );
    if( c1.guesses() == 1 && c2.guesses() == 1 )
      {
      unsigned char ch1 = c1.guess( 0 ).ch;
      unsigned char ch2 = c2.guess( 0 ).ch;
      if( ( ch1 == '\'' || ch1 == '`' ) && ch1 == ch2 &&
          2 * ( c2.left() - c1.right() ) < 3 * c1.width() )
        { c1.join( c2 ); c1.only_guess( '"', 0 ); delete_character( i + 1 ); }
      }
    }

  // join a comma followed by a period into a semicolon
  for( int i = 0; i < characters() - 1; ++i )
    {
    Character & c1 = character( i );
    Character & c2 = character( i + 1 );
    if( c1.guesses() == 1 && c2.guesses() == 1 )
      {
      unsigned char ch1 = c1.guess( 0 ).ch;
      unsigned char ch2 = c2.guess( 0 ).ch;
      if( ch1 == ',' && ch2 == '.' && c1.top() > c2.bottom() &&
          c2.left() - c1.right() < c2.width() )
        { c1.join( c2 ); c1.only_guess( ';', 0 ); delete_character( i + 1 ); }
      }
    }

  // transform 'O' or 'l' into '0' or '1'
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() == 1 )
      {
      unsigned char ch = c1.guess( 0 ).ch;
      if( isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch != 'o' && ch != 'O' && ch != 'l' ) continue;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guesses() >= 1 )
          {
          unsigned char ch2 = c2.guess( 0 ).ch;
          if( isspace( ch2 ) ) break;
          if( isdigit( ch2 ) && similar( c1.height(), c2.height(), 10 ) )
            { c1.insert_guess( 0, (ch == 'l') ? '1' : '0', 1 ); break; }
          }
        }
      }
    }
  }
