/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <cctype>
#include <cstdio>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "block.h"
#include "character.h"
#include "iso_8859_1.h"
#include "textline.h"


void Textline::recognize2() throw()
  {
  if( characters() == 0 ) return;

  // try to recognize separately the 2 overlapped blocks of an
  // unrecognized character
  for( int i = 0; i < characters(); ++i )
    {
    Character & c = character( i );
    if( c.guesses() == 0 && c.blocks() == 2 &&
        c.block_vector().front().v_overlaps( c.block_vector().back() ) )
      {
      Character c1( c.block_vector().front() );
      Character c2( c.block_vector().back() );
      c1.recognize1( c.vcenter() );
      c2.recognize1( c.vcenter() );
      if( c1.left() <= c2.left() ) { c = c1; insert_character( i + 1, c2 ); }
      else { c = c2; insert_character( i + 1, c1 ); }
      ++i;
      }
    }

  // separate merged characters recognized by recognize1
  for( int i = 0; i < characters(); ++i )
    {
    int j = i, guesses = data[j].guesses();
    if( guesses >= 1 && data[j].guess( 0 ).ch == 0 )
      {
      if( guesses >= 3 && data[j].blocks() >= 1 )
        {
        int left = data[j].guess( 0 ).value;
        for( int g = 1; g < guesses; ++g )
          {
          const Block & b = data[j].block_vector().back();
          Rectangle r( left, b.top(), data[j].guess( g ).value, b.bottom() );
          Character c1( Block( r, *data[j].blockmap(), b.id() ) );
          std::vector< Block >::const_iterator p = data[j].block_vector().begin();
          for( ++p; p != data[j].block_vector().end(); ++p )
            if( r.includes_hcenter( *p ) ) c1.shift_block( *p );
          c1.add_guess( data[j].guess( g ).ch, 0 );
          left = data[j].guess( g ).value + 1;
          insert_character( j + g, c1 );
          }
        i += guesses - 2;
        }
      delete_character( j );
      }
    }

  // transform some small letters to capitals
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() == 1 )
      {
      unsigned char ch = c1.guess( 0 ).ch;
      if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
      if( !ISO_8859_1::islower_small_ambiguous( ch ) ) continue;
      if( 4 * c1.height() > 5 * mean_height() )
        { c1.only_guess( std::toupper( ch ), 0 ); continue; }
      if( 5 * c1.height() < 4 * mean_height() ) continue;
      bool capital = false, small = false;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( !c2.guesses() ) continue;
        unsigned char ch2 = c2.guess( 0 ).ch;
        if( std::isspace( ch2 ) ) break;
        if( !std::isalpha( ch2 ) ) continue;
        if( !capital )
          {
          if( 4 * c1.height() > 5 * c2.height() ) capital = true;
          else if( std::isupper( ch2 ) &&
                   ch2 != 'B' && ch2 != 'Q' &&
                   ( c1.height() >= c2.height() ||
                     Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
            capital = true;
          }
        if( !small && std::islower( ch2 ) &&  ch2 != 'l' )
          {
          if( 5 * c1.height() < 4 * c2.height() ) small = true;
          else if( ISO_8859_1::islower_small( ch2 ) &&
                   ( j < i || !ISO_8859_1::islower_small_ambiguous( ch2 ) ) &&
                   Ocrad::similar( c1.height(), c2.height(), 10 ) )
            small = true;
          }
        }
      if( capital && !small ) c1.insert_guess( 0, std::toupper( ch ), 1 );
      }
    }

  // transform 'i' into 'j'
  for( int i = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() == 1 && c1.guess( 0 ).ch == 'i' )
      {
      int j = i + 1;
      if( j >= characters() || !character( j ).guesses() )
        { j = i - 1; if( j < 0 || !character( j ).guesses() ) continue; }
      Character & c2 = character( j );
      if( ISO_8859_1::isvowel( c2.guess( 0 ).ch ) &&
          c1.bottom() >= c2.bottom() + ( c2.height() / 4 ) )
        c1.insert_guess( 0, 'j', 1 );
      }
    }

  // transform small o or u with accent or diaeresis to capital
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() >= 1 )
      {
      unsigned char ch = c1.guess( 0 ).ch;
      if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch <= 127 || c1.blocks() < 2 ) continue;
      int chb = ISO_8859_1::base_letter( ch );
      if( chb != 'o' && chb != 'u' ) continue;
      const Block & b1 = c1.block_vector().back();	// lower block
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guesses() >= 1 )
          {
          unsigned char ch2 = c2.guess( 0 ).ch;
          int ch2b = ISO_8859_1::base_letter( ch2 );
          if( !ch2b && ch2 > 127 ) continue;
          if( std::isspace( ch2 ) ) break;
          if( ( std::isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
              ( std::isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
              ( std::isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
              ( std::isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
            { c1.insert_guess( 0, ISO_8859_1::toupper( ch ), 1 ); break; }
          }
        }
      }
    }

  // transform 'O' or 'l' into '0' or '1'
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() >= 1 )
      {
      unsigned char ch = c1.guess( 0 ).ch;
      if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch != 'o' && ch != 'O' && ch != 'l' ) continue;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guesses() >= 1 )
          {
          unsigned char ch2 = c2.guess( 0 ).ch;
          if( std::isspace( ch2 ) ) break;
          if( std::isdigit( ch2 ) )
            {
            if( Ocrad::similar( c1.height(), c2.height(), 10 ) )
              c1.insert_guess( 0, (ch == 'l') ? '1' : '0', c1.guess(0).value + 1 );
            break;
            }
          if( ISO_8859_1::isalpha( ch2 ) &&
              ch2 != 'o' && ch2 != 'O' && ch2 != 'l' ) break;
          }
        }
      }
    }

  // transform a small p to a capital P
  for( int i = characters() - 1; i > 0; --i )
    {
    Character & c1 = character( i - 1 );
    if( c1.guesses() == 1 && c1.guess( 0 ).ch == 'p' )
      {
      Character & c2 = character( i );
      if( !c2.guesses() ) continue;
      unsigned char ch = c2.guess( 0 ).ch;
      if( !std::isalnum( ch ) && ch != '.' && ch != '|' ) continue;
      switch( ch )
        {
        case 'g': case 'j': case 'p': case 'q': case 'y':
                  if( c1.bottom() + 2 > c2.bottom() ) continue; break;
        case 'Q': if( std::abs( c1.top() - c2.top() ) > 2 ) continue; break;
        default : if( std::abs( c1.bottom() - c2.bottom() ) > 2 ) continue; break;
        }
      c1.only_guess( 'P', 0 );
      }
    }

  // transform words like 'lO.OOO' into numbers like '10.000'
  for( int begin = 0, end = 0; begin < characters(); begin = end + 1 )
    {
    end = find_space( begin );
    if( end - begin < 2 ) continue;
    Character & c1 = character( begin );
    if( !c1.guesses() ) continue;
    int height = c1.height();
    unsigned char ch = c1.guess( 0 ).ch;
    if( ch == 'l' || ch == 'O' || ch == 'o' )
      {
      int digits = 1;
      int i = begin + 1;
      for( ; i < end; ++i )
        {
        Character & c = character( i );
        if( !c.guesses() ) break;
        bool valid = false;
        unsigned char ch = c.guess(0).ch;
        if( ( ch == 'l' || ch == 'O' || ch == 'o' ) &&
            Ocrad::similar( c.height(), height, 10 ) )
          { valid = true; ++digits; }
        if( ch == '.' || ch == ',' ) valid = true;
        if( !valid ) break;
        }
      if( i >= end && digits >= 2 )
        for( i = begin; i < end; ++i )
        {
        Character & c = character( i );
        unsigned char ch = c.guess(0).ch;
        if( ch == 'l' ) ch = '1';
        else if( ch == 'O' || ch == 'o' ) ch = '0';
        else ch = 0;
        if( ch ) c.insert_guess( 0, ch, c.guess(0).value + 1 );
        }
      }
    }

  // transform a vertical bar into l or I (or a l into an I)
  for( int i = 0; i < characters(); ++i )
    {
    Character & c = character( i );
    if( c.guesses() != 1 ) continue;
    unsigned char ch = c.guess( 0 ).ch;
    if( ch == '|' || ch == 'l' )
      {
      unsigned char lch = 0, rch = 0;
      if( i > 0 && character( i - 1 ).guesses() )
        lch = character( i - 1 ).guess( 0 ).ch;
      if( i < characters() - 1 && character( i + 1 ).guesses() )
        rch = character( i + 1 ).guess( 0 ).ch;
      if( ISO_8859_1::isupper( rch ) &&
          ( !lch || ISO_8859_1::isupper( lch ) || std::isspace( lch ) ) )
        { c.insert_guess( 0, 'I', 1 ); continue; }
      if( ch == 'l' ) continue;
      if( ISO_8859_1::isalpha( lch ) || ISO_8859_1::isalpha( rch ) )
        { c.insert_guess( 0, 'l', 1 ); continue; }
      if( rch == '|' && ( !lch || std::isspace( lch ) ) &&
          i < characters() - 2 && character( i + 2 ).guesses() &&
          std::isalpha( character( i + 2 ).guess( 0 ).ch ) )
        { c.insert_guess( 0, 'l', 1 ); continue; }
      }
    }

  // join two adjacent single quotes into a double quote
  for( int i = 0; i < characters() - 1; ++i )
    {
    Character & c1 = character( i );
    Character & c2 = character( i + 1 );
    if( c1.guesses() == 1 && c2.guesses() == 1 )
      {
      unsigned char ch1 = c1.guess( 0 ).ch;
      unsigned char ch2 = c2.guess( 0 ).ch;
      if( ( ch1 == '\'' || ch1 == '`' ) && ch1 == ch2 &&
          2 * ( c2.left() - c1.right() ) < 3 * c1.width() )
        { c1.join( c2 ); c1.only_guess( '"', 0 ); delete_character( i + 1 ); }
      }
    }

  // join a comma followed by a period into a semicolon
  for( int i = 0; i < characters() - 1; ++i )
    {
    Character & c1 = character( i );
    Character & c2 = character( i + 1 );
    if( c1.guesses() == 1 && c2.guesses() == 1 )
      {
      unsigned char ch1 = c1.guess( 0 ).ch;
      unsigned char ch2 = c2.guess( 0 ).ch;
      if( ch1 == ',' && ch2 == '.' && c1.top() > c2.bottom() &&
          c2.left() - c1.right() < c2.width() )
        { c1.join( c2 ); c1.only_guess( ';', 0 ); delete_character( i + 1 ); }
      }
    }

  // choose between 'a' and 'Q'
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c = character( i );
    if( c.guesses() )
      {
      unsigned char ch = c.guess( 0 ).ch;
      if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
      if( i == begin && ch == 'a' && c.guesses() == 2 &&
          c.guess( 1 ).ch == 'Q' && 4 * c.height() > 5 * mean_height() )
        c.swap_guesses( 0, 1 );
      }
    }

  // choose between '.' and '-'
  if( characters() >= 2 )
    {
    Character & c = character( characters() - 1 );
    if( c.guesses() >= 2 && c.guess(0).ch == '.' && c.guess(1).ch == '-' )
      {
      Character & lc = character( characters() - 2 );
      if( lc.guesses() && ISO_8859_1::isalpha( lc.guess(0).ch ) )
        c.swap_guesses( 0, 1 );
      }
    }

  // choose between 'B' and 'a'
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guesses() )
      {
      unsigned char ch = c1.guess(0).ch;
      if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
      if( c1.guesses() != 2 || ch != 'B' || c1.guess(1).ch != 'a' ) continue;
      if( 4 * c1.height() > 5 * mean_height() ) continue;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guesses() >= 1 )
          {
          unsigned char ch2 = c2.guess(0).ch;
          if( std::isspace( ch2 ) ) break;
          if( ( std::isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
              ( std::islower( ch2 ) &&
                ( c1.height() <= c2.height() ||
                  Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
            { c1.swap_guesses( 0, 1 ); break; }
          }
        }
      }
    }
  }
