/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003, 2004, 2005 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <algorithm>
#include <cctype>
#include <climits>
#include <cstdio>
#include <stack>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "vrhomboid.h"
#include "track.h"
#include "bitmap.h"


namespace {

char pbm_getrawbyte( FILE * f ) throw( Bitmap::Error )
  {
  int ch = std::fgetc( f );

  if( ch == EOF )
    throw Bitmap::Error( "end-of-file reading pbm file." );

  return static_cast< char > (ch);
  }


char pbm_getc( FILE * f ) throw( Bitmap::Error )
  {
  char ch;
  bool comment = false;

  do {
    ch = pbm_getrawbyte( f );
    if( ch == '#' ) comment = true;
    else if( ch == '\n' ) comment = false;
    }
  while( comment );
  return ch;
  }


int pbm_getint( FILE * f ) throw( Bitmap::Error )
  {
  char ch;
  int i = 0;

  do ch = pbm_getc( f ); while( std::isspace( ch ) );
  if( !std::isdigit( ch ) )
    throw Bitmap::Error( "junk in pbm file where an integer should be." );
  do {
    if( ( INT_MAX - (ch - '0') ) / 10 < i )
      throw Bitmap::Error( "number too big in pbm file." );
    i = (i * 10) + (ch - '0');
    ch = pbm_getc( f );
    }
  while( std::isdigit( ch ) );
  return i;
  }


bool pbm_getbit( FILE * f ) throw( Bitmap::Error )
  {
  char ch;

  do ch = pbm_getc( f ); while( std::isspace( ch ) );

  if( ch == '0' ) return false;
  if( ch == '1' ) return true;
  throw Bitmap::Error( "junk in pbm file where bits should be." );
  }


void find_rows( const std::vector< std::vector< bool > > & data,
                const Rectangle & rdata, const Rectangle & rin,
                std::vector< Rectangle > & rvout, bool recursive ) throw();

void find_columns( const std::vector< std::vector< bool > > & data,
                   const Rectangle & rdata, const Rectangle & rin,
                   std::vector< Rectangle > & rvout, bool recursive ) throw()
  {
  if( !rdata.includes( rin ) ) return;
  const int colmin = ( rdata.width() > 100 ) ? rdata.width() / 10 : 10;
  const int gapmin = ( rdata.width() > 300 ) ? rdata.width() / 100 : 3;
  const int min_width = ( 2 * colmin ) + gapmin;
  if( rin.width() < min_width ) { rvout.push_back( rin ); return; }
  const unsigned int rvout_size_orig = rvout.size();
  const int ldiff = rin.left()-rdata.left();
  const int tdiff = rin.top()-rdata.top();
  std::vector< int > h_outline( rin.width(), 0 );

  for( int row = 0; row < rin.height(); ++row )
    {
    const std::vector< bool > & datarow = data[row+tdiff];
    for( int col = 0; col < rin.width(); ++col )
      if( datarow[col+ldiff] ) ++h_outline[col];
    }
  int total_dots = 0;
  for( int col = 0; col < rin.width(); ++col ) total_dots += h_outline[col];
  if( 10 * total_dots > 8 * rin.size() ) return;	// eliminates images

  const int threshold_col = std::max( total_dots / ( rin.width() * 4 ), 10 );
  const int threshold_gap = std::max( total_dots / ( rin.width() * 20 ), 1 );
  int left = 0, right = rin.width() - 1;

  while( left < right && h_outline[left] <= 0 ) ++left;	// cut left border
  if( rin.left() == rdata.left() && 10 * left < right )	// cut left border noise
    {
    int l = left + gapmin;
    while( l < right && h_outline[l] <= 0 ) ++l;
    if( l > left + ( 2 * gapmin ) ) left = l;
    }
  left = std::max( 0, left - gapmin );

  while( right > left && h_outline[right] <= 0 ) --right; // cut right border
  right = std::min( right + gapmin, rin.width() - 1 );

  while( right - left >= 2 * colmin )
    {
    int l, r;
    for( l = r = left; r < right; ++r )			// look for text
      if( h_outline[r] < threshold_col )
        { if( r - l >= colmin ) break; else l = r; }
    if( r - l < colmin ) break;

    for( l = r; r < right; ++r )			// look for gap
      if( h_outline[r] > threshold_gap )
        { if( r - l >= gapmin ) break; else l = r; }
    if( r - l < gapmin ) break;

    if( r < right )			// cut by a minimum near the center
      {
      int mid = ( r + l ) / 2, half = ( r - l ) / 2;
      r = mid;
      for( int i = 1; i <= half && h_outline[r] > 0; ++i )
        {
        if( h_outline[mid+i] < h_outline[r] ) r = mid + i;
        if( h_outline[mid-i] < h_outline[r] ) r = mid - i;
        }
      }
    Rectangle re( rin.left() + left, rin.top(), rin.left() + r, rin.bottom() );
    if( recursive ) find_rows( data, rdata, re, rvout, re.width() >= min_width );
    else rvout.push_back( re );
    left = r;
    }
  if( right - left > gapmin )
    {
    Rectangle re( rin.left() + left, rin.top(), rin.left() + right, rin.bottom() );
    if( recursive && ( rvout.size() > rvout_size_orig || rvout.size() == 0 ) )
      find_rows( data, rdata, re, rvout, re.width() >= min_width );
    else rvout.push_back( re );
    }
  }


void find_rows( const std::vector< std::vector< bool > > & data,
                const Rectangle & rdata, const Rectangle & rin,
                std::vector< Rectangle > & rvout, bool recursive ) throw()
  {
  if( !rdata.includes( rin ) ) return;
  const int rowmin = ( rdata.height() > 100 ) ? rdata.height() / 10 : 10;
  const int gapmin = ( rdata.height() > 300 ) ? rdata.height() / 100 : 3;
  const int min_height = ( 2 * rowmin ) + gapmin;
  if( rin.height() < min_height ) { rvout.push_back( rin ); return; }
  const unsigned int rvout_size_orig = rvout.size();
  const int ldiff = rin.left()-rdata.left();
  const int tdiff = rin.top()-rdata.top();
  std::vector< int > v_outline( rin.height(), 0 );

  int total_dots = 0;
  for( int row = 0; row < rin.height(); ++row )
    {
    const std::vector< bool > & datarow = data[row+tdiff];
    for( int col = 0; col < rin.width(); ++col )
      if( datarow[col+ldiff] ) ++v_outline[row];
    total_dots += v_outline[row];
    }
  if( 10 * total_dots > 8 * rin.size() ) return;	// eliminates images

  const int threshold_gap = ( total_dots / ( rin.height() * 20 ) ) + 1;
  int top = 0, bottom = rin.height() - 1;

  while( top < bottom && v_outline[top] <= 0 ) ++top;	// cut top border
  top = std::max( 0, top - gapmin );
  while( bottom > top && v_outline[bottom] <= 0 ) --bottom; // cut bottom border
  bottom = std::min( bottom + gapmin, rin.height() - 1 );

  while( bottom - top >= min_height )
    {
    int t, b;					// top and bottom of gap
    for( t = b = top + gapmin; t < bottom - gapmin; ++t )
      if( v_outline[t] < threshold_gap )
        {
        for( b = t + 1; b < bottom && v_outline[b] < threshold_gap; ++b );
        if( b - t >= gapmin ) break; else t = b;
        }
    if( b - t < gapmin ) break;

    if( b < bottom )			// cut by a minimum near the center
      {
      int mid = ( b + t ) / 2, half = ( b - t ) / 2;
      b = mid;
      for( int i = 1; i <= half && v_outline[b] > 0; ++i )
        {
        if( v_outline[mid+i] < v_outline[b] ) b = mid + i;
        if( v_outline[mid-i] < v_outline[b] ) b = mid - i;
        }
      }
    Rectangle re( rin.left(), rin.top() + top, rin.right(), rin.top() + b );
    if( recursive ) find_columns( data, rdata, re, rvout, re.height() >= min_height && bottom - b > gapmin );
    else rvout.push_back( re );
    top = b;
    }
  if( bottom - top > gapmin )
    {
    Rectangle re( rin.left(), rin.top() + top, rin.right(), rin.top() + bottom );
    if( recursive && rvout.size() > rvout_size_orig )
      find_columns( data, rdata, re, rvout, re.height() >= min_height );
    else rvout.push_back( re );
    }
  }

} // end namespace


// Creates a Bitmap from a pbm file
// Only "P1" (pbm) and "P4" (pbm RAWBITS) file formats are recognized.
//
Bitmap::Bitmap( FILE * f, const bool invert ) throw( Bitmap::Error )
  : Rectangle( 0, 0, 0, 0 )
  {
  char filetype = 0;

  if( pbm_getrawbyte( f ) == 'P' )
    {
    char ch = pbm_getrawbyte( f );
    if( ch == '1' || ch == '4' ) filetype = ch;
    }
  if( filetype == 0 )
    throw Error( "bad magic number - not a pbm file." );

  {
  int tmp = pbm_getint( f );
  if( tmp == 0 ) throw Error( "zero width in pbm file." );
  Rectangle::width( tmp );
  tmp = pbm_getint( f );
  if( tmp == 0 ) throw Error( "zero height in pbm file." );
  Rectangle::height( tmp );
  }

  data.resize( height() );
  for( int row = top(); row <= bottom(); ++row )
    {
    data[row-top()].resize( width(), false );
    if( filetype == '4' )
      {
      unsigned char byte = 0, mask = 0;
      if( !invert )
        for( int col = left(); col <= right(); ++col )
          {
          if( mask == 0 )
            { byte = pbm_getrawbyte( f );
            if( byte ) mask = 0x80; else { col += 7; continue; } }
          if( byte & mask ) set_bit( row, col, true );
          mask >>= 1;
          }
      else
        for( int col = left(); col <= right(); ++col )
          {
          if( mask == 0 ) { byte = pbm_getrawbyte( f ); mask = 0x80; }
          if( !( byte & mask ) ) set_bit( row, col, true );
          mask >>= 1;
          }
      }
    else
      for( int col = left(); col <= right(); ++col )
        if( pbm_getbit( f ) != invert ) set_bit( row, col, true );
    }
  }


// Creates a reduced bitmap
//
Bitmap::Bitmap( const Bitmap & source, const int scale, int th ) throw()
  : Rectangle( source )
  {
  if( scale < 2 || scale > source.width() || scale > source.height() ||
      th >= scale * scale )
    Ocrad::internal_error( "bad parameter building a reduced Bitmap" );

  if( th < 0 ) th = ( ( scale * scale ) - 1 ) / 2;
  Rectangle::height( source.height() / scale );
  Rectangle::width( source.width() / scale );

  data.resize( height() );
  for( int row = 0; row < height(); ++row )
    {
    int srow = ( row * scale ) + scale;
    data[row].reserve( width() );
    std::vector< bool > & datarow = data[row];
    for( int col = 0; col < width(); ++col )
      {
      int scol = ( col * scale ) + scale;
      int counter = 0;
      for( int i = srow - scale; i < srow; ++i )
        for( int j = scol - scale; j < scol; ++j )
          if( source.data[i][j] && ++counter > th )
            goto L1;
      L1: datarow.push_back( counter > th );
      }
    }
  }


int Bitmap::analyse_layout( std::vector< Rectangle > & rv,
                            const int layout_level ) const throw()
  {
  rv.clear();

  if( layout_level >= 1 && layout_level <= 2 &&
      left() == 0 && top() == 0 && width() > 200 && height() > 200 )
    {
    Bitmap reduced( *this, 10, 9 );
    find_columns( reduced.data, reduced, reduced, rv, layout_level >= 2 );
    if( rv.size() > 1 )
      for( unsigned int i = 0; i < rv.size(); ++i )
        rv[i].enlarge( 10 );
    }
  if( rv.size() == 0 ) rv.push_back( *this );
  return rv.size();
  }


void Bitmap::draw_rectangle( const Rectangle & re ) throw()
  {
  int l = std::max( left(), re.left() );
  int t = std::max( top(), re.top() );
  int r = std::min( right(), re.right() );
  int b = std::min( bottom(), re.bottom() );
  if( l == re.left() )
    for( int row = t; row <= b; ++row ) set_bit( row, l, true );
  if( t == re.top() )
    for( int col = l; col <= r; ++col ) set_bit( t, col, true );
  if( r == re.right() )
    for( int row = t; row <= b; ++row ) set_bit( row, r, true );
  if( b == re.bottom() )
    for( int col = l; col <= r; ++col ) set_bit( b, col, true );
  }


void Bitmap::draw_track( const Track & tr ) throw()
  {
  int l = std::max( left(), tr.left() );
  int r = std::min( right(), tr.right() );
  if( l == tr.left() )
    for( int row = tr.top( l ); row <= tr.bottom( l ); ++row )
      if( row >= top() && row <= bottom() ) set_bit( row, l, true );
  if( r == tr.right() )
    for( int row = tr.top( r ); row <= tr.bottom( r ); ++row )
      if( row >= top() && row <= bottom() ) set_bit( row, r, true );
  for( int col = l; col <= r; ++col )
    {
    int row = tr.top( col );
    if( row >= top() && row <= bottom() ) set_bit( row, col, true );
    row = tr.bottom( col );
    if( row >= top() && row <= bottom() ) set_bit( row, col, true );
    }
  }


void Bitmap::histogramize( const bool vertical ) throw()
  {
  if( vertical )
    {
    for( int col = left(); col <= right(); ++col )
      for( int row = top(), y = bottom(); row <= bottom(); ++row )
        if( get_bit( row, col ) )
          { set_bit( row, col, false ); set_bit( y--, col, true ); }
    }
  else
    {
    for( int row = top(); row <= bottom(); ++row )
      for( int col = left(), x = left(); col <= right(); ++col )
        if( get_bit( row, col ) )
          { set_bit( row, col, false ); set_bit( row, x++, true ); }
    }
  }


void Bitmap::save( FILE * f, char filetype ) const throw()
  {
  if( filetype != '1' && filetype != '4' ) filetype = '4';
  std::fprintf( f, "P%c\n%d %d\n", filetype, width(), height() );

  for( int row = top(); row <= bottom(); ++row )
    {
    if( filetype == '4' )
      {
      unsigned char byte = 0, mask = 0x80;
      for( int col = left(); col <= right(); ++col )
        {
        if( get_bit( row, col ) ) byte |= mask;
        mask >>= 1;
        if( mask == 0 ) { std::putc( byte, f ); byte = 0; mask = 0x80; }
        }
      if( mask != 0x80 ) std::putc( byte, f ); // incomplete byte at end of row
      }
    else	// filetype == '1'
      {
      for( int col = left(); col <= right(); ++col )
        std::putc( get_bit( row, col ) ? '1' : '0', f );
      std::putc( '\n', f );
      }
    }
  }
