#!/usr/bin/python
import os,sys,os.path,glob,re,string,random

ocropus = "../ocropus-cmd/ocropus"
simp_re = re.compile(r'[^a-zA-Z0-9,.+=/*?!-]+')

def die(s):
    sys.stderr.write("FATAL: %s\n"%s)
    sys.exit(1)
def ensure(s):
    if not os.path.exists(s):
        die("%s: file not found" % s)

ensure(ocropus)

directory = "../data/lines"
if len(sys.argv)>1: directory = sys.argv[1]
lines = glob.glob(directory+"/*.png")
lines = [line for line in lines if os.path.exists(os.path.splitext(line)[0]+".txt")]
sys.stderr.write("evaluating the %d lines with ground truth in %s\n" % (len(lines),directory))

print "directory",directory
for line in lines:
    root,ext = os.path.splitext(line)
    truth = open(root+".txt","r").read()
    cmd = "env hocr=0 remove_hyphens=0 %s linerec %s 2> _err" % (ocropus,line)
    ocr = os.popen(cmd,"r").read()
    if os.path.getsize("_err")>2: # we allow two characters because something outputs a newline
        err = open("_err","r").read()
        err = re.sub(r'\n',' ',err)
        err = err[:40]
        sys.stdout.write("badmsg %s\t%s\n" % (line,err))
    simp_truth = simp_re.sub('',truth).lower()
    simp_ocr = simp_re.sub('',ocr).lower()
    if simp_truth != simp_ocr:
        sys.stdout.write("badocr %s\t%s\t%s\n" % (line,simp_truth,simp_ocr))
    sys.stdout.flush()
