#!/usr/bin/python

import glob,re,string,os,sys,time

os.environ["OCROSCRIPTS"] = "../../ocroscript/scripts"

files = glob.glob("*.png")
files.sort()

# FIXME use rec-bpnet script too
for toplevel in ["ocroscript rec-tess --output-mode=text"]:
    print "# testing:",toplevel
    count = 0
    total = 0
    total_layout = 0
    total_time = 0.0
    for file in files:
        base = re.sub(".png","",file)
        start = time.time()
        os.system("%s %s > %s"%(toplevel,file,base+".out.txt"))
        duration = time.time()-start
        total_time += duration
        stream = os.popen("ocroscript editdist %s %s"%(base+".txt",base+".out.txt"))
        [njumps, miss1, miss2, errs] = [int(i) for i in stream.read().split()]
        stream.close()
        print "# %s  OCR err: %d  LA err: %d  time: %f" % (file, errs, njumps+miss1+miss2, duration)
        total_layout += njumps
        total += errs
        count += 1
    print "OCR errors",re.sub(r'\s',"-",toplevel),total,total_time,count
    print "layout errors",re.sub(r'\s',"-",toplevel),total_layout,total_time,count

