/*
    Copyright (C) 2012 Oleksiy Chernyavskyy

    This file is part of XDClient.

    XDClient is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    XDClient is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with XDClient.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _GNU_SOURCE

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <wchar.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <wctype.h>
#include <tre/tre.h>
#include <limits.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "xdc.h"
#include "regexp_utf8.h"
#include "common.h"
#include "hash.h"
#include "md5.h"
#include "conf.h"
#include "adt_id.h"
#include "file.h"
#include "utf8.h"
#include "aprint.h"

const char *XDC_Copyright = "Copyright (C) 2012 Oleksiy Chernyavskyy";
#ifdef VERSION
const char *XDC_Version = VERSION;
#else
const char *XDC_Version = "";
#endif

static regaparams_t aparams;
static int fuzzy_match = 0;
static wchar_t *tag_val_buf = NULL;
static int tag_val_buf_len = 0;

char scbuf[BUFSZ];

void print_version(void);

int main(int argc, char *argv[])
{
  int skip_abbrev;
  xdc_conf_t *xdc_conf;
  nameval_t *nv;
  fstat_lt *flist, *dict_flist_start;
  fstat_lt *flist2, *idx_flist_start;
  wchar_t *full_name;
  wchar_t *descr;
  char *lang_from, *lang_to;
  index_lt *index;
  wchar_t *key_wcs;
  wcs_ll_t *subre, *subre_start;
  wchar_t *re_norm;

  setlocale(LC_CTYPE, "en_US.UTF-8");

  if (argc < 2) {
	print_usage();
	return 1;
  }

  if (! (xdc_conf = create_xdc_conf(argc, argv))) {
	fprintf(stderr, "\n");
	print_usage();
	return 1;
  }

  if (hlookup(xdc_conf->symtab, "cmd_arg_print_version", 0)) {
	print_version();
	free_xdc_conf(xdc_conf);
	return 0;
  }

  if (hlookup(xdc_conf->symtab, "cmd_arg_print_help", 0)) {
	  print_usage();
	  free_xdc_conf(xdc_conf);
	  return 0;
  }

  if (hlookup(xdc_conf->symtab, "cmd_arg_debug_print_htable", 0))
	print_htable(xdc_conf->symtab, print_nv, 0);
  if (hlookup(xdc_conf->symtab, "cmd_arg_debug_no_exec", 0))
	goto MAIN_RETURN;

  if (xdc_conf->list_dicts || xdc_conf->describe) {
	flist = dict_flist_start = search_dicts(xdc_conf, 0);

	idx_flist_start = search_indexes(xdc_conf, 0);
	load_indexes(idx_flist_start);
	hash_indexes(xdc_conf, idx_flist_start, H_IDX_MD5_PART);

	while (flist) {
	  if (file_exist(flist->path) && is_rfile(flist->path) && !is_dir(flist->path) \
		  && check_lang_from(xdc_conf, flist) && check_lang_to(xdc_conf, flist) && check_dict_name(xdc_conf, flist)) {
		if (!flist->md5_part)
		  fstat_update(flist, FSTAT_MD5_PART);
		get_full_name(xdc_conf, flist, &full_name);
		if (!xdc_conf->verbose && !xdc_conf->describe) {
		  if (full_name)
			fwprintf(stdout, L"%S\n", full_name);
		} else {
		  fwprintf(stdout, L"Name: ");
		  if (full_name)
			fwprintf(stdout, L"%S", full_name);
		  fwprintf(stdout, L"\n");
		  fwprintf(stdout, L"Path: %s\n", flist->path);

		  get_lang(xdc_conf, flist, &lang_from, &lang_to);
		  fwprintf(stdout, L"Lang: ");
		  if (lang_from)
			fwprintf(stdout, L"%s", lang_from);
		  fwprintf(stdout, L"-");
		  if (lang_to)
			fwprintf(stdout, L"%s", lang_to);
		  fwprintf(stdout, L"\n");

		  if ((nv = hlookup(xdc_conf->symtab, pfmbs("index:dict_md5_part:%s", flist->md5_part), 0))) {
			flist2 = (fstat_lt*) nv->val;
			if (flist2 && (index = (index_lt*) flist2->udata))
			  fwprintf(stdout, L"Number of articles: %u\n", index->arnum);
			else
			  fwprintf(stdout, L"Number of articles: unknown\n");

			flist2 = idx_flist_start;
			while(flist2) {
			  if ((index = (index_lt*) flist2->udata)) {
				if (!strcmp(index->dict_md5_part, flist->md5_part))
				  fwprintf(stdout, L"Index: %s\n", flist2->path);
			  }
			  flist2 = flist2->next;
			}
		  } else {
			fwprintf(stdout, L"Index: no index found\n");
		  }

		  if (xdc_conf->describe) {
			fwprintf(stdout, L"Description:\n");
			get_description(xdc_conf, flist, &descr);
			if (descr) {
			  print_xml_body_double_conv(descr);
			  fwprintf(stdout, L"\n");

			  free(descr);
			  descr = NULL;
			}
		  }

		  fwprintf(stdout, L"\n");

		  if (full_name)
			free(full_name);
		  if (lang_from)
			free(lang_from);
		  if (lang_to)
			free(lang_to);
		}
	  }
	  flist = flist->next;
	}
	unhash_indexes(xdc_conf);
	flist_free(idx_flist_start, 0);

	if (dict_flist_start)
	  flist_free(dict_flist_start, 0);
  } else if (hlookup(xdc_conf->symtab, "cmd_arg_gen_index", 0)) {
	skip_abbrev = xdc_conf->skip_abbrev;
	xdc_conf->skip_abbrev = 0;
	gen_index(xdc_conf);
	xdc_conf->skip_abbrev = skip_abbrev;
  } else if (hlookup(xdc_conf->symtab, "cmd_arg_key", 0)) {

	nv = hlookup(xdc_conf->symtab, "cmd_arg_key", 0);
	if (!nv)
	  goto MAIN_RETURN;
	key_wcs = nv->wcs_val;
	if (!key_wcs)
	  goto MAIN_RETURN;

	regaparams_set_defaults(&aparams);

	aparams.max_cost = 0;
	nv = hlookup(xdc_conf->symtab, "cmd_arg_delete_cost", 0);
	if (nv) {
	  aparams.cost_del = atoi(nv->mbs_val);
	  fuzzy_match = 1;
	  if (aparams.cost_del > aparams.max_cost)
		aparams.max_cost = aparams.cost_del;
	}
	nv = hlookup(xdc_conf->symtab, "cmd_arg_insert_cost", 0);
	if (nv) {
	  aparams.cost_ins = atoi(nv->mbs_val);
	  fuzzy_match = 1;
	  if (aparams.cost_ins > aparams.max_cost)
		aparams.max_cost = aparams.cost_ins;
	}
	nv = hlookup(xdc_conf->symtab, "cmd_arg_substitute_cost", 0);
	if (nv) {
	  aparams.cost_subst = atoi(nv->mbs_val);
	  fuzzy_match = 1;
	  if (aparams.cost_subst > aparams.max_cost)
		aparams.max_cost = aparams.cost_subst;
	}
	nv = hlookup(xdc_conf->symtab, "cmd_arg_max_cost", 0);
	if (nv) {
	  aparams.max_cost = atoi(nv->mbs_val);
	  fuzzy_match = 1;
	}

	subre_start = re_get_subre(xdc_conf, key_wcs);
	subre = subre_start;
	while(subre) {
	  re_norm = wcsdup(subre->wcs);
	  norm_re(re_norm);
	  key_to_art(xdc_conf, subre->wcs, re_norm);
	  free(re_norm);
	  subre = subre->next;
	}
	wcsl_free(subre_start);
  }

MAIN_RETURN:
  free_xdc_conf(xdc_conf);
  if (tag_val_buf)
	free(tag_val_buf);

  return 0;
}

void key_to_art(xdc_conf_t *xdc_conf, wchar_t *re, wchar_t *re_norm)
{
  int nhit = 0;
  int ret;
  fstat_lt *dict_flist_start, *idx_flist_start, *flist;
  nameval_t *nv;
  unsigned mnum;
  regex_t preg;
  regex_t preg_norm;

  if (!xdc_conf)
	return;

  ret = tre_regwcomp(&preg_norm, re_norm, REG_EXTENDED | REG_ICASE);
  if (ret > 0) {
	switch(ret) {
	  case REG_BADPAT:
		fprintf(stderr, "xdclient: regexp error: Invalid regexp. Multibyte character set is used in the current locale, and regex contained an invalid multibyte sequence\n");
		break;
	  case REG_ECOLLATE:
		fprintf(stderr, "xdclient: regexp error: Invalid collating element referenced. Equivalence classes or multicharacter collating elements are used in bracket expressions (they are not supported yet)\n");
		break;
	  case REG_ECTYPE:
		fprintf(stderr, "xdclient: regexp error: Unknown character class name in [[:name:]]\n");
		break;
	  case REG_EESCAPE:
		fprintf(stderr, "xdclient: regexp error: The last character of regex was a backslash (\\)\n");
		break;
	  case REG_ESUBREG:
		fprintf(stderr, "xdclient: regexp error: Invalid back reference; number in i\\digit invalid\n");
		break;
	  case REG_EBRACK:
		fprintf(stderr, "xdclient: regexp error: [] imbalance\n");
		break;
	  case REG_EPAREN:
		fprintf(stderr, "xdclient: regexp error: \\(\\) or () imbalance\n");
		break;
	  case REG_EBRACE:
		fprintf(stderr, "xdclient: regexp error: \\{\\} or {} imbalance\n");
		break;
	  case REG_BADBR:
		fprintf(stderr, "xdclient: regexp error: {} content invalid: not a number, more than two numbers, first larger than second, or number too large\n");
		break;
	  case REG_ERANGE:
		fprintf(stderr, "xdclient: regexp error: Invalid character range, e.g. ending point is earlier in the collating order than the starting point\n");
		break;
	  case REG_ESPACE:
		fprintf(stderr, "xdclient: regexp error: Out of memory, or an internal limit exceeded\n");
		break;
	  case REG_BADRPT:
		fprintf(stderr, "xdclient: regexp error: Invalid use of repetition operators: two or more repetition operators have been chained in an undefined way\n");
		break;
	}
	return;
  }
  ret = tre_regwcomp(&preg, re, REG_EXTENDED | REG_ICASE);

  dict_flist_start = search_dicts(xdc_conf, 0);
  idx_flist_start = search_indexes(xdc_conf, 0);
  load_indexes(idx_flist_start);
  dup_index_remove(xdc_conf, idx_flist_start);
  hash_indexes(xdc_conf, idx_flist_start, H_IDX_MD5_PART);

  mnum = 0;
  flist = dict_flist_start;
  while(flist) {
	if (file_exist(flist->path) && is_rfile(flist->path) && !is_dir(flist->path)) {
	  if (!flist->md5_part)
		fstat_update(flist, FSTAT_MD5_PART);
	  nv = hlookup(xdc_conf->symtab, pfmbs("dict_passed:md5_part:%s", flist->md5_part), 1);
	  if (!nv->ival) {
		nv->ival = 1;
		if ((nv = hlookup(xdc_conf->symtab, pfmbs("index:dict_md5_part:%s", flist->md5_part), 0))) {
		  ret = lookup_article(xdc_conf, flist, (fstat_lt*) nv->val, &preg, re, &preg_norm, re_norm);
		  if (ret > 0)
			nhit += ret;
		} else {
		  mnum++;
		}
	  }
	}
	flist = flist->next;
  }

  if (mnum && xdc_conf->warn) {
	fprintf(stdout, "\n\n\nWARNING: missing indexes for the following dictionaries:\n");
	flist = dict_flist_start;
	while (flist) {
	  if (! hlookup(xdc_conf->symtab, pfmbs("index:dict_md5_part:%s", flist->md5_part), 0))
		fprintf(stdout, "  %s\n", flist->path);
	  flist = flist->next;
	}
	fprintf(stdout, "\nto generate indexes run: ");
	nv = hlookup(xdc_conf->symtab, "argv0", 0);
	fprintf(stdout, "%s -g", nv->mbs_val);
	nv = hlookup(xdc_conf->symtab, "cmd_arg_conf", 0);
	if (nv && nv->wcs_val)
	  fwprintf(stdout, L" --conf=%S", nv->wcs_val);
	nv = hlookup(xdc_conf->symtab, "cmd_arg_path", 0);
	if (nv && nv->mbs_val)
	  fprintf(stdout, " -p %s", nv->mbs_val);
	nv = hlookup(xdc_conf->symtab, "cmd_arg_add_path", 0);
	if (nv && nv->mbs_val)
	  fprintf(stdout, " --add-path=%s", nv->mbs_val);
	fprintf(stdout, "\n");
  }

  flist_free(dict_flist_start, 0);
  flist_free(idx_flist_start, 0);
  unhash_indexes(xdc_conf);
  htable_regex_free(xdc_conf->symtab, "dict_passed:md5_part:.*", 0);
  whtable_regex_free(xdc_conf->wsymtab, L"lookup_key:.*", 0);
  tre_regfree(&preg);
  tre_regfree(&preg_norm);
}

int lookup_article(xdc_conf_t *xdc_conf, fstat_lt *dict_fst, fstat_lt *index_fst, regex_t *preg, wchar_t *re, regex_t *preg_norm, wchar_t *re_norm)
{
  long istart_pos;
  const int ret_error = -1;
  FILE *ifp;
  FILE *dfp;
  long _pos;
  int j;
  xdxf_idx_t xdxf_idx;
  DTag *tag_ar;
  int rret1, rret2;
  wchar_t *_key;
  static int nhit_all = 0;
  int nhit = 0;
  wnameval_t *wnv;
  index_lt *index;
  wchar_t *key_start = NULL;
  int kstart_len;
  regmatch_t pmatch[RE_NMATCH];
  regamatch_t amatch;
  wchar_t *key_norm = NULL;
  regmatch_t **pmatch_set = NULL;
  regamatch_t fs_amatch;
  regmatch_t *pmatch2;
  int full_search = 0;
  int rret3 = 0;
  wnameval_t *wnv_article;
  int fs_found;
  int i;
  DTag *tag_ar_fs;
  DTag *tag;
  DTag *tag_prev, *tag_next;
  int len;

  amatch.pmatch = pmatch;
  amatch.nmatch = RE_NMATCH;

  if (!xdc_conf || !dict_fst || !index_fst)
	return ret_error;

  index = (index_lt*) index_fst->udata;

  if (!index->dict_size || !index->dict_md5_full || !index->dict_md5_part || \
	  !index->index_size || !index->index_body_start)
	return ret_error;

  if (! check_lang_from(xdc_conf, dict_fst))
	return 0;
  if (! check_lang_to(xdc_conf, dict_fst))
	return 0;
  if (! check_dict_name(xdc_conf, dict_fst))
	return 0;

  if (dict_fst->size != index->dict_size || strcmp(dict_fst->md5_part, index->dict_md5_part)) {
	fprintf(stderr, "xdclient: dict file %s was changes. Index must be regenerated\n", dict_fst->path);
	return 0;
  }
  
  if (dict_fst->mtime != index->dict_mtime || dict_fst->ctime != index->dict_ctime) {
	fstat_update(dict_fst, FSTAT_MD5_FULL | FSTAT_MD5_PART);
	if (strcmp(dict_fst->md5_full, index->dict_md5_full)) {
	  fprintf(stderr, "xdclient: dict file %s was changes. Index must be regenerated!\n", dict_fst->path);
	  return 0;
	}
	if (! update_idx_header(index, dict_fst))
	  return 0;
  }

  if (hlookup(xdc_conf->symtab, "cmd_arg_full_search", 0))
	full_search = 1;

  if (!fuzzy_match && !full_search) {
	key_start = re_start_literals(re_norm);
	if (key_start) {
	  norm_key(key_start);
	  istart_pos = get_istart_pos(dict_fst, index, key_start);
	} else {
	  istart_pos = index->index_body_start;
	}
  } else {
	istart_pos = index->index_body_start;
  }

  if (istart_pos != -1) {

	if (! (ifp = fopen(index->index_path, "r"))) {
	  return ret_error;
	}

	if (! (dfp = fopen(dict_fst->path, "r"))) {
	  fclose (ifp);
	  return ret_error;
	}

	xdxf_idx.index = index;
	xdxf_idx.shift = 0;
	xdxf_idx.size = 0;
	xdxf_idx.ktag_index = 0;
	xdxf_idx.mask = 0;

	_pos = istart_pos;
	fseek(ifp, _pos, SEEK_SET);

	fread(&xdxf_idx.shift, sizeof(uint32_t), 1, ifp);
	fread(&xdxf_idx.size, sizeof(uint32_t), 1, ifp);
	fread(&xdxf_idx.ktag_index, ktag_index_len(), 1, ifp);
	fread(&xdxf_idx.mask, opt_mask_len(), 1, ifp);

	_key = read_key_by_idx(dict_fst, &xdxf_idx, dfp);

	key_norm = wcsdup(_key);
	norm_key(key_norm);
	if (key_start) {
	  kstart_len = wcslen(key_start);
	  rret1 = wcsncmp(key_norm, key_start, kstart_len) ? 0 : 1;
	} else {
	  rret1 = 1;
	}

	j = 1;
	while(rret1) {
	  rret3 = 0;
	  fs_found = 0;

	  if (fuzzy_match)
		rret2 = tre_regawexec(preg_norm, key_norm, &amatch, aparams, 0) ? 0 : 1;
	  else
		rret2 = tre_regwexec(preg_norm, key_norm, RE_NMATCH, pmatch, 0) ? 0 : 1;

	  if (full_search) {
		if (!pmatch_set) {
		  pmatch_set = (regmatch_t**) malloc(sizeof(regmatch_t*) * PMATCH_SET_NUM);
		  for (i=0; i < PMATCH_SET_NUM; i++)
			pmatch_set[i] = (regmatch_t*) malloc(sizeof(regmatch_t) * RE_NMATCH);

		  fs_amatch.nmatch = RE_NMATCH;
		}

		tag_ar_fs = read_xdxf(dict_fst->path, xdxf_idx.shift, xdxf_idx.size, dfp);
		fs_found = 0;
		tag = tag_rnext(tag_ar_fs, tag_ar_fs, S_ALL);
		while(tag) {
		  if (is_tag_name(tag, L"tr")) {
			tag = tag_rnext(tag_ar_fs, tag, S_DOWN | S_UP);
		  } else {
			if (tag->type == DT_Value) {
			  if (!tag_val_buf) {
				tag_val_buf_len = tag->vlen * 10;
				tag_val_buf = (wchar_t*) malloc(sizeof(wchar_t) * tag_val_buf_len);
			  } else if (tag_val_buf_len <= tag->vlen) {
				free(tag_val_buf);
				tag_val_buf_len = tag->vlen * 10;
				tag_val_buf = (wchar_t*) malloc(sizeof(wchar_t) * tag_val_buf_len);
			  }

			  wcsncpy(tag_val_buf, tag->value, tag->vlen);
			  tag_val_buf[tag->vlen] = L'\0';

			  tag_prev = tag;
			  tag_next = tag_rnext(tag_ar_fs, tag, S_ALL);
			  while(tag_next) {
				if (tag_next->type == DT_Value) {
				  len = wcslen(tag_val_buf);
				  if (len + tag_next->vlen >= tag_val_buf_len) {
					free(tag_val_buf);
					tag_val_buf_len = (len + tag_next->vlen) * 10;
					tag_val_buf = (wchar_t*) malloc(sizeof(wchar_t) * tag_val_buf_len);
				  }
				  wcsncpy(&tag_val_buf[len], tag_next->value, tag_next->vlen);
				  tag_val_buf[len + tag_next->vlen] = L'\0';
				} else if (is_tag_name(tag_next, L"i") ||
					is_tag_name(tag_next, L"c") ||
					is_tag_name(tag_next, L"co") ||
					is_tag_name(tag_next, L"sup") ||
					is_tag_name(tag_next, L"sub") ||
					is_tag_name(tag_next, L"b") ||
					is_tag_name(tag_next, L"tt") ||
					is_tag_name(tag_next, L"big") ||
					is_tag_name(tag_next, L"small") ||
					is_tag_name(tag_next, L"blockquote")) {
				} else {
				  break;
				}
				tag_prev = tag_next;
				tag_next = tag_rnext(tag_ar_fs, tag_next, S_ALL);
			  }

			  convert_xml_escape(tag_val_buf);

			  if (fuzzy_match) {
				fs_amatch.pmatch = pmatch2 = pmatch_set[fs_found];
				rret3 = tre_regawexec(preg, tag_val_buf, &fs_amatch, aparams, 0) ? 0 : 1;
			  } else {
				pmatch2 = pmatch_set[fs_found];
				rret3 = tre_regwexec(preg, tag_val_buf, RE_NMATCH, pmatch2, 0) ? 0 : 1;
			  }
			  if (rret3) {
				tag->udata = (void*) pmatch2;
				tag->udata_type = Tregmatch;
				fs_found++;
			  }

			  tag = tag_prev;
			}

			if (fs_found == PMATCH_SET_NUM)
			  tag = NULL;
			else
			  tag = tag_rnext(tag_ar_fs, tag, S_ALL);
		  }
		}
		if (!fs_found)
		  tag_free(tag_ar_fs);
	  }

	  if (rret2 || fs_found) {
		wnv = whlookup(xdc_conf->wsymtab, pfwcs(L"lookup_key:%S", _key), 1);
		wnv->ival++;
		wnv_article = whlookup(xdc_conf->wsymtab, pfwcs(L"lookup_article:%ld", xdxf_idx.shift), 1);
		wnv_article->ival++;

		if (wnv_article->ival == 1) {
		  nhit_all++;
		  nhit++;
		  if (fs_found)
			tag_ar = tag_ar_fs;
		  else
			tag_ar = read_xdxf(dict_fst->path, xdxf_idx.shift, xdxf_idx.size, dfp);
		  if (tag_ar) {
			if (!xdc_conf->cmd_list || xdc_conf->verbose) {
			  if (nhit_all > 1 && nhit == 1)
				fwprintf(stdout, L"\n-------------------------------------------------\n");
			  if (nhit == 1) {
				print_dict_descr(xdc_conf, dict_fst);
				fwprintf(stdout, L"[%d] ", nhit);
			  } else {
				if (xdc_conf->cmd_list)
				  fwprintf(stdout, L"[%d] ", nhit);
				else
				  fwprintf(stdout, L"\n\n[%d] ", nhit);
			  }
			}
			if (xdc_conf->cmd_list) {
			  if (xdc_conf->verbose)
				print_key(xdc_conf, _key);
			  else {
				wnv = whlookup(xdc_conf->wsymtab, pfwcs(L"lookup_key:%S", _key), 0);
				if (wnv && wnv->ival == 1)
				  fwprintf(stdout, L"%S\n", _key);
			  }
			} else {
			  if (rret2 && full_search)
				print_tag_article(xdc_conf, tag_ar, xdxf_idx.mask, pmatch);
			  else
				print_tag_article(xdc_conf, tag_ar, xdxf_idx.mask, NULL);
			}
			tag_free(tag_ar);
		  } else {
			fprintf(stderr, "xdc: %s:%d: could not get article\n", __FUNCTION__, __LINE__);
		  }

		  if (!xdc_conf->cmd_list) {
			fwprintf(stdout, L"\n");
			fflush(stdout);
		  }
		}
	  }

	  _pos = istart_pos + j * (8 + index->ktag_index_len + index->mask_len);
	  j++;

	  if (_pos >= index->index_size)
		break;

	  fseek(ifp, _pos, SEEK_SET);
	  fread(&xdxf_idx.shift, sizeof(uint32_t), 1, ifp);
	  fread(&xdxf_idx.size, sizeof(uint32_t), 1, ifp);
	  fread(&xdxf_idx.ktag_index, ktag_index_len(), 1, ifp);
	  fread(&xdxf_idx.mask, opt_mask_len(), 1, ifp);

	  if (key_norm)
		free(key_norm);
	  if (_key)
		free(_key);

	  _key = read_key_by_idx(dict_fst, &xdxf_idx, dfp);

	  if (_key) {
		key_norm = wcsdup(_key);
		norm_key(key_norm);
		if (key_start)
		  rret1 = wcsncmp(key_norm, key_start, kstart_len) ? 0 : 1;
	  } else {
		rret1 = 0;
	  }
	}

	if (key_norm)
	  free(key_norm);
	if (_key)
	  free(_key);

	if (nhit >0)
	  fflush(stdout);

	fclose(ifp);
	fclose(dfp);

	if (full_search && pmatch_set) {
	  for (i=0; i<PMATCH_SET_NUM; i++)
		free(pmatch_set[i]);
	  free(pmatch_set);
	}

  } else {
	if (key_start) {
	  free(key_start);
	  key_start = NULL;
	}
	return 0;
  }

  whtable_regex_free(xdc_conf->wsymtab, L"lookup_article:.*", 0);

  if (key_start)
	free(key_start);

  return nhit;
}

long get_istart_pos(fstat_lt *dict, index_lt *index, wchar_t *key)
{
  long start, end;
  int cmpret;
  int j;
  FILE *ifp;
  FILE *dfp;
  long _pos, _ipos;
  wchar_t *_key;
  xdxf_idx_t xdxf_idx;
  const long ret_error = -1;
  int klen;

  if (!dict || !index || !key)
	return ret_error;

  if (! (klen = wcslen(key)))
	return ret_error;

  if (!dict->path || !index->index_path)
	return ret_error;

  if (! is_rfile(dict->path) || ! is_rfile(index->index_path))
	return ret_error;

  if (! (ifp = fopen(index->index_path, "r")))
	return ret_error;

  if (! (dfp = fopen(dict->path, "r"))) {
	fclose (ifp);
	return ret_error;
  }

  xdxf_idx.index = index;
  xdxf_idx.shift = 0;
  xdxf_idx.size = 0;
  xdxf_idx.ktag_index = 0;
  xdxf_idx.mask = 0;

  /* check first key in index */
  fseek(ifp, index->index_body_start, SEEK_SET);

  fread(&xdxf_idx.shift, sizeof(uint32_t), 1, ifp);
  fread(&xdxf_idx.size, sizeof(uint32_t), 1, ifp);
  fread(&xdxf_idx.ktag_index, ktag_index_len(), 1, ifp);
  fread(&xdxf_idx.mask, opt_mask_len(), 1, ifp);

  _key = read_key_by_idx(dict, &xdxf_idx, dfp);
  if (_key) {
	norm_key(_key);
	if (! wcsncmp(_key, key, klen)) {
/*	if (reg_test(_key, key, RE_IGN_CASE | RE_COMP_SPACES | RE_STOP_WILDCARD | RE_SKIP_NONALPHA \
		  | RE_SKIP_START_SPC | RE_PREFIX_DASH | RE_SKIP_END_SPC, NULL)) {
		  */
	  fclose(ifp);
	  fclose(dfp);
	  free(_key);
	  return index->index_body_start;
	}
	free(_key);
	_key = NULL;
  } else {
	fclose(ifp);
	fclose(dfp);
	return ret_error;
  }


  _ipos = -1;
  start = 0;
  end = index->index_body_size / (8 + index->ktag_index_len + index->mask_len);

  for (j = (end - start)/2; ;) {
	_pos = index->index_body_start + (start+j) * (8 + index->ktag_index_len + index->mask_len);
	fseek(ifp, _pos, SEEK_SET);

	fread(&xdxf_idx.shift, sizeof(uint32_t), 1, ifp);
	fread(&xdxf_idx.size, sizeof(uint32_t), 1, ifp);
	fread(&xdxf_idx.ktag_index, ktag_index_len(), 1, ifp);
	fread(&xdxf_idx.mask, opt_mask_len(), 1, ifp);

	_key = read_key_by_idx(dict, &xdxf_idx, dfp);

	if (_key) {
	  /*
	  if (! reg_test(_key, key, RE_IGN_CASE | RE_COMP_SPACES | RE_STOP_WILDCARD | RE_SKIP_NONALPHA \
			| RE_SKIP_START_SPC | RE_PREFIX_DASH | RE_SKIP_END_SPC, &retr)) {
		if (retr.cmp_is_valid)
		  cmpret = retr.cmp_res;
		else
		  break;
	  } else {
		cmpret = 0;
	  }
	  */

	  norm_key(_key);
	  cmpret = wcsncmp(key, _key, klen);

	  if (cmpret != 0 && j == 0)
		break;

	  if (cmpret > 0) {
		if (_ipos != -1)
		  break;
		start += j;
		j = (end - start)/2;
	  } else if (cmpret < 0) {
		if (_ipos != -1)
		  break;
		end -= j;
		j = (end - start)/2;
	  }  else {
		_ipos = _pos;
		start+= j;
		j=0;
		if (start > 0)
		  start--;
		else
		  break;
	  }
	} else {
	  fclose(ifp);
	  fclose(dfp);
	  return ret_error;
	}

	if (_key) {
	  free(_key);
	  _key = NULL;
	}
  }

  if (_key) {
	free(_key);
	_key = NULL;
  }

  fclose(ifp);
  fclose(dfp);

  return _ipos;
}


int pp_wcs(wchar_t *ppbuf, int ppbuf_len, wchar_t *wcs, unsigned flags)
{
  wchar_t wc, wc_next;
  int i, h;

  if (!ppbuf || ppbuf_len == 0 || !wcs)
	return 0;

  for (i=0; wcs[i] != L'\0' && i < ppbuf_len-1; i++) {
	wc = wcs[i];
	if (flags & PP_TO_LOWER)
	  wc = towlower(wc);
	ppbuf[i] = wc;
  }
  ppbuf[i] = L'\0';

  if (flags & PP_CHOP_SPACES) {
	for (i=0; iswspace(ppbuf[i]) && ppbuf[i] != L'\0'; i++) ;

	for (h=0; ppbuf[i] != L'\0'; i++, h++)
	  ppbuf[h] = ppbuf[i];
	ppbuf[h] = L'\0';

	for(h--; iswspace(ppbuf[h]) && h >= 0; h--);
	ppbuf[++h] = L'\0';
  }

  if (flags & PP_CROP_NONALPHA)
	crop_non_alnum(ppbuf);

  if (flags & PP_CROP_SPC)
	shrink_norm_spaces(ppbuf);

  if (flags & PP_ADD_PREF_DASH) {
	wc = ppbuf[0];
	ppbuf[0] = L'^';
	for (i=1; wc != L'\0' && i < ppbuf_len-1; i++) {
	  wc_next = ppbuf[i];
	  ppbuf[i] = wc;
	  wc = wc_next;
	}
	ppbuf[i] = L'\0';
  }

  return 1;
}

void norm_re(wchar_t *re)
{
  if (!re)
	return;

  re_shrink_norm_spaces(re);
  re_crop_non_alnum(re);
}

void re_shrink_norm_spaces(wchar_t *re)
{
  int i, h;
  wchar_t wc_prev;
  wchar_t wc;
  int hook;

  if (!re)
	return;

  wc_prev = 0;
  hook = 0;
  for (i=0, h=0; re[h] != L'\0'; h++) {
	wc = re[h];
	if (wc == L'[' && wc_prev != L'\\' && !hook) {
	  hook++;
	  re[i++] = re[h];
	} else if (wc == L']' && hook && wc_prev != L'[') {
	  hook--;
	  re[i++] = re[h];
	} else if (hook) {
	  re[i++] = re[h];
	} else if (iswspace(re[h])) {
	  if (i>0 && re[i-1] != L'^')
		re[i++] = L' ';
	  while(iswspace(re[++h]));
	  h--;
	} else {
	  re[i++] = re[h];
	}

	if (wc == L'\\' && wc_prev == L'\\')
	  wc_prev = 0;
	else
	  wc_prev = wc;
  }

  re[i] = L'\0';

  h=i-1;
  while(h >= 0 && re[h] == L'$')
	h--;
  h++;
  i=h-1;
  while (i >= 0 && iswspace(re[i]))
	i--;
  i++;
  while (re[h] != L'\0')
	re[i++] = re[h++];
  re[i] = L'\0';
}

void re_crop_non_alnum(wchar_t *re)
{
  int i, h;
  wchar_t wc_prev;
  wchar_t wc, wc2;
  int hook, brc;

  if (!re)
	return;

  wc_prev = 0;
  hook = brc = 0;
  for (i=0, h=0; re[h] != L'\0'; h++) {
	wc = re[h];
	wc2 = re[h+1];
	if (wc == L'[' && wc_prev != L'\\' && !hook) {
	  hook++;
	  re[i++] = re[h];
	} else if (wc == L']' && hook && wc_prev != L'[') {
	  hook--;
	  re[i++] = re[h];
	} else if (wc == L'{' && wc_prev != L'\\' && !hook && !brc) {
	  brc++;
	  re[i++] = re[h];
	} else if (wc == L'}' && wc_prev != L'\\' && !hook && brc) {
	  brc--;
	  re[i++] = re[h];
	} else if (hook || brc) {
	  re[i++] = re[h];
	} else if (iswalnum(wc) || iswspace(wc)) {
	  re[i++] = re[h];
	} else if (wc == L'\\') {
	  /* special RE character.
		 if used as mask - user knows what he is doing
		 if used as part of char class it must be passed through too
		 */
	  re[i++] = re[h];
	} else if (wc == L'.' || wc == L'*' || wc == L'+' || wc == L'?' || wc == L'(' || wc == L')' || wc == L'[' || wc == L']' \
		|| wc == L'{' || wc == L'}' || wc == L'|' || wc == L'^' || wc == L'$') {
	  /* special RE characters are not cropped even if masked by '\' */
	  re[i++] = re[h];
	} else if (wc2 == L'*' || wc2 == L'+' || wc2 == L'?' || wc2 == L'{') {
	  /* user knows what he is doing */
	  re[i++] = re[h];
	}

	if (wc == L'\\' && wc_prev == L'\\')
	  wc_prev = 0;
	else
	  wc_prev = wc;
  }

  re[i] = L'\0';
}


void norm_key(wchar_t *key)
{
  if (!key)
	return;

  shrink_norm_spaces(key);
  crop_non_alnum(key);
  wcs_lcase(key);
}

void wcs_lcase(wchar_t *str)
{
  int i;

  if (!str)
	return;

  for (i=0; str[i] != L'\0'; i++)
	str[i] = towlower(str[i]);
}

void shrink_norm_spaces(wchar_t *str)
{
  int i, h;

  if (!str)
	return;

  for (i=0, h=0; str[h] != L'\0'; h++) {
	if (iswspace(str[h])) {
	  if (i>0)
		str[i++] = L' ';
	  while(iswspace(str[++h]));
	  h--;
	} else {
	  str[i++] = str[h];
	}
  }
  i-=1;
  while (i >= 0 && iswspace(str[i]))
	i--;
  i++;
  str[i] = L'\0';
}

void crop_non_alnum(wchar_t *str)
{
  int i, h;

  if (!str)
	return;

  for (i=0, h=0; str[h] != L'\0'; h++) {
	if (iswalnum(str[h]) || iswspace(str[h])) {
	  if (i<h)
		str[i] = str[h];
	  i++;
	}
  }
  str[i] = L'\0';
}

void print_usage(void)
{
  fprintf(stdout, "Usage: xdc [-V|--version] | [-h|--help] | [--conf=path] [--list-dicts] [--describe]\n");
  fprintf(stdout, "           [-v|--verbose] [-p path|--path=path] [--add-path=path] [-i path|--index-path=path]\n");
  fprintf(stdout, "           [-W|--no-warn] [-C|-c|--use-colors=yes|y|no|n] [--skip-abbrev|--no-skip-abbrev]\n");
  fprintf(stdout, "           [ -g|--gen-index [--clean-output] [-o dir|--output-dir=dir] ]\n");
  fprintf(stdout, "           [ [-D num|--delete-cost=num] [-I num|--insert-cost=num] [-S num|--substitute-cost=num] [-E num|--max-errors=num]\n");
  fprintf(stdout, "             [-F|--full] [-l|--list] [-d name|--dict_name=name] [-f lang|--from=lang] [-t lang|--to=lang] -k key|key ]\n");
}

char *get_cfg_dir(void)
{
  static char *cfg_dir = NULL;

  if (!cfg_dir) {
	if (getenv("HOME")) {
	  snprintf(scbuf, BUFSZ, "%s/.xdc", getenv("HOME"));
	  if (strlen(scbuf)) {
		cfg_dir = strdup(scbuf);
	  }
	}
  }
  return cfg_dir;
}

void xdcdb_free_all(xdcdb_t *db)
{
  xdcdb_t *_db;

  while (db) {
	_db = db->next;
	free(db->path);
	free(db);
	db = _db;
  }
}

void print_xdcdbs(xdcdb_t *db_ll)
{
  int dbnum;
  xdcdb_t *db;
  char printstr[PSTRMAX];

  dbnum = 0;
  db = db_ll;
  while (db) {
	dbnum++;
	db = db->next;
  }

  snprintf(printstr, PSTRMAX, "xdc: found %d databases", dbnum);
  fprintf(stdout, "%s\n", printstr);
  print_line(100);
  fprintf(stdout, "\n");

  db = db_ll;
  while (db) {
	fprintf(stdout, "%s\n", db->path);
	db = db->next;
  }
}

void print_line(int len)
{
  int i;
  for (i=0; i<len; i++)
	fprintf(stdout, "-");
  fprintf(stdout, "\n");
}

xdcdb_t* get_db_stat(char *path, unsigned flags)
{
  xdcdb_t *_db;

  _db = (xdcdb_t*) malloc(sizeof(xdcdb_t));
  _db->path = strdup(path);
  _db->size = file_size(path);
  _db->mtime = file_mtime(path);
  _db->ctime = file_ctime(path);

  if (flags & XDXF_MD5_FULL)
	_db->md5_full = gen_md5sum_full(path);
  else
	_db->md5_full = NULL;

  if (flags & XDXF_MD5_PART)
	_db->md5_part = gen_md5sum_part2(path, MD5_PART_SIZE, MD5_NPARTS);
  else
	_db->md5_part = NULL;

  _db->next = NULL;

  return _db;
}

int is_xdxf(char *path)
{

  if (! is_rfile(path))
	return 0;

  if (reg_test_8(path, L".*\\.xdxf$", 0, NULL)) {
	if (reg_test_8(path, L"^dict\\.xdxf$", 0, NULL))
	  return 1;
	else
	  return 2;
  }
  return 0;
}

int is_idx(char *path)
{
  if (! is_rfile(path))
	return 0;

  if (reg_test_8(path, L".*\\.idx$", 0, NULL))
	return 1;
  return 0;
}

void chomp_path(char* path)
{
  int len;
  int i;

  len = strlen(path);
  for (i=len-1; i>0; i--) {
	if (path[i] == '/')
	  path[i] = '\0';
	else
	  break;
  }
}

void convert_xml_escape(wchar_t *str)
{
  int i, h;

  if (!str)
	return;

  for (i=0, h=0; str[i] != L'\0' && h <BUFSZ-1; i++, h++) {
	if (str[i] == L'&') {
	  if (wcsncmp(&str[i], L"&lt;", 4) == 0) {
		str[h] = L'<';
		i+=3;
	  } else if (wcsncmp(&str[i], L"&gt;", 4) == 0) {
		str[h] = L'>';
		i+=3;
	  } else if (wcsncmp(&str[i], L"&amp;", 5) == 0) {
		str[h] = L'&';
		i+=4;
	  } else if (wcsncmp(&str[i], L"&apos;", 6) == 0) {
		str[h] = L'\'';
		i+=5;
	  } else if (wcsncmp(&str[i], L"&quot;", 6) == 0) {
		str[h] = L'"';
		i+=5;
	  } else {
		fwprintf(stderr, L"xdc: warning: invalid xml: undefined escape code\n");
		str[h] = str[i];
	  }
	} else {
	  str[h] = str[i];
	}
  }
  str[h] = L'\0';
}

void print_version(void)
{
  fprintf(stdout, "xdclient (XDC) %s\n", XDC_Version);
  fprintf(stdout, "%s\n", XDC_Copyright);
}

md5_byte_t* md5_string2byte(char *md5_string)
{
  char hex_str[3];
  md5_byte_t *digest;
  int i;

  if (!md5_string)
	return NULL;
  if (strlen(md5_string) != 32)
	return NULL;

  hex_str[2] = '\0';

  digest = (md5_byte_t*) malloc(sizeof(md5_byte_t) * 16);

  for (i=0; i<16; i++) {
	hex_str[0] = md5_string[i*2];
	hex_str[1] = md5_string[i*2+1];
	digest[i] = (md5_byte_t) strtoul(hex_str, NULL, 16);
  }

  return digest;
}

char* md5_byte2string(md5_byte_t *digest)
{
  int di;
  char hex_output[16*2 + 1];
  char *hash_return;
  int len;

  if (!digest)
	return NULL;

  for (di = 0; di < 16; ++di)
	sprintf(hex_output + di * 2, "%02x", digest[di]);

  len = strlen(hex_output);
  hash_return = (char*) malloc(sizeof(char) * (len+1));
  strncpy(hash_return, hex_output, len+1);

  return hash_return;
}

char* gen_md5sum_full(char *path)
{
  FILE *src_stream;
  char buf[BUFSZ+1];
  md5_state_t state;
  md5_byte_t digest[16];
  int n;

  if (!path)
	return NULL;

  src_stream = fopen(path, "r");
  if (!src_stream) {
	perror("xdclient");
	return NULL;
  }

  md5_init(&state);
  while ((n = fread(buf, sizeof(char), BUFSZ, src_stream))) {
	buf[n] = '\0';
	md5_append(&state, (const md5_byte_t *)buf, n);
  }
  md5_finish(&state, digest);

  fclose(src_stream);

  return md5_byte2string(digest);
}

char* gen_md5sum_part(char *path, unsigned part_size, unsigned shift_len)
{
  FILE *src_stream;
  md5_state_t state;
  md5_byte_t digest[16];
  int n;
  unsigned long fsize;
  char *buf;
  int jump;

  if (!path || !part_size)
	return NULL;

  src_stream = fopen(path, "r");
  if (!src_stream) {
	perror("xdclient");
	return NULL;
  }

  buf = (char*) malloc(sizeof(char) * (part_size + 1));

  fsize = file_size(path);
  jump = 0;
  if (fsize > part_size * 2 + shift_len)
	jump = 1;

  md5_init(&state);
  while ((n = fread(buf, sizeof(char), part_size, src_stream))) {
	buf[n] = '\0';
	md5_append(&state, (const md5_byte_t *)buf, n);
	if (jump)
	  fseek(src_stream, shift_len, SEEK_CUR);
  }
  md5_finish(&state, digest);

  fclose(src_stream);
  free(buf);

  return md5_byte2string(digest);
}

char* gen_md5sum_part2(char *path, unsigned part_size, unsigned nparts)
{
  FILE *src_stream;
  md5_state_t state;
  md5_byte_t digest[16];
  int n;
  unsigned long fsize;
  char *buf;
  int jump;
  int shift_len;

  if (!path || !part_size)
	return NULL;

  src_stream = fopen(path, "r");
  if (!src_stream) {
	perror("xdclient");
	return NULL;
  }

  buf = (char*) malloc(sizeof(char) * (part_size + 1));

  fsize = file_size(path);
  if (nparts > fsize)
	nparts = fsize;

  shift_len = fsize/nparts;

  jump = 0;
  if (fsize > part_size * 2 + shift_len)
	jump = 1;

  md5_init(&state);
  while ((n = fread(buf, sizeof(char), part_size, src_stream))) {
	buf[n] = '\0';
	md5_append(&state, (const md5_byte_t *)buf, n);
	if (jump)
	  fseek(src_stream, shift_len, SEEK_CUR);
  }
  md5_finish(&state, digest);

  fclose(src_stream);
  free(buf);

  return md5_byte2string(digest);
}

void print_nv(nameval_t *nv, int shift)
{
  if (!nv)
	return;

  if (nv->val_type == Tmbsll) {
	ppad(shift);
	fprintf(stdout, "type: mbs_ll_t\n");
	print_mbsll((mbs_ll_t*) nv->val, shift);
  } else if (nv->val_type == Twcsll) {
	ppad(shift);
	fprintf(stdout, "type:wcs_ll_t\n");
	print_wcsll((wcs_ll_t*) nv->val, shift);
  }
}

fstat_lt* search_dicts(xdc_conf_t *xdc_conf, unsigned flags)
{
  fstat_lt *flist_start;
  fstat_lt *flist, *flist_next, *flist_prev;
  nameval_t *nv;
  int search_cache = 1;
  mbs_ll_t *mbsl;
  wchar_t *name;

  if (!xdc_conf)
	return 0;

  if (hlookup(xdc_conf->symtab, "cmd_arg_no_search_cache", 0))
	search_cache = 0;

  flist_start = NULL;

  if ((nv = hlookup(xdc_conf->symtab, "cmd_arg_add_path", 0))) {
	search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
	if (search_cache)
	  search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
  }
  if ((nv = hlookup(xdc_conf->symtab, "cmd_arg_path", 0))) {
	search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
	if (search_cache)
	  search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
  } else {
	if ((nv = hlookup(xdc_conf->symtab, "env_arg_add_path", 0))) {
	  search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
	  if (search_cache)
		search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
	}
	if ((nv = hlookup(xdc_conf->symtab, "env_arg_path", 0))) {
	  search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
	  if (search_cache)
		search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
	} else {
	  if ((nv = hlookup(xdc_conf->symtab, "cfg_arg_add_path", 0))) {
		search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
		if (search_cache)
		  search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
	  }
	  if ((nv = hlookup(xdc_conf->symtab, "cfg_arg_path", 0))) {
		search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
		if (search_cache)
		  search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
	  } else {
		if ((nv = hlookup(xdc_conf->symtab, "default_path", 0))) {
		  search_dict_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
		  if (search_cache)
			search_cache_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, CC_DICT_PATH | CC_VALID_PATH, flags);
		}
	  }
	}
  }

  if (hlookup(xdc_conf->symtab, "cmd_arg_gen_index", 0) && !hlookup(xdc_conf->symtab, "cmd_arg_clean_output", 0)) {
	mbsl = mbsl_create(find_output_dir(xdc_conf));
	search_cache_path(xdc_conf, &flist_start, mbsl, 0, flags);
	mbsl_free(mbsl);
  }

  flist_dup_clean(xdc_conf, flist_start);

  if (xdc_conf->skip_abbrev) {
	flist = flist_start;
	flist_start = NULL;
	flist_next = flist_prev = NULL;
	while(flist) {
	  get_full_name(xdc_conf, flist, &name);
	  if (name && reg_test(name, L"^abbrev$", RE_IGN_CASE | RE_SKIP_START_SPC | RE_SKIP_END_SPC, NULL)) {
		flist_next = flist->next;
		flist->next = NULL;
		flist_free(flist, 0);
		flist = flist_next;
		if (flist_prev)
		  flist_prev->next = flist;
	  } else {
		if (!flist_start)
		  flist_start = flist;
		flist_prev = flist;
		flist = flist->next;
	  }
	}
  }

  return flist_start;
}

char* find_output_dir(xdc_conf_t *xdc_conf)
{
  nameval_t *nv;
  char *output_dir = NULL;
  char *mbs_ret;
  mbs_ll_t *mbsl;

  nv = hlookup(xdc_conf->symtab, "cmd_arg_output_dir", 0);
  if (!nv)
	nv = hlookup(xdc_conf->symtab, "env_arg_output_dir", 0);
  if (!nv)
	nv = hlookup(xdc_conf->symtab, "cfg_arg_output_dir", 0);

  if (nv) {
	if (!nv->mbs_val) {
	  fprintf(stdout, "xdclient: error: no output dir path set\n");
	  return NULL;
	}
	if (!file_exist(nv->mbs_val))
	  mkdir(nv->mbs_val, 0775);
	if (is_dir(nv->mbs_val) && can_rw(nv->mbs_val))
	  output_dir = nv->mbs_val;
	if (!output_dir) {
	  fprintf(stdout, "xdclient: error: invalid path: %s. Check permissions\n", nv->mbs_val);
	  return NULL;
	}
  }

  if (!output_dir && (nv = hlookup(xdc_conf->symtab, "default_output_dir", 0))) {
	mbsl = (mbs_ll_t*) nv->val;
	while(mbsl && !output_dir) {
	  if (!file_exist(mbsl->mbs))
		mkdir(mbsl->mbs, 0775);
	  if (is_dir(mbsl->mbs) && can_rw(mbsl->mbs))
		output_dir = mbsl->mbs;
	  mbsl = mbsl->next;
	}
  }

  mbs_ret = strdup(output_dir);
  mbs_sufx_crop(mbs_ret, '/');

  return mbs_ret;
}

int search_dict_path(xdc_conf_t *xdc_conf, fstat_lt **dict_flist_start, mbs_ll_t *mbsl, unsigned flags)
{
  fstat_lt *flist;

  if (!xdc_conf || !dict_flist_start || !mbsl)
	return 0;

  flist = *dict_flist_start;
  while (mbsl) {
	while(flist && flist->next)
	  flist = flist->next;

	if (! flist)
	  flist = fs_find(mbsl->mbs, ".*\\.xdxf", flags);
	else
	  flist->next = fs_find(mbsl->mbs, ".*\\.xdxf", flags);

	if (! *dict_flist_start)
	  *dict_flist_start = flist;

	mbsl = mbsl->next;
  }

  return 1;
}

int search_cache_path(xdc_conf_t *xdc_conf, fstat_lt **dict_flist_start, mbs_ll_t *mbsl, unsigned cache_flags, unsigned flags)
{
  fstat_lt *flist_start, *flist, *flist2;
  unsigned stat_flags;

  if (!xdc_conf || !dict_flist_start || !mbsl)
	return 0;

  flist_start = flist = NULL;
  while (mbsl) {
	while(flist && flist->next)
	  flist = flist->next;

	if (! flist)
	  flist = fs_find(mbsl->mbs, "xdc\\.cache", 0);
	else
	  flist->next = fs_find(mbsl->mbs, "xdc\\.cache", 0);

	if (!flist_start)
	  flist_start = flist;

	mbsl = mbsl->next;
  }

  stat_flags = 0;
  if (flags & FF_MD5_FULL)
	stat_flags |= FSTAT_MD5_FULL;
  if (flags & FF_MD5_PART)
	stat_flags |= FSTAT_MD5_PART;

  flist = flist_start;
  flist2 = *dict_flist_start;
  while (flist) {
	if (is_rfile(flist->path) && !is_dir(flist->path)) {
	  while(flist2 && flist2->next)
		flist2 = flist2->next;
		
	  if (flist2)
		flist2->next = read_cache_file(flist->path, cache_flags, stat_flags);
	  else
		flist2 = read_cache_file(flist->path, cache_flags, stat_flags);

	  if (!*dict_flist_start)
		*dict_flist_start = flist2;
	}
	flist = flist->next;
  }
  flist_free(flist_start, 0);

  return 1;
}

fstat_lt* read_cache_file(char *path, unsigned flags, unsigned stat_flags)
{
  FILE *fp;
  char *mbs;
  char *mbs_abs;
  char *abs_path, *abs_dir;
  int len, mbs_sz;
  fstat_lt *flist_start, *flist;
  long pos, pos_next;
  char c;
  int i;
  int mbs_is_dir;

  if (!path || !path[0])
	return NULL;


  abs_path = abs_path_8(towcs(path));
  if (!file_exist(abs_path) || is_dir(abs_path) || !is_rfile(abs_path)) {
	free(abs_path);
	return NULL;
  }

  fp = fopen(abs_path, "r");
  if (!fp) {
	free(abs_path);
	return NULL;
  }

  abs_dir = mbs_dirname(abs_path);

  flist_start = flist = NULL;
  len = 0;
  pos = ftell(fp);
  mbs = NULL;
  while(1) {
	c = fgetc(fp);
	if (c == '\n' || c == EOF || c == '\0') {
	  if (len == 0) {
		pos = ftell(fp);
		goto RCF_L1;
	  }

	  if (mbs_sz <= len && mbs) {
		free(mbs);
		mbs = NULL;
	  }
	  if (!mbs) {
		mbs_sz = len*10;
		mbs = (char*) malloc(sizeof(char) * mbs_sz);
	  }
	  pos_next = ftell(fp);
	  fseek(fp, pos, SEEK_SET);
	  fread(mbs, sizeof(char), len, fp);
	  mbs[len] = '\0';
	  mbs_pref_crop(mbs, ' ');
	  mbs_sufx_crop(mbs, ' ');
	  mbs_pref_crop(mbs, '\t');
	  mbs_sufx_crop(mbs, '\t');
	  i = strlen(mbs)-1;
	  if (mbs[i] == '/')
		mbs_is_dir = 1;
	  else
		mbs_is_dir = 0;

	  mbs_sufx_crop(mbs, '/');
	  if (mbs[0] == '/' || mbs[0] == '~') {
		mbs_abs = abs_path_8(towcs(mbs));
	  } else {
		mbs_abs = strdup(pfmbs("%s/%s", abs_dir, mbs));
		mbs = mbs_abs;
		mbs_abs = abs_path_8(towcs(mbs));
		free(mbs);
	  }

	  if ((flags & CC_DICT_PATH) && (mbs_is_dir ||  is_dir(mbs_abs))) {
		free(mbs_abs);
	  } else if ((flags & CC_VALID_PATH) && (!file_exist(mbs_abs) || !is_rfile(mbs_abs))) {
		free(mbs_abs);
	  } else {
		if (flist) {
		  flist->next = fstat_create();
		  flist = flist->next;
		} else {
		  flist = fstat_create();
		}
		flist->path = mbs_abs;
		fstat_update(flist, stat_flags);
		if (!flist_start)
		  flist_start = flist;
	  }
	  fseek(fp, pos_next, SEEK_SET);
	  pos = pos_next;
	  len = 0;
	} else {
	  len++;
	}
RCF_L1:
	if (feof(fp))
	  break;
  }
  fclose(fp);
  if (mbs)
	free(mbs);

  free(abs_dir);
  free(abs_path);
  return flist_start;
}

fstat_lt* search_indexes(xdc_conf_t *xdc_conf, unsigned flags)
{
  nameval_t *nv;
  fstat_lt *flist_start, *flist;
  mbs_ll_t *mbsl;
  char *mbs;

  if (!xdc_conf)
	return NULL;

  flist_start = NULL;

  if (!hlookup(xdc_conf->symtab, "cmd_arg_clean_output", 0)) {
	mbsl = mbsl_create(find_output_dir(xdc_conf));
	search_index_path(xdc_conf, &flist_start, mbsl, flags);
	mbsl_free(mbsl);
  }

  if ((nv = hlookup(xdc_conf->symtab, "cmd_arg_add_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "cmd_arg_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "cmd_arg_index_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "env_arg_add_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "env_arg_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "env_arg_index_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "cfg_arg_add_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "cfg_arg_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "cfg_arg_index_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "default_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);
  if ((nv = hlookup(xdc_conf->symtab, "default_index_path", 0)))
	search_index_path(xdc_conf, &flist_start, (mbs_ll_t*) nv->val, flags);

  if ((nv = hlookup(xdc_conf->symtab, "dict_flist", 0))) {
	flist = (fstat_lt*) nv->val;
	while(flist) {
	  if ((mbs = mbs_dirname(flist->path))) {
		mbsl = mbsl_create(mbs);
		search_index_path(xdc_conf, &flist_start, mbsl, flags);
		mbsl_free(mbsl);
	  }
	  flist = flist->next;
	}
  }

  flist_dup_clean(xdc_conf, flist_start);

  return flist_start;
}

int search_index_path(xdc_conf_t *xdc_conf, fstat_lt **index_flist_start, mbs_ll_t *mbsl, unsigned flags)
{
  fstat_lt *flist;

  if (!xdc_conf || !index_flist_start || !mbsl)
	return 0;

  flist = *index_flist_start;
  while (mbsl) {
	while(flist && flist->next)
	  flist = flist->next;

	if (! flist)
	  flist = fs_find(mbsl->mbs, ".*\\.idx", flags);
	else
	  flist->next = fs_find(mbsl->mbs, ".*\\.idx", flags);

	if (! *index_flist_start)
	  *index_flist_start = flist;

	mbsl = mbsl->next;
  }

  return 1;
}

int check_lang_from(xdc_conf_t *xdc_conf, fstat_lt *dict_fst)
{
  char *lang_from;

  if (!xdc_conf || !dict_fst)
	return 0;

  lang_from = NULL;
  if (!xdc_conf->cmd_lang_from_all) {
	if (xdc_conf->cmd_lang_from) {
	  get_lang(xdc_conf, dict_fst, &lang_from, NULL);
	  if (!lang_from)
		return 0;
	  if (! hlookup(xdc_conf->symtab, pfmbs("cmd_val_lang_from_%s", lang_from), 0))
		return 0;
	} else if (!xdc_conf->env_lang_from_all) {
	  if (xdc_conf->env_lang_from) {
		get_lang(xdc_conf, dict_fst, &lang_from, NULL);
		if (!lang_from)
		  return 0;
		if (! hlookup(xdc_conf->symtab, pfmbs("env_val_lang_from_%s", lang_from), 0))
		  return 0;
	  } else if (!xdc_conf->cfg_lang_from_all && xdc_conf->cfg_lang_from) {
		get_lang(xdc_conf, dict_fst, &lang_from, NULL);
		if (!lang_from)
		  return 0;
		if (! hlookup(xdc_conf->symtab, pfmbs("cfg_val_lang_from_%s", lang_from), 0))
		  return 0;
	  }
	}
	if (lang_from)
	  free(lang_from);
	lang_from = NULL;
  }
  return 1;
}

int check_lang_to(xdc_conf_t *xdc_conf, fstat_lt *dict_fst)
{
  char *lang_to;

  if (!xdc_conf || !dict_fst)
	return 0;

  lang_to = NULL;
  if (!xdc_conf->cmd_lang_to_all) {
	if (xdc_conf->cmd_lang_to) {
	  if (!lang_to)
		get_lang(xdc_conf, dict_fst, NULL, &lang_to);
	  if (!lang_to)
		return 0;
	  if (! hlookup(xdc_conf->symtab, pfmbs("cmd_val_lang_to_%s", lang_to), 0))
		return 0;
	} else if (!xdc_conf->env_lang_to_all) {
	  if (xdc_conf->env_lang_to) {
		get_lang(xdc_conf, dict_fst, NULL, &lang_to);
		if (!lang_to)
		  return 0;
		if (! hlookup(xdc_conf->symtab, pfmbs("env_val_lang_to_%s", lang_to), 0))
		  return 0;
	  } else if (!xdc_conf->cfg_lang_to_all && xdc_conf->cfg_lang_to) {
		get_lang(xdc_conf, dict_fst, NULL, &lang_to);
		if (!lang_to)
		  return 0;
		if (! hlookup(xdc_conf->symtab, pfmbs("cfg_val_lang_to_%s", lang_to), 0))
		  return 0;
	  }
	}
	if (lang_to)
	  free(lang_to);
	lang_to = NULL;
  }
  return 1;
}

int check_dict_name(xdc_conf_t *xdc_conf, fstat_lt *dict_fst)
{
  wchar_t *full_name;
  int found;
  wcs_ll_t *wcsl;
  nameval_t *nv;

  if (!xdc_conf || !dict_fst)
	return 0;

  full_name = NULL;
  found = 0;
  if (xdc_conf->cmd_dict_name) {
	get_full_name(xdc_conf, dict_fst, &full_name);
	if (!full_name)
	  return 0;
	nv = hlookup(xdc_conf->symtab, "cmd_arg_dict_name", 0);
	wcsl = (wcs_ll_t*) nv->val;
	while (wcsl && !found) {
	  if (reg_test(full_name, wcsl->wcs, RE_IGN_CASE | RE_COMP_SPACES | RE_SKIP_NONALPHA | RE_SKIP_START_SPC | RE_PREFIX_DASH, NULL))
		found = 1;
	  else if (reg_test(L"all", wcsl->wcs, RE_IGN_CASE | RE_COMP_SPACES | RE_SKIP_NONALPHA | RE_SKIP_START_SPC | RE_PREFIX_DASH, NULL))
		found = 1;
	  wcsl = wcsl->next;
	}
	free(full_name);
	if (!found)
	  return 0;
  } else if (xdc_conf->env_dict_name) {
	get_full_name(xdc_conf, dict_fst, &full_name);
	if (!full_name)
	  return 0;
	nv = hlookup(xdc_conf->symtab, "env_arg_dict_name", 0);
	wcsl = (wcs_ll_t*) nv->val;
	while (wcsl && !found) {
	  if (reg_test(full_name, wcsl->wcs, RE_IGN_CASE | RE_COMP_SPACES | RE_SKIP_NONALPHA | RE_SKIP_START_SPC | RE_PREFIX_DASH, NULL))
		found = 1;
	  else if (reg_test(L"all", wcsl->wcs, RE_IGN_CASE | RE_COMP_SPACES | RE_SKIP_NONALPHA | RE_SKIP_START_SPC | RE_PREFIX_DASH, NULL))
		found = 1;
	  wcsl = wcsl->next;
	}
	free(full_name);
	if (!found)
	  return 0;
  } else if (xdc_conf->cfg_dict_name) {
	get_full_name(xdc_conf, dict_fst, &full_name);
	if (!full_name)
	  return 0;
	nv = hlookup(xdc_conf->symtab, "cfg_arg_dict_name", 0);
	wcsl = (wcs_ll_t*) nv->val;
	while (wcsl && !found) {
	  if (reg_test(full_name, wcsl->wcs, RE_IGN_CASE | RE_COMP_SPACES | RE_SKIP_NONALPHA | RE_SKIP_START_SPC | RE_PREFIX_DASH, NULL))
		found = 1;
	  else if (reg_test(L"all", wcsl->wcs, RE_IGN_CASE | RE_COMP_SPACES | RE_SKIP_NONALPHA | RE_SKIP_START_SPC | RE_PREFIX_DASH, NULL))
		found = 1;
	  wcsl = wcsl->next;
	}
	free(full_name);
	if (!found)
	  return 0;
  }

  return 1;
}

wchar_t* re_start_literals(wchar_t *key)
{
  int len;
  wchar_t *buf;
  int bi;
  int i;
  wchar_t wc_prev;
  wchar_t wc;

  if (!key)
	return NULL;

  len = wcslen(key);
  if (!len)
	return NULL;


  for (i=0; i<len; i++) {
	if (key[i] == L'|' && (i == 0 || key[i-1] != L'\\'))
	  return NULL;
  }

  i=0;
  while (i<len && key[i] == L'^')
	i++;

  if (i == len)
	return NULL;

  buf = (wchar_t*) malloc(sizeof(wchar_t) * (len+1));
  bi = 0;

  wc_prev = 0;
  for (; i<len; i++) {
	wc = key[i];
	if (wc == L'.' || wc == L'[' || wc == L'$' || wc == L'^' || wc == L'(' || wc == L'|') {
	  if (wc_prev != L'\\')
		goto GSL_L1;
	  else
		buf[bi++] = wc;
	} else if (wc == L'*' || wc == L'+' || wc == L'?' || wc == L'{') {
	  if (wc_prev != L'\\') {
		if (i > 1)
		  i -= 2;
		else
		  i -= 1;
		goto GSL_L1;
	  } else {
		buf[bi++] = wc;
	  }
	} else if (wc == L'\\' && wc_prev == L'\\') {
	  buf[bi++] = wc;
	} else if (wc_prev == L'\\') {
	  goto GSL_L1;
	} else {
	  buf[bi++] = wc;
	}

	if (wc == L'\\' && wc_prev == L'\\')
	  wc_prev = 0;
	else
	  wc_prev = wc;
  }

GSL_L1:

  if (bi == 0) {
	free(buf);
	return NULL;
  }
  buf[bi] = L'\0';

  return buf;
}

wcs_ll_t* re_get_subre(xdc_conf_t *xdc_conf, wchar_t *re)
{
  int len;
  int i, j, h;
  wchar_t wc_prev;
  wchar_t wc;
  int start;
  int hook, par;
  wcs_ll_t *subre, *subre_first, *subre_last;
  wchar_t *wcs;
  wchar_t *re2;
  int sub_len;
  int full_search;

  full_search = 0;

  if (!re)
	return NULL;

  len = wcslen(re);
  if (!len)
	return NULL;

  if (hlookup(xdc_conf->symtab, "cmd_arg_full_search", 0))
	full_search = 1;

  re2 = (wchar_t*) malloc(sizeof(wchar_t) * (len+1));

  i=0;
  while (i<len && re[i] == L'^')
	i++;
  if (i == len) {
	free(re2);
	return NULL;
  }

  j=len-1;
  while (j>=0 && re[j] == L'$')
	j--;
  if (j < 0) {
	free(re2);
	return NULL;
  }

  if (i>j) {
	free(re2);
	return NULL;
  }

  while (i<len && re[i] == L'(' && j>=0 && re[j] == L')') {
	i++;
	j--;
  }

  if (i==len || j<0 || i>j) {
	free(re2);
	return NULL;
  }

  if (re[j] == L'\\' && (j==0 || re[j-1] != L'\\'))
	j++;

  for (h=0; i<=j; i++, h++)
	re2[h] = re[i];

  re2[h] = L'\0';
  len = wcslen(re2);
  
  subre_first = subre_last = NULL;
  start = 0;
  wc_prev = 0;
  hook = par = 0;
  for (i=0; i<=len; i++) {
	wc = re2[i];
	if (wc == L'(' && wc_prev != L'\\' && !hook) {
	  par++;
	} else if (wc == L')' && wc_prev != L'\\' && !hook && par) {
	  par--;
	} else if (wc == L'[' && wc_prev != L'\\' && !hook) {
	  hook++;
	} else if (wc == L']' && hook && wc_prev != L'[') {
	  hook--;
	} else if (wc == L'\0' || (wc == L'|' && wc_prev != L'\\' && !hook && !par)) {
	  sub_len = i - start;
	  if (sub_len) {
		subre = (wcs_ll_t*) malloc(sizeof(wcs_ll_t));
		subre->next = NULL;
		wcs = wcsndup(&re2[start], sub_len);
		if (! full_search) {
		  subre->wcs = wcsdup(pfwcs(L"^%S$", wcs));
		  free(wcs);
		} else {
		  subre->wcs = wcs;
		}
		if (subre_last)
		  subre_last->next = subre;
		subre_last = subre;
		if (!subre_first)
		  subre_first = subre;
	  }
	  if (wc == L'|')
		start = i+1;
	}

	if (wc == L'\\' && wc_prev == L'\\')
	  wc_prev = 0;
	else
	  wc_prev = wc;
  }

  free(re2);
  return subre_first;
}

void regaparams_set_defaults(regaparams_t *params)
{
  memset(params, 0, sizeof(*params));
  params->cost_ins = 1;
  params->cost_del = 1;
  params->cost_subst = 1;
  params->max_cost = INT_MAX;
  params->max_ins = INT_MAX;
  params->max_del = INT_MAX;
  params->max_subst = INT_MAX;
  params->max_err = INT_MAX;
}

