/*
    Copyright (C) 2009, 2012 Oleksiy Chernyavskyy

    This file is part of XDClient.

    XDClient is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    XDClient is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with XDClient.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _GNU_SOURCE

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <sys/time.h>
#include <sys/stat.h>
#include "xml_utf8.h"
#include "utf8.h"
#include "common.h"

typedef unsigned long long timestamp_t;
/*static timestamp_t get_timestamp(); */

#ifndef BUFSZ
#define    BUFSZ 1024
#endif
DAttr* atexpose(wchar_t *wcs_xml, int len);
void attr_free(DAttr* attr);
int _isalnum(wchar_t c);
int islvalue(wchar_t c);
int _isalnum_full(wchar_t c);
int _isspace(wchar_t c);
int str_skip_space(wchar_t* str, int len);
wchar_t* space_clean(wchar_t* text);
wchar_t* str_skip_xml_header(wchar_t* wcs_xml);
wchar_t* str_skip_doctype(wchar_t* wcs_xml);
wchar_t* find_open_tag(wchar_t *wcs_xml, int len, wchar_t *tag);
DTag* _dexml(pse_param_t *param, unsigned long *pos, int *err_ret, DTag *parent);
DTag* create_tag(void);
int get_comment_tag(wchar_t *wcs_xml, int i, int len);
int get_name_len(wchar_t *wcs_xml, int i, int len);
int _isdigit(wchar_t wc);
int str_skip_space2(wchar_t *wcs_xml, int i, int len);
int get_cdata(wchar_t *wcs_xml, int i, int len);
long xml_file_size(char* path);

int ncall = 0;

#define CODE_NO_ERROR 0
#define CODE_INVALID_XML 1


DTag* fseek_tag(char *path, wchar_t *stag, wchar_t *stop_tag, unsigned flags)
{
  FILE *fp;
  int i;
  wchar_t *buf;
  DTag *tag;
  wchar_t *seek_wcs;
  wchar_t *stop_wcs;
  unsigned len, len2, len_max;
  int found;
  unsigned buf_sz;
  long pos;
  long new_pos;
  long shift;

  if (!path || !path[0] || (!stag && !stop_tag))
	return NULL;

  tag = NULL;
  len = 0;
  len2 = 0;
  seek_wcs = stop_wcs = NULL;

  if (!(fp = fopen(path, "r")))
	return NULL;

  if (stag) {
	len = wcslen(stag) + 3;
	seek_wcs = (wchar_t*) malloc(sizeof(wchar_t) * len);
	swprintf(seek_wcs, len, L"</%S", stag);
	len = wcslen(seek_wcs);
  }

  if (stop_tag) {
	len2 = wcslen(stop_tag) + 2;
	stop_wcs = (wchar_t*) malloc(sizeof(wchar_t) * len2);
	swprintf(stop_wcs, len2, L"<%S", stop_tag);
	len2 = wcslen(stop_wcs);
  }

  len_max = len > len2 ? len : len2;

  buf_sz = len_max*100;
  buf = (wchar_t*) malloc(sizeof(wchar_t) * buf_sz);

  found=0;
  while (!feof(fp) && !found) {
	i=0;
	while (!feof(fp) && i < buf_sz-1) {
	  buf[i] = fgetwc(fp);
	  i++;
	}
	buf[i] = L'\0';
	if (seek_wcs && wcsstr(buf, seek_wcs)) {
	  buf_sz = ftell(fp) + len + 100;
	  free(buf);
	  buf = (wchar_t*) malloc(sizeof(wchar_t) * buf_sz);
	  i=0;
	  rewind(fp);
	  while (!feof(fp) && i < buf_sz-1) {
		buf[i] = fgetwc(fp);
		i++;
	  }
	  buf[i] = L'\0';
	  found = 1;
	} else if (stop_wcs && wcsstr(buf, stop_wcs)) {
	  buf_sz = ftell(fp) + len2 + 1000;
	  free(buf);
	  buf = (wchar_t*) malloc(sizeof(wchar_t) * buf_sz);
	  i=0;
	  rewind(fp);
	  while (!feof(fp) && i < buf_sz-1) {
		buf[i] = fgetwc(fp);
		i++;
	  }
	  buf[i] = L'\0';
	  found = 1;
	} else if (!feof(fp)) {
	  pos = ftell(fp);
	  shift = 0-10;
	  if (pos > len_max) {
		pos -= len_max;
		shift = (int) (-len_max);
	  }
	  fseek(fp, shift, SEEK_CUR);
	  new_pos = ftell(fp);
	  if (pos > 0 && new_pos == 0) {
	      fclose(fp);
	      fp = fopen(path, "r");
	      new_pos = fseek(fp, pos, SEEK_SET);
	  }
	}
  }

  fclose(fp);

  if (seek_wcs)
	free(seek_wcs);
  if (stop_wcs)
	free(stop_wcs);

  if (found) {
	tag = seek_tag(buf, stag, stop_tag, 0);
	if (!tag)
	  free(buf);
  }

  return tag;
}

DTag* seek_tag(wchar_t *wcs_xml, wchar_t *stag, wchar_t *stop_tag, unsigned flags)
{
  wchar_t *str;
  int i;
  int _err_ret;
  DTag *tag_root;
  wchar_t *xml_start;
  unsigned long *wc_mb_addr;
  long len;
  long mb_addr;
  int j;
  pse_param_t param;

  if (!wcs_xml || !wcs_xml[0] || (!stag && !stop_tag))
	return NULL;

  xml_start = wcs_xml;

  len = wcslen(wcs_xml);
  wc_mb_addr = (unsigned long*) malloc(sizeof(long) * len);

  for (j=0, mb_addr=0; j < len; j++) {
	wc_mb_addr[j] = mb_addr;
	mb_addr += wc_mb_len(wcs_xml[j]);
  }

  if ((i = str_skip_space(wcs_xml, -1)) == -1) {
	free(wc_mb_addr);
	return NULL;
  }
  str = &wcs_xml[i];
  wcs_xml = str_skip_xml_header(str);
  if (!wcs_xml) {
	free(wc_mb_addr);
	return NULL;
  }
  if ((i = str_skip_space(wcs_xml, -1)) == -1) {
	free(wc_mb_addr);
 	return NULL;
  }
  str = &wcs_xml[i];
  wcs_xml = str_skip_doctype(str);
  if (!wcs_xml) {
	free(wc_mb_addr);
	return NULL;
  }

  _err_ret = CODE_NO_ERROR;
  ncall = 0;
  param.xml = wcs_xml;
  param.xml_len = wcslen(wcs_xml);
  param.wc_mb_addr = wc_mb_addr;
  param.flags = flags;
  param.seek_tag = stag;
  param.stop_tag = stop_tag;
  param.stop_parsing = 0;
  param.found = 0;

  tag_root = _dexml(&param, NULL, &_err_ret, NULL);

  free(wc_mb_addr);

  if (stag && !param.found) {
	tag_root->source_xml = NULL;
	tag_free(tag_root);
	return NULL;
  }

  if (tag_root)
	tag_root->source_xml = xml_start;

  return tag_root;
}

DTag* dexml(wchar_t* wcs_xml, unsigned flags)
{
  wchar_t *str;
  unsigned long i;
  int _err_ret;
  DTag *tag_root;
  wchar_t *xml_start;
  unsigned long *wc_mb_addr;
  long len;
  long mb_addr;
  int j;
  pse_param_t param;

  PRDEBUG("enter")

  if (!wcs_xml || !wcs_xml[0])
	return NULL;

  xml_start = wcs_xml;

  len = wcslen(wcs_xml);
  wc_mb_addr = (unsigned long*) malloc(sizeof(long) * len);

  for (j=0, mb_addr=0; j < len; j++) {
	wc_mb_addr[j] = mb_addr;
	mb_addr += wc_mb_len(wcs_xml[j]);
  }

  if ((i = str_skip_space(wcs_xml, -1)) == -1) {
	free(wc_mb_addr);
	return NULL;
  }
  str = &wcs_xml[i];
  wcs_xml = str_skip_xml_header(str);
  if (!wcs_xml) {
	free(wc_mb_addr);
	return NULL;
  }
  if ((i = str_skip_space(wcs_xml, -1)) == -1) {
	free(wc_mb_addr);
 	return NULL;
  }
  str = &wcs_xml[i];
  wcs_xml = str_skip_doctype(str);
  if (!wcs_xml) {
	free(wc_mb_addr);
	return NULL;
  }

//  param.xml = wcs_xml;
  param.xml = xml_start;
//  param.xml_len = wcslen(wcs_xml);
  param.xml_len = wcslen(xml_start);
  param.wc_mb_addr = wc_mb_addr;
  param.flags = flags;
  param.seek_tag = NULL;
  param.stop_tag = NULL;
  param.stop_parsing = 0;
  param.found = 0;

  ncall = 0;
  i = wcs_xml - xml_start;
  _err_ret = CODE_NO_ERROR;
  tag_root = _dexml(&param, &i, &_err_ret, NULL);

  if (tag_root)
	tag_root->source_xml = xml_start;

  free(wc_mb_addr);

  PRDEBUG("exit")
  return tag_root;

}

wchar_t buf[BUFSZ];
wchar_t buf2[BUFSZ];
wchar_t tag_name[BUFSZ];

DTag* _dexml(pse_param_t *param, unsigned long *pos, int *err_ret, DTag *parent)
{
  unsigned long i, i1, i2;
  int _l, _dq, _sq;
  wchar_t *attribs;
  DTag *tag, *tag_root, *_tag;
  int bi;
  int j;
  int buf2_len;
  int nloop = 0;
  int _err_ret;
  int len2;
  unsigned long i_start;
  unsigned long i_tag_start;
  unsigned long i_name_start;
  unsigned long i_attr_start;
  wchar_t *wcs_xml;
  unsigned long len;
  unsigned long *wc_mb_addr;
  unsigned flags;
  wchar_t *seek_name;
  wchar_t *stop_tag;

  if (!param)
	return NULL;

  wcs_xml = param->xml;
  len = param->xml_len;
  wc_mb_addr = param->wc_mb_addr;
  flags = param->flags;
  seek_name = param->seek_tag;
  stop_tag = param->stop_tag;


  ncall++;

  if (pos)
	i_start = *pos;
  else
	i_start = 0;

  if (ncall == 1) {
	i1 = str_skip_space2(wcs_xml, i_start, len);
	if (i1 == -1) {
	  fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
	  *err_ret = CODE_INVALID_XML;
	  return NULL;
	}
	if (i1 == len)
	  return NULL;
	if (wcs_xml[i1] == '<')
	  i_start = i1;
  }

  tag_root = NULL;
  tag = NULL;
  i=0;
  while (1) {
	nloop++;
	i1 = str_skip_space2(wcs_xml, i_start, len);
	if (i1 == -1) {
	  fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
	  tag_free(tag_root);
	  *err_ret = CODE_INVALID_XML;
	  return NULL;
	} else if (i1 >= len) {
	  if (pos)
		*pos = i1;
	  return tag_root;
	}

/*	if (wcs_xml[i1] != L'<' || ((flags & XML_SAVE_SPC_BODY) && i1 > i_start && i1+2 <= len && wcs_xml[i1] == L'<' && wcs_xml[i1 + 1] == L'/')) { */
	if (wcs_xml[i1] != L'<' || ((flags & XML_SAVE_SPC_BODY) && i1 > i_start && i1+2 <= len && wcs_xml[i1] == L'<')) {
	  _tag = create_tag();
	  if (tag)
		tag->next = _tag;
	  tag = _tag;

	  if (! tag_root)
		tag_root = tag;

	  tag->parent = parent;

	  if (wcs_xml[i1] != L'<')
		for (i2=i1+1; i2<len && wcs_xml[i2] != L'<' && wcs_xml[i2] != L'\0'; i2++);
	  else
		i2 = i1;

	  tag->value = &wcs_xml[i_start];
	  tag->vlen = i2 - i_start;
	  tag->type = DT_Value;
	  i_start = i2;
	} else {
	  i_tag_start = i1;

	  if (wcs_xml[i_tag_start+1] == L'/') {
		if (pos)
		  *pos = i_tag_start;
		return tag_root;
	  }

	  i_name_start = str_skip_space2(wcs_xml, i_tag_start+1, len);
	  if (i_name_start == -1 || i_name_start >= len-1) {
		fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		tag_free(tag_root);
		*err_ret = CODE_INVALID_XML;
		return NULL;
	  }

	  len2 = get_name_len(wcs_xml, i_name_start, len);
	  i_attr_start = i2 = str_skip_space2(wcs_xml, i_name_start+len2, len);
	  if(i2 >= len) {
		fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		tag_free(tag_root);
		*err_ret = CODE_INVALID_XML;
		return NULL;
	  } else if (!len2) {
		/* parse comment or CDATA */

		i2 = get_comment_tag(wcs_xml, i_tag_start, len);
		if (i2 != -1) {
		  i_start = i2;
		} else {
		  i2 = get_cdata(wcs_xml, i_tag_start, len);
		  if (i2 != -1) {
			_tag = create_tag();
			if (tag)
			  tag->next = _tag;
			tag = _tag;

			if (! tag_root)
			  tag_root = tag;

			tag->parent = parent;

			tag->value = &wcs_xml[i_tag_start+9];
			tag->vlen = i2 - i_tag_start - 12;
			tag->type = DT_Cdata;
			i_start = i2;
		  }else {
			fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
			tag_free(tag_root);
			*err_ret = CODE_INVALID_XML;
			return NULL;
		  }
		}

	  } else {
		/* parse tag */
		
		_tag = create_tag();
		if (tag)
		  tag->next = _tag;
		tag = _tag;

		if (! tag_root)
		  tag_root = tag;

		tag->parent = parent;

		tag->name = &wcs_xml[i_name_start];
		tag->nlen = len2;
		tag->type = DT_Tag;
		tag->pos = wc_mb_addr[i_tag_start];

		_sq = _dq = 0;
		_l = 1;
		i = i2 = i_attr_start;
		while (_l) {
		  switch (wcs_xml[i]) {
			case L'>':
			  _l = 0;
			  break;
			case L'"':
			  if (!_sq)	_dq = !_dq;
			  i++;
			  break;
			case L'\'':
			  if (!_dq) _sq = !_sq;
			  i++;
			  break;
			case L'\0':
			  fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
			  tag_free(tag_root);
			  *err_ret = CODE_INVALID_XML;
			  return NULL;
			  break;
			default:
			  i++;
			  break;
		  }
		  if (i>= len) {
			fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
			tag_free(tag_root);
			*err_ret = CODE_INVALID_XML;
			return NULL;
		  }
		}

		if (_sq || _dq) {
		  fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		  tag_free(tag_root);
		  *err_ret = CODE_INVALID_XML;
		  return NULL;
		}

		attribs = &wcs_xml[i_attr_start];
		if (wcs_xml[i-1] == L'/') {
		  tag->attributes = atexpose(attribs, i - i_attr_start - 1);
		  i_start = i+1;
		} else {
		  tag->attributes = atexpose(attribs, i - i_attr_start);
		}

		if (stop_tag && !wcsncmp(stop_tag, tag->name, tag->nlen)) {
		  param->stop_parsing = 1;
		  return tag_root;
		}

		if (wcs_xml[i-1] != L'/') {
		  i++;
		  _err_ret = CODE_NO_ERROR;
		  tag->body = _dexml(param, &i, &_err_ret, tag);

		  if (param->stop_parsing) {
			tag->len = 0;
			return tag_root;
		  }

		  if (!tag->body && _err_ret == CODE_INVALID_XML) {
			*err_ret = _err_ret;
			tag_free(tag_root);
			return NULL;
		  }
		  ncall--;

		  bi=0;
		  buf2[bi++] = L'<';
		  buf2[bi++] = L'/';
		  for(j=0; j<tag->nlen && bi<BUFSZ-2; j++, bi++)
			buf2[bi] = tag->name[j];
		  buf2[bi++] = L'>';
		  buf2[bi] = L'\0';
		  buf2_len = bi;

		  if (i + buf2_len > len) {
			fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
			tag_free(tag_root);
			*err_ret = CODE_INVALID_XML;
			return NULL;
		  }

		  if (wcsncmp(&wcs_xml[i], buf2, buf2_len)) {
			fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
			tag_free(tag_root);
			*err_ret = CODE_INVALID_XML;
			return NULL;
		  }

		  i += buf2_len;
		  i_start = i;
		}

		tag->len = wc_mb_addr[i_start] - tag->pos;

		if (seek_name && !wcsncmp(seek_name, &wcs_xml[i_name_start], len2)) {
		  param->stop_parsing = 1;
		  param->found = 1;
		  return tag_root;
		}
	  }
	}
  }
  return tag_root;
}

wchar_t* space_clean(wchar_t* text)
{
  int len;
  wchar_t *ret, *_buf;
  int i, j;

  len = wcslen(text);
  _buf = (wchar_t*) malloc(sizeof(wchar_t) * (len+1));
  for (i=0, j=0; i<len; j++, i++) {
	if (_isspace(text[i])) {
	  _buf[j] = L' ';
	  while (_isspace(text[i])) i++;
	  if (text[i] != L'\0' && text[i] != L'\n')
		_buf[++j] = text[i];
	} else if (text[i] == L'\n') {
	  i++;
	  while(_isspace(text[i])) i++;
	  if (text[i] != L'\n' && text[i] != L'\0') {
		if (j>0) {
		  _buf[j] = L'\n';
		  j++;
		}
		_buf[j] = text[i];
	  }
	} else {
	  _buf[j] = text[i];
	}
  }
  _buf[j] = L'\0';
  ret = wcsdup(_buf);
  free(_buf);
  return ret;
}

/* Attributes Expose */
DAttr* atexpose(wchar_t *wcs_xml, int len)
{
  int i, i1;
  int name_i, name_len;
  int value_i, value_len;
  DAttr* attr;

  if ((i = str_skip_space(wcs_xml, len)) == -1) return NULL;
  i1 = i;
  while (islvalue(wcs_xml[i]) && i < len)
	i++;

  name_i = i1;
  name_len = i-i1;

  i++;
  if (wcs_xml[i-1] == L'=') {
	if (i < len && wcs_xml[i] == L'"') {
	  i1 = ++i;
	  while (i < len && !(wcs_xml[i] == L'"' && wcs_xml[i-1] != L'\\') && wcs_xml[i] != L'\0') i++;
	  if (wcs_xml[i] == L'\0' || i >= len) {
		fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		return NULL;
	  }
	  value_i = i1;
	  value_len = i-i1;
	  i++;
	} else if (i < len && wcs_xml[i] == L'\'') {
	  i1 = ++i;
	  while (i < len && !(wcs_xml[i] == L'\'' && wcs_xml[i-1] != L'\\') && wcs_xml[i] != L'\0') i++;
	  if (wcs_xml[i] == L'\0' || i >= len) {
		fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		return NULL;
	  }
	  value_i = i1;
	  value_len = i-i1;
	  i++;
	} else if (i < len && ! _isspace(wcs_xml[i])) {
	  i1 = i;
	  while (_isalnum_full(wcs_xml[i]) && i < len) i++;
	  value_i = i1;
	  value_len = i-i1;
	}
  } else if (! iswspace(wcs_xml[i])) {
	fwprintf(stderr, L"xml parser: error: %s:%d: attributes parsing: syntax error\n", __FUNCTION__, __LINE__);
	return NULL;
  }

  attr = (DAttr*) malloc(sizeof(DAttr));
  attr->name = &wcs_xml[name_i];
  attr->nlen = name_len;
  if (value_len > 0)
	attr->value = &wcs_xml[value_i];
  else
	attr->value = NULL;
  attr->vlen = value_len;
  attr->next = atexpose(&wcs_xml[i], len - i);
  return attr;
}

void tag_free(DTag* tag)
{
  DTag *_tag;
  DTag *tag_next;

  tag_next = tag;
  while((_tag = tag_next)) {
	tag_next = _tag->next;
	if (_tag->source_xml) {
	  free(_tag->source_xml);
	}
	attr_free(_tag->attributes);
	tag_free(_tag->body);
	free(_tag);
  }
}

void attr_free(DAttr* attr)
{
  DAttr *_attr;
  DAttr *attr_next;

  attr_next = attr;
  while((_attr = attr_next)) {
	attr_next = _attr->next;
	free(_attr);
  }
}

int _isalnum(wchar_t c)
{
  if ((c >= L'0' && c <= L'9') || (c >= L'A' && c <= L'Z') || (c >= L'a' && c <= L'z')) 
	return 1;
  else
	return 0;
}

int get_name_len(wchar_t *wcs_xml, int i, int len)
{
  int name_len = 0;

  if (!wcs_xml || len <= 0 || i < 0 || i >= len) {
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid arguments\n", __FUNCTION__, __LINE__);
	return 0;
  }

  if ((islvalue(wcs_xml[i]) || wcs_xml[i] == L':') && !_isdigit(wcs_xml[i])) {
	i++;
	name_len++;
	while ((islvalue(wcs_xml[i]) || wcs_xml[i] == L':' || wcs_xml[i] == L'.' || wcs_xml[i] == L'-') && i<len) {
	  name_len++;
	  i++;
	}
	return name_len;
  }
  return 0;
}

int _isdigit(wchar_t wc)
{
  if (wc >= L'0' && wc <= L'9')
	return 1;

  return 0;
}

int islvalue(wchar_t c)
{
  if ((c >= L'0' && c <= L'9') || (c >= L'A' && c <= L'Z') || (c >= L'a' && c <= L'z') || c == L'_') 
	return 1;
  else
	return 0;
}

int _isalnum_full(wchar_t c)
{
  if (c >= L'!' && c <= L'~' && c != L'<' && c != L'>')
	return 1;
  else
	return 0;
}

int _isspace(wchar_t c)
{
  if (c == L' ' || c == L'\t')
	return 1;
  else
	return 0;
}

int str_skip_space2(wchar_t *wcs_xml, int i, int len)
{
  if (!wcs_xml || i < 0) {
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid arguments\n", __FUNCTION__, __LINE__);
	return -1;
  }

  if (len <= 0 || i >= len)
	return i;

  while(1) {
	if (i == len)
	  return i;
	switch (wcs_xml[i]) {
	  case L' ':
	  case L'\t':
	  case L'\n':
		break;
	  case L'\0':
		return i;
		break;
	  default:
		return i;
		break;
	}
	i++;
  }
}


int str_skip_space(wchar_t *wcs_xml, int len)
{
  int i;
  int _l;

  i = 0;
  _l = 1;
  while(_l) {
	if (i == len)
	  return -1;
	switch (wcs_xml[i]) {
	  case L' ':
	  case L'\t':
	  case L'\n':
		break;
	  case L'\0':
		return -1;
		break;
	  default:
		return i;
		break;
	}
	i++;
  }
  return -1;
}

wchar_t* str_skip_xml_header(wchar_t *wcs_xml)
{
  int i, _q, _l;

  if (! (wcsncmp(wcs_xml, L"<?xml", 5) == 0))
	return wcs_xml;

  i = 5;
  _q = 0;
  _l = 1;
  while (_l) {
	switch (wcs_xml[i]) {
	  case L'>':
		if (!_q) {
		  if (wcs_xml[i-1] != L'?') {
			fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml header\n", __FUNCTION__, __LINE__);
			return NULL;
		  }
		  _l = 0;
		}
		break;
	  case L'"':
	  case L'\'':
		if (wcs_xml[i-1] != L'\\')
		  _q = !_q;
		i++;
		break;
	  case L'\0':
		fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		return NULL;
		break;
	  default:
		i++;
		break;
	}
  }
  return &wcs_xml[i+1];

}

wchar_t* str_skip_doctype(wchar_t *wcs_xml)
{

  int i, _q, _l;

  if (! (wcsncmp(wcs_xml, L"<!DOCTYPE", 9) == 0))
	return wcs_xml;

  i = 5;
  _q = 0;
  _l = 1;
  while (_l) {
	switch (wcs_xml[i]) {
	  case L'>':
		if (!_q) _l = 0;
		break;
	  case L'"':
	  case L'\'':
		if (wcs_xml[i-1] != L'\\')
		  _q = !_q;
		i++;
		break;
	  case L'\0':
		fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
		return NULL;
		break;
	  default:
		i++;
		break;
	}
  }
  return &wcs_xml[i+1];

}

wchar_t* find_open_tag(wchar_t *wcs_xml, int len, wchar_t *tag)
{
  wchar_t *topen;
  int patlen;
  int bi;
  int j;

  if (!wcs_xml|| !tag || len <= 0) {
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid arguments\n", __FUNCTION__, __LINE__);
	return NULL;
  }

  bi=0;
  buf[bi++] = L'<';
  j=0;
  while (tag[j] != L'\0' && bi<BUFSZ-1) {
	buf[bi] = tag[j];
	j++;
	bi++;
  }
  buf[bi] = L'\0';

  patlen = bi;
  topen = wcs_xml;
  while (topen) {
/*	topen = wcsstr(topen, buf); */
	topen = wcsstr_fast(topen, buf);
	if (!topen) return NULL;
	if (topen + patlen > wcs_xml + len) return NULL;
	if (topen[patlen] == L' ' || topen[patlen] == L'>') {
	  for (j=patlen; topen[j] != L'>'; j++) {
		if (topen + j >= wcs_xml + len)
		  return NULL;
	  }

	  if (topen[j] == L'>' && topen[j-1] != L'/')
		return topen;
	}
	topen++;
  }
  return NULL;
}

DTag* create_tag(void)
{
  DTag *_tag;

  _tag = (DTag*) malloc(sizeof(DTag));
  if (!_tag) {
	perror("WTF");
	exit(1);
  }
  _tag->type = 0;
  _tag->sid = 0;
  _tag->name = NULL;
  _tag->nlen = 0;
  _tag->value = NULL;
  _tag->vlen = 0;
  _tag->attributes = NULL;
  _tag->pos = -1;
  _tag->len = -1;

  _tag->udata = NULL;
  _tag->udata_type = 0;

  _tag->parent = NULL;
  _tag->body = NULL;
  _tag->next = NULL;

  _tag->source_xml = NULL;

  return _tag;
}

/* returns index of next char after comment closing '>' character */
int get_comment_tag(wchar_t *wcs_xml, int i, int len)
{
  int j;

  if (!wcs_xml || len <= 0 || i < 0 || i >= len) {
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid arguments\n", __FUNCTION__, __LINE__);
	return -1;
  } else if (len-i < 7) {
	return -1;
  }

  if (wcs_xml[i] == L'<' && wcs_xml[i+1] == L'!' && wcs_xml[i+2] == L'-' && wcs_xml[i+3] == L'-') {
	for (j=i+6; j<len; j++) {
	  if (wcs_xml[j-2] == L'-' && wcs_xml[j-1] == L'-' && wcs_xml[j] == L'>')
		return j+1;
	}
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
	return -1;
  }

  return -1;
}

/* returns index of next char after cdata closing ']]>' string */
int get_cdata(wchar_t *wcs_xml, int i, int len)
{
  int j;

  if (!wcs_xml || len <= 0 || i < 0 || i >= len) {
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid arguments\n", __FUNCTION__, __LINE__);
	return -1;
  } else if (len-i < 12) {
	return -1;
  }

  if (wcs_xml[i] == L'<' && wcs_xml[i+1] == L'!' && wcs_xml[i+2] == L'[' && wcs_xml[i+3] == L'C' \
	  && wcs_xml[i+4] == L'D' && wcs_xml[i+5] == L'A' && wcs_xml[i+6] == L'T' && wcs_xml[i+7] == L'A' \
	  && wcs_xml[i+8] == L'[') {
	for (j=i+11; j<len; j++) {
	  if (wcs_xml[j-2] == L']' && wcs_xml[j-1] == L']' && wcs_xml[j] == L'>')
		return j+1;
	}
	fwprintf(stderr, L"xml parser: error: %s:%d: invalid xml\n", __FUNCTION__, __LINE__);
	return -1;
  }

  return -1;
}

DTag* tag_rnext(DTag *tag_vroot, DTag *tag_root, unsigned flags)
{
  if (!tag_root)
	return NULL;

  if (flags & S_DEEP && tag_root->body)
	return tag_root->body;

  if (tag_root == tag_vroot)
	return NULL;

  if (flags & S_DOWN && tag_root->next)
	return tag_root->next;

  if (flags & S_UP) {
	while ((tag_root = tag_root->parent)) {
	  if (tag_root == tag_vroot)
		return NULL;
	  if (tag_root->next) {
		tag_root = tag_root->next;
		break;
	  }
	}
  } else {
	return NULL;
  }
  return tag_root;
}

/*
tag_vroot: during search with set option S_UP this tag defines an upper boundary that is not crossed when moving up in hierarchy.
           If no upper boundary needed tag_vroot parameter should be set NULL.
*/
DTag* tag_seek(DTag *tag_vroot, DTag *tag_root, wchar_t *tag_name, unsigned flags)
{
  DTag *tag_ret;

  if (!tag_root)
	return NULL;

  if (flags & S_SKIP)
	tag_root = tag_rnext(tag_vroot, tag_root, flags);

  if (tag_root == tag_vroot)
	flags &= ~S_DOWN;

  while (1) {
	tag_ret = _tag_seek(tag_root, tag_name, flags);
	if (tag_ret || !(flags & S_UP))
	  return tag_ret;

	if (tag_root == tag_vroot)
	  return NULL;

	while ((tag_root = tag_root->parent)) {
	  if (tag_root == tag_vroot)
		return NULL;
	  if (tag_root->next) {
		tag_root = tag_root->next;
		break;
	  }
	}
	if (!tag_root)
	  return NULL;
  }

  return tag_ret;
}

DTag* _tag_seek(DTag* tag_root, wchar_t *tag_name, unsigned flags)
{
  DTag *tag;
  DTag *_tag;

  tag = tag_root;
  while (tag) {
	if (tag->type == DT_Tag) {
	  if (wcsncmp(tag->name, tag_name, tag->nlen) == 0)
		break;

	  if (flags & S_DEEP) {
		_tag = _tag_seek(tag->body, tag_name, flags | S_DOWN);
		if (_tag) {
		  tag = _tag;
		  break;
		}
	  }
	}
	if (!(flags & S_DOWN)) {
	  tag = NULL;
	  break;
	}
	  
	tag = tag->next;
  }
  return tag;
}

int is_tag_name(DTag *tag, wchar_t *str)
{
  if (!tag || !str)
	return 0;

  if (tag->type != DT_Tag)
	return 0;

  if (wcsncmp(tag->name, str, tag->nlen) == 0)
	return 1;

  return 0;
}

/*
static timestamp_t get_timestamp()
{
  struct timeval now;
  gettimeofday(&now, NULL);
  return now.tv_usec + (timestamp_t)now.tv_sec * 1000000;
}
*/

long xml_file_size(char* path)
{
  struct stat statbuf;

  if (stat(path, &statbuf) == -1)
	return -1;

  return statbuf.st_size;
}

