/*
 * wordlib.c
 * Copyright (C) 1998-2001 A.J. van Os; Released under GPL
 *
 * Description:
 * Deal with the internals of a MS Word file
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "antiword.h"

/* Private type for Property Set Storage entries */
typedef struct pps_entry_tag {
	char	szName[32];
	int	iType;
	int	iNext;
	int	iPrev;
	int	iDir;
	long	lSb;
	long	lSize;
	int	iLevel;
} pps_entry_type;


/* Macro to make sure all such statements will be identical */
#define FREE_ALL()		\
	do {\
		vDestroySmallBlockList();\
		alRootList = xfree(alRootList);\
		alSbdList = xfree(alSbdList);\
		alBbdList = xfree(alBbdList);\
		alSBD = xfree(alSBD);\
		alBBD = xfree(alBBD);\
	} while(0)


/*
 * ulReadLong - read four bytes from the given file and offset
 */
static unsigned long
ulReadLong(FILE *pFile, long lOffset)
{
	unsigned char	aucBytes[4];

	fail(pFile == NULL || lOffset < 0);

	if (!bReadBytes(aucBytes, 4, lOffset, pFile)) {
		werr(1, "Read long %ld not possible", lOffset);
	}
	return ulGetLong(0, aucBytes);
} /* end of ulReadLong */

/*
 * vName2String - turn the name into a proper string.
 */
static void
vName2String(char *szName, const unsigned char *aucBytes, int iNameSize)
{
	char	*pcChar;
	int	iIndex;

	fail(aucBytes == NULL || szName == NULL);

	if (iNameSize <= 0) {
		szName[0] = '\0';
		return;
	}
	for (iIndex = 0, pcChar = szName;
	     iIndex < 2 * iNameSize;
	     iIndex += 2, pcChar++) {
		*pcChar = (char)aucBytes[iIndex];
	}
	szName[iNameSize - 1] = '\0';
} /* end of vName2String */

/*
 * tReadBlockIndices - read the Big/Small Block Depot indices
 *
 * Returns the number of indices read
 */
static size_t
tReadBlockIndices(FILE *pFile, long *alBlockDepot, size_t tMaxRec, long lOffset)
{
	size_t	tDone;
	int	iIndex;
	unsigned char	aucBytes[BIG_BLOCK_SIZE];

	fail(pFile == NULL || alBlockDepot == NULL);
	fail(tMaxRec == 0);
	fail(lOffset < 0);

	/* Read a big block with BBD or SBD indices */
	if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, lOffset, pFile)) {
		werr(0, "Reading big block from %ld is not possible", lOffset);
		return 0;
	}
	/* Split the big block into indices, an index is four bytes */
	tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
	for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
		alBlockDepot[iIndex] = (long)ulGetLong(4 * iIndex, aucBytes);
		NO_DBG_DEC(alBlockDepot[iIndex]);
	}
	return tDone;
} /* end of tReadBlockIndices */

/*
 * bGetBBD - get the Big Block Depot indices from the index-blocks
 */
static BOOL
bGetBBD(FILE *pFile, const long *alDepot, size_t tDepotLen,
	long *alBBD, size_t tBBDLen)
{
	long	lBegin;
	size_t	tToGo, tDone;
	int	iIndex;

	fail(pFile == NULL || alDepot == NULL || alBBD == NULL);

	DBG_MSG("bGetBBD");

	tToGo = tBBDLen;
	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
		lBegin = (alDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
		NO_DBG_HEX(lBegin);
		tDone = tReadBlockIndices(pFile, alBBD, tToGo, lBegin);
		fail(tDone > tToGo);
		if (tDone == 0) {
			return FALSE;
		}
		alBBD += tDone;
		tToGo -= tDone;
	}
	return tToGo == 0;
} /* end of bGetBBD */

/*
 * bGetSBD - get the Small Block Depot indices from the index-blocks
 */
static BOOL
bGetSBD(FILE *pFile, const long *alDepot, size_t tDepotLen,
	long *alSBD, size_t tSBDLen)
{
	long	lBegin;
	size_t	tToGo, tDone;
	int	iIndex;

	fail(pFile == NULL || alDepot == NULL || alSBD == NULL);

	DBG_MSG("bGetSBD");

	tToGo = tSBDLen;
	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
		lBegin = (alDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
		NO_DBG_HEX(lBegin);
		tDone = tReadBlockIndices(pFile, alSBD, tToGo, lBegin);
		fail(tDone > tToGo);
		if (tDone == 0) {
			return FALSE;
		}
		alSBD += tDone;
		tToGo -= tDone;
	}
	return tToGo == 0;
} /* end of bGetSBD */

/*
 * vComputePPSlevels - compute the levels of the Property Set Storage entries
 */
static void
vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
			int iLevel, int iRecursionLevel)
{
	fail(atPPSlist == NULL || pNode == NULL);
	fail(iLevel < 0 || iRecursionLevel < 0);

	if (iRecursionLevel > 25) {
		/* This removes the possibility of an infinite recursion */
		DBG_DEC(iRecursionLevel);
		return;
	}
	if (pNode->iLevel <= iLevel) {
		/* Avoid entering a loop */
		DBG_DEC(iLevel);
		DBG_DEC(pNode->iLevel);
		return;
	}

	pNode->iLevel = iLevel;

	if (pNode->iDir != -1) {
		vComputePPSlevels(atPPSlist,
				&atPPSlist[pNode->iDir],
				iLevel + 1,
				iRecursionLevel + 1);
	}
	if (pNode->iNext != -1) {
		vComputePPSlevels(atPPSlist,
				&atPPSlist[pNode->iNext],
				iLevel,
				iRecursionLevel + 1);
	}
	if (pNode->iPrev != -1) {
		vComputePPSlevels(atPPSlist,
				&atPPSlist[pNode->iPrev],
				iLevel,
				iRecursionLevel + 1);
	}
} /* end of vComputePPSlevels */

/*
 * bGetPPS - search the Property Set Storage for three sets
 *
 * Return TRUE if the WordDocument PPS is found
 */
static BOOL
bGetPPS(FILE *pFile,
	const long *alRootList, size_t tRootListLen, pps_info_type *pPPS)
{
	pps_entry_type	*atPPSlist;
	long	lBegin, lTmp;
	size_t	tNbrOfPPS;
	int	iIndex, iStartBlock, iOffset;
	int	iNameSize, iRootIndex;
	BOOL	bWord, bExcel;
	unsigned char	aucBytes[PROPERTY_SET_STORAGE_SIZE];

	fail(pFile == NULL || pPPS == NULL || alRootList == NULL);

	DBG_MSG("bGetPPS");
	NO_DBG_DEC(tRootListLen);

	bWord = FALSE;
	bExcel = FALSE;
	(void)memset(pPPS, 0, sizeof(*pPPS));

	/* Read and store all the Property Set Storage entries */
	tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
	atPPSlist = xmalloc(tNbrOfPPS * sizeof(pps_entry_type));
	iRootIndex = 0;
	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
		lTmp = (long)iIndex * PROPERTY_SET_STORAGE_SIZE;
		iStartBlock = (int)(lTmp / BIG_BLOCK_SIZE);
		iOffset = (int)(lTmp % BIG_BLOCK_SIZE);
		lBegin = (alRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
			iOffset;
		NO_DBG_HEX(lBegin);
		if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
							lBegin, pFile)) {
			werr(0, "Reading PPS %d is not possible", iIndex);
			atPPSlist = xfree(atPPSlist);
			return FALSE;
		}
		iNameSize = (int)usGetWord(0x40, aucBytes);
		iNameSize = (iNameSize + 1) / 2;
		vName2String(atPPSlist[iIndex].szName, aucBytes, iNameSize);
		atPPSlist[iIndex].iType = (int)ucGetByte(0x42, aucBytes);
		if (atPPSlist[iIndex].iType == 5) {
			iRootIndex = iIndex;
		}
		atPPSlist[iIndex].iPrev = (int)ulGetLong(0x44, aucBytes);
		atPPSlist[iIndex].iNext = (int)ulGetLong(0x48, aucBytes);
		atPPSlist[iIndex].iDir = (int)ulGetLong(0x4c, aucBytes);
		atPPSlist[iIndex].lSb = (long)ulGetLong(0x74, aucBytes);
		atPPSlist[iIndex].lSize = (long)ulGetLong(0x78, aucBytes);
		atPPSlist[iIndex].iLevel = INT_MAX;
		if (atPPSlist[iIndex].iPrev < -1 ||
		    atPPSlist[iIndex].iPrev >= (int)tNbrOfPPS ||
		    atPPSlist[iIndex].iNext < -1 ||
		    atPPSlist[iIndex].iNext >= (int)tNbrOfPPS ||
		    atPPSlist[iIndex].iDir < -1 ||
		    atPPSlist[iIndex].iDir >= (int)tNbrOfPPS) {
			DBG_DEC(iIndex);
			DBG_DEC(atPPSlist[iIndex].iPrev);
			DBG_DEC(atPPSlist[iIndex].iNext);
			DBG_DEC(atPPSlist[iIndex].iDir);
			DBG_DEC(tNbrOfPPS);
			werr(0, "The Property Set Storage is damaged");
			atPPSlist = xfree(atPPSlist);
			return FALSE;
		}
	}

#if 0 /* defined(DEBUG) */
	DBG_MSG("Before");
	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
		DBG_MSG(atPPSlist[iIndex].szName);
		DBG_HEX(atPPSlist[iIndex].iDir);
		DBG_HEX(atPPSlist[iIndex].iPrev);
		DBG_HEX(atPPSlist[iIndex].iNext);
		DBG_DEC(atPPSlist[iIndex].iSb);
		DBG_HEX(atPPSlist[iIndex].lSize);
		DBG_DEC(atPPSlist[iIndex].iLevel);
	}
#endif /* DEBUG */

	/* Add level information to each entry */
	vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);

	/* Check the entries on level 1 for the required information */
	NO_DBG_MSG("After");
	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
#if 0 /* defined(DEBUG) */
		DBG_MSG(atPPSlist[iIndex].szName);
		DBG_HEX(atPPSlist[iIndex].iDir);
		DBG_HEX(atPPSlist[iIndex].iPrev);
		DBG_HEX(atPPSlist[iIndex].iNext);
		DBG_DEC(atPPSlist[iIndex].iSb);
		DBG_HEX(atPPSlist[iIndex].lSize);
		DBG_DEC(atPPSlist[iIndex].iLevel);
#endif /* DEBUG */
		if (atPPSlist[iIndex].iLevel != 1 ||
		    atPPSlist[iIndex].iType != 2 ||
		    atPPSlist[iIndex].szName[0] == '\0' ||
		    atPPSlist[iIndex].lSize <= 0) {
			continue;
		}
		if (pPPS->tWordDocument.lSize <= 0 &&
		    STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
			pPPS->tWordDocument.lSb = atPPSlist[iIndex].lSb;
			pPPS->tWordDocument.lSize = atPPSlist[iIndex].lSize;
			bWord = TRUE;
		} else if (pPPS->tData.lSize <= 0 &&
			   STREQ(atPPSlist[iIndex].szName, "Data")) {
			pPPS->tData.lSb = atPPSlist[iIndex].lSb;
			pPPS->tData.lSize = atPPSlist[iIndex].lSize;
		} else if (pPPS->t0Table.lSize <= 0 &&
			   STREQ(atPPSlist[iIndex].szName, "0Table")) {
			pPPS->t0Table.lSb = atPPSlist[iIndex].lSb;
			pPPS->t0Table.lSize = atPPSlist[iIndex].lSize;
		} else if (pPPS->t1Table.lSize <= 0 &&
			   STREQ(atPPSlist[iIndex].szName, "1Table")) {
			pPPS->t1Table.lSb = atPPSlist[iIndex].lSb;
			pPPS->t1Table.lSize = atPPSlist[iIndex].lSize;
		} else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
			   STREQ(atPPSlist[iIndex].szName, "Workbook")) {
			bExcel = TRUE;
		}
	}

	/* Free the space for the Property Set Storage entries */
	atPPSlist = xfree(atPPSlist);

	/* Draw your conclusions */
	if (bWord) {
		return TRUE;
	}
	if (bExcel) {
		werr(0, "Sorry, but this is an Excel spreadsheet");
	} else {
		werr(0, "This OLE file does not contain a Word document");
	}
	return FALSE;
} /* end of bGetPPS */

/*
 * vGetBbdList - make a list of the places to find big blocks
 */
static void
vGetBbdList(FILE *pFile, int iNbr, long *alBbdList, long lOffset)
{
	int	iIndex;

	fail(pFile == NULL);
	fail(iNbr > 127);
	fail(alBbdList == NULL);
	fail(lOffset < 0);

	NO_DBG_DEC(iNbr);
	for (iIndex = 0; iIndex < iNbr; iIndex++) {
                alBbdList[iIndex] =
                        (long)ulReadLong(pFile, lOffset + 4 * (long)iIndex);
		NO_DBG_DEC(iIndex);
                NO_DBG_HEX(alBbdList[iIndex]);
        }
} /* end of vGetBbdList */

/*
 * bGetDocumentText - make a list of the text blocks of a Word document
 *
 * Return TRUE when succesful, otherwise FALSE
 */
static BOOL
bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
	const long *alBBD, size_t tBBDLen,
	const long *alSBD, size_t tSBDLen,
	const unsigned char *aucHeader, int iWordVersion)
{
	long	lBeginOfText;
	long	lTextLen, lFootnoteLen, lEndnoteLen;
	long	lHeaderLen, lMacroLen, lAnnotationLen;
	long	lTextBoxLen, lHdrTextBoxLen;
	unsigned int	uiQuickSaves;
	BOOL	bFarEastWord, bFastSaved, bEncrypted, bSuccess;
	unsigned short	usDocStatus, usIdent;

	fail(pFile == NULL || pPPS == NULL);
	fail(alBBD == NULL);
	fail(alSBD == NULL);

	DBG_MSG("bGetDocumentText");

	/* Get the "magic number" from the header */
	usIdent = usGetWord(0x00, aucHeader);
	DBG_HEX(usIdent);
	bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
			usIdent == 0xa697 || usIdent == 0xa699;
	/* Get the status flags from the header */
	usDocStatus = usGetWord(0x0a, aucHeader);
	DBG_HEX(usDocStatus);
	bFastSaved = (usDocStatus & BIT(2)) != 0;
	uiQuickSaves = (usDocStatus & 0x00f0) >> 4;
	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
	DBG_DEC_C(bFastSaved, uiQuickSaves);
	bEncrypted = (usDocStatus & BIT(8)) != 0;
	if (bEncrypted) {
		werr(0, "Encrypted documents are not supported");
		return FALSE;
	}

	/* Get length information */
	lBeginOfText = (long)ulGetLong(0x18, aucHeader);
	DBG_HEX(lBeginOfText);
	if (iWordVersion == 6 || iWordVersion == 7) {
		lTextLen = (long)ulGetLong(0x34, aucHeader);
		lFootnoteLen = (long)ulGetLong(0x38, aucHeader);
		lHeaderLen = (long)ulGetLong(0x3c, aucHeader);
		lMacroLen = (long)ulGetLong(0x40, aucHeader);
		lAnnotationLen = (long)ulGetLong(0x44, aucHeader);
		lEndnoteLen = (long)ulGetLong(0x48, aucHeader);
		lTextBoxLen = (long)ulGetLong(0x4c, aucHeader);
		lHdrTextBoxLen = (long)ulGetLong(0x50, aucHeader);
	} else {
		lTextLen = (long)ulGetLong(0x4c, aucHeader);
		lFootnoteLen = (long)ulGetLong(0x50, aucHeader);
		lHeaderLen = (long)ulGetLong(0x54, aucHeader);
		lMacroLen = (long)ulGetLong(0x58, aucHeader);
		lAnnotationLen = (long)ulGetLong(0x5c, aucHeader);
		lEndnoteLen = (long)ulGetLong(0x60, aucHeader);
		lTextBoxLen = (long)ulGetLong(0x64, aucHeader);
		lHdrTextBoxLen = (long)ulGetLong(0x68, aucHeader);
	}
	DBG_DEC(lTextLen);
	DBG_DEC(lFootnoteLen);
	DBG_DEC(lHeaderLen);
	DBG_DEC(lMacroLen);
	DBG_DEC(lAnnotationLen);
	DBG_DEC(lEndnoteLen);
	DBG_DEC(lTextBoxLen);
	DBG_DEC(lHdrTextBoxLen);

	/* Make a list of the text blocks */
	switch (iWordVersion) {
	case 6:
	case 7:
		if (bFastSaved) {
			bSuccess = bGet6DocumentText(pFile,
					bFarEastWord,
					pPPS->tWordDocument.lSb,
					alBBD, tBBDLen,
					aucHeader);
		} else {
		  	bSuccess = bAddTextBlocks(lBeginOfText,
				lTextLen +
				lFootnoteLen +
				lHeaderLen + lMacroLen + lAnnotationLen +
				lEndnoteLen +
				lTextBoxLen + lHdrTextBoxLen,
				bFarEastWord,
				IGNORE_PROPMOD,
				pPPS->tWordDocument.lSb,
				alBBD, tBBDLen);
		}
		break;
	case 8:
		bSuccess = bGet8DocumentText(pFile,
				pPPS,
				alBBD, tBBDLen, alSBD, tSBDLen,
				aucHeader);
		break;
	default:
		werr(0, "This version of Word is not supported");
		bSuccess = FALSE;
		break;
	}

	if (bSuccess) {
		vSplitBlockList(lTextLen,
				lFootnoteLen,
				lHeaderLen + lMacroLen + lAnnotationLen,
				lEndnoteLen,
				lTextBoxLen + lHdrTextBoxLen,
				!bFastSaved && iWordVersion == 8);
	} else {
		vDestroyTextBlockList();
		werr(0, "I can't find the text of this document");
	}
	return bSuccess;
} /* end of bGetDocumentText */

/*
 * vGetDocumentData - make a list of the data blocks of a Word document
 */
static void
vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
	const long *alBBD, size_t tBBDLen,
	const unsigned char *aucHeader, int iWordVersion)
{
	options_type	tOptions;
	long	lBeginOfText;
	BOOL	bFastSaved, bHasImages, bSuccess;
	unsigned short	usDocStatus;

	fail(pFile == NULL);
	fail(pPPS == NULL);
	fail(alBBD == NULL);

	/* Get the options */
	vGetOptions(&tOptions);

	/* Get the status flags from the header */
	usDocStatus = usGetWord(0x0a, aucHeader);
	DBG_HEX(usDocStatus);
	bFastSaved = (usDocStatus & BIT(2)) != 0;
	bHasImages = (usDocStatus & BIT(3)) != 0;

	if (!bHasImages ||
	    !tOptions.bUseOutlineFonts ||
	    tOptions.eImageLevel == level_no_images) {
		/*
		 * No images in the document or text-only output or
		 * no images wanted, so no data blocks will be needed
		 */
		vDestroyDataBlockList();
		return;
	}

	/* Get length information */
	lBeginOfText = (long)ulGetLong(0x18, aucHeader);
	DBG_HEX(lBeginOfText);

	/* Make a list of the data blocks */
	switch (iWordVersion) {
	case 6:
	case 7:
		/*
		 * The data blocks are in the text stream. The text stream
		 * is in "fast saved" format or "normal saved" format
		 */
		if (bFastSaved) {
			bSuccess = bGet6DocumentData(pFile,
					pPPS->tWordDocument.lSb,
					alBBD, tBBDLen,
					aucHeader);
		} else {
		  	bSuccess = bAddDataBlocks(lBeginOfText, LONG_MAX,
				pPPS->tWordDocument.lSb, alBBD, tBBDLen);
		}
		break;
	case 8:
		/*
		 * The data blocks are in the data stream. The data stream
		 * is always in "normal saved" format
		 */
		bSuccess = bAddDataBlocks(0, LONG_MAX,
				pPPS->tData.lSb, alBBD, tBBDLen);
		break;
	default:
		werr(0, "This version of Word is not supported");
		bSuccess = FALSE;
		break;
	}

	if (!bSuccess) {
		vDestroyDataBlockList();
		werr(0, "I can't find the data of this document");
	}
} /* end of vGetDocumentData */

/*
 * iInitDocument - initialize the document
 *
 * Returns the version of Word that made the document or -1
 */
int
iInitDocument(FILE *pFile, long lFilesize)
{
	pps_info_type	PPS_info;
	long	*alBBD, *alSBD;
	long	*alRootList, *alBbdList, *alSbdList;
	long	lRootStartblock, lSbdStartblock, lSBLstartblock;
	long	lAdditionalBBDlist;
	long	lBdbListStart, lMaxBlock, lTmp;
	size_t	tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
	int	iWordVersion, iIndex, iStart, iToGo;
	int	iMaxSmallBlock;
	BOOL	bSuccess;
	unsigned short	usIdent;
	unsigned char	aucHeader[HEADER_SIZE];

	fail(pFile == NULL);

	lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
	DBG_DEC(lMaxBlock);
	if (lMaxBlock < 1) {
		return -1;
	}
	tBBDLen = (size_t)(lMaxBlock + 1);
	tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
	DBG_DEC(tNumBbdBlocks);
	lRootStartblock = (long)ulReadLong(pFile, 0x30);
	DBG_DEC(lRootStartblock);
	lSbdStartblock = (long)ulReadLong(pFile, 0x3c);
	DBG_DEC(lSbdStartblock);
	lAdditionalBBDlist = (long)ulReadLong(pFile, 0x44);
	DBG_DEC(lAdditionalBBDlist);
	DBG_HEX(lAdditionalBBDlist);
	lSBLstartblock = (long)ulReadLong(pFile,
		(lRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
	DBG_DEC(lSBLstartblock);
	iMaxSmallBlock = (int)(ulReadLong(pFile,
		(lRootStartblock + 1) *
		BIG_BLOCK_SIZE + 0x78) / SMALL_BLOCK_SIZE) - 1;
	DBG_DEC(iMaxSmallBlock);
	tSBDLen = (size_t)(iMaxSmallBlock + 1);
	/* All to be xmalloc-ed pointers to NULL */
	alRootList = NULL;
	alSbdList = NULL;
	alBbdList = NULL;
	alSBD = NULL;
	alBBD = NULL;
/* Big Block Depot */
	alBbdList = xmalloc(tNumBbdBlocks * sizeof(long));
	alBBD = xmalloc(tBBDLen * sizeof(long));
	iToGo = (int)tNumBbdBlocks;
	vGetBbdList(pFile, min(iToGo, 109),  alBbdList, 0x4c);
	iStart = 109;
	iToGo -= 109;
	while (lAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
		lBdbListStart = (lAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
		vGetBbdList(pFile, min(iToGo, 127),
					alBbdList + iStart, lBdbListStart);
		lAdditionalBBDlist = (long)ulReadLong(pFile,
					lBdbListStart + 4 * 127);
		DBG_DEC(lAdditionalBBDlist);
		DBG_HEX(lAdditionalBBDlist);
		iStart += 127;
		iToGo -= 127;
	}
	if (!bGetBBD(pFile, alBbdList, tNumBbdBlocks, alBBD, tBBDLen)) {
		FREE_ALL();
		return -1;
	}
	alBbdList = xfree(alBbdList);
/* Small Block Depot */
	alSbdList = xmalloc(tBBDLen * sizeof(long));
	alSBD = xmalloc(tSBDLen * sizeof(long));
	for (iIndex = 0, lTmp = lSbdStartblock;
	     iIndex < (int)tBBDLen && lTmp != END_OF_CHAIN;
	     iIndex++, lTmp = alBBD[lTmp]) {
		if (lTmp < 0 || lTmp >= (long)tBBDLen) {
			DBG_DEC(lTmp);
			DBG_DEC(tBBDLen);
			werr(1, "The Big Block Depot is damaged");
		}
		alSbdList[iIndex] = lTmp;
		NO_DBG_HEX(alSbdList[iIndex]);
	}
	if (!bGetSBD(pFile, alSbdList, tBBDLen, alSBD, tSBDLen)) {
		FREE_ALL();
		return -1;
	}
	alSbdList = xfree(alSbdList);
/* Root list */
	for (tRootListLen = 0, lTmp = lRootStartblock;
	     tRootListLen < tBBDLen && lTmp != END_OF_CHAIN;
	     tRootListLen++, lTmp = alBBD[lTmp]) {
		if (lTmp < 0 || lTmp >= (long)tBBDLen) {
			DBG_DEC(lTmp);
			DBG_DEC(tBBDLen);
			werr(1, "The Big Block Depot is damaged");
		}
	}
	if (tRootListLen == 0) {
		werr(0, "No Rootlist found");
		FREE_ALL();
		return -1;
	}
	alRootList = xmalloc(tRootListLen * sizeof(long));
	for (iIndex = 0, lTmp = lRootStartblock;
	     iIndex < (int)tBBDLen && lTmp != END_OF_CHAIN;
	     iIndex++, lTmp = alBBD[lTmp]) {
		if (lTmp < 0 || lTmp >= (long)tBBDLen) {
			DBG_DEC(lTmp);
			DBG_DEC(tBBDLen);
			werr(1, "The Big Block Depot is damaged");
		}
		alRootList[iIndex] = lTmp;
		NO_DBG_DEC(alRootList[iIndex]);
	}
	fail(tRootListLen != (size_t)iIndex);
	bSuccess = bGetPPS(pFile, alRootList, tRootListLen, &PPS_info);
	alRootList = xfree(alRootList);
	if (!bSuccess) {
		FREE_ALL();
		return -1;
	}
/* Small block list */
	if (!bCreateSmallBlockList(lSBLstartblock, alBBD, tBBDLen)) {
		FREE_ALL();
		return -1;
	}

	if (PPS_info.tWordDocument.lSize < MIN_SIZE_FOR_BBD_USE) {
		DBG_DEC(PPS_info.tWordDocument.lSize);
		FREE_ALL();
		werr(0, "I'm afraid the text stream of this file "
			"is too small to handle.");
		return -1;
	}
	/* Read the headerblock */
	if (!bReadBuffer(pFile, PPS_info.tWordDocument.lSb,
			alBBD, tBBDLen, BIG_BLOCK_SIZE,
			aucHeader, 0, HEADER_SIZE)) {
		FREE_ALL();
		return -1;
	}
	usIdent = usGetWord(0x00, aucHeader);
	DBG_HEX(usIdent);
	fail(usIdent != 0x8098 &&	/* Word 7 for oriental languages */
	     usIdent != 0x8099 &&	/* Word 7 for oriental languages */
	     usIdent != 0xa5dc &&	/* Word 6 & 7 */
	     usIdent != 0xa5ec &&	/* Word 7 & 97 & 98 */
	     usIdent != 0xa697 &&	/* Word 7 for oriental languages */
	     usIdent != 0xa699);	/* Word 7 for oriental languages */
	iWordVersion = iGetVersionNumber(aucHeader);
	if (iWordVersion < 6) {
		FREE_ALL();
		werr(0, "This file is from a version of Word before Word 6.");
		return -1;
	}

	bSuccess = bGetDocumentText(pFile, &PPS_info,
			alBBD, tBBDLen, alSBD, tSBDLen,
			aucHeader, iWordVersion);
	if (bSuccess) {
		vGetDocumentData(pFile, &PPS_info,
			alBBD, tBBDLen, aucHeader, iWordVersion);
		vSetDefaultTabWidth(pFile, &PPS_info,
			alBBD, tBBDLen, alSBD, tSBDLen,
			aucHeader, iWordVersion);
		vGetPropertyInfo(pFile, &PPS_info,
			alBBD, tBBDLen, alSBD, tSBDLen,
			aucHeader, iWordVersion);
		vGetNotesInfo(pFile, &PPS_info,
			alBBD, tBBDLen, alSBD, tSBDLen,
			aucHeader, iWordVersion);
	}
	FREE_ALL();
	return bSuccess ? iWordVersion : -1;
} /* end of iInitDocument */

/*
 * vFreeDocument - free a document by free-ing its parts
 */
void
vFreeDocument(void)
{
	DBG_MSG("vFreeDocument");

	/* Free the memory */
	vDestroyTextBlockList();
	vDestroyDataBlockList();
	vDestroyRowInfoList();
	vDestroyStyleInfoList();
	vDestroyFontInfoList();
	vDestroyPicInfoList();
	vDestroyPropModList();
	vDestroyNotesInfoLists();
	vDestroyFontTable();
} /* end of vFreeDocument */

/*
 * Common part of the file checking functions
 */
static BOOL
bCheckBytes(FILE *pFile, const unsigned char *aucBytes, size_t tBytes)
{
	int	iIndex, iChar;

	fail(pFile == NULL || aucBytes == NULL || tBytes == 0);

	rewind(pFile);

	for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
		iChar = getc(pFile);
		if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
			DBG_HEX(iChar);
			DBG_HEX(aucBytes[iIndex]);
			return FALSE;
		}
	}
	return TRUE;
} /* end of bCheckBytes */

/*
 * This function checks whether the given file is or is not a Word6 (or later)
 * document
 */
BOOL
bIsSupportedWordFile(FILE *pFile, long lFilesize)
{
	static unsigned char	aucBytes[] =
		{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };

	if (pFile == NULL || lFilesize < 0) {
		DBG_MSG("No proper file given");
		return FALSE;
	}
	if (lFilesize < (long)BIG_BLOCK_SIZE * 3 ||
	    lFilesize % BIG_BLOCK_SIZE != 0) {
		DBG_DEC(lFilesize);
		DBG_MSG("File size mismatch");
		return FALSE;
	}
	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
} /* end of bIsSupportedWordFile */

/*
 * This function checks whether the given file is or is not a "Word2, 4, 5"
 * document
 */
BOOL
bIsWord245File(FILE *pFile)
{
	static unsigned char	aucBytes[6][8] = {
		{ 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00 },
		{ 0xdb, 0xa5, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x04 },
		{ 0xdb, 0xa5, 0x2d, 0x00, 0x31, 0x40, 0x09, 0x08 },
		{ 0xdb, 0xa5, 0x2d, 0x00, 0x31, 0x40, 0x09, 0x0c },
		{ 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00 },
		{ 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00 },
	};
	int	iIndex;

	DBG_MSG("bIsWord245File");

	for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
		if (bCheckBytes(pFile,
				aucBytes[iIndex],
				elementsof(aucBytes[iIndex]))) {
			return TRUE;
		}
	}
	return FALSE;
} /* end of bIsWord245File */

/*
 * This function checks whether the given file is or is not a RTF document
 */
BOOL
bIsRtfFile(FILE *pFile)
{
	static unsigned char	aucBytes[] =
		{ '{', '\\', 'r', 't', 'f', '1' };

	DBG_MSG("bIsRtfFile");

	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
} /* end of bIsRtfFile */
