windows-nt/Source/XPSP1/NT/enduser/netmeeting/as/cpi32/gdc.cpp

1456 lines
40 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
#include "precomp.h"
//
// GDC.CPP
// General Data Compressor
//
// Copyright(c) Microsoft 1997-
//
#define MLZ_FILE_ZONE ZONE_NET
//
// Tables used by the compression / decompression algorithms
//
const BYTE s_gdcExLenBits[GDC_LEN_SIZE] =
{
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8
};
const WORD s_gdcLenBase[GDC_LEN_SIZE] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 14, 22, 38, 70, 134, 262
};
//
// Dist: Bits, Coded, Decoded
//
const BYTE s_gdcDistBits[GDC_DIST_SIZE] =
{
2, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
};
const BYTE s_gdcDistCode[GDC_DIST_SIZE] =
{
0x03, 0x0d, 0x05, 0x19, 0x09, 0x11, 0x01, 0x3e,
0x1e, 0x2e, 0x0e, 0x36, 0x16, 0x26, 0x06, 0x3a,
0x1a, 0x2a, 0x0a, 0x32, 0x12, 0x22, 0x42, 0x02,
0x7c, 0x3c, 0x5c, 0x1c, 0x6c, 0x2c, 0x4c, 0x0c,
0x74, 0x34, 0x54, 0x14, 0x64, 0x24, 0x44, 0x04,
0x78, 0x38, 0x58, 0x18, 0x68, 0x28, 0x48, 0x08,
0xf0, 0x70, 0xb0, 0x30, 0xd0, 0x50, 0x90, 0x10,
0xe0, 0x60, 0xa0, 0x20, 0xc0, 0x40, 0x80, 0x00
};
//
// Len: Bits, Coded, Decoded
//
const BYTE s_gdcLenBits[GDC_LEN_SIZE] =
{
3, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7
};
const BYTE s_gdcLenCode[GDC_LEN_SIZE] =
{
0x05, 0x03, 0x01, 0x06, 0x0A, 0x02, 0x0C, 0x14,
0x04, 0x18, 0x08, 0x30, 0x10, 0x20, 0x40, 0x00
};
//
// GDC_Init()
//
// BOGUS LAURABU:
// Having one global scratch compression buffer is lousy in multiple
// conference situations. Maybe allocate it or use caching scheme in
// future, then get rid of mutex.
//
void GDC_Init(void)
{
UINT i, j, k;
DebugEntry(GDC_Init);
//
// Set up the binary data used for PDC compression. We 'calculate'
// these since putting this in raw const data is too complicated!
// The LitBits/LitCodes arrays have 774 entries each, and
// the LenBits/DistBits arrays have 256 entries.
//
// Non-compressed chars take 9 bits in the compressed version: one
// bit (zero) to indicate that what follows is not a distance/size
// code, then the 8 bits of the char.
//
for (k = 0; k < GDC_DECODED_SIZE; k++)
{
s_gdcLitBits[k] = 9;
s_gdcLitCode[k] = (WORD)(k << 1);
}
for (i = 0; i < GDC_LEN_SIZE; i++)
{
for (j = 0; j < (1U << s_gdcExLenBits[i]); j++, k++)
{
s_gdcLitBits[k] = (BYTE)(s_gdcLenBits[i] + s_gdcExLenBits[i] + 1);
s_gdcLitCode[k] = (WORD)((j << (s_gdcLenBits[i] + 1)) |
(s_gdcLenCode[i] << 1) | 1);
}
}
GDCCalcDecode(s_gdcLenBits, s_gdcLenCode, GDC_LEN_SIZE, s_gdcLenDecode);
GDCCalcDecode(s_gdcDistBits, s_gdcDistCode, GDC_DIST_SIZE, s_gdcDistDecode);
DebugExitVOID(GDC_Init);
}
//
// GDCCalcDecode()
// This calculates 'const' arrays for s_gdcLenDecode and s_gdcDistDecode.
//
void GDCCalcDecode
(
const BYTE * pSrcBits,
const BYTE * pSrcCodes,
UINT cSrc,
LPBYTE pDstDecodes
)
{
UINT j;
UINT Incr;
int i;
DebugEntry(GDC_CalcDecode);
for (i = cSrc-1; i >= 0; i--)
{
Incr = 1 << pSrcBits[i];
j = pSrcCodes[i];
do
{
pDstDecodes[j] = (BYTE)i;
j += Incr;
}
while (j < GDC_DECODED_SIZE);
}
DebugExitVOID(GDC_CalcDecode);
}
//
// Optimize compilation for speed (not space)
//
#pragma optimize ("s", off)
#pragma optimize ("t", on)
//
// GDC_Compress()
// Compresses data based on different options.
// This compresses data using PKZIP for both persistent and non-persistent
// types. The differences between the algorithms are few:
// * Persistent compression is never used for sources > 4096 bytes
// * We copy in & update saved dictionary data before starting
// * We copy back updated dictionary data after ending
// * One byte of the used DistBits is used for PDC, 2 bytes for
// plain PKZIP compression in the resulting compressed packet.
//
BOOL GDC_Compress
(
PGDC_DICTIONARY pDictionary, // NULL if not persistent
UINT Options, // Not meaningful if pDictionary
LPBYTE pWorkBuf,
LPBYTE pSrc,
UINT cbSrcSize,
LPBYTE pDst,
UINT * pcbDstSize
)
{
BOOL rc = FALSE;
UINT Len;
UINT cbRaw;
UINT Passes;
LPBYTE pCur;
LPBYTE pMax;
PGDC_IMPLODE pgdcImp;
#ifdef _DEBUG
UINT cbSrcOrg;
#endif // _DEBUG
DebugEntry(GDC_Compress);
pgdcImp = (PGDC_IMPLODE)pWorkBuf;
ASSERT(pgdcImp);
#ifdef _DEBUG
cbSrcOrg = cbSrcSize;
#endif // _DEBUG
//
// Figure out what size dictionary to use.
//
if (pDictionary)
pgdcImp->cbDictSize = GDC_DATA_MAX;
else if (Options == GDCCO_MAXSPEED)
{
//
// Use the smallest for max speed.
//
pgdcImp->cbDictSize = GDC_DATA_SMALL;
}
else
{
ASSERT(Options == GDCCO_MAXCOMPRESSION);
//
// Use the nearest dictionary size to the source size.
//
if (cbSrcSize <= GDC_DATA_SMALL)
pgdcImp->cbDictSize = GDC_DATA_SMALL;
else if (cbSrcSize <= GDC_DATA_MEDIUM)
pgdcImp->cbDictSize = GDC_DATA_MEDIUM;
else
pgdcImp->cbDictSize = GDC_DATA_MAX;
}
//
// How many bits of distance are needed to back the dictionary size
// # of bytes?
//
switch (pgdcImp->cbDictSize)
{
case GDC_DATA_SMALL:
pgdcImp->ExtDistBits = EXT_DIST_BITS_MIN;
break;
case GDC_DATA_MEDIUM:
pgdcImp->ExtDistBits = EXT_DIST_BITS_MEDIUM;
break;
case GDC_DATA_MAX:
pgdcImp->ExtDistBits = EXT_DIST_BITS_MAC;
break;
}
pgdcImp->ExtDistMask = 0xFFFF >> (16 - pgdcImp->ExtDistBits);
//
// We need at least 4 bytes (2 max for ExtDistBits, 2 for EOF code).
//
ASSERT(*pcbDstSize > 4);
//
// Now save the destination info in our struct. That we we can just
// pass a pointer to our GDC_IMPLODE routine around with everything
// we need.
//
pgdcImp->pDst = pDst;
pgdcImp->cbDst = *pcbDstSize;
//
// For non PDC compression, the first little-endian WORD is the ExtDistBits
// used in decompression. For PDC compression, just the first BYTE is
// the ExtDistBits.
//
if (!pDictionary)
{
*(pgdcImp->pDst)++ = 0;
--(pgdcImp->cbDst);
}
*(pgdcImp->pDst)++ = (BYTE)pgdcImp->ExtDistBits;
--(pgdcImp->cbDst);
//
// Since pDst could be huge, we don't zero it all out before using.
// As the pointer into the destination advances, we zero out a byte
// just before we start writing bits into it.
//
pgdcImp->iDstBit = 0;
*(pgdcImp->pDst) = 0;
//
// Now, if we have a dictonary, restore the contents into our scratch
// buffer.
//
if (pDictionary && pDictionary->cbUsed)
{
TRACE_OUT(("Restoring %u dictionary bytes before compression",
pDictionary->cbUsed));
//
// NOTE: the data saved in pDictionary->pData is front aligned.
// But the data in RawData is end aligned so that we can slide up
// new data chunk by chunk when compressing. Therefore only copy
// the part that is valid, but make it end at the back of the
// space for the dictionary data.
//
ASSERT(pDictionary->cbUsed <= pgdcImp->cbDictSize);
memcpy(pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize - pDictionary->cbUsed,
pDictionary->pData, pDictionary->cbUsed);
pgdcImp->cbDictUsed = pDictionary->cbUsed;
}
else
{
pgdcImp->cbDictUsed = 0;
}
//
// We only compress GDC_DATA_MAX bytes at a time. Therefore we have
// this loop to grab at most that amount each time around. Since we
// only persistently compress packets <= GDC_DATA_MAX, we should never
// go through it more than once for that compression type. But normal
// compression, you betcha since the max packet size is 32K.
//
Passes = 0;
pCur = pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize;
do
{
//
// cbRaw will either be GDC_DATA_MAX (if source has >= that to go)
// or remainder. Copy that much uncompressed data into our
// working RawData buffer in the 'new data' space.
//
ASSERT(cbSrcSize);
cbRaw = min(cbSrcSize, GDC_DATA_MAX);
memcpy(pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize,
pSrc, cbRaw);
pSrc += cbRaw;
cbSrcSize -= cbRaw;
//
// Now get a pointer just past the end of the data we read. Well,
// almost. We fed in cbRaw bytes starting at GDC_MAXREP +
// pgdcImp->cbDictSize. So unless this is the last chunk of raw
// data to process, pMax is GDC_MAXREP before the end of the
// new raw data.
//
// NOTE that in several of the functions that follow, we read
// a byte or two past the end and the beginning of the valid new
// raw data. THIS IS INTENTIONAL.
//
// Doing so is the only way to get the beginning and ending bytes
// indexed, since the hash function uses TWO bytes. We won't
// GPF because of padding in our RawData buffer.
//
pMax = pgdcImp->RawData + pgdcImp->cbDictSize + cbRaw;
if (!cbSrcSize)
{
pMax += GDC_MAXREP;
}
else
{
//
// This better NOT be persistent compression, since we don't
// let you compress packets bigger than the chunk size we
// process (GDC_DATA_MAX).
//
ASSERT(!pDictionary);
}
//
// Generate the sort buffer, which orders the raw data according
// to an index calculated using pairs of contiguous bytes that
// occur within it. Without a dictionary yet, the first pass
// only indexes the current chunk. With a dictionary (a second or
// greater pass--or PERSISTENT COMPRESSION has saved enough data
// from last time), we look back into the previous chunk (what we
// call the dictionary).
//
// This takes longer since we go through more bytes, but produces
// better results. Hence the dictionary size controls the speed/
// resulting size.
//
switch (Passes)
{
case 0:
{
if (pgdcImp->cbDictUsed > GDC_MAXREP)
{
//
// On the zeroth pass, cbDictUsed is always ZERO
// for non-persistent PKZIP.
//
ASSERT(pDictionary);
GDCSortBuffer(pgdcImp, pCur - pgdcImp->cbDictUsed + GDC_MAXREP,
pMax + 1);
}
else
{
GDCSortBuffer(pgdcImp, pCur, pMax + 1);
}
++Passes;
//
// After completing a pass we slide the raw data up into
// the dictionary slot, bumping out the older dictionary
// data.
//
if (pgdcImp->cbDictSize != GDC_DATA_MAX)
{
ASSERT(pgdcImp->cbDictUsed == 0);
ASSERT(!pDictionary);
++Passes;
}
}
break;
case 1:
{
//
// Start sorting GDC_MAXREP bytes after the start. NOTE
// that this is exactly what PERSISTENT compression does
// on the zeroth pass--it acts like we already have
// dictionary data, using the bytes from the last time
// we compressed something.
//
GDCSortBuffer(pgdcImp, pCur - pgdcImp->cbDictSize + GDC_MAXREP,
pMax + 1);
++Passes;
}
break;
default:
{
//
// Start sort from the beginning of the dictionary.
// This works because we copy raw data around before
// starting the next pass.
//
GDCSortBuffer(pgdcImp, pCur - pgdcImp->cbDictSize, pMax + 1);
}
break;
}
//
// Now compress the raw data chunk we ar working on.
//
while (pCur < pMax)
{
Len = GDCFindRep(pgdcImp, pCur);
SkipFindRep:
if (!Len || (Len == GDC_MINREP && pgdcImp->Distance >= GDC_DECODED_SIZE))
{
if (!GDCOutputBits(pgdcImp, s_gdcLitBits[*pCur],
s_gdcLitCode[*pCur]))
DC_QUIT;
pCur++;
continue;
}
//
// Only do this if we're on the last chunk
//
if (!cbSrcSize && (pCur + Len > pMax))
{
//
// Peg run size so it doesn't go past end of raw data.
//
Len = (UINT)(pMax - pCur);
if ((Len < GDC_MINREP) ||
(Len == GDC_MINREP && pgdcImp->Distance >= GDC_DECODED_SIZE))
{
if (!GDCOutputBits(pgdcImp, s_gdcLitBits[*pCur],
s_gdcLitCode[*pCur]))
DC_QUIT;
pCur++;
continue;
}
}
else if ((Len < 8) && (pCur + 1 < pMax))
{
UINT Save_Distance;
UINT Save_Len;
//
// Make temp copies of Distance and Len so we can
// look ahead and see if a better compression run is
// looming. If so, we won't bother starting it here,
// we'll grab the better one next time around.
//
Save_Distance = pgdcImp->Distance;
Save_Len = Len;
Len = GDCFindRep(pgdcImp, pCur + 1);
if ((Len > Save_Len) &&
((Len > Save_Len + 1) || (Save_Distance > (GDC_DECODED_SIZE/2))))
{
if (!GDCOutputBits(pgdcImp, s_gdcLitBits[*pCur],
s_gdcLitCode[*pCur]))
DC_QUIT;
++pCur;
goto SkipFindRep;
}
//
// Put back old Len and Distance, we'll take this one.
//
Len = Save_Len;
pgdcImp->Distance = Save_Distance;
}
if (!GDCOutputBits(pgdcImp, s_gdcLitBits[256 + Len - GDC_MINREP],
s_gdcLitCode[256 + Len - GDC_MINREP]))
DC_QUIT;
if (Len == GDC_MINREP)
{
//
// GDC_MINREP is 2, so we right shift Distance by 2
// (divide by 4). Then we mask out the last 2 bits
// of Distance.
//
if (!GDCOutputBits(pgdcImp,
s_gdcDistBits[pgdcImp->Distance >> GDC_MINREP],
s_gdcDistCode[pgdcImp->Distance >> GDC_MINREP]))
DC_QUIT;
if (!GDCOutputBits(pgdcImp, GDC_MINREP, (WORD)(pgdcImp->Distance & 3)))
DC_QUIT;
}
else
{
if (!GDCOutputBits(pgdcImp,
s_gdcDistBits[pgdcImp->Distance >> pgdcImp->ExtDistBits],
s_gdcDistCode[pgdcImp->Distance >> pgdcImp->ExtDistBits]))
DC_QUIT;
if (!GDCOutputBits(pgdcImp, (WORD)pgdcImp->ExtDistBits,
(WORD)(pgdcImp->Distance & pgdcImp->ExtDistMask)))
DC_QUIT;
}
pCur += Len;
}
if (cbSrcSize)
{
//
// There's more data to process. Here's where we slide up the
// current raw data into the dictionary space. This is simply
// the final cbDictSize + GDC_MAXREP bytes of data. It
// begins GDC_DATA_MAX after the start of the bufer.
//
// For example, if the dict size is 1K, the current data goes
// from 1K to 5K, and we slide up the data from 4K to 5K.
//
memcpy(pgdcImp->RawData, pgdcImp->RawData + GDC_DATA_MAX,
pgdcImp->cbDictSize + GDC_MAXREP);
//
// Now move our raw data pointer back and update the
// dictonary used amount. Since we have GDC_DATA_MAX of data,
// we fill the dictionary completely.
//
pCur -= GDC_DATA_MAX;
pgdcImp->cbDictUsed = pgdcImp->cbDictSize;
}
}
while (cbSrcSize);
//
// Add the end code.
//
if (!GDCOutputBits(pgdcImp, s_gdcLitBits[EOF_CODE], s_gdcLitCode[EOF_CODE]))
DC_QUIT;
//
// Return the resulting compressed data size.
//
// NOTE that partial bits are already in the destination. But we
// need to account for any in the total size.
//
if (pgdcImp->iDstBit)
++(pgdcImp->pDst);
*pcbDstSize = (UINT)(pgdcImp->pDst - pDst);
//
// We're done. If we have a persistent dictionary, copy back our
// last block of raw data into it. We only copy as much as is actually
// valid however.
//
// We can only get here on successful compression. NOTE that we do not
// wipe out our dictionary on failure like we used to. This helps us
// by permitting better compression the next time. The receiver will
// be OK, since his receive dictionary won't be altered upon reception
// of a non-compressed packet.
//
if (pDictionary)
{
pDictionary->cbUsed = min(pgdcImp->cbDictSize, pgdcImp->cbDictUsed + cbRaw);
TRACE_OUT(("Copying back %u dictionary bytes after compression",
pDictionary->cbUsed));
memcpy(pDictionary->pData, pgdcImp->RawData + GDC_MAXREP +
pgdcImp->cbDictSize + cbRaw - pDictionary->cbUsed,
pDictionary->cbUsed);
}
TRACE_OUT(("%sCompressed %u bytes to %u",
(pDictionary ? "PDC " : ""), cbSrcOrg, *pcbDstSize));
rc = TRUE;
DC_EXIT_POINT:
if (!rc && !pgdcImp->cbDst)
{
TRACE_OUT(("GDC_Compress: compressed size is bigger than decompressed size %u.",
cbSrcOrg));
}
DebugExitBOOL(GDC_Compress, rc);
return(rc);
}
//
// GDCSortBuffer()
//
void GDCSortBuffer
(
PGDC_IMPLODE pgdcImp,
LPBYTE pStart,
LPBYTE pEnd
)
{
WORD Accum;
WORD * pHash;
LPBYTE pTmp;
DebugEntry(GDCSortBuffer);
ASSERT(pStart >= pgdcImp->RawData + pgdcImp->cbDictSize - pgdcImp->cbDictUsed);
//
// For each pair of bytes in the raw data, from pStart to pEnd,
// calculate the hash value for the pair . The hash value ranges from
// 0 to GDC_HASH_SIZE-1. Thus the HashArray structure is an array of
// GDC_HASH_SIZE WORDs. Keep a count of how many times a particular
// hash value occurs in the uncompressed data.
//
//
ZeroMemory(pgdcImp->HashArray, sizeof(pgdcImp->HashArray));
pTmp = pStart;
do
{
++(pgdcImp->HashArray[GDC_HASHFN(pTmp)]);
}
while (++pTmp < pEnd);
//
// Now go back and make each HashArray entry a cumulative total of the
// occurrences of the hash values up to and including itself. Kind
// of like the Fibonacci sequence actually.
//
Accum = 0;
pHash = pgdcImp->HashArray;
do
{
Accum += *pHash;
*pHash = Accum;
}
while (++pHash < pgdcImp->HashArray + GDC_HASH_SIZE);
//
// Find the entry in the HashArray containing the accumulated
// instance count for the current data WORD. Since these values are
// calculated from the data in the passed in range, we know that the
// value in any slot we get to by hashing some bytes in the range is
// at least 1.
//
// We start at the end and work towards the beginning so that we
// end up with the first instance of such an occurrence in the SortArray.
//
pTmp = pEnd - 1;
do
{
pHash = pgdcImp->HashArray + GDC_HASHFN(pTmp);
ASSERT(*pHash > 0);
//
// The count (*pHash) is to be used as an array index, so subtract
// one from it. If there was only one instance, put it in array
// element 0. If there is more than one instance of a particular
// hash, then next time we will start with a lower accumulated
// total. The array element will be one back, and so on.
//
--(*pHash);
//
// Store an offset from the beginning of the RawData buffer to
// each byte of data into the SortArray. This is inserted
// using the hash instance count as the index.
//
// In other words, the buffer is sorted in ascending order of hash
// for a particular piece of data. Where two bytes of data have
// the same hash, they are referenced in the SortBuffer in the
// same order as in the RawData since we are scanning backwards.
//
pgdcImp->SortArray[*pHash] = (WORD)(pTmp - pgdcImp->RawData);
}
while (--pTmp >= pStart);
//
// Now all entries in the HashArray index the first occurrence of a byte
// in the workspace which has a particular index, via the SortArray
// offset. That is, the above do-while loop decrements each HashArray
// entry until all data bytes for that entry are written to SortBuffer.
//
DebugExitVOID(GDCSortBuffer);
}
//
// GDCFindRep
//
// This looks for byte patterns in the uncompressed data that can be
// represented in the compressed data with smaller sequences. The biggest
// wins come from repeating byte sequences; later sequences can be
// compressed into a few bytes referring to an earlier sequence (how big,
// how many bytes back).
//
// This returns the length of the uncompressed data to be replaced.
//
UINT GDCFindRep
(
PGDC_IMPLODE pgdcImp,
LPBYTE pDataStart
)
{
UINT CurLen;
UINT Len;
LPBYTE pDataPat;
LPBYTE pData;
UINT iDataMin;
UINT SortIndex;
LPBYTE pDataMax;
UINT HashVal;
UINT i1;
short j1;
LPBYTE pBase;
DebugEntry(GDCFindRep);
//
// See GDCSortBuffer for a description of the contents of the
// Index array. GDC_HASHFN() returns a hash value for a byte
// using it and its successor in the uncompressed data stream.
//
HashVal = GDC_HASHFN(pDataStart);
ASSERT(HashVal < GDC_HASH_SIZE);
SortIndex = pgdcImp->HashArray[HashVal];
//
// Find the minimum sort buffer value. This is the offset of the
// first byte of data.
//
iDataMin = (UINT)(pDataStart - pgdcImp->cbDictSize + 1 - pgdcImp->RawData);
if (pgdcImp->SortArray[SortIndex] < iDataMin)
{
//
// The SortArray is referencing stale data, data that is no
// longer in the range we are processing. Move forward until
// we hit the first entry that's in the current chunk.
//
do
{
++SortIndex;
}
while (pgdcImp->SortArray[SortIndex] < iDataMin);
//
// Save this new sort value in the hash.
//
pgdcImp->HashArray[HashVal] = (WORD)SortIndex;
}
//
// Need more than 2 bytes with the same index before processing it.
//
pDataMax = pDataStart - 1;
//
// Get a Ptr to the first byte in the compression buffer referenced by
// the SortBuffer offset indexed by the SortIndex we just calculated.
// If this Ptr is not at least 2 bytes before pDataStart then return 0.
// This means that the byte pointed to by Start does not share the
// index with earlier bytes.
//
pData = pgdcImp->RawData + pgdcImp->SortArray[SortIndex];
if (pData >= pDataMax)
return 0;
//
// Now the current bytes have the same index as at least 2 other
// sequences. Ptr points to the first compress buffer byte with
// the same index as that pointed to by pDataStart.
//
pDataPat = pDataStart;
CurLen = 1;
do
{
if (*(pData + CurLen - 1) == *(pDataPat + CurLen - 1) &&
*(pData) == *(pDataPat))
{
//
// This processes a sequence of identical bytes, one starting
// at pDataPat, the other at pData.
//
++pData;
++pDataPat;
Len = 2;
// Skip past matching bytes, keeping a count.
while ((*++pData == *++pDataPat) && (++Len < GDC_MAXREP))
;
pDataPat = pDataStart;
if (Len >= CurLen)
{
pgdcImp->Distance = (UINT)(pDataPat - pData + Len - 1);
if ((CurLen = Len) > KMP_THRESHOLD)
{
if (Len == GDC_MAXREP)
{
--(pgdcImp->Distance);
return Len;
}
goto DoKMP;
}
}
}
//
// Get a pointer to the next compress buffer byte having the same
// hash. If this byte comes before pDataMax, go back around the
// loop and look for a matching sequence.
//
pData = pgdcImp->RawData + pgdcImp->SortArray[++SortIndex];
}
while (pData < pDataMax);
return (CurLen >= GDC_MINREP) ? CurLen : 0;
DoKMP:
if (pgdcImp->RawData + pgdcImp->SortArray[SortIndex+1] >= pDataMax)
return CurLen;
j1 = pgdcImp->Next[1] = 0;
pgdcImp->Next[0] = -1;
i1 = 1;
do
{
if ((pDataPat[i1] == pDataPat[j1]) || ((j1 = pgdcImp->Next[j1]) == -1))
pgdcImp->Next[++i1] = ++j1;
}
while (i1 < CurLen);
Len = CurLen;
pData = pgdcImp->RawData + pgdcImp->SortArray[SortIndex] + CurLen;
while (TRUE)
{
if ((Len = pgdcImp->Next[Len]) == -1)
Len = 0;
do
{
pBase = pgdcImp->RawData + pgdcImp->SortArray[++SortIndex];
if (pBase >= pDataMax)
return CurLen;
}
while (pBase + Len < pData);
if (*(pBase + CurLen - 2) != *(pDataPat + CurLen - 2))
{
do
{
pBase = pgdcImp->RawData + pgdcImp->SortArray[++SortIndex];
if (pBase >= pDataMax)
return CurLen;
}
while ((*(pBase + CurLen - 2) != *(pDataPat + CurLen - 2)) ||
(*(pBase) != *(pDataPat)));
Len = 2;
pData = pBase + Len;
}
else if (pBase + Len != pData)
{
Len = 0;
pData = pBase;
}
while ((*pData == pDataPat[Len]) && (++Len < GDC_MAXREP))
pData++;
if (Len >= CurLen)
{
ASSERT(pBase < pDataStart);
pgdcImp->Distance = (UINT)(pDataStart - pBase - 1);
if (Len > CurLen)
{
if (Len == GDC_MAXREP)
return Len;
CurLen = Len;
do
{
if ((pDataPat[i1] == pDataPat[j1]) ||
((j1 = pgdcImp->Next[j1]) == -1))
pgdcImp->Next[++i1] = ++j1;
}
while (i1 < CurLen);
}
}
}
DebugExitVOID(GDCFindRep);
}
//
// GDCOutputBits()
//
// This writes compressed output into our output buffer. If the total
// goes past the max compressed chunk we have workspace for, we flush
// our buffer into the apps'destination.
//
// It returns FALSE on failure, i.e. we would go past the end of the
// destination.
//
BOOL GDCOutputBits
(
PGDC_IMPLODE pgdcImp,
WORD Cnt,
WORD Code
)
{
UINT iDstBit;
BOOL rc = FALSE;
DebugEntry(GDCOutputBits);
//
// If we are writing more than a byte's worth of bits, call ourself
// recursively to write just 8. NOTE THAT WE NEVER OUTPUT MORE THAN
// A WORD'S WORTH, since Code is a WORD sized object.
//
if (Cnt > 8)
{
if (!GDCOutputBits(pgdcImp, 8, Code))
DC_QUIT;
Cnt -= 8;
Code >>= 8;
}
ASSERT(pgdcImp->cbDst > 0);
//
// OR on the bits of the Code (Cnt of them). Then advance our
// current bit pointer and current byte pointer in the output buffer.
//
iDstBit = pgdcImp->iDstBit;
ASSERT(iDstBit < 8);
//
// NOTE: This is why it is extremely important to have zeroed out
// the current destination byte when we advance. We OR on bit
// sequences to the current byte.
//
*(pgdcImp->pDst) |= (BYTE)(Code << iDstBit);
pgdcImp->iDstBit += Cnt;
if (pgdcImp->iDstBit >= 8)
{
//
// We've gone past a byte. Advance the destination ptr to the next
// one.
//
++(pgdcImp->pDst);
if (--(pgdcImp->cbDst) == 0)
{
//
// We just filled the last byte and are trying to move past
// the end of the destination. Bail out now
//
DC_QUIT;
}
//
// Phew, we have room left. Carry over the slop bits.
//
if (pgdcImp->iDstBit > 8)
{
//
// Carry over slop.
//
*(pgdcImp->pDst) = (BYTE)(Code >> (8 - iDstBit));
}
else
*(pgdcImp->pDst) = 0;
// Now the new byte is fullly initialized.
pgdcImp->iDstBit &= 7;
}
rc= TRUE;
DC_EXIT_POINT:
DebugExitBOOL(GDCOutputBits, rc);
return(rc);
}
//
// GDC_Decompress()
//
BOOL GDC_Decompress
(
PGDC_DICTIONARY pDictionary,
LPBYTE pWorkBuf,
LPBYTE pSrc,
UINT cbSrcSize,
LPBYTE pDst,
UINT * pcbDstSize
)
{
BOOL rc = FALSE;
UINT Len;
UINT Dist;
UINT i;
UINT cbDstSize;
LPBYTE pDstOrg;
LPBYTE pEarlier;
LPBYTE pNow;
PGDC_EXPLODE pgdcExp;
#ifdef _DEBUG
UINT cbSrcOrg;
#endif // _DEBUG
DebugEntry(GDC_Decompress);
pgdcExp = (PGDC_EXPLODE)pWorkBuf;
ASSERT(pgdcExp);
#ifdef _DEBUG
cbSrcOrg = cbSrcSize;
#endif // _DEBUG
//
// This shouldn't be possible--but since this compressed data
// comes from another machine, we want to make sure _we_ don't blow
// up if that machine flaked out.
//
if (cbSrcSize <= 4)
{
ERROR_OUT(("GDC_Decompress: bogus compressed data"));
DC_QUIT;
}
//
// Get the distance bits and calculate the mask needed for that many.
//
// NOTE: For PDC compression, the ExtDistBits are just in the first
// byte. For plain compression, the ExtDistBits are in the first
// little-endian word. Either way, we only allow from 4 to 6, so
// the high byte in the non-PDC case is not useful.
//
if (!pDictionary)
{
// First byte better be zero
if (*pSrc != 0)
{
ERROR_OUT(("GDC_Decompress: unrecognized distance bits"));
DC_QUIT;
}
++pSrc;
--cbSrcSize;
}
pgdcExp->ExtDistBits = *pSrc;
if ((pgdcExp->ExtDistBits < EXT_DIST_BITS_MIN) ||
(pgdcExp->ExtDistBits > EXT_DIST_BITS_MAC))
{
ERROR_OUT(("GDC_Decompress: unrecognized distance bits"));
DC_QUIT;
}
pgdcExp->ExtDistMask = 0xFFFF >> (16 - pgdcExp->ExtDistBits);
//
// Set up source data info (compressed goop). SrcByte is the current
// byte & bits we're reading from. pSrc is the pointer to the next
// byte.
//
pgdcExp->SrcByte = *(pSrc+1);
pgdcExp->SrcBits = 0;
pgdcExp->pSrc = pSrc + 2;
pgdcExp->cbSrc = cbSrcSize - 2;
//
// Save the beginning of the result buffer so we can calculate how
// many bytes we wrote into it afterwards.
//
pDstOrg = pDst;
cbDstSize = *pcbDstSize;
//
// If we have a dictionary, put its data into our work area--the
// compression might be referencing byte sequences in it (that's the
// whole point, you get better compression that way when you send
// packets with the same info over and over).
//
// We remember and update cbDictUsed to do the minimal dictionary
// byte copying back and forth.
//
if (pDictionary && pDictionary->cbUsed)
{
TRACE_OUT(("Restoring %u dictionary bytes before decompression",
pDictionary->cbUsed));
memcpy(pgdcExp->RawData + GDC_DATA_MAX - pDictionary->cbUsed,
pDictionary->pData, pDictionary->cbUsed);
pgdcExp->cbDictUsed = pDictionary->cbUsed;
}
else
{
pgdcExp->cbDictUsed = 0;
}
//
// The decompressed data starts filling in at GDC_DATA_MAX bytes into
// the RawData array. We have to double buffer the output (just
// like we double buffer the input during compression) because
// decompressing may require reaching backwards into the decompressed
// byte stream to pull out sequences.
//
pgdcExp->iRawData = GDC_DATA_MAX;
while ((Len = GDCDecodeLit(pgdcExp)) < EOF_CODE)
{
if (Len < 256)
{
pgdcExp->RawData[pgdcExp->iRawData++] = (BYTE)Len;
}
else
{
Len -= (256 - GDC_MINREP);
Dist = GDCDecodeDist(pgdcExp, Len);
if (!Dist)
DC_QUIT;
//
// Now we're reaching back, this may in fact spill into the
// dictionary data that preceded us.
//
pNow = pgdcExp->RawData + pgdcExp->iRawData;
pEarlier = pNow - Dist;
ASSERT(pEarlier >= pgdcExp->RawData + GDC_DATA_MAX - pgdcExp->cbDictUsed);
pgdcExp->iRawData += Len;
do
{
*pNow++ = *pEarlier++;
}
while (--Len > 0);
}
//
// We've gone past the end of our workspace, flush the decompressed
// data out. This is why RawData in GDC_EXPLODE has an extra pad of
// GDC_MAXREP at the end. This prevents us from spilling out of
// the RawData buffer, we will never go more than GDC_MAXREP beyond
// the last GDC_DATA_MAX chunk.
//
if (pgdcExp->iRawData >= 2*GDC_DATA_MAX)
{
//
// Do we have enough space left in the destination?
//
if (cbDstSize < GDC_DATA_MAX)
{
cbDstSize = 0;
DC_QUIT;
}
// Yup.
memcpy(pDst, pgdcExp->RawData + GDC_DATA_MAX, GDC_DATA_MAX);
pDst += GDC_DATA_MAX;
cbDstSize -= GDC_DATA_MAX;
//
// Slide decoded data up to be used for decoding the next
// chunk ofcompressed source. It's convenient that the
// dictionary size and flush size are the same.
//
pgdcExp->iRawData -= GDC_DATA_MAX;
memcpy(pgdcExp->RawData, pgdcExp->RawData + GDC_DATA_MAX,
pgdcExp->iRawData);
pgdcExp->cbDictUsed = GDC_DATA_MAX;
}
}
if (Len == ABORT_CODE)
DC_QUIT;
i = pgdcExp->iRawData - GDC_DATA_MAX;
if (i > 0)
{
//
// This is the remaining decompressed data--can we we right it
// out?
//
if (cbDstSize < i)
{
cbDstSize = 0;
DC_QUIT;
}
memcpy(pDst, pgdcExp->RawData + GDC_DATA_MAX, i);
//
// Advance pDst so that the delta between it and the original is
// the resulting uncompressed size.
//
pDst += i;
//
// And update the dictionary used size
//
pgdcExp->cbDictUsed = min(pgdcExp->cbDictUsed + i, GDC_DATA_MAX);
}
//
// If we make it to here, we've successfully decompressed the input.
// So fill in the resulting uncompressed size.
//
*pcbDstSize = (UINT)(pDst - pDstOrg);
//
// If a persistent dictionary was passed in, save the current contents
// back into the thing for next time.
//
if (pDictionary)
{
TRACE_OUT(("Copying back %u dictionary bytes after decompression",
pgdcExp->cbDictUsed));
memcpy(pDictionary->pData, pgdcExp->RawData + GDC_DATA_MAX +
i - pgdcExp->cbDictUsed, pgdcExp->cbDictUsed);
pDictionary->cbUsed = pgdcExp->cbDictUsed;
}
TRACE_OUT(("%sExploded %u bytes from %u",
(pDictionary ? "PDC " : ""), *pcbDstSize, cbSrcOrg));
rc = TRUE;
DC_EXIT_POINT:
if (!rc && !cbDstSize)
{
ERROR_OUT(("GDC_Decompress: decompressed data too big"));
}
DebugExitBOOL(GDC_Decompress, rc);
return(rc);
}
//
// GDCDecodeLit()
//
UINT GDCDecodeLit
(
PGDC_EXPLODE pgdcExp
)
{
UINT LitChar, i;
if (pgdcExp->SrcByte & 0x01)
{
// Length found
if (!GDCWasteBits(pgdcExp, 1))
return ABORT_CODE;
LitChar = s_gdcLenDecode[pgdcExp->SrcByte & 0xFF];
if (!GDCWasteBits(pgdcExp, s_gdcLenBits[LitChar]))
return ABORT_CODE;
if (s_gdcExLenBits[LitChar])
{
i = pgdcExp->SrcByte & ((1 << s_gdcExLenBits[LitChar]) - 1);
if (!GDCWasteBits(pgdcExp, s_gdcExLenBits[LitChar]))
{
// If this isn't EOF, something is wrong
if (LitChar + i != 15 + 255)
return ABORT_CODE;
}
LitChar = s_gdcLenBase[LitChar] + i;
}
LitChar += 256;
}
else
{
// Char found
if (!GDCWasteBits(pgdcExp, 1))
return ABORT_CODE;
LitChar = (pgdcExp->SrcByte & 0xFF);
if (!GDCWasteBits(pgdcExp, 8))
return ABORT_CODE;
}
return LitChar;
}
//
// GDCDecodeDist()
//
UINT GDCDecodeDist
(
PGDC_EXPLODE pgdcExp,
UINT Len
)
{
UINT Dist;
Dist = s_gdcDistDecode[pgdcExp->SrcByte & 0xFF];
if (!GDCWasteBits(pgdcExp, s_gdcDistBits[Dist]))
return 0;
if (Len == GDC_MINREP)
{
// GDC_MINREP is 2, hence we shift over by 2 then mask the low 2 bits
Dist <<= GDC_MINREP;
Dist |= (pgdcExp->SrcByte & 3);
if (!GDCWasteBits(pgdcExp, GDC_MINREP))
return 0;
}
else
{
Dist <<= pgdcExp->ExtDistBits;
Dist |=( pgdcExp->SrcByte & pgdcExp->ExtDistMask);
if (!GDCWasteBits(pgdcExp, pgdcExp->ExtDistBits))
return 0;
}
return Dist+1;
}
//
// GDCWasteBits()
//
BOOL GDCWasteBits
(
PGDC_EXPLODE pgdcExp,
UINT cBits
)
{
if (cBits <= pgdcExp->SrcBits)
{
pgdcExp->SrcByte >>= cBits;
pgdcExp->SrcBits -= cBits;
}
else
{
pgdcExp->SrcByte >>= pgdcExp->SrcBits;
//
// We need to advance to the next source byte. Can we, or have
// we reached the end already?
//
if (!pgdcExp->cbSrc)
return(FALSE);
pgdcExp->SrcByte |= (*pgdcExp->pSrc) << 8;
//
// Move these to the next byte in the compressed source
//
++(pgdcExp->pSrc);
--(pgdcExp->cbSrc);
pgdcExp->SrcByte >>= (cBits - pgdcExp->SrcBits);
pgdcExp->SrcBits = 8 - (cBits - pgdcExp->SrcBits);
}
return(TRUE);
}