#include "precomp.h" // // GDC.CPP // General Data Compressor // // Copyright(c) Microsoft 1997- // #define MLZ_FILE_ZONE ZONE_NET // // Tables used by the compression / decompression algorithms // const BYTE s_gdcExLenBits[GDC_LEN_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8 }; const WORD s_gdcLenBase[GDC_LEN_SIZE] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 14, 22, 38, 70, 134, 262 }; // // Dist: Bits, Coded, Decoded // const BYTE s_gdcDistBits[GDC_DIST_SIZE] = { 2, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; const BYTE s_gdcDistCode[GDC_DIST_SIZE] = { 0x03, 0x0d, 0x05, 0x19, 0x09, 0x11, 0x01, 0x3e, 0x1e, 0x2e, 0x0e, 0x36, 0x16, 0x26, 0x06, 0x3a, 0x1a, 0x2a, 0x0a, 0x32, 0x12, 0x22, 0x42, 0x02, 0x7c, 0x3c, 0x5c, 0x1c, 0x6c, 0x2c, 0x4c, 0x0c, 0x74, 0x34, 0x54, 0x14, 0x64, 0x24, 0x44, 0x04, 0x78, 0x38, 0x58, 0x18, 0x68, 0x28, 0x48, 0x08, 0xf0, 0x70, 0xb0, 0x30, 0xd0, 0x50, 0x90, 0x10, 0xe0, 0x60, 0xa0, 0x20, 0xc0, 0x40, 0x80, 0x00 }; // // Len: Bits, Coded, Decoded // const BYTE s_gdcLenBits[GDC_LEN_SIZE] = { 3, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7 }; const BYTE s_gdcLenCode[GDC_LEN_SIZE] = { 0x05, 0x03, 0x01, 0x06, 0x0A, 0x02, 0x0C, 0x14, 0x04, 0x18, 0x08, 0x30, 0x10, 0x20, 0x40, 0x00 }; // // GDC_Init() // // BOGUS LAURABU: // Having one global scratch compression buffer is lousy in multiple // conference situations. Maybe allocate it or use caching scheme in // future, then get rid of mutex. // void GDC_Init(void) { UINT i, j, k; DebugEntry(GDC_Init); // // Set up the binary data used for PDC compression. We 'calculate' // these since putting this in raw const data is too complicated! // The LitBits/LitCodes arrays have 774 entries each, and // the LenBits/DistBits arrays have 256 entries. // // Non-compressed chars take 9 bits in the compressed version: one // bit (zero) to indicate that what follows is not a distance/size // code, then the 8 bits of the char. // for (k = 0; k < GDC_DECODED_SIZE; k++) { s_gdcLitBits[k] = 9; s_gdcLitCode[k] = (WORD)(k << 1); } for (i = 0; i < GDC_LEN_SIZE; i++) { for (j = 0; j < (1U << s_gdcExLenBits[i]); j++, k++) { s_gdcLitBits[k] = (BYTE)(s_gdcLenBits[i] + s_gdcExLenBits[i] + 1); s_gdcLitCode[k] = (WORD)((j << (s_gdcLenBits[i] + 1)) | (s_gdcLenCode[i] << 1) | 1); } } GDCCalcDecode(s_gdcLenBits, s_gdcLenCode, GDC_LEN_SIZE, s_gdcLenDecode); GDCCalcDecode(s_gdcDistBits, s_gdcDistCode, GDC_DIST_SIZE, s_gdcDistDecode); DebugExitVOID(GDC_Init); } // // GDCCalcDecode() // This calculates 'const' arrays for s_gdcLenDecode and s_gdcDistDecode. // void GDCCalcDecode ( const BYTE * pSrcBits, const BYTE * pSrcCodes, UINT cSrc, LPBYTE pDstDecodes ) { UINT j; UINT Incr; int i; DebugEntry(GDC_CalcDecode); for (i = cSrc-1; i >= 0; i--) { Incr = 1 << pSrcBits[i]; j = pSrcCodes[i]; do { pDstDecodes[j] = (BYTE)i; j += Incr; } while (j < GDC_DECODED_SIZE); } DebugExitVOID(GDC_CalcDecode); } // // Optimize compilation for speed (not space) // #pragma optimize ("s", off) #pragma optimize ("t", on) // // GDC_Compress() // Compresses data based on different options. // This compresses data using PKZIP for both persistent and non-persistent // types. The differences between the algorithms are few: // * Persistent compression is never used for sources > 4096 bytes // * We copy in & update saved dictionary data before starting // * We copy back updated dictionary data after ending // * One byte of the used DistBits is used for PDC, 2 bytes for // plain PKZIP compression in the resulting compressed packet. // BOOL GDC_Compress ( PGDC_DICTIONARY pDictionary, // NULL if not persistent UINT Options, // Not meaningful if pDictionary LPBYTE pWorkBuf, LPBYTE pSrc, UINT cbSrcSize, LPBYTE pDst, UINT * pcbDstSize ) { BOOL rc = FALSE; UINT Len; UINT cbRaw; UINT Passes; LPBYTE pCur; LPBYTE pMax; PGDC_IMPLODE pgdcImp; #ifdef _DEBUG UINT cbSrcOrg; #endif // _DEBUG DebugEntry(GDC_Compress); pgdcImp = (PGDC_IMPLODE)pWorkBuf; ASSERT(pgdcImp); #ifdef _DEBUG cbSrcOrg = cbSrcSize; #endif // _DEBUG // // Figure out what size dictionary to use. // if (pDictionary) pgdcImp->cbDictSize = GDC_DATA_MAX; else if (Options == GDCCO_MAXSPEED) { // // Use the smallest for max speed. // pgdcImp->cbDictSize = GDC_DATA_SMALL; } else { ASSERT(Options == GDCCO_MAXCOMPRESSION); // // Use the nearest dictionary size to the source size. // if (cbSrcSize <= GDC_DATA_SMALL) pgdcImp->cbDictSize = GDC_DATA_SMALL; else if (cbSrcSize <= GDC_DATA_MEDIUM) pgdcImp->cbDictSize = GDC_DATA_MEDIUM; else pgdcImp->cbDictSize = GDC_DATA_MAX; } // // How many bits of distance are needed to back the dictionary size // # of bytes? // switch (pgdcImp->cbDictSize) { case GDC_DATA_SMALL: pgdcImp->ExtDistBits = EXT_DIST_BITS_MIN; break; case GDC_DATA_MEDIUM: pgdcImp->ExtDistBits = EXT_DIST_BITS_MEDIUM; break; case GDC_DATA_MAX: pgdcImp->ExtDistBits = EXT_DIST_BITS_MAC; break; } pgdcImp->ExtDistMask = 0xFFFF >> (16 - pgdcImp->ExtDistBits); // // We need at least 4 bytes (2 max for ExtDistBits, 2 for EOF code). // ASSERT(*pcbDstSize > 4); // // Now save the destination info in our struct. That we we can just // pass a pointer to our GDC_IMPLODE routine around with everything // we need. // pgdcImp->pDst = pDst; pgdcImp->cbDst = *pcbDstSize; // // For non PDC compression, the first little-endian WORD is the ExtDistBits // used in decompression. For PDC compression, just the first BYTE is // the ExtDistBits. // if (!pDictionary) { *(pgdcImp->pDst)++ = 0; --(pgdcImp->cbDst); } *(pgdcImp->pDst)++ = (BYTE)pgdcImp->ExtDistBits; --(pgdcImp->cbDst); // // Since pDst could be huge, we don't zero it all out before using. // As the pointer into the destination advances, we zero out a byte // just before we start writing bits into it. // pgdcImp->iDstBit = 0; *(pgdcImp->pDst) = 0; // // Now, if we have a dictonary, restore the contents into our scratch // buffer. // if (pDictionary && pDictionary->cbUsed) { TRACE_OUT(("Restoring %u dictionary bytes before compression", pDictionary->cbUsed)); // // NOTE: the data saved in pDictionary->pData is front aligned. // But the data in RawData is end aligned so that we can slide up // new data chunk by chunk when compressing. Therefore only copy // the part that is valid, but make it end at the back of the // space for the dictionary data. // ASSERT(pDictionary->cbUsed <= pgdcImp->cbDictSize); memcpy(pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize - pDictionary->cbUsed, pDictionary->pData, pDictionary->cbUsed); pgdcImp->cbDictUsed = pDictionary->cbUsed; } else { pgdcImp->cbDictUsed = 0; } // // We only compress GDC_DATA_MAX bytes at a time. Therefore we have // this loop to grab at most that amount each time around. Since we // only persistently compress packets <= GDC_DATA_MAX, we should never // go through it more than once for that compression type. But normal // compression, you betcha since the max packet size is 32K. // Passes = 0; pCur = pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize; do { // // cbRaw will either be GDC_DATA_MAX (if source has >= that to go) // or remainder. Copy that much uncompressed data into our // working RawData buffer in the 'new data' space. // ASSERT(cbSrcSize); cbRaw = min(cbSrcSize, GDC_DATA_MAX); memcpy(pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize, pSrc, cbRaw); pSrc += cbRaw; cbSrcSize -= cbRaw; // // Now get a pointer just past the end of the data we read. Well, // almost. We fed in cbRaw bytes starting at GDC_MAXREP + // pgdcImp->cbDictSize. So unless this is the last chunk of raw // data to process, pMax is GDC_MAXREP before the end of the // new raw data. // // NOTE that in several of the functions that follow, we read // a byte or two past the end and the beginning of the valid new // raw data. THIS IS INTENTIONAL. // // Doing so is the only way to get the beginning and ending bytes // indexed, since the hash function uses TWO bytes. We won't // GPF because of padding in our RawData buffer. // pMax = pgdcImp->RawData + pgdcImp->cbDictSize + cbRaw; if (!cbSrcSize) { pMax += GDC_MAXREP; } else { // // This better NOT be persistent compression, since we don't // let you compress packets bigger than the chunk size we // process (GDC_DATA_MAX). // ASSERT(!pDictionary); } // // Generate the sort buffer, which orders the raw data according // to an index calculated using pairs of contiguous bytes that // occur within it. Without a dictionary yet, the first pass // only indexes the current chunk. With a dictionary (a second or // greater pass--or PERSISTENT COMPRESSION has saved enough data // from last time), we look back into the previous chunk (what we // call the dictionary). // // This takes longer since we go through more bytes, but produces // better results. Hence the dictionary size controls the speed/ // resulting size. // switch (Passes) { case 0: { if (pgdcImp->cbDictUsed > GDC_MAXREP) { // // On the zeroth pass, cbDictUsed is always ZERO // for non-persistent PKZIP. // ASSERT(pDictionary); GDCSortBuffer(pgdcImp, pCur - pgdcImp->cbDictUsed + GDC_MAXREP, pMax + 1); } else { GDCSortBuffer(pgdcImp, pCur, pMax + 1); } ++Passes; // // After completing a pass we slide the raw data up into // the dictionary slot, bumping out the older dictionary // data. // if (pgdcImp->cbDictSize != GDC_DATA_MAX) { ASSERT(pgdcImp->cbDictUsed == 0); ASSERT(!pDictionary); ++Passes; } } break; case 1: { // // Start sorting GDC_MAXREP bytes after the start. NOTE // that this is exactly what PERSISTENT compression does // on the zeroth pass--it acts like we already have // dictionary data, using the bytes from the last time // we compressed something. // GDCSortBuffer(pgdcImp, pCur - pgdcImp->cbDictSize + GDC_MAXREP, pMax + 1); ++Passes; } break; default: { // // Start sort from the beginning of the dictionary. // This works because we copy raw data around before // starting the next pass. // GDCSortBuffer(pgdcImp, pCur - pgdcImp->cbDictSize, pMax + 1); } break; } // // Now compress the raw data chunk we ar working on. // while (pCur < pMax) { Len = GDCFindRep(pgdcImp, pCur); SkipFindRep: if (!Len || (Len == GDC_MINREP && pgdcImp->Distance >= GDC_DECODED_SIZE)) { if (!GDCOutputBits(pgdcImp, s_gdcLitBits[*pCur], s_gdcLitCode[*pCur])) DC_QUIT; pCur++; continue; } // // Only do this if we're on the last chunk // if (!cbSrcSize && (pCur + Len > pMax)) { // // Peg run size so it doesn't go past end of raw data. // Len = (UINT)(pMax - pCur); if ((Len < GDC_MINREP) || (Len == GDC_MINREP && pgdcImp->Distance >= GDC_DECODED_SIZE)) { if (!GDCOutputBits(pgdcImp, s_gdcLitBits[*pCur], s_gdcLitCode[*pCur])) DC_QUIT; pCur++; continue; } } else if ((Len < 8) && (pCur + 1 < pMax)) { UINT Save_Distance; UINT Save_Len; // // Make temp copies of Distance and Len so we can // look ahead and see if a better compression run is // looming. If so, we won't bother starting it here, // we'll grab the better one next time around. // Save_Distance = pgdcImp->Distance; Save_Len = Len; Len = GDCFindRep(pgdcImp, pCur + 1); if ((Len > Save_Len) && ((Len > Save_Len + 1) || (Save_Distance > (GDC_DECODED_SIZE/2)))) { if (!GDCOutputBits(pgdcImp, s_gdcLitBits[*pCur], s_gdcLitCode[*pCur])) DC_QUIT; ++pCur; goto SkipFindRep; } // // Put back old Len and Distance, we'll take this one. // Len = Save_Len; pgdcImp->Distance = Save_Distance; } if (!GDCOutputBits(pgdcImp, s_gdcLitBits[256 + Len - GDC_MINREP], s_gdcLitCode[256 + Len - GDC_MINREP])) DC_QUIT; if (Len == GDC_MINREP) { // // GDC_MINREP is 2, so we right shift Distance by 2 // (divide by 4). Then we mask out the last 2 bits // of Distance. // if (!GDCOutputBits(pgdcImp, s_gdcDistBits[pgdcImp->Distance >> GDC_MINREP], s_gdcDistCode[pgdcImp->Distance >> GDC_MINREP])) DC_QUIT; if (!GDCOutputBits(pgdcImp, GDC_MINREP, (WORD)(pgdcImp->Distance & 3))) DC_QUIT; } else { if (!GDCOutputBits(pgdcImp, s_gdcDistBits[pgdcImp->Distance >> pgdcImp->ExtDistBits], s_gdcDistCode[pgdcImp->Distance >> pgdcImp->ExtDistBits])) DC_QUIT; if (!GDCOutputBits(pgdcImp, (WORD)pgdcImp->ExtDistBits, (WORD)(pgdcImp->Distance & pgdcImp->ExtDistMask))) DC_QUIT; } pCur += Len; } if (cbSrcSize) { // // There's more data to process. Here's where we slide up the // current raw data into the dictionary space. This is simply // the final cbDictSize + GDC_MAXREP bytes of data. It // begins GDC_DATA_MAX after the start of the bufer. // // For example, if the dict size is 1K, the current data goes // from 1K to 5K, and we slide up the data from 4K to 5K. // memcpy(pgdcImp->RawData, pgdcImp->RawData + GDC_DATA_MAX, pgdcImp->cbDictSize + GDC_MAXREP); // // Now move our raw data pointer back and update the // dictonary used amount. Since we have GDC_DATA_MAX of data, // we fill the dictionary completely. // pCur -= GDC_DATA_MAX; pgdcImp->cbDictUsed = pgdcImp->cbDictSize; } } while (cbSrcSize); // // Add the end code. // if (!GDCOutputBits(pgdcImp, s_gdcLitBits[EOF_CODE], s_gdcLitCode[EOF_CODE])) DC_QUIT; // // Return the resulting compressed data size. // // NOTE that partial bits are already in the destination. But we // need to account for any in the total size. // if (pgdcImp->iDstBit) ++(pgdcImp->pDst); *pcbDstSize = (UINT)(pgdcImp->pDst - pDst); // // We're done. If we have a persistent dictionary, copy back our // last block of raw data into it. We only copy as much as is actually // valid however. // // We can only get here on successful compression. NOTE that we do not // wipe out our dictionary on failure like we used to. This helps us // by permitting better compression the next time. The receiver will // be OK, since his receive dictionary won't be altered upon reception // of a non-compressed packet. // if (pDictionary) { pDictionary->cbUsed = min(pgdcImp->cbDictSize, pgdcImp->cbDictUsed + cbRaw); TRACE_OUT(("Copying back %u dictionary bytes after compression", pDictionary->cbUsed)); memcpy(pDictionary->pData, pgdcImp->RawData + GDC_MAXREP + pgdcImp->cbDictSize + cbRaw - pDictionary->cbUsed, pDictionary->cbUsed); } TRACE_OUT(("%sCompressed %u bytes to %u", (pDictionary ? "PDC " : ""), cbSrcOrg, *pcbDstSize)); rc = TRUE; DC_EXIT_POINT: if (!rc && !pgdcImp->cbDst) { TRACE_OUT(("GDC_Compress: compressed size is bigger than decompressed size %u.", cbSrcOrg)); } DebugExitBOOL(GDC_Compress, rc); return(rc); } // // GDCSortBuffer() // void GDCSortBuffer ( PGDC_IMPLODE pgdcImp, LPBYTE pStart, LPBYTE pEnd ) { WORD Accum; WORD * pHash; LPBYTE pTmp; DebugEntry(GDCSortBuffer); ASSERT(pStart >= pgdcImp->RawData + pgdcImp->cbDictSize - pgdcImp->cbDictUsed); // // For each pair of bytes in the raw data, from pStart to pEnd, // calculate the hash value for the pair . The hash value ranges from // 0 to GDC_HASH_SIZE-1. Thus the HashArray structure is an array of // GDC_HASH_SIZE WORDs. Keep a count of how many times a particular // hash value occurs in the uncompressed data. // // ZeroMemory(pgdcImp->HashArray, sizeof(pgdcImp->HashArray)); pTmp = pStart; do { ++(pgdcImp->HashArray[GDC_HASHFN(pTmp)]); } while (++pTmp < pEnd); // // Now go back and make each HashArray entry a cumulative total of the // occurrences of the hash values up to and including itself. Kind // of like the Fibonacci sequence actually. // Accum = 0; pHash = pgdcImp->HashArray; do { Accum += *pHash; *pHash = Accum; } while (++pHash < pgdcImp->HashArray + GDC_HASH_SIZE); // // Find the entry in the HashArray containing the accumulated // instance count for the current data WORD. Since these values are // calculated from the data in the passed in range, we know that the // value in any slot we get to by hashing some bytes in the range is // at least 1. // // We start at the end and work towards the beginning so that we // end up with the first instance of such an occurrence in the SortArray. // pTmp = pEnd - 1; do { pHash = pgdcImp->HashArray + GDC_HASHFN(pTmp); ASSERT(*pHash > 0); // // The count (*pHash) is to be used as an array index, so subtract // one from it. If there was only one instance, put it in array // element 0. If there is more than one instance of a particular // hash, then next time we will start with a lower accumulated // total. The array element will be one back, and so on. // --(*pHash); // // Store an offset from the beginning of the RawData buffer to // each byte of data into the SortArray. This is inserted // using the hash instance count as the index. // // In other words, the buffer is sorted in ascending order of hash // for a particular piece of data. Where two bytes of data have // the same hash, they are referenced in the SortBuffer in the // same order as in the RawData since we are scanning backwards. // pgdcImp->SortArray[*pHash] = (WORD)(pTmp - pgdcImp->RawData); } while (--pTmp >= pStart); // // Now all entries in the HashArray index the first occurrence of a byte // in the workspace which has a particular index, via the SortArray // offset. That is, the above do-while loop decrements each HashArray // entry until all data bytes for that entry are written to SortBuffer. // DebugExitVOID(GDCSortBuffer); } // // GDCFindRep // // This looks for byte patterns in the uncompressed data that can be // represented in the compressed data with smaller sequences. The biggest // wins come from repeating byte sequences; later sequences can be // compressed into a few bytes referring to an earlier sequence (how big, // how many bytes back). // // This returns the length of the uncompressed data to be replaced. // UINT GDCFindRep ( PGDC_IMPLODE pgdcImp, LPBYTE pDataStart ) { UINT CurLen; UINT Len; LPBYTE pDataPat; LPBYTE pData; UINT iDataMin; UINT SortIndex; LPBYTE pDataMax; UINT HashVal; UINT i1; short j1; LPBYTE pBase; DebugEntry(GDCFindRep); // // See GDCSortBuffer for a description of the contents of the // Index array. GDC_HASHFN() returns a hash value for a byte // using it and its successor in the uncompressed data stream. // HashVal = GDC_HASHFN(pDataStart); ASSERT(HashVal < GDC_HASH_SIZE); SortIndex = pgdcImp->HashArray[HashVal]; // // Find the minimum sort buffer value. This is the offset of the // first byte of data. // iDataMin = (UINT)(pDataStart - pgdcImp->cbDictSize + 1 - pgdcImp->RawData); if (pgdcImp->SortArray[SortIndex] < iDataMin) { // // The SortArray is referencing stale data, data that is no // longer in the range we are processing. Move forward until // we hit the first entry that's in the current chunk. // do { ++SortIndex; } while (pgdcImp->SortArray[SortIndex] < iDataMin); // // Save this new sort value in the hash. // pgdcImp->HashArray[HashVal] = (WORD)SortIndex; } // // Need more than 2 bytes with the same index before processing it. // pDataMax = pDataStart - 1; // // Get a Ptr to the first byte in the compression buffer referenced by // the SortBuffer offset indexed by the SortIndex we just calculated. // If this Ptr is not at least 2 bytes before pDataStart then return 0. // This means that the byte pointed to by Start does not share the // index with earlier bytes. // pData = pgdcImp->RawData + pgdcImp->SortArray[SortIndex]; if (pData >= pDataMax) return 0; // // Now the current bytes have the same index as at least 2 other // sequences. Ptr points to the first compress buffer byte with // the same index as that pointed to by pDataStart. // pDataPat = pDataStart; CurLen = 1; do { if (*(pData + CurLen - 1) == *(pDataPat + CurLen - 1) && *(pData) == *(pDataPat)) { // // This processes a sequence of identical bytes, one starting // at pDataPat, the other at pData. // ++pData; ++pDataPat; Len = 2; // Skip past matching bytes, keeping a count. while ((*++pData == *++pDataPat) && (++Len < GDC_MAXREP)) ; pDataPat = pDataStart; if (Len >= CurLen) { pgdcImp->Distance = (UINT)(pDataPat - pData + Len - 1); if ((CurLen = Len) > KMP_THRESHOLD) { if (Len == GDC_MAXREP) { --(pgdcImp->Distance); return Len; } goto DoKMP; } } } // // Get a pointer to the next compress buffer byte having the same // hash. If this byte comes before pDataMax, go back around the // loop and look for a matching sequence. // pData = pgdcImp->RawData + pgdcImp->SortArray[++SortIndex]; } while (pData < pDataMax); return (CurLen >= GDC_MINREP) ? CurLen : 0; DoKMP: if (pgdcImp->RawData + pgdcImp->SortArray[SortIndex+1] >= pDataMax) return CurLen; j1 = pgdcImp->Next[1] = 0; pgdcImp->Next[0] = -1; i1 = 1; do { if ((pDataPat[i1] == pDataPat[j1]) || ((j1 = pgdcImp->Next[j1]) == -1)) pgdcImp->Next[++i1] = ++j1; } while (i1 < CurLen); Len = CurLen; pData = pgdcImp->RawData + pgdcImp->SortArray[SortIndex] + CurLen; while (TRUE) { if ((Len = pgdcImp->Next[Len]) == -1) Len = 0; do { pBase = pgdcImp->RawData + pgdcImp->SortArray[++SortIndex]; if (pBase >= pDataMax) return CurLen; } while (pBase + Len < pData); if (*(pBase + CurLen - 2) != *(pDataPat + CurLen - 2)) { do { pBase = pgdcImp->RawData + pgdcImp->SortArray[++SortIndex]; if (pBase >= pDataMax) return CurLen; } while ((*(pBase + CurLen - 2) != *(pDataPat + CurLen - 2)) || (*(pBase) != *(pDataPat))); Len = 2; pData = pBase + Len; } else if (pBase + Len != pData) { Len = 0; pData = pBase; } while ((*pData == pDataPat[Len]) && (++Len < GDC_MAXREP)) pData++; if (Len >= CurLen) { ASSERT(pBase < pDataStart); pgdcImp->Distance = (UINT)(pDataStart - pBase - 1); if (Len > CurLen) { if (Len == GDC_MAXREP) return Len; CurLen = Len; do { if ((pDataPat[i1] == pDataPat[j1]) || ((j1 = pgdcImp->Next[j1]) == -1)) pgdcImp->Next[++i1] = ++j1; } while (i1 < CurLen); } } } DebugExitVOID(GDCFindRep); } // // GDCOutputBits() // // This writes compressed output into our output buffer. If the total // goes past the max compressed chunk we have workspace for, we flush // our buffer into the apps'destination. // // It returns FALSE on failure, i.e. we would go past the end of the // destination. // BOOL GDCOutputBits ( PGDC_IMPLODE pgdcImp, WORD Cnt, WORD Code ) { UINT iDstBit; BOOL rc = FALSE; DebugEntry(GDCOutputBits); // // If we are writing more than a byte's worth of bits, call ourself // recursively to write just 8. NOTE THAT WE NEVER OUTPUT MORE THAN // A WORD'S WORTH, since Code is a WORD sized object. // if (Cnt > 8) { if (!GDCOutputBits(pgdcImp, 8, Code)) DC_QUIT; Cnt -= 8; Code >>= 8; } ASSERT(pgdcImp->cbDst > 0); // // OR on the bits of the Code (Cnt of them). Then advance our // current bit pointer and current byte pointer in the output buffer. // iDstBit = pgdcImp->iDstBit; ASSERT(iDstBit < 8); // // NOTE: This is why it is extremely important to have zeroed out // the current destination byte when we advance. We OR on bit // sequences to the current byte. // *(pgdcImp->pDst) |= (BYTE)(Code << iDstBit); pgdcImp->iDstBit += Cnt; if (pgdcImp->iDstBit >= 8) { // // We've gone past a byte. Advance the destination ptr to the next // one. // ++(pgdcImp->pDst); if (--(pgdcImp->cbDst) == 0) { // // We just filled the last byte and are trying to move past // the end of the destination. Bail out now // DC_QUIT; } // // Phew, we have room left. Carry over the slop bits. // if (pgdcImp->iDstBit > 8) { // // Carry over slop. // *(pgdcImp->pDst) = (BYTE)(Code >> (8 - iDstBit)); } else *(pgdcImp->pDst) = 0; // Now the new byte is fullly initialized. pgdcImp->iDstBit &= 7; } rc= TRUE; DC_EXIT_POINT: DebugExitBOOL(GDCOutputBits, rc); return(rc); } // // GDC_Decompress() // BOOL GDC_Decompress ( PGDC_DICTIONARY pDictionary, LPBYTE pWorkBuf, LPBYTE pSrc, UINT cbSrcSize, LPBYTE pDst, UINT * pcbDstSize ) { BOOL rc = FALSE; UINT Len; UINT Dist; UINT i; UINT cbDstSize; LPBYTE pDstOrg; LPBYTE pEarlier; LPBYTE pNow; PGDC_EXPLODE pgdcExp; #ifdef _DEBUG UINT cbSrcOrg; #endif // _DEBUG DebugEntry(GDC_Decompress); pgdcExp = (PGDC_EXPLODE)pWorkBuf; ASSERT(pgdcExp); #ifdef _DEBUG cbSrcOrg = cbSrcSize; #endif // _DEBUG // // This shouldn't be possible--but since this compressed data // comes from another machine, we want to make sure _we_ don't blow // up if that machine flaked out. // if (cbSrcSize <= 4) { ERROR_OUT(("GDC_Decompress: bogus compressed data")); DC_QUIT; } // // Get the distance bits and calculate the mask needed for that many. // // NOTE: For PDC compression, the ExtDistBits are just in the first // byte. For plain compression, the ExtDistBits are in the first // little-endian word. Either way, we only allow from 4 to 6, so // the high byte in the non-PDC case is not useful. // if (!pDictionary) { // First byte better be zero if (*pSrc != 0) { ERROR_OUT(("GDC_Decompress: unrecognized distance bits")); DC_QUIT; } ++pSrc; --cbSrcSize; } pgdcExp->ExtDistBits = *pSrc; if ((pgdcExp->ExtDistBits < EXT_DIST_BITS_MIN) || (pgdcExp->ExtDistBits > EXT_DIST_BITS_MAC)) { ERROR_OUT(("GDC_Decompress: unrecognized distance bits")); DC_QUIT; } pgdcExp->ExtDistMask = 0xFFFF >> (16 - pgdcExp->ExtDistBits); // // Set up source data info (compressed goop). SrcByte is the current // byte & bits we're reading from. pSrc is the pointer to the next // byte. // pgdcExp->SrcByte = *(pSrc+1); pgdcExp->SrcBits = 0; pgdcExp->pSrc = pSrc + 2; pgdcExp->cbSrc = cbSrcSize - 2; // // Save the beginning of the result buffer so we can calculate how // many bytes we wrote into it afterwards. // pDstOrg = pDst; cbDstSize = *pcbDstSize; // // If we have a dictionary, put its data into our work area--the // compression might be referencing byte sequences in it (that's the // whole point, you get better compression that way when you send // packets with the same info over and over). // // We remember and update cbDictUsed to do the minimal dictionary // byte copying back and forth. // if (pDictionary && pDictionary->cbUsed) { TRACE_OUT(("Restoring %u dictionary bytes before decompression", pDictionary->cbUsed)); memcpy(pgdcExp->RawData + GDC_DATA_MAX - pDictionary->cbUsed, pDictionary->pData, pDictionary->cbUsed); pgdcExp->cbDictUsed = pDictionary->cbUsed; } else { pgdcExp->cbDictUsed = 0; } // // The decompressed data starts filling in at GDC_DATA_MAX bytes into // the RawData array. We have to double buffer the output (just // like we double buffer the input during compression) because // decompressing may require reaching backwards into the decompressed // byte stream to pull out sequences. // pgdcExp->iRawData = GDC_DATA_MAX; while ((Len = GDCDecodeLit(pgdcExp)) < EOF_CODE) { if (Len < 256) { pgdcExp->RawData[pgdcExp->iRawData++] = (BYTE)Len; } else { Len -= (256 - GDC_MINREP); Dist = GDCDecodeDist(pgdcExp, Len); if (!Dist) DC_QUIT; // // Now we're reaching back, this may in fact spill into the // dictionary data that preceded us. // pNow = pgdcExp->RawData + pgdcExp->iRawData; pEarlier = pNow - Dist; ASSERT(pEarlier >= pgdcExp->RawData + GDC_DATA_MAX - pgdcExp->cbDictUsed); pgdcExp->iRawData += Len; do { *pNow++ = *pEarlier++; } while (--Len > 0); } // // We've gone past the end of our workspace, flush the decompressed // data out. This is why RawData in GDC_EXPLODE has an extra pad of // GDC_MAXREP at the end. This prevents us from spilling out of // the RawData buffer, we will never go more than GDC_MAXREP beyond // the last GDC_DATA_MAX chunk. // if (pgdcExp->iRawData >= 2*GDC_DATA_MAX) { // // Do we have enough space left in the destination? // if (cbDstSize < GDC_DATA_MAX) { cbDstSize = 0; DC_QUIT; } // Yup. memcpy(pDst, pgdcExp->RawData + GDC_DATA_MAX, GDC_DATA_MAX); pDst += GDC_DATA_MAX; cbDstSize -= GDC_DATA_MAX; // // Slide decoded data up to be used for decoding the next // chunk ofcompressed source. It's convenient that the // dictionary size and flush size are the same. // pgdcExp->iRawData -= GDC_DATA_MAX; memcpy(pgdcExp->RawData, pgdcExp->RawData + GDC_DATA_MAX, pgdcExp->iRawData); pgdcExp->cbDictUsed = GDC_DATA_MAX; } } if (Len == ABORT_CODE) DC_QUIT; i = pgdcExp->iRawData - GDC_DATA_MAX; if (i > 0) { // // This is the remaining decompressed data--can we we right it // out? // if (cbDstSize < i) { cbDstSize = 0; DC_QUIT; } memcpy(pDst, pgdcExp->RawData + GDC_DATA_MAX, i); // // Advance pDst so that the delta between it and the original is // the resulting uncompressed size. // pDst += i; // // And update the dictionary used size // pgdcExp->cbDictUsed = min(pgdcExp->cbDictUsed + i, GDC_DATA_MAX); } // // If we make it to here, we've successfully decompressed the input. // So fill in the resulting uncompressed size. // *pcbDstSize = (UINT)(pDst - pDstOrg); // // If a persistent dictionary was passed in, save the current contents // back into the thing for next time. // if (pDictionary) { TRACE_OUT(("Copying back %u dictionary bytes after decompression", pgdcExp->cbDictUsed)); memcpy(pDictionary->pData, pgdcExp->RawData + GDC_DATA_MAX + i - pgdcExp->cbDictUsed, pgdcExp->cbDictUsed); pDictionary->cbUsed = pgdcExp->cbDictUsed; } TRACE_OUT(("%sExploded %u bytes from %u", (pDictionary ? "PDC " : ""), *pcbDstSize, cbSrcOrg)); rc = TRUE; DC_EXIT_POINT: if (!rc && !cbDstSize) { ERROR_OUT(("GDC_Decompress: decompressed data too big")); } DebugExitBOOL(GDC_Decompress, rc); return(rc); } // // GDCDecodeLit() // UINT GDCDecodeLit ( PGDC_EXPLODE pgdcExp ) { UINT LitChar, i; if (pgdcExp->SrcByte & 0x01) { // Length found if (!GDCWasteBits(pgdcExp, 1)) return ABORT_CODE; LitChar = s_gdcLenDecode[pgdcExp->SrcByte & 0xFF]; if (!GDCWasteBits(pgdcExp, s_gdcLenBits[LitChar])) return ABORT_CODE; if (s_gdcExLenBits[LitChar]) { i = pgdcExp->SrcByte & ((1 << s_gdcExLenBits[LitChar]) - 1); if (!GDCWasteBits(pgdcExp, s_gdcExLenBits[LitChar])) { // If this isn't EOF, something is wrong if (LitChar + i != 15 + 255) return ABORT_CODE; } LitChar = s_gdcLenBase[LitChar] + i; } LitChar += 256; } else { // Char found if (!GDCWasteBits(pgdcExp, 1)) return ABORT_CODE; LitChar = (pgdcExp->SrcByte & 0xFF); if (!GDCWasteBits(pgdcExp, 8)) return ABORT_CODE; } return LitChar; } // // GDCDecodeDist() // UINT GDCDecodeDist ( PGDC_EXPLODE pgdcExp, UINT Len ) { UINT Dist; Dist = s_gdcDistDecode[pgdcExp->SrcByte & 0xFF]; if (!GDCWasteBits(pgdcExp, s_gdcDistBits[Dist])) return 0; if (Len == GDC_MINREP) { // GDC_MINREP is 2, hence we shift over by 2 then mask the low 2 bits Dist <<= GDC_MINREP; Dist |= (pgdcExp->SrcByte & 3); if (!GDCWasteBits(pgdcExp, GDC_MINREP)) return 0; } else { Dist <<= pgdcExp->ExtDistBits; Dist |=( pgdcExp->SrcByte & pgdcExp->ExtDistMask); if (!GDCWasteBits(pgdcExp, pgdcExp->ExtDistBits)) return 0; } return Dist+1; } // // GDCWasteBits() // BOOL GDCWasteBits ( PGDC_EXPLODE pgdcExp, UINT cBits ) { if (cBits <= pgdcExp->SrcBits) { pgdcExp->SrcByte >>= cBits; pgdcExp->SrcBits -= cBits; } else { pgdcExp->SrcByte >>= pgdcExp->SrcBits; // // We need to advance to the next source byte. Can we, or have // we reached the end already? // if (!pgdcExp->cbSrc) return(FALSE); pgdcExp->SrcByte |= (*pgdcExp->pSrc) << 8; // // Move these to the next byte in the compressed source // ++(pgdcExp->pSrc); --(pgdcExp->cbSrc); pgdcExp->SrcByte >>= (cBits - pgdcExp->SrcBits); pgdcExp->SrcBits = 8 - (cBits - pgdcExp->SrcBits); } return(TRUE); }