windows-nt/Source/XPSP1/NT/base/ntos/rtl/xencode.i

602 lines
19 KiB
OpenEdge ABL
Raw Normal View History

2020-09-26 03:20:57 -05:00
#if _MSC_FULL_VER >= 13008827 && defined(_M_IX86)
#pragma warning(disable:4731) // EBP modified with inline asm
#endif
#if CHAIN >= 2
INLINE void find_match (prs *p)
{
const uchar *p1;
xint k, n, m;
#if CHAIN >= 3
xint chain = v.chain;
#endif
p->x.z_next[0] = (z_index_t) (k = n = v.orig.pos);
do
{
m = p->x.z_next[k];
{
uint16 c = *(__unaligned uint16 *)((p1 = v.orig.ptr + v.match.len - 1) + n);
#if CHAIN >= 3
do
{
if (--chain < 0)
return;
#endif
k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
#if CHAIN < 3
if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
return;
#else
}
while (1);
#endif
same_m:
k = m;
same_k:
if (k == n)
return;
#if MAX_OFFSET < BUFF_SIZE_LOG
if (n - k >= (1 << MAX_OFFSET))
return;
#endif
}
{
const uchar *p2;
p1 = v.orig.ptr;
p2 = p1 + k;
p1 += n;
if ((m = *(__unaligned uint32 *)p2 ^ *(__unaligned uint32 *)p1) != 0)
{
#if MIN_MATCH <= 3
if ((m & 0xffffff) == 0 && v.match.len <= 2 && p1 + 3 <= v.orig.end)
{
v.match.len = 3;
v.match.pos = k;
}
#endif
goto cont;
}
if (p1 <= v.orig.end_16)
{
goto entry4;
do
{
#define X(i) if (p1[i] != p2[i]) {p1 += i; goto chk;}
X(0); X(1); X(2); X(3);
entry4:
X(4); X(5); X(6); X(7); X(8);
X(9); X(10); X(11); X(12); X(13); X(14); X(15);
#undef X
p1 += 16; p2 += 16;
}
while (p1 <= v.orig.end_16);
}
while (p1 != v.orig.end)
{
if (*p1 != *p2)
goto chk;
++p1;
++p2;
}
#define SET_LENGTH() \
n = -n; \
n += (xint) (p1 - v.orig.ptr); \
if (n > v.match.len) \
{ \
v.match.len = n; \
v.match.pos = k; \
}
SET_LENGTH ();
return;
}
chk:
SET_LENGTH ();
cont:
n = v.orig.pos;
}
while (CHAIN >= 3);
}
static void encode_pass1 (prs *p)
{
uchar *ptr = v.temp.ptr;
do
{
if (p->x.z_next[v.orig.pos] == 0)
goto literal;
v.match.len = MIN_MATCH-1;
find_match (p);
if (v.match.len <= MIN_MATCH-1)
{
literal:
write_lit (p, ptr, v.orig.ptr[v.orig.pos]);
v.orig.pos += 1;
}
else
{
ptr = write_ptr (p, ptr, v.orig.pos - v.match.pos, v.match.len);
v.orig.pos += v.match.len;
}
}
while (v.orig.pos < v.orig.stop);
v.temp.ptr = ptr;
}
#endif /* CHAIN >= 2 */
#if CHAIN < 2
#if CODING != CODING_DIRECT2 || !defined (i386)
static void encode_pass1 (prs *p)
{
const uchar *b, *b1, *stop;
uchar *ptr;
#if CHAIN > 0
xint pos = v.orig.pos;
#endif
b = v.orig.ptr;
v.orig.ptr_stop = stop = b + v.orig.stop;
b += v.orig.pos;
ptr = v.temp.ptr;
if (b != v.orig.ptr)
goto literal_entry;
for (;;)
{
do
{
#if MAX_OFFSET < BUFF_SIZE_LOG
next:
#endif
write_lit (p, ptr, *b);
++b;
#if CHAIN > 0
++pos;
#endif
literal_entry:
if (b >= stop)
goto ret;
{
uxint h;
#if CHAIN <= 0
h = Q_HASH_SUM (b);
b1 = p->x.q_last[h];
p->x.q_last[h] = b;
#else
assert (pos == b - v.orig.ptr);
h = Z_HASH_SUM (b);
b1 = v.orig.ptr + p->x.z_next[h];
p->x.z_next[h] = (z_index_t) pos;
#endif
}
#if MAX_OFFSET < BUFF_SIZE_LOG
if (b1 <= b - (1 << MAX_OFFSET))
goto next;
#endif
}
while (b1 == 0 || b1[0] != b[0] || b1[1] != b[1] || b1[2] != b[2]);
assert (v.orig.ptr + v.orig.size - b > 7);
{
const uchar *b0 = b;
if (b <= v.orig.end_16)
goto match_entry_3;
goto match_careful;
do
{
#define X(i) if (b1[i] != b[i]) {b += i; b1 += i; goto eval_len;}
X(0); X(1); X(2);
match_entry_3:
X(3); X(4); X(5); X(6); X(7);
X(8); X(9); X(10); X(11);
X(12); X(13); X(14); X(15);
#undef X
b += 16; b1 += 16;
}
while (b <= v.orig.end_16);
match_careful:
while (b != v.orig.end && *b1 == *b)
{
++b;
++b1;
}
eval_len:
#if BUFF_SIZE_LOG > 16
#error
#endif
ptr = write_ptr (p, ptr, (xint)(b - b1), (xint)(b - b0));
b1 = b0;
}
++b1;
#if CHAIN > 0
++pos;
#endif
if (b > v.orig.end_3)
{
while (b1 < v.orig.end_3)
{
#if CHAIN <= 0
p->x.q_last[Q_HASH_SUM (b1)] = b1;
#else
assert (pos == b1 - v.orig.ptr);
p->x.z_next[Z_HASH_SUM (b1)] = (z_index_t) pos;
++pos;
#endif
++b1;
}
goto literal_entry;
}
do
{
#if CHAIN <= 0
p->x.q_last[Q_HASH_SUM (b1)] = b1;
#else
assert (pos == b1 - v.orig.ptr);
p->x.z_next[Z_HASH_SUM (b1)] = (z_index_t) pos;
++pos;
#endif
++b1;
}
while (b1 != b);
goto literal_entry;
}
ret:
v.orig.pos = (xint)(b - v.orig.ptr);
v.temp.ptr = ptr;
}
#else /* CODING != CODING_DIRECT2 */
static void encode_pass1 (prs *PrsPtr)
{
#define PRS edx
#define TAG ebp
#define TAGW bp
// access to prs structure fields
#define V [PRS - SIZE prs] prs.c
// TAG = tag_mask; adjusts TAG (tag_mask), V.temp.tag_ptr, and ebx (output pointer)
#define WRITE_TAG_MASK() \
__asm mov ecx, V.temp.tag_ptr \
__asm mov V.temp.tag_ptr, ebx \
__asm add ebx, 4 \
__asm mov [ecx], TAG \
__asm mov TAG, 1
#if CHAIN <= 0
// access to respective hash table entry
#define Q_HTABLE(idx) dword ptr [PRS + idx*4] prs.x.q_last
// evaluate hash sum of [esi] on eax; spoils eax, ecx, TAG
#define Q_HASH_SUM_ASM() \
__asm movzx eax, byte ptr [esi] \
__asm movzx ecx, byte ptr [esi+1] \
__asm movzx edi, byte ptr [esi+2] \
__asm lea ecx, [ecx + eax * (1 << (Q_HASH_SH1 - Q_HASH_SH2))] \
__asm lea eax, [edi + ecx * (1 << Q_HASH_SH2)]
#else
// access to respective hash table entry
#define Z_HTABLE(idx) word ptr [PRS + idx*2] prs.x.z_next
// evaluate hash sum of [esi] on eax; spoils eax, ecx, edi
#define Z_HASH_SUM_ASM() \
__asm movzx eax, byte ptr [esi] \
__asm movzx ecx, byte ptr [esi+1] \
__asm movzx edi, byte ptr [esi+2] \
__asm movzx eax, word ptr z_hash_map[eax*2] \
__asm movzx ecx, word ptr z_hash_map[ecx*2][512] \
__asm movzx edi, word ptr z_hash_map[edi*2][1024] \
__asm xor eax, ecx \
__asm xor eax, edi
#endif /* CHAIN <= 0 */
__asm
{
push ebp // save ebp
mov PRS, PrsPtr // PRS = PrsPtr (globally)
// esi = b
// edi = b1
// ebx = V.prs.temp.ptr
// TAG = V.temp.tag_mask
mov esi, V.orig.ptr // obtain b, b1, temp.ptr, and temp.mask
mov eax, V.orig.stop
add eax, esi
mov V.orig.ptr_stop, eax // and set orig.ptr_stop by orig.stop
add esi, V.orig.pos
mov ebx, V.temp.ptr
mov TAG, V.temp.tag_mask
cmp esi, V.orig.ptr // if beginning of buffer
jne write_literal_entry // then write literal immediately
write_literal:
mov al, [esi] // read the literal
inc ebx // shift dst ptr in advance
inc esi // shift src ptr to next character
mov [ebx-1], al // emit literal
add TAG, TAG // write tag bit 0
jc write_literal_tag_new // save tag word if it is full
write_literal_tag_done:
write_literal_entry:
cmp esi, V.orig.ptr_stop // processed everything?
jae pass1_stop // yes, stop
#if CHAIN <= 0
Q_HASH_SUM_ASM () // evaluate hash sum
#if MAX_OFFSET < BUFF_SIZE_LOG
lea ecx, [esi - (1 << MAX_OFFSET) + 1] // min. allowed left bound
#endif
mov edi, Q_HTABLE (eax) // edi = candidate ptr
mov Q_HTABLE (eax), esi // save current ptr
#else
Z_HASH_SUM_ASM () // evaluate hash sum
mov ecx,V.orig.ptr
movzx di, Z_HTABLE (eax) // edi = offset to candidate ptr
sub esi, ecx // esi = offset to current ptr
add edi, ecx // edi = candidate ptr
mov Z_HTABLE (eax), si // store current ptr offset
add esi, ecx // restore current ptr
#if MAX_OFFSET < BUFF_SIZE_LOG
lea ecx, [esi - (1 << MAX_OFFSET) + 1] // min. allowed left bound
#endif
#endif /* CHAIN <= 0 */
#if MAX_OFFSET < BUFF_SIZE_LOG
cmp edi, ecx // canidate is in window?
js write_literal // no, then emit literal
#endif
test edi, edi // is it NULL?
jz write_literal // emit literal if so
mov eax, [esi] // get first 4 src bytes
sub eax, [edi] // diff them with first 4 candidate bytes
je length_4 // if no diff then match is at least 4 bytes
test eax, 0xffffff // is there any difference in first 3 bytes?
jne write_literal // if yes emit literal
mov ecx, 3 // save match ptr of length ECX
sub edi, esi // edi = -offset
write_small_ptr:
lea eax, [esi+ecx] // eax = end of src match
not edi // edi = offset-1
add ebx, 2 // adjust output ptr in advance
shl edi, DIRECT2_LEN_LOG // make room for length
inc esi // esi = next substring (current already inserted)
lea edi, [edi + ecx - MIN_MATCH] // combine offset and shoft length
stc // set carry bit
mov [ebx-2], di // save packed pointer
adc TAG, TAG // write tag bit 1
jc write_pointer_tag_new // write tag word when it is full
write_pointer_tag_done:
cmp eax, V.orig.end_3 // is it too close to end of buffer?
ja insert_tail // if yes process is specially avoiding read overrun
#if CHAIN <= 0
push TAG // save tag_mask
mov TAG, eax // eax = end-of-match
insert_all:
Q_HASH_SUM_ASM () // evaluate hash sum
mov Q_HTABLE (eax), esi // save current ptr
inc esi // shift to next position
cmp esi, TAG // inserted all substrings in the match?
jne insert_all // continue until finished
pop TAG // restore tag_mask value
jmp write_literal_entry // process next substring
#else
push TAG // save tag_mask
push eax // save end-of-match
mov TAG, esi // TAG = current ptr
sub TAG, V.orig.ptr // TAG = current ptr offset
insert_all:
Z_HASH_SUM_ASM () // evaluate hash sum
mov Z_HTABLE (eax), TAGW // save current offset
inc esi // shift to next position
inc TAG // increase offset
cmp esi, [esp] // inserted all substrings in the match?
jne insert_all // continue until finished
pop eax // remove end-of-match ptr from the stack
pop TAG // restore tag_mask
jmp write_literal_entry // process next substring
#endif /* CHAIN <= 0 */
length_4:
#define KNOWN_LENGTH 4 // we know that first 4 bytes match
#if DIRECT2_MAX_LEN + MIN_MATCH >= 8
mov eax, [esi+4] // fetch next 4 bytes
sub eax, [edi+4] // get the diff between src and candidate
jz length_8 // do long compare if 8+ bytes match
bsf ecx, eax // ecx = # of first non-zero bit
sub edi, esi // edi = -offset
shr ecx, 3 // ecx = # of first non-zero byte
not edi // edi = offset-1
add ecx, 4 // plus previous 4 matching bytes = match length
add ebx, 2 // adjust output ptr in advance
lea eax, [esi+ecx] // eax = end of src match
shl edi, DIRECT2_LEN_LOG // make room for length
inc esi // esi = next substring (current already inserted)
lea edi,[edi+ecx-MIN_MATCH] // combine offset and shoft length
stc // set carry bit
mov [ebx-2], di // save packed pointer
adc TAG, TAG // write tag bit 1
jnc write_pointer_tag_done // write tag word when it is full
WRITE_TAG_MASK ()
jmp write_pointer_tag_done
length_8:
#undef KNOWN_LENGTH
#define KNOWN_LENGTH 8 // we know that first 8 bytes match
#endif /* DIRECT2_MAX_LEN + MIN_MATCH >= 8 */
mov eax, esi // eax = beginning of the string
mov ecx, V.orig.end // ecx = end of buffer
add esi, KNOWN_LENGTH // shift to first untested src byte
add edi, KNOWN_LENGTH // shift to first untested candidate
sub ecx, esi // ecx = max compare length
rep cmpsb // compare src and candidate
je match_complete // if eq then match till end of buffer
match_complete_done:
lea ecx, [esi-1] // ecx = end of match
sub edi, esi // edi = -offset
sub ecx, eax // ecx = match length
mov esi, eax // esi = src ptr
cmp ecx, DIRECT2_MAX_LEN+MIN_MATCH // small length?
jb write_small_ptr // write ptr if so
not edi // edi = offset-1
lea eax, [esi+ecx] // eax = end of match
shl edi, DIRECT2_LEN_LOG // make room for length
sub ecx, DIRECT2_MAX_LEN+MIN_MATCH // decrease the length
add edi, DIRECT2_MAX_LEN // mark length as long
push eax // save end of match
mov [ebx], di // write packed pointer
mov al, cl // al = (ecx <= 15 ? cl : 15)
cmp ecx, 15
jbe match_less_15
mov al, 15
match_less_15:
mov edi, V.stat.ptr // edi = quad_ptr
add ebx, 2 // wrote 2 bytes, move output ptr
test edi, edi // if quad_ptr != NULL write upper 4 bits
jne match_have_ptr
mov V.stat.ptr, ebx // make new tag_ptr
mov [ebx], al // write lower 4 bits
inc ebx // wrote 1 byte, move output ptr
jmp match_done_ptr // continue execution
match_have_ptr:
shl al, 4 // will write into upper 4 bits
mov dword ptr V.stat.ptr, 0 // no more space in this quad_bit[0]
or [edi], al // write upper 4 bits
match_done_ptr:
sub ecx, 15 // adjusted length < 15?
jae match_long_long_length // if not continue encoding
match_finish_2:
inc esi // shift to next output position
pop eax // restore eax = end-of-match
stc // set carry flag
adc TAG, TAG // write tag bit 1
jnc write_pointer_tag_done // continue execution if do not need to flush
write_pointer_tag_new: // write tag word and return to pointers
WRITE_TAG_MASK ()
jmp write_pointer_tag_done
match_long_long_length:
mov [ebx], cl // write the length as a byte
inc ebx // move output ptr
cmp ecx, 255 // adjusted length fits in byte?
jb match_finish_2 // if so ptr is written
add ecx, DIRECT2_MAX_LEN+15 // restore full length - MIN_MATCH
mov byte ptr [ebx-1], 255 // mark byte length as "to be continued"
mov [ebx], cx // write full length
add ebx, 2 // move output ptr
jmp match_finish_2
write_literal_tag_new: // write tag word and return to literals
WRITE_TAG_MASK ()
jmp write_literal_tag_done
match_complete: // cmpsb compared till end of buffer
inc esi // increase esi
inc edi // increase edi
jmp match_complete_done // resume execution
insert_tail:
push eax // save end-of-match
jmp insert_tail_1
insert_tail_next:
#if CHAIN <= 0
Q_HASH_SUM_ASM () // evaluate hash sum
mov Q_HTABLE (eax), esi // insert current src pointer
#else
Z_HASH_SUM_ASM () // evaluate hash sum
mov ecx, esi
sub ecx, V.orig.ptr // ecx = current ptr offset
mov Z_HTABLE (eax), cx // save offset in hash table
#endif /* CHAIN <= 0 */
inc esi // and move it to next substring
insert_tail_1: // end of match exceeds end_3 -- be careful
cmp esi, V.orig.end_3 // inserted up to end_3?
jb insert_tail_next // if not continue
pop esi // esi = end of match
jmp write_literal_entry
pass1_stop:
mov V.temp.ptr, ebx // save register variables
mov V.temp.tag_mask, TAG
sub esi, V.orig.ptr
mov V.orig.pos, esi
pop ebp // restore ebp
} /* __asm */
}
#undef V
#undef PRS
#undef TAG
#undef TAGW
#undef Q_HTABLE
#undef Q_HASH_SUM_ASM
#undef Z_HTABLE
#undef Z_HASH_SUM_ASM
#undef WRITE_SMALL_PTR
#undef KNOWN_LENGTH
#endif /* CODING != CODING_DIRECT2 */
#endif /* CHAIN < 2 */
#undef CHAIN
#undef find_match
#undef encode_pass1