windows-nt/Source/XPSP1/NT/enduser/stuff/itss/lzx/decoder/dv.asm
2020-09-26 16:20:57 +08:00

513 lines
12 KiB
NASM

;
; DV.ASM
;
; jforbes
;
TITLE DV.ASM
.386P
.model FLAT
_TEXT SEGMENT
INCLUDE offsets.i
EXTRN _MP_POS_minus2:DWORD
EXTRN _dec_extra_bits:BYTE
local_32_minus_extra_bits:
DB 32,32,32,32,31,31,30,30
DB 29,29,28,28,27,27,26,26
DB 25,25,24,24,23,23,22,22
DB 21,21,20,20,19,19,18,18
DB 17,17,16,16,15,15,15,15
DB 15,15,15,15,15,15,15,15
DB 15,15,15
PUBLIC _fast_decode_verbatim_block
;
; NOTES:
;
; last_offset uses 12 bytes; 4 for each of the 3 repeated offsets
;
$bitbuf=0
$bufposend=4
$context=8
$mem_window=12
$last_offset=16
$bitcount=28
$stackvars=32
;
; fast_decode_verbatim_block(context, bufpos, amount_to_decode)
;
_fast_decode_verbatim_block PROC NEAR
; save registers
push edx
push ecx
push ebx
push edi
push esi
push ebp
; load parameters and initialise
mov edx, [esp + 28] ; context
mov edi, [esp + 32] ; bufpos
mov eax, [esp + 36] ; amount_to_decode
mov esi, [edx + OFF_INPUT_CURPOS] ; input data ptr
add eax, edi ; eax := bufpos_end = bufpos + amt
sub esp, $stackvars ; allocate stack space for variables
; store variables on stack
mov [esp + $context], edx ; u
mov [esp + $bufposend], eax ; v
mov ecx, [edx + OFF_MEM_WINDOW] ; u
mov eax, [edx + OFF_BITBUF] ; v
mov [esp + $mem_window], ecx ; u
mov [esp + $bitbuf], eax ; v
; copy repeated offsets onto stack for quicker accessing (<128 byte offset)
mov ecx, [edx + OFF_LAST_MATCHPOS_OFFSET] ; u
mov ebx, [edx + OFF_LAST_MATCHPOS_OFFSET + 4] ; v
mov eax, [edx + OFF_LAST_MATCHPOS_OFFSET + 8] ; u
mov [esp + $last_offset], ecx ; v
mov [esp + $last_offset + 4], ebx ; u
mov [esp + $last_offset + 8], eax ; v
; store other variables
xor ecx, ecx
mov cl, BYTE PTR [edx + OFF_BITCOUNT]
mov [esp + $bitcount], ecx
mov edx, ecx
; start
jmp SHORT loop_top
;
; end of init
;
;
; Decoder input overflow error!
;
$fillbuf1:
; restore stack and return -1
add esp, $stackvars
pop ebp
pop esi
pop edi
pop ebx
pop ecx
pop edx
mov eax, -1
ret 0
;
; Handle codes > table bits in length, for main tree
;
main_tree_long_code:
mov eax, [esp + $bitbuf] ; u
shl eax, MAIN_TREE_TABLE_BITS ; u
; negation loop
$L19975:
neg ebx ; NP
add ebx, ebx ; u
add eax, eax ; v test MSB of eax
; ADC takes 3 clocks, which allows it to overshadow the 0F prefix
; in the next instruction (saving 1 clock)
adc ebx, 0 ; u
; won't pair
movsx ebx, WORD PTR [ecx + OFF_MAIN_TREE_LEFTRIGHT + ebx*2]
test ebx, ebx ; u
jl SHORT $L19975 ; v
jmp SHORT back_main_tree_long_code
;
; Handle codes > table bits in length, for secondary tree
;
secondary_tree_long_code:
mov ecx, [esp + $bitbuf]
shl ecx, SECONDARY_LEN_TREE_TABLE_BITS
$L19990:
neg ebp
add ebp, ebp
add ecx, ecx
adc ebp, 0
; won't pair
movsx ebp, WORD PTR [eax + OFF_SECONDARY_TREE_LEFTRIGHT + ebp*2]
test ebp, ebp
jl SHORT $L19990
jmp back_secondary_tree_long_code
;
; loop top
;
loop_top:
; DECODE_DDMTREE(c);
; ebx = table[ bitbuf >> (32-MAIN_TREE_TABLE_BITS) ]
mov ecx, [esp + $context] ; u1
mov eax, [esp + $bitbuf] ; v1
shr eax, 32-MAIN_TREE_TABLE_BITS ; u1
mov ebp, DWORD PTR [ecx + OFF_INPUT_ENDPOS] ; v1
loop_top_after_char:
movsx ebx, WORD PTR [ecx + OFF_MAIN_TREE_TABLE + eax*2] ; NP
test ebx, ebx ; u
jl SHORT main_tree_long_code ; v
back_main_tree_long_code:
; check for end of input
cmp ebp, esi ; u1
jbe SHORT $fillbuf1 ; v1
mov cl, [ebx + ecx + OFF_MAIN_TREE_LEN] ; u1 cl = len[x]
xor eax, eax ; v1
shl DWORD PTR [esp + $bitbuf], cl ; NP bitbuf <<= len
sub dl, cl ; u1 bitcount -= len
jg SHORT bitcount_gt_0 ; v1
; otherwise fill buffer
mov al, [esi] ; u1
mov cl, dl ; v1
mov ah, [esi+1] ; u1
xor cl, -1 ; v1
add esi, 2 ; u1
inc cl ; v1
shl eax, cl ; NP
or eax, [esp + $bitbuf] ; u1
add dl, 16 ; v1
mov [esp + $bitbuf], eax ; u1
nop ; v1
bitcount_gt_0:
;
; is it a match or a character?
;
sub ebx, 256 ; u1
jns SHORT $L19985 ; v1
;
; it's a character
;
mov ebp, [esp + $mem_window] ; u1 get mem_window ptr
inc edi ; v1 bufpos++
mov eax, [esp + $bitbuf] ; u1 for next iteration
mov ecx, [esp + $context] ; v1 for next iteration
shr eax, 32-MAIN_TREE_TABLE_BITS ; u1 for next iteration
mov [ebp + edi - 1], bl ; u1 store current character
mov ebp, DWORD PTR [ecx + OFF_INPUT_ENDPOS] ; v1 for next iteration
cmp [esp + $bufposend], edi ; u1
ja SHORT loop_top_after_char ; v1
jmp $cleanup
m_is_3:
mov ebx, 1 ; == _MP_POS_minus2[3*4]
jmp skipover
m_not_zero:
cmp bl, 3 ; u1
je SHORT m_is_3 ; v1
mov eax, [esp + $last_offset] ; u1 eax = t = last[0]
mov ecx, [esp + $last_offset + ebx*4] ; v1 ecx = last[m]
mov [esp + $last_offset], ecx ; u1 last[0] = last[m]
mov [esp + $last_offset + ebx*4], eax ; v1 last[m] = t
mov ebx, ecx ; u
jmp $L20003 ; too far, won't pair
;
; m = 0, 1, 2, 3
;
m_is_0123:
test ebx, ebx ; u1
jnz SHORT m_not_zero ; v1
; m == 0
mov ebx, [esp + $last_offset] ;
jmp $L20003 ; NP
$L19985:
mov ebp, ebx ; u
mov eax, [esp + $context] ; v
shr ebx, 3 ; u
and ebp, 7 ; v
cmp ebp, 7 ; u
jne SHORT $L19987 ; v
mov ecx, [esp + $bitbuf] ; u
shr ecx, 32-SECONDARY_LEN_TREE_TABLE_BITS ; u
movsx ebp, WORD PTR [eax + OFF_SECONDARY_TREE_TABLE + ecx*2] ; NP
test ebp, ebp ; u1
jnge secondary_tree_long_code ; v1
back_secondary_tree_long_code:
mov cl, BYTE PTR [eax + OFF_SECONDARY_TREE_LEN + ebp] ; u1
add ebp, 7 ; v1
shl DWORD PTR [esp + $bitbuf], cl ; NP bitbuf <<= len
; if (bitcount > 0) we're ok, otherwise fill buffer
sub dl, cl ; u1 bitcount -= len
jg SHORT $L19987 ; v1
xor eax, eax ; u1
mov cl, dl ; v1
; NEG does not pair, so we replace it with XOR CL,-1 ; INC CL
mov al, [esi] ; u1
xor cl, -1 ; v1
mov ah, [esi+1] ; u1
inc cl ; v1
shl eax, cl ; NP
or eax, [esp + $bitbuf] ; u2
add dl, 16 ; v1
add esi, 2 ; u1
mov [esp + $bitbuf], eax ; v1
$L19987:
; if m == 3 then extra_bits == 0, and shifts don't work
; with a count of zero
xor eax, eax ; u1
cmp bl, 3 ; v1
mov al, bl ; u1
jle SHORT m_is_0123 ; v1
mov cl, BYTE PTR local_32_minus_extra_bits [eax] ; u1
mov ebx, [esp + $bitbuf] ; v1
shr ebx, cl ; NP
add ebx, _MP_POS_minus2[eax*4] ; u2
mov cl, _dec_extra_bits [eax] ; v1
shl DWORD PTR [esp + $bitbuf], cl ; NP
; now we can trash eax (m)
sub dl, cl ; u1
jg SHORT preskipover ; v1
; otherwise fill buffer
; no need to xor eax, eax since everything but the low order
; byte is already zero
mov al, [esi] ; u1
mov cl, dl ; v1
mov ah, [esi+1] ; u1
xor cl, -1 ; v1
add esi, 2 ; u1
inc cl ; v1
shl eax, cl ; NP
or eax, [esp + $bitbuf] ; u2
add dl, 16 ; v1
; remember that this can execute twice, if we grab 17 bits
mov [esp + $bitbuf], eax ; u1
jg SHORT preskipover ; v1
;
; Second iteration
;
xor eax, eax ; u1
mov cl, dl ; v1
mov al, [esi] ; u1
xor cl, -1 ; v1
mov ah, [esi+1] ; u1
inc cl ; v1
shl eax, cl ; NP
or eax, [esp + $bitbuf] ; u2
add dl, 16 ; v1
mov [esp + $bitbuf], eax ; u1
add esi, 2 ; v1
preskipover:
skipover:
mov eax, [esp + $last_offset] ; u EAX = R0
mov ecx, [esp + $last_offset + 4] ; v ECX = R1
mov [esp + $last_offset + 4], eax ; u R1 := R0
mov [esp + $last_offset + 8], ecx ; v R2 := R1
mov [esp + $last_offset], ebx ; u R0 := matchpos
$L20003:
;
; eax = dec_mem_window
; ebx = matchpos
; edi = bufpos
; ebp = matchlen (ebp=0 means "ML2", ebp=1 means "ML3", ...)
;
mov ecx, edi ; u1 ecx = bufpos
mov eax, [esp + $context] ; v1 eax = context ptr
inc edi ; u1 bufpos++ for first character
sub ecx, ebx ; v1 ecx := bufpos - matchpos
and ecx, [eax + OFF_WINDOW_MASK] ; u1 ecx &= window_mask
mov eax, [eax + OFF_MEM_WINDOW] ; v1 eax = mem_window
mov bl, [eax + ecx] ; u1 AGI bl = window[src]
inc ecx ; v1 for next iteration
mov [eax + edi - 1], bl ; u store in window[dst]
nop ; v
;
; second and later characters...
;
; eax = mem_window edx = bitbuf
; ebx = BL used for character esi = input_pos
; ecx = bufpos - matchpos
; ebp = matchlen count
; edi = bufpos
;
copy_loop:
inc edi ; u1
mov bl, [eax + ecx] ; v1 bl = dec_window[(bp-mp)&mask]
inc ecx ; u1
dec ebp ; v1
mov [eax + edi - 1], bl ; u1 dec_window[bufpos] = bl
jge SHORT copy_loop ; v1
cmp [esp + $bufposend], edi ; u1
ja loop_top ; NP
; fall through
$cleanup:
mov ebx, DWORD PTR [esp + $context]
xor eax, eax
cmp edi, [esp + $bufposend]
je SHORT successful
mov eax, -1 ; failure
successful:
and edi, [ebx + OFF_WINDOW_MASK]
mov [ebx + OFF_BITCOUNT], dl
mov [ebx + OFF_BUFPOS], edi
mov [ebx + OFF_INPUT_CURPOS], esi
mov edi, [esp + $bitbuf]
; copy repeated offsets into context structure
mov ecx, [esp + $last_offset]
mov ebp, [esp + $last_offset + 4]
mov esi, [esp + $last_offset + 8]
mov [ebx + OFF_LAST_MATCHPOS_OFFSET], ecx
mov [ebx + OFF_LAST_MATCHPOS_OFFSET+4], ebp
mov [ebx + OFF_LAST_MATCHPOS_OFFSET+8], esi
mov [ebx + OFF_BITBUF], edi
; restore stack
add esp, $stackvars
pop ebp
pop esi
pop edi
pop ebx
pop ecx
pop edx
ret 0
_fast_decode_verbatim_block ENDP
_TEXT ENDS
END