513 lines
12 KiB
NASM
513 lines
12 KiB
NASM
;
|
|
; DV.ASM
|
|
;
|
|
; jforbes
|
|
;
|
|
TITLE DV.ASM
|
|
.386P
|
|
|
|
.model FLAT
|
|
|
|
_TEXT SEGMENT
|
|
|
|
INCLUDE offsets.i
|
|
|
|
EXTRN _MP_POS_minus2:DWORD
|
|
EXTRN _dec_extra_bits:BYTE
|
|
|
|
local_32_minus_extra_bits:
|
|
DB 32,32,32,32,31,31,30,30
|
|
DB 29,29,28,28,27,27,26,26
|
|
DB 25,25,24,24,23,23,22,22
|
|
DB 21,21,20,20,19,19,18,18
|
|
DB 17,17,16,16,15,15,15,15
|
|
DB 15,15,15,15,15,15,15,15
|
|
DB 15,15,15
|
|
|
|
PUBLIC _fast_decode_verbatim_block
|
|
|
|
;
|
|
; NOTES:
|
|
;
|
|
; last_offset uses 12 bytes; 4 for each of the 3 repeated offsets
|
|
;
|
|
|
|
$bitbuf=0
|
|
$bufposend=4
|
|
$context=8
|
|
$mem_window=12
|
|
$last_offset=16
|
|
$bitcount=28
|
|
$stackvars=32
|
|
|
|
;
|
|
; fast_decode_verbatim_block(context, bufpos, amount_to_decode)
|
|
;
|
|
_fast_decode_verbatim_block PROC NEAR
|
|
|
|
; save registers
|
|
push edx
|
|
push ecx
|
|
push ebx
|
|
push edi
|
|
push esi
|
|
push ebp
|
|
|
|
|
|
; load parameters and initialise
|
|
mov edx, [esp + 28] ; context
|
|
mov edi, [esp + 32] ; bufpos
|
|
|
|
mov eax, [esp + 36] ; amount_to_decode
|
|
mov esi, [edx + OFF_INPUT_CURPOS] ; input data ptr
|
|
|
|
add eax, edi ; eax := bufpos_end = bufpos + amt
|
|
sub esp, $stackvars ; allocate stack space for variables
|
|
|
|
|
|
; store variables on stack
|
|
mov [esp + $context], edx ; u
|
|
mov [esp + $bufposend], eax ; v
|
|
|
|
mov ecx, [edx + OFF_MEM_WINDOW] ; u
|
|
mov eax, [edx + OFF_BITBUF] ; v
|
|
|
|
mov [esp + $mem_window], ecx ; u
|
|
mov [esp + $bitbuf], eax ; v
|
|
|
|
|
|
; copy repeated offsets onto stack for quicker accessing (<128 byte offset)
|
|
mov ecx, [edx + OFF_LAST_MATCHPOS_OFFSET] ; u
|
|
mov ebx, [edx + OFF_LAST_MATCHPOS_OFFSET + 4] ; v
|
|
|
|
mov eax, [edx + OFF_LAST_MATCHPOS_OFFSET + 8] ; u
|
|
mov [esp + $last_offset], ecx ; v
|
|
|
|
mov [esp + $last_offset + 4], ebx ; u
|
|
mov [esp + $last_offset + 8], eax ; v
|
|
|
|
|
|
; store other variables
|
|
xor ecx, ecx
|
|
mov cl, BYTE PTR [edx + OFF_BITCOUNT]
|
|
mov [esp + $bitcount], ecx
|
|
|
|
mov edx, ecx
|
|
|
|
; start
|
|
jmp SHORT loop_top
|
|
|
|
;
|
|
; end of init
|
|
;
|
|
|
|
|
|
|
|
;
|
|
; Decoder input overflow error!
|
|
;
|
|
$fillbuf1:
|
|
|
|
; restore stack and return -1
|
|
add esp, $stackvars
|
|
|
|
pop ebp
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ecx
|
|
pop edx
|
|
|
|
mov eax, -1
|
|
|
|
ret 0
|
|
|
|
|
|
;
|
|
; Handle codes > table bits in length, for main tree
|
|
;
|
|
main_tree_long_code:
|
|
mov eax, [esp + $bitbuf] ; u
|
|
shl eax, MAIN_TREE_TABLE_BITS ; u
|
|
|
|
; negation loop
|
|
$L19975:
|
|
neg ebx ; NP
|
|
|
|
add ebx, ebx ; u
|
|
add eax, eax ; v test MSB of eax
|
|
|
|
; ADC takes 3 clocks, which allows it to overshadow the 0F prefix
|
|
; in the next instruction (saving 1 clock)
|
|
adc ebx, 0 ; u
|
|
|
|
; won't pair
|
|
movsx ebx, WORD PTR [ecx + OFF_MAIN_TREE_LEFTRIGHT + ebx*2]
|
|
|
|
test ebx, ebx ; u
|
|
jl SHORT $L19975 ; v
|
|
|
|
jmp SHORT back_main_tree_long_code
|
|
|
|
|
|
|
|
;
|
|
; Handle codes > table bits in length, for secondary tree
|
|
;
|
|
secondary_tree_long_code:
|
|
mov ecx, [esp + $bitbuf]
|
|
shl ecx, SECONDARY_LEN_TREE_TABLE_BITS
|
|
|
|
$L19990:
|
|
neg ebp
|
|
|
|
add ebp, ebp
|
|
add ecx, ecx
|
|
|
|
adc ebp, 0
|
|
|
|
; won't pair
|
|
movsx ebp, WORD PTR [eax + OFF_SECONDARY_TREE_LEFTRIGHT + ebp*2]
|
|
|
|
test ebp, ebp
|
|
jl SHORT $L19990
|
|
|
|
jmp back_secondary_tree_long_code
|
|
|
|
|
|
;
|
|
; loop top
|
|
;
|
|
loop_top:
|
|
|
|
; DECODE_DDMTREE(c);
|
|
|
|
; ebx = table[ bitbuf >> (32-MAIN_TREE_TABLE_BITS) ]
|
|
mov ecx, [esp + $context] ; u1
|
|
mov eax, [esp + $bitbuf] ; v1
|
|
|
|
shr eax, 32-MAIN_TREE_TABLE_BITS ; u1
|
|
mov ebp, DWORD PTR [ecx + OFF_INPUT_ENDPOS] ; v1
|
|
|
|
|
|
loop_top_after_char:
|
|
movsx ebx, WORD PTR [ecx + OFF_MAIN_TREE_TABLE + eax*2] ; NP
|
|
|
|
test ebx, ebx ; u
|
|
jl SHORT main_tree_long_code ; v
|
|
|
|
|
|
back_main_tree_long_code:
|
|
|
|
; check for end of input
|
|
cmp ebp, esi ; u1
|
|
jbe SHORT $fillbuf1 ; v1
|
|
|
|
mov cl, [ebx + ecx + OFF_MAIN_TREE_LEN] ; u1 cl = len[x]
|
|
xor eax, eax ; v1
|
|
|
|
shl DWORD PTR [esp + $bitbuf], cl ; NP bitbuf <<= len
|
|
|
|
sub dl, cl ; u1 bitcount -= len
|
|
jg SHORT bitcount_gt_0 ; v1
|
|
|
|
; otherwise fill buffer
|
|
mov al, [esi] ; u1
|
|
mov cl, dl ; v1
|
|
|
|
mov ah, [esi+1] ; u1
|
|
xor cl, -1 ; v1
|
|
|
|
add esi, 2 ; u1
|
|
inc cl ; v1
|
|
|
|
shl eax, cl ; NP
|
|
|
|
or eax, [esp + $bitbuf] ; u1
|
|
add dl, 16 ; v1
|
|
|
|
mov [esp + $bitbuf], eax ; u1
|
|
nop ; v1
|
|
|
|
bitcount_gt_0:
|
|
|
|
;
|
|
; is it a match or a character?
|
|
;
|
|
sub ebx, 256 ; u1
|
|
jns SHORT $L19985 ; v1
|
|
|
|
|
|
;
|
|
; it's a character
|
|
;
|
|
mov ebp, [esp + $mem_window] ; u1 get mem_window ptr
|
|
inc edi ; v1 bufpos++
|
|
|
|
mov eax, [esp + $bitbuf] ; u1 for next iteration
|
|
mov ecx, [esp + $context] ; v1 for next iteration
|
|
|
|
shr eax, 32-MAIN_TREE_TABLE_BITS ; u1 for next iteration
|
|
|
|
mov [ebp + edi - 1], bl ; u1 store current character
|
|
mov ebp, DWORD PTR [ecx + OFF_INPUT_ENDPOS] ; v1 for next iteration
|
|
|
|
cmp [esp + $bufposend], edi ; u1
|
|
ja SHORT loop_top_after_char ; v1
|
|
|
|
jmp $cleanup
|
|
|
|
|
|
m_is_3:
|
|
mov ebx, 1 ; == _MP_POS_minus2[3*4]
|
|
jmp skipover
|
|
|
|
|
|
m_not_zero:
|
|
cmp bl, 3 ; u1
|
|
je SHORT m_is_3 ; v1
|
|
|
|
mov eax, [esp + $last_offset] ; u1 eax = t = last[0]
|
|
mov ecx, [esp + $last_offset + ebx*4] ; v1 ecx = last[m]
|
|
|
|
mov [esp + $last_offset], ecx ; u1 last[0] = last[m]
|
|
mov [esp + $last_offset + ebx*4], eax ; v1 last[m] = t
|
|
|
|
mov ebx, ecx ; u
|
|
jmp $L20003 ; too far, won't pair
|
|
|
|
|
|
;
|
|
; m = 0, 1, 2, 3
|
|
;
|
|
m_is_0123:
|
|
test ebx, ebx ; u1
|
|
jnz SHORT m_not_zero ; v1
|
|
|
|
; m == 0
|
|
mov ebx, [esp + $last_offset] ;
|
|
jmp $L20003 ; NP
|
|
|
|
|
|
|
|
$L19985:
|
|
mov ebp, ebx ; u
|
|
mov eax, [esp + $context] ; v
|
|
|
|
shr ebx, 3 ; u
|
|
and ebp, 7 ; v
|
|
|
|
cmp ebp, 7 ; u
|
|
jne SHORT $L19987 ; v
|
|
|
|
mov ecx, [esp + $bitbuf] ; u
|
|
|
|
shr ecx, 32-SECONDARY_LEN_TREE_TABLE_BITS ; u
|
|
|
|
movsx ebp, WORD PTR [eax + OFF_SECONDARY_TREE_TABLE + ecx*2] ; NP
|
|
|
|
test ebp, ebp ; u1
|
|
jnge secondary_tree_long_code ; v1
|
|
|
|
back_secondary_tree_long_code:
|
|
|
|
mov cl, BYTE PTR [eax + OFF_SECONDARY_TREE_LEN + ebp] ; u1
|
|
add ebp, 7 ; v1
|
|
|
|
shl DWORD PTR [esp + $bitbuf], cl ; NP bitbuf <<= len
|
|
|
|
; if (bitcount > 0) we're ok, otherwise fill buffer
|
|
sub dl, cl ; u1 bitcount -= len
|
|
jg SHORT $L19987 ; v1
|
|
|
|
xor eax, eax ; u1
|
|
mov cl, dl ; v1
|
|
|
|
; NEG does not pair, so we replace it with XOR CL,-1 ; INC CL
|
|
mov al, [esi] ; u1
|
|
xor cl, -1 ; v1
|
|
|
|
mov ah, [esi+1] ; u1
|
|
inc cl ; v1
|
|
|
|
shl eax, cl ; NP
|
|
|
|
or eax, [esp + $bitbuf] ; u2
|
|
add dl, 16 ; v1
|
|
|
|
add esi, 2 ; u1
|
|
mov [esp + $bitbuf], eax ; v1
|
|
|
|
$L19987:
|
|
|
|
; if m == 3 then extra_bits == 0, and shifts don't work
|
|
; with a count of zero
|
|
xor eax, eax ; u1
|
|
cmp bl, 3 ; v1
|
|
|
|
mov al, bl ; u1
|
|
jle SHORT m_is_0123 ; v1
|
|
|
|
mov cl, BYTE PTR local_32_minus_extra_bits [eax] ; u1
|
|
mov ebx, [esp + $bitbuf] ; v1
|
|
|
|
shr ebx, cl ; NP
|
|
|
|
add ebx, _MP_POS_minus2[eax*4] ; u2
|
|
mov cl, _dec_extra_bits [eax] ; v1
|
|
|
|
shl DWORD PTR [esp + $bitbuf], cl ; NP
|
|
|
|
; now we can trash eax (m)
|
|
sub dl, cl ; u1
|
|
jg SHORT preskipover ; v1
|
|
|
|
; otherwise fill buffer
|
|
|
|
; no need to xor eax, eax since everything but the low order
|
|
; byte is already zero
|
|
mov al, [esi] ; u1
|
|
mov cl, dl ; v1
|
|
|
|
mov ah, [esi+1] ; u1
|
|
xor cl, -1 ; v1
|
|
|
|
add esi, 2 ; u1
|
|
inc cl ; v1
|
|
|
|
shl eax, cl ; NP
|
|
|
|
or eax, [esp + $bitbuf] ; u2
|
|
add dl, 16 ; v1
|
|
|
|
; remember that this can execute twice, if we grab 17 bits
|
|
mov [esp + $bitbuf], eax ; u1
|
|
jg SHORT preskipover ; v1
|
|
|
|
;
|
|
; Second iteration
|
|
;
|
|
xor eax, eax ; u1
|
|
mov cl, dl ; v1
|
|
|
|
mov al, [esi] ; u1
|
|
xor cl, -1 ; v1
|
|
|
|
mov ah, [esi+1] ; u1
|
|
inc cl ; v1
|
|
|
|
shl eax, cl ; NP
|
|
|
|
or eax, [esp + $bitbuf] ; u2
|
|
add dl, 16 ; v1
|
|
|
|
mov [esp + $bitbuf], eax ; u1
|
|
add esi, 2 ; v1
|
|
|
|
preskipover:
|
|
skipover:
|
|
mov eax, [esp + $last_offset] ; u EAX = R0
|
|
mov ecx, [esp + $last_offset + 4] ; v ECX = R1
|
|
|
|
mov [esp + $last_offset + 4], eax ; u R1 := R0
|
|
mov [esp + $last_offset + 8], ecx ; v R2 := R1
|
|
|
|
mov [esp + $last_offset], ebx ; u R0 := matchpos
|
|
|
|
$L20003:
|
|
|
|
;
|
|
; eax = dec_mem_window
|
|
; ebx = matchpos
|
|
; edi = bufpos
|
|
; ebp = matchlen (ebp=0 means "ML2", ebp=1 means "ML3", ...)
|
|
;
|
|
|
|
mov ecx, edi ; u1 ecx = bufpos
|
|
mov eax, [esp + $context] ; v1 eax = context ptr
|
|
|
|
inc edi ; u1 bufpos++ for first character
|
|
sub ecx, ebx ; v1 ecx := bufpos - matchpos
|
|
|
|
and ecx, [eax + OFF_WINDOW_MASK] ; u1 ecx &= window_mask
|
|
mov eax, [eax + OFF_MEM_WINDOW] ; v1 eax = mem_window
|
|
|
|
mov bl, [eax + ecx] ; u1 AGI bl = window[src]
|
|
inc ecx ; v1 for next iteration
|
|
|
|
mov [eax + edi - 1], bl ; u store in window[dst]
|
|
nop ; v
|
|
|
|
;
|
|
; second and later characters...
|
|
;
|
|
; eax = mem_window edx = bitbuf
|
|
; ebx = BL used for character esi = input_pos
|
|
; ecx = bufpos - matchpos
|
|
; ebp = matchlen count
|
|
; edi = bufpos
|
|
;
|
|
copy_loop:
|
|
inc edi ; u1
|
|
mov bl, [eax + ecx] ; v1 bl = dec_window[(bp-mp)&mask]
|
|
|
|
inc ecx ; u1
|
|
dec ebp ; v1
|
|
|
|
mov [eax + edi - 1], bl ; u1 dec_window[bufpos] = bl
|
|
jge SHORT copy_loop ; v1
|
|
|
|
cmp [esp + $bufposend], edi ; u1
|
|
ja loop_top ; NP
|
|
|
|
|
|
; fall through
|
|
|
|
$cleanup:
|
|
mov ebx, DWORD PTR [esp + $context]
|
|
xor eax, eax
|
|
|
|
cmp edi, [esp + $bufposend]
|
|
je SHORT successful
|
|
|
|
mov eax, -1 ; failure
|
|
|
|
successful:
|
|
and edi, [ebx + OFF_WINDOW_MASK]
|
|
|
|
mov [ebx + OFF_BITCOUNT], dl
|
|
mov [ebx + OFF_BUFPOS], edi
|
|
|
|
mov [ebx + OFF_INPUT_CURPOS], esi
|
|
mov edi, [esp + $bitbuf]
|
|
|
|
; copy repeated offsets into context structure
|
|
mov ecx, [esp + $last_offset]
|
|
mov ebp, [esp + $last_offset + 4]
|
|
|
|
mov esi, [esp + $last_offset + 8]
|
|
mov [ebx + OFF_LAST_MATCHPOS_OFFSET], ecx
|
|
|
|
mov [ebx + OFF_LAST_MATCHPOS_OFFSET+4], ebp
|
|
mov [ebx + OFF_LAST_MATCHPOS_OFFSET+8], esi
|
|
|
|
mov [ebx + OFF_BITBUF], edi
|
|
|
|
; restore stack
|
|
add esp, $stackvars
|
|
|
|
pop ebp
|
|
pop esi
|
|
pop edi
|
|
pop ebx
|
|
pop ecx
|
|
pop edx
|
|
|
|
ret 0
|
|
|
|
|
|
_fast_decode_verbatim_block ENDP
|
|
_TEXT ENDS
|
|
|
|
END
|