; ; DV.ASM ; ; jforbes ; TITLE DV.ASM .386P .model FLAT _TEXT SEGMENT INCLUDE offsets.i EXTRN _MP_POS_minus2:DWORD EXTRN _dec_extra_bits:BYTE local_32_minus_extra_bits: DB 32,32,32,32,31,31,30,30 DB 29,29,28,28,27,27,26,26 DB 25,25,24,24,23,23,22,22 DB 21,21,20,20,19,19,18,18 DB 17,17,16,16,15,15,15,15 DB 15,15,15,15,15,15,15,15 DB 15,15,15 PUBLIC _fast_decode_verbatim_block ; ; NOTES: ; ; last_offset uses 12 bytes; 4 for each of the 3 repeated offsets ; $bitbuf=0 $bufposend=4 $context=8 $mem_window=12 $last_offset=16 $bitcount=28 $stackvars=32 ; ; fast_decode_verbatim_block(context, bufpos, amount_to_decode) ; _fast_decode_verbatim_block PROC NEAR ; save registers push edx push ecx push ebx push edi push esi push ebp ; load parameters and initialise mov edx, [esp + 28] ; context mov edi, [esp + 32] ; bufpos mov eax, [esp + 36] ; amount_to_decode mov esi, [edx + OFF_INPUT_CURPOS] ; input data ptr add eax, edi ; eax := bufpos_end = bufpos + amt sub esp, $stackvars ; allocate stack space for variables ; store variables on stack mov [esp + $context], edx ; u mov [esp + $bufposend], eax ; v mov ecx, [edx + OFF_MEM_WINDOW] ; u mov eax, [edx + OFF_BITBUF] ; v mov [esp + $mem_window], ecx ; u mov [esp + $bitbuf], eax ; v ; copy repeated offsets onto stack for quicker accessing (<128 byte offset) mov ecx, [edx + OFF_LAST_MATCHPOS_OFFSET] ; u mov ebx, [edx + OFF_LAST_MATCHPOS_OFFSET + 4] ; v mov eax, [edx + OFF_LAST_MATCHPOS_OFFSET + 8] ; u mov [esp + $last_offset], ecx ; v mov [esp + $last_offset + 4], ebx ; u mov [esp + $last_offset + 8], eax ; v ; store other variables xor ecx, ecx mov cl, BYTE PTR [edx + OFF_BITCOUNT] mov [esp + $bitcount], ecx mov edx, ecx ; start jmp SHORT loop_top ; ; end of init ; ; ; Decoder input overflow error! ; $fillbuf1: ; restore stack and return -1 add esp, $stackvars pop ebp pop esi pop edi pop ebx pop ecx pop edx mov eax, -1 ret 0 ; ; Handle codes > table bits in length, for main tree ; main_tree_long_code: mov eax, [esp + $bitbuf] ; u shl eax, MAIN_TREE_TABLE_BITS ; u ; negation loop $L19975: neg ebx ; NP add ebx, ebx ; u add eax, eax ; v test MSB of eax ; ADC takes 3 clocks, which allows it to overshadow the 0F prefix ; in the next instruction (saving 1 clock) adc ebx, 0 ; u ; won't pair movsx ebx, WORD PTR [ecx + OFF_MAIN_TREE_LEFTRIGHT + ebx*2] test ebx, ebx ; u jl SHORT $L19975 ; v jmp SHORT back_main_tree_long_code ; ; Handle codes > table bits in length, for secondary tree ; secondary_tree_long_code: mov ecx, [esp + $bitbuf] shl ecx, SECONDARY_LEN_TREE_TABLE_BITS $L19990: neg ebp add ebp, ebp add ecx, ecx adc ebp, 0 ; won't pair movsx ebp, WORD PTR [eax + OFF_SECONDARY_TREE_LEFTRIGHT + ebp*2] test ebp, ebp jl SHORT $L19990 jmp back_secondary_tree_long_code ; ; loop top ; loop_top: ; DECODE_DDMTREE(c); ; ebx = table[ bitbuf >> (32-MAIN_TREE_TABLE_BITS) ] mov ecx, [esp + $context] ; u1 mov eax, [esp + $bitbuf] ; v1 shr eax, 32-MAIN_TREE_TABLE_BITS ; u1 mov ebp, DWORD PTR [ecx + OFF_INPUT_ENDPOS] ; v1 loop_top_after_char: movsx ebx, WORD PTR [ecx + OFF_MAIN_TREE_TABLE + eax*2] ; NP test ebx, ebx ; u jl SHORT main_tree_long_code ; v back_main_tree_long_code: ; check for end of input cmp ebp, esi ; u1 jbe SHORT $fillbuf1 ; v1 mov cl, [ebx + ecx + OFF_MAIN_TREE_LEN] ; u1 cl = len[x] xor eax, eax ; v1 shl DWORD PTR [esp + $bitbuf], cl ; NP bitbuf <<= len sub dl, cl ; u1 bitcount -= len jg SHORT bitcount_gt_0 ; v1 ; otherwise fill buffer mov al, [esi] ; u1 mov cl, dl ; v1 mov ah, [esi+1] ; u1 xor cl, -1 ; v1 add esi, 2 ; u1 inc cl ; v1 shl eax, cl ; NP or eax, [esp + $bitbuf] ; u1 add dl, 16 ; v1 mov [esp + $bitbuf], eax ; u1 nop ; v1 bitcount_gt_0: ; ; is it a match or a character? ; sub ebx, 256 ; u1 jns SHORT $L19985 ; v1 ; ; it's a character ; mov ebp, [esp + $mem_window] ; u1 get mem_window ptr inc edi ; v1 bufpos++ mov eax, [esp + $bitbuf] ; u1 for next iteration mov ecx, [esp + $context] ; v1 for next iteration shr eax, 32-MAIN_TREE_TABLE_BITS ; u1 for next iteration mov [ebp + edi - 1], bl ; u1 store current character mov ebp, DWORD PTR [ecx + OFF_INPUT_ENDPOS] ; v1 for next iteration cmp [esp + $bufposend], edi ; u1 ja SHORT loop_top_after_char ; v1 jmp $cleanup m_is_3: mov ebx, 1 ; == _MP_POS_minus2[3*4] jmp skipover m_not_zero: cmp bl, 3 ; u1 je SHORT m_is_3 ; v1 mov eax, [esp + $last_offset] ; u1 eax = t = last[0] mov ecx, [esp + $last_offset + ebx*4] ; v1 ecx = last[m] mov [esp + $last_offset], ecx ; u1 last[0] = last[m] mov [esp + $last_offset + ebx*4], eax ; v1 last[m] = t mov ebx, ecx ; u jmp $L20003 ; too far, won't pair ; ; m = 0, 1, 2, 3 ; m_is_0123: test ebx, ebx ; u1 jnz SHORT m_not_zero ; v1 ; m == 0 mov ebx, [esp + $last_offset] ; jmp $L20003 ; NP $L19985: mov ebp, ebx ; u mov eax, [esp + $context] ; v shr ebx, 3 ; u and ebp, 7 ; v cmp ebp, 7 ; u jne SHORT $L19987 ; v mov ecx, [esp + $bitbuf] ; u shr ecx, 32-SECONDARY_LEN_TREE_TABLE_BITS ; u movsx ebp, WORD PTR [eax + OFF_SECONDARY_TREE_TABLE + ecx*2] ; NP test ebp, ebp ; u1 jnge secondary_tree_long_code ; v1 back_secondary_tree_long_code: mov cl, BYTE PTR [eax + OFF_SECONDARY_TREE_LEN + ebp] ; u1 add ebp, 7 ; v1 shl DWORD PTR [esp + $bitbuf], cl ; NP bitbuf <<= len ; if (bitcount > 0) we're ok, otherwise fill buffer sub dl, cl ; u1 bitcount -= len jg SHORT $L19987 ; v1 xor eax, eax ; u1 mov cl, dl ; v1 ; NEG does not pair, so we replace it with XOR CL,-1 ; INC CL mov al, [esi] ; u1 xor cl, -1 ; v1 mov ah, [esi+1] ; u1 inc cl ; v1 shl eax, cl ; NP or eax, [esp + $bitbuf] ; u2 add dl, 16 ; v1 add esi, 2 ; u1 mov [esp + $bitbuf], eax ; v1 $L19987: ; if m == 3 then extra_bits == 0, and shifts don't work ; with a count of zero xor eax, eax ; u1 cmp bl, 3 ; v1 mov al, bl ; u1 jle SHORT m_is_0123 ; v1 mov cl, BYTE PTR local_32_minus_extra_bits [eax] ; u1 mov ebx, [esp + $bitbuf] ; v1 shr ebx, cl ; NP add ebx, _MP_POS_minus2[eax*4] ; u2 mov cl, _dec_extra_bits [eax] ; v1 shl DWORD PTR [esp + $bitbuf], cl ; NP ; now we can trash eax (m) sub dl, cl ; u1 jg SHORT preskipover ; v1 ; otherwise fill buffer ; no need to xor eax, eax since everything but the low order ; byte is already zero mov al, [esi] ; u1 mov cl, dl ; v1 mov ah, [esi+1] ; u1 xor cl, -1 ; v1 add esi, 2 ; u1 inc cl ; v1 shl eax, cl ; NP or eax, [esp + $bitbuf] ; u2 add dl, 16 ; v1 ; remember that this can execute twice, if we grab 17 bits mov [esp + $bitbuf], eax ; u1 jg SHORT preskipover ; v1 ; ; Second iteration ; xor eax, eax ; u1 mov cl, dl ; v1 mov al, [esi] ; u1 xor cl, -1 ; v1 mov ah, [esi+1] ; u1 inc cl ; v1 shl eax, cl ; NP or eax, [esp + $bitbuf] ; u2 add dl, 16 ; v1 mov [esp + $bitbuf], eax ; u1 add esi, 2 ; v1 preskipover: skipover: mov eax, [esp + $last_offset] ; u EAX = R0 mov ecx, [esp + $last_offset + 4] ; v ECX = R1 mov [esp + $last_offset + 4], eax ; u R1 := R0 mov [esp + $last_offset + 8], ecx ; v R2 := R1 mov [esp + $last_offset], ebx ; u R0 := matchpos $L20003: ; ; eax = dec_mem_window ; ebx = matchpos ; edi = bufpos ; ebp = matchlen (ebp=0 means "ML2", ebp=1 means "ML3", ...) ; mov ecx, edi ; u1 ecx = bufpos mov eax, [esp + $context] ; v1 eax = context ptr inc edi ; u1 bufpos++ for first character sub ecx, ebx ; v1 ecx := bufpos - matchpos and ecx, [eax + OFF_WINDOW_MASK] ; u1 ecx &= window_mask mov eax, [eax + OFF_MEM_WINDOW] ; v1 eax = mem_window mov bl, [eax + ecx] ; u1 AGI bl = window[src] inc ecx ; v1 for next iteration mov [eax + edi - 1], bl ; u store in window[dst] nop ; v ; ; second and later characters... ; ; eax = mem_window edx = bitbuf ; ebx = BL used for character esi = input_pos ; ecx = bufpos - matchpos ; ebp = matchlen count ; edi = bufpos ; copy_loop: inc edi ; u1 mov bl, [eax + ecx] ; v1 bl = dec_window[(bp-mp)&mask] inc ecx ; u1 dec ebp ; v1 mov [eax + edi - 1], bl ; u1 dec_window[bufpos] = bl jge SHORT copy_loop ; v1 cmp [esp + $bufposend], edi ; u1 ja loop_top ; NP ; fall through $cleanup: mov ebx, DWORD PTR [esp + $context] xor eax, eax cmp edi, [esp + $bufposend] je SHORT successful mov eax, -1 ; failure successful: and edi, [ebx + OFF_WINDOW_MASK] mov [ebx + OFF_BITCOUNT], dl mov [ebx + OFF_BUFPOS], edi mov [ebx + OFF_INPUT_CURPOS], esi mov edi, [esp + $bitbuf] ; copy repeated offsets into context structure mov ecx, [esp + $last_offset] mov ebp, [esp + $last_offset + 4] mov esi, [esp + $last_offset + 8] mov [ebx + OFF_LAST_MATCHPOS_OFFSET], ecx mov [ebx + OFF_LAST_MATCHPOS_OFFSET+4], ebp mov [ebx + OFF_LAST_MATCHPOS_OFFSET+8], esi mov [ebx + OFF_BITBUF], edi ; restore stack add esp, $stackvars pop ebp pop esi pop edi pop ebx pop ecx pop edx ret 0 _fast_decode_verbatim_block ENDP _TEXT ENDS END