; ; tableasm.asm ; ; Assembly version of make_table() ; ; jforbes 07/20/96 ; ; Note, this is not optimised for the Pentium at all; very few ; instructions will execute two at a time. ; TITLE TABLEASM.ASM .386P .model FLAT PUBLIC _make_table ; COMDAT @_make_table _TEXT SEGMENT $start = 0 $weight = $start + 72 $count = $weight + 72 $nchar = $count + 72 $bitlen = $nchar + 4 $ch = $bitlen + 4 $leftright = $ch + 4 $avail = $leftright + 4 $k = $avail + 4 $table = $k + 4 $tablebits = $table + 4 $jutbits = $tablebits + 4 $context = $jutbits + 4 $last = $context + 4 _make_table PROC NEAR ; COMDAT ; ;void make_table( ; t_decoder_context *context, ; USHORT nchar, ; UBYTE *bitlen, ; USHORT tablebits, ; short *table, ; short *leftright) ; count [esp+72+68] ; weight [esp+72] ; start [esp] ; 6 regs * 4 = 24 bytes push ebx push ecx push edx push ebp push esi push edi sub esp, $last ; how to access the parameters off the stack ; skip over 24 bytes of pushed registers, and $last local ; variables, and the 4 byte return address. $parms = $last+28 mov eax, [esp + $parms + 4] and eax, 65535 mov [esp + $nchar], eax mov eax, [esp + $parms] mov [esp + $context], eax mov eax, [esp + $parms + 8] mov [esp + $bitlen], eax mov eax, [esp + $parms + 12] and eax, 255 mov [esp + $tablebits], eax mov eax, [esp + $parms + 16] mov [esp + $table], eax mov eax, [esp + $parms + 20] mov [esp + $leftright], eax ; for (i = 1; i <= 16; i++) ; count[i] = 0; ; clear 64 bytes starting at &count[1] xor eax, eax lea edi, [esp + $count + 4] mov ecx, 16 rep stosd ; for (i = 0; i < nchar; i++) ; count[bitlen[i]]++; ; Do it in reverse mov ecx, [esp + $nchar] ; u mov esi, [esp + $bitlen] ; v xor ebx, ebx ; u dec ecx ; v ecx = i loop1: mov bl, [esi + ecx] ; bl = bitlen[i] inc DWORD PTR [esp + $count + ebx*4] ; NP dec ecx ; u jge SHORT loop1 ; v ; start[1] = 0; ; ; for (i = 1; i <= 16; i++) ; start[i + 1] = start[i] + (count[i] << (16 - i)); ; lea ebp, [esp + $start + 4] ; u lea esi, [esp + $count + 4] ; v xor edx, edx ; u edx = start[i] mov ecx, 15 ; v ecx = 16 - i mov [ebp], edx ; u start[1] = 0 nop ; v loop2: mov eax, [esi] ; u eax = count[i] add ebp, 4 ; v shl eax, cl ; u add esi, 4 ; v add eax, edx ; u edx = start[i] ; stall mov [ebp], eax ; u start[i+1] mov edx, eax ; v edx <- start[i+1] dec ecx ; u jge SHORT loop2 ; v ; if (start[17] != 65536) mov edx, [esp + 68 + $start] cmp edx, 65536 jne not_65536 ; jutbits = 16 - tablebits; ; ; for (i = 1; i <= tablebits; i++) ; { ; start[i] >>= jutbits; ; weight[i] = 1 << (tablebits - i); ; } mov edx, [esp + $tablebits] ; u edx = tablebits mov eax, 1 ; v eax = i lea ecx, [edx - 1] ; u ecx = tablebits - i(=1) mov ebp, eax ; v ebp = 1 shl ebp, cl ; u ebp = 1 << (tablebits - i) mov ebx, ecx ; v ebx = tablebits - i(=1) mov cl, 16 ; upper bits of ecx are zero sub ecx, edx ; ecx = jutbits = 16 - tablebits mov [esp + $jutbits], ecx loop3: shr DWORD PTR [esp + $start + eax*4], cl ; u start[i] >>= jutbits mov DWORD PTR [esp + $weight + eax*4], ebp ; v shr ebp, 1 ; u inc eax ; v i++ cmp eax, edx ; u jle SHORT loop3 ; v ; while (i <= 16) ; { ; weight[i] = 1 << (16 - i); ; i++; ; } cmp al, 16 ; u jg SHORT exit_loop4 ; v loop4: mov ecx, 16 ; u mov ebx, 1 ; v sub ecx, eax ; u ecx = 16 - i inc eax ; v WAR ok shl ebx, cl ; u ebx = 1 << (16 - i) mov DWORD PTR [esp + $weight + eax*4 - 4], ebx ; v cmp al, 16 ; u jle SHORT loop4 ; v exit_loop4: ; i = start[tablebits+1] >> jutbits ; ecx = jutbits mov ecx, [esp + $jutbits] ; edx = tablebits mov edx, [esp + $tablebits] ; eax = start[tablebits+1] mov eax, [esp + $start + 4 + edx*4] ; eax = start[tablebits+1] >> jutbits shr eax, cl ; if (i != 65536) cmp eax, 65536 je SHORT i_is_zero ; ; memset(&table[i], 0, sizeof(ushort)*((1 << tablebits)-i); ; ; ecx = tablebits mov ecx, edx ; edx = 1 << tablebits mov edx, 1 shl edx, cl ; edx = (1 << tablebits) - i sub edx, eax ; count = (1 << tablebits) - i words mov ecx, edx ; dest = edi = &table[i] mov edi, [esp + $table] lea edi, [edi + eax*2] ; value = 0 xor eax, eax rep stosw i_is_zero: ; ; avail = nchar; ; mov eax, [esp + $nchar] ; u xor edi, edi ; v edi = ch ; ; for (ch = 0; ch < nchar; ch++) ; mov [esp + $avail], eax ; u jmp SHORT main_loop ; v ; for short jump bad_table2: xor eax, eax ; return failure jmp cleanup main_loop: ; if ((len = bitlen[ch]) == 0) ; continue; ; eax = &bitlen[0] mov eax, [esp + $bitlen] ; ebp = len = bitlen[ch] movzx ebp, BYTE PTR [eax + edi] ; if (len == 0) ; continue test ebp, ebp jz loop_bottom ; nextcode = start[len] + weight[len]; ; ebx = start[len] mov ebx, [esp + $start + ebp*4] ; u mov ecx, [esp + $tablebits] ; v ecx = tablebits ; ebx = nextcode = start[len] + weight[len] mov eax, ebx ; u eax = start[len] add ebx, [esp + $weight + ebp*4] ; v WAR ok ; if (len <= tablebits) cmp ebp, ecx ; u jg SHORT len_g_tablebits ; v ; if (nextcode > (1 << tablebits)) ; bad_table(); ; edx = 1 << tablebits mov edx, 1 shl edx, cl ; u mov ecx, ebx ; v ecx = nextcode ; if (nextcode > (1 << tablebits)) cmp ebx, edx ; u jg SHORT bad_table2 ; v ; for (i = start[len]; i < nextcode; i++) ; table[i] = ch; ; ecx = nextcode - start[len] sub ecx, eax ; u add eax, eax ; v WAR ok ; eax = &table[ start[len] ] add eax, [esp + $table] ; u ; start[len] = nextcode (moved up) mov [esp + $start + ebp*4], ebx ; v ; For this loop: ; eax = &table[ start[len] ] ; edi = ch ; ecx = nextcode - start[len] ; loop6: mov WORD PTR [eax], di ; table[i] = ch add eax, 2 ; i++ dec ecx jnz SHORT loop6 ; ch++ inc edi ; moved up ; loop bottom cmp edi, [esp + $nchar] jl SHORT main_loop mov eax, 1 ; success jmp cleanup ; ; len > tablebits ; ; on entry: eax = start[len] ; ebx = nextcode ; ecx = tablebits ; ebp = len ; len_g_tablebits: mov esi, ebp ; u esi = len mov edx, eax ; v edx = start[len] sub esi, ecx ; u esi = len - tablebits add cl, 16 ; v ; edx = k << tablebits ; shift left another 16 because we want to use a DWORD ; for testing the negative bit shl edx, cl ; u mov [esp + $k], eax ; v ; start[len] = nextcode; mov [esp + $start + ebp*4], ebx ; u nop ; v ; p = &table[k >> jutbits]; mov ecx, [esp + $jutbits] ; u ecx = jutbits mov ebx, [esp + $k] ; v ebx = k >> jutbits shr ebx, cl ; u mov eax, [esp + $table] ; v lea ebx, [eax + ebx*2] ; u ebx = p = &table[k >> jutbits] mov ebp, [esp + $avail] ; v ebp = avail bottom_loop: ; if (*p == 0) ; eax = &leftright[0] mov eax, [esp + $leftright] ; ecx = *p movsx ecx, WORD PTR [ebx] ; NP ; *p == 0 ? test ecx, ecx ; u jne SHORT p_not_zero ; v ; left_right[avail*2] = left_right[avail*2+1] = 0; ; *p = -avail; ; avail++; mov WORD PTR [ebx], bp ; *p = avail ; sets left and right to zero (remember that ecx == 0) mov [eax + ebp*4], ecx ; u inc ebp ; v avail++ ; *p = -avail neg WORD PTR [ebx] p_not_zero: ; if ((signed short) k < 0) ; p = &right[-(*p)]; ; else ; p = &left[-(*p)]; ; ecx = -(*p) movsx ecx, WORD PTR [ebx] neg ecx ; ebx = p = &ptr[-(*p)] lea ebx, [ecx*4 + eax] ; if (k becomes -ve when we shift out a bit) add edx, edx jnc SHORT go_left ; right add ebx, 2 go_left: dec esi ; i-- jnz SHORT bottom_loop ; *p = ch; mov WORD PTR [ebx], di ; store avail mov [esp + $avail], ebp loop_bottom: ; ch++ inc edi cmp edi, [esp + $nchar] jl main_loop mov eax, 1 ; success cleanup: add esp, $last pop edi pop esi pop ebp pop edx pop ecx pop ebx ret 0 not_65536: test edx, edx jnz SHORT bad_table ; memset(table, 0, sizeof(ushort)*(1<