windows-nt/Source/XPSP1/NT/base/ntdll/i386/emfprem.asm
2020-09-26 16:20:57 +08:00

408 lines
12 KiB
NASM

subttl emfprem.asm - FPREM and FPREM1 instructions
page
;*******************************************************************************
;emfprem.asm - FPREM and FPREM1 instructions
; by Tim Paterson
;
; Microsoft Confidential
;
; Copyright (c) Microsoft Corporation 1991
; All Rights Reserved
;
;Inputs:
; edi = [CURstk]
; ST(1) loaded into ebx:esi & ecx
;
;Revision History:
;
; [] 09/05/91 TP Initial 32-bit version.
;
;*******************************************************************************
;Dispatch table for remainder
;
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
;pointed to by edi ("dest").
;
;Tag of source is shifted. Tag values are as follows:
.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
.erre TAG_VALID eq 1
.erre TAG_ZERO eq 2
.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
;Any special case routines not found in this file are in emarith.asm
;Divisor Dividend
tFpremDisp label dword ;Source(ST(1)) Dest (ST(0))
dd PremDouble ;single single
dd PremDouble ;single double
dd PremX ;single zero
dd PremSpclDest ;single special
dd PremDouble ;double single
dd PremDouble ;double double
dd PremX ;double zero
dd PremSpclDest ;double special
dd ReturnIndefinite ;zero single
dd ReturnIndefinite ;zero double
dd ReturnIndefinite ;zero zero
dd PremSpclDest ;zero special
dd PremSpclSource ;special single
dd PremSpclSource ;special double
dd PremSpclSource ;special zero
dd TwoOpBothSpcl ;special special
dd ReturnIndefinite ;Two infinites
PremSpclDone:
add sp,4 ;Clean off return address for normal
ret
;***
PremSpclDest:
mov al,EMSEG:[edi].bTag ;Pick up tag
cmp al,bTAG_INF ;Dividing infinity?
jz ReturnIndefinite ;Invalid operation if so
jmp SpclDest ;In emarith.asm
;***
PremSpclSource:
cmp cl,bTAG_INF ;Dividing by infinity?
jnz SpclSource ;in emarith.asm
PremX:
;Return Dest unchanged, quotient = 0
mov EMSEG:[SWcc],0
ret
;*******************************************************************************
;Map quotient bits to condition codes
Q0 equ C1
Q1 equ C3
Q2 equ C0
MapQuo label byte
db 0
db Q0
db Q1
db Q1+Q0
db Q2
db Q2+Q0
db Q2+Q1
db Q2+Q1+Q0
Prem1Cont:
;edx:eax = remainder, normalized
;ebx:esi = divisor
;ebp = quotient
;edi = exponent difference, zero or less
;ecx = 0 (positive sign)
;
;At this point, 0 <= remainder < divisor. However, for FPREM1 we need
; -divisor/2 <= remainder <= divisor/2. If remainder = divisor/2, whether
;we choose + or - is dependent on whichever gives us an even quotient
;(the usual IEEE rounding rule). Quotient must be incremented if we
;use negative remainder.
cmp edi,-1
jl PremCont ;Remainder < divisor/2
jg NegRemainExp0 ;Remainder > divisor/2
;Exponent is -1
cmp edx,ebx
jl PremCont ;Remainder < divisor/2
jg NegRemain ;Remainder > divisor/2
cmp eax,esi
jl PremCont ;Remainder < divisor/2
jg NegRemain ;Remainder > divisor/2
;Remainder = divisor/2. Ensure quotient is even
test ebp,1 ;Even?
jz PremCont
NegRemain:
;Theoretically we subtract divisor from remainder once more, leaving us
;with a negative remainder. But since we use sign/magnitude representation,
;we want the abs() of that with sign bit set--so subtract remainder from
;(larger) divisor. Note that exponent difference is -1, so we must align
;binary points first.
add esi,esi
adc ebx,ebx ;Double divisor to align binary points
NegRemainExp0:
sub esi,eax
sbb ebx,edx ;Subtract remainder
mov eax,esi
mov edx,ebx ;Result in edx:eax
mov ch,bSign ;Flip sign of remainder
inc ebp ;Increase quotient
;Must normalize result of subtraction
bsr ecx,edx ;Look for 1 bit
jnz @F
sub edi,32
xchg edx,eax ;Shift left 32 bits
bsr ecx,edx
@@:
lea edi,[edi+ecx-31] ;Fix up exponent for normalization
not cl
shld edx,eax,cl
shl eax,cl
mov ch,bSign ;Flip sign of remainder
PremCont:
;edx:eax = remainder, normalized
;ebp = quotient
;edi = exponent difference, zero or less
;ch = sign
or eax,eax ;Low bits zero?
.erre bTAG_VALID eq 1
.erre bTAG_SNGL eq 0
setnz cl ;if low half==0 then cl=0 else cl=1
mov esi,EMSEG:[CURstk]
mov ebx,esi
NextStackElem ebx,Prem
add di,EMSEG:[ebx].wExp ;Compute result exponent
cmp di,IexpMin-IexpBias
jle PremUnderflow
SavePremResult:
mov EMSEG:[esi].lManLo,eax
xor EMSEG:[esi].bSgn,ch
mov EMSEG:[esi].lManHi,edx
and ebp,7 ;Keep last 3 bits of quotient only
; and give write buffers a break
mov EMSEG:[esi].wExp,di
mov EMSEG:[esi].bTag,cl
mov al,MapQuo[ebp] ;Get cond. codes for this quotient
mov EMSEG:[SWcc],al
ret
NextStackWrap ebx,Prem ;Tied to NextStackElem above
PremUnderflow:
test EMSEG:[CWmask],Underflow ;Is exception unmasked?
jz UnmaskedPremUnder
mov cl,bTAG_DEN
jmp SavePremResult
UnmaskedPremUnder:
add edi,UnderBias ;Additional exp. bias for unmasked resp.
or EMSEG:[CURerr],Underflow
jmp SavePremResult
;*******************************************************************************
PremDouble:
;edi = [CURstk]
;ebx:esi = ST(1) mantissa, ecx = ExpSgn
add sp,4 ;Clean off return address for special
mov eax,EMSEG:[edi].lManLo
mov edx,EMSEG:[edi].lManHi
movsx edi,EMSEG:[edi].wExp
xor ebp,ebp ;Quotient, in case we skip stage 1
sar ecx,16 ;Bring exponent down
sub edi,ecx ;Get exponent difference
jl ExitPremLoop ;If dividend is smaller, return it.
;FPREM is performed in two stages. The first stage is used only if the
;exponent difference is greater than 31. It reduces the exponent difference
;by 32, and repeats until the difference is less than 32. Note that
;unlike the hardware FPREM instruction, we are not limited to reducing
;the exponent by only 63--we just keep looping until it's done.
;
;The second stage performs ordinary 1-bit-at-a-time long division.
;It stops when the exponent difference is zero, meaning we have an
;integer quotient and the final remainder.
;
;edx:eax = dividend
;ebx:esi = divisor
;edi = exponent difference
;ebp = 0 (initial quotient)
cmp edi,32 ;Do we need to do stage 1?
jl FitDivisor ;No, start stage 2
;FPREM stage 1
;
;Exponent difference is at least 32. Use 32-bit division to compute
;quotient and exact remainder, reducing exponent difference by 32.
;DIV instruction will overflow if dividend >= divisor. In this case,
;subtract divisor from dividend to ensure no overflow. This will change
;the quotient, but that doesn't matter because we only need the last
;3 bits of the quotient (and we're about to calculate 32 quotient bits).
;This subtraction will not affect the remainder.
sub eax,esi
sbb edx,ebx
jnc FpremReduce32 ;Was dividend big?
add eax,esi ;Restore dividend, it was smaller
adc edx,ebx
;Division algorithm from Knuth vol. 2, p. 237, using 32-bit "digits":
;Guess a quotient digit by dividing two MSDs of dividend by the MSD of
;divisor. If divisor is >= 1/2 the radix (radix = 2^32 in this case), then
;this guess will be no more than 2 larger than the correct value of that
;quotient digit (and never smaller). Divisor meets magnitude condition
;because it's normalized.
;
;This loop typically takes 117 clocks.
;edx:eax = dividend
;ebx:esi = divisor
;edi = exponent difference
;ebp = quotient (zero)
FpremReduce32:
;We know that dividend < divisor, but it is still possible that
;high dividend == high divisor, which will cause the DIV instruction
;to overflow.
cmp edx,ebx ;Will DIV instruction overflow?
jae PremOvfl
div ebx ;Guess a quotient "digit"
;Currently, remainder in edx = dividend - (quotient * high half divisor).
;The definition of remainder is dividend - (quotient * all divisor). So
;if we subtract (quotient * low half divisor) from edx, we'll get
;the true remainder. If it's negative, our guess was too big.
mov ebp,eax ;Save quotient
mov ecx,edx ;Save remainder
mul esi ;Quotient * low half divisor
neg eax ;Subtract from dividend extended with 0
sbb ecx,edx ;Subtract from remainder
mov edx,ecx ;Remainder back to edx:eax
jnc HavPremQuo ;Was quotient OK?
FpremCorrect:
dec ebp ;Quotient was too big
add eax,esi ;Add divisor back into remainder
adc edx,ebx
jnc FpremCorrect ;Repeat if quotient is still too big
HavPremQuo:
sub edi,32 ;Exponent reduced
cmp edi,32 ;Exponent difference within 31?
jl PremNormalize ;Do it a bit a time
or edx,edx ;Check for zero remainder
jnz FpremReduce32
or eax,eax ;Remainder 0?
jz ExactPrem
xchg edx,eax ;Shift left 32 bits
sub edi,32 ;Another 32 bits reduced
cmp edi,32
jge FpremReduce32
xor ebp,ebp ;No quotient bits are valid
jmp PremNormalize
PremOvfl:
;edx:eax = dividend
;ebx:esi = divisor
;On exit, ebp = second quotient "digit"
;
;Come here if divide instruction would overflow. This must mean that edx == ebx,
;i.e., the high halves of the dividend and divisor are equal. Assume a result
;of 2^32-1, thus remainder = dividend - ( divisor * (2^32-1) )
; = dividend - divisor * 2^32 + divisor. Since the high halves of the dividend
;and divisor are equal, dividend - divisor * 2^32 can be computed by
;subtracting only the low halves. When adding divisor (in ebx) to this, note
;that edx == ebx, and we want the result in edx anyway.
;
;Note also that since dividend < divisor, the
;dividend - divisor * 2^32 calculation must always be negative. Thus the
;addition of divisor back to it should generate a carry if it goes positive.
mov ebp,-1 ;Max quotient digit
sub eax,esi ;Calculate correct remainder
add edx,eax ;Should set CY if quotient fit
mov eax,esi ;edx:eax has new remainder
jc HavPremQuo ;Remainder was positive
jmp FpremCorrect
ExactPrem:
;eax = 0
mov esi,EMSEG:[CURstk]
mov EMSEG:[esi].lManLo,eax
mov EMSEG:[esi].lManHi,eax
add sp,4 ;Clean off first return address
mov EMSEG:[esi].wExp,ax
mov EMSEG:[esi].bTag,bTAG_ZERO
ret
;FPREM stage 2
;
;Exponent difference is less than 32. Use restoring long division to
;compute quotient bits until exponent difference is zero. Note that we
;often get more than one bit/loop: BSR is used to scan off leading
;zeros each time around. Since the divisor is normalized, we can
;instantly compute a zero quotient bit for each leading zero bit.
;
;For reductions of 1 to 31 bits per loop, this loop requires 41 or 59 clocks
;plus 3 clocks/bit (BSR time). If we had to use this for 32-bit reductions
;(without stage 1), we could expect (50+6)*16 = 896 clocks typ (2 bits/loop)
;instead of the 112 required by stage 1!
FpremLoop:
;edx:eax = dividend (remainder) minus divisor
;ebx:esi = divisor
;ebp = quotient
;edi = exponent difference, less than 32
;
;If R is current remainder and d is divisor, then we have edx:eax = R - d,
;which is negative. We want 2*R - d, which is positive.
;2*R - d = 2*(R - d) + d.
add eax,eax ;2*(R - d)
adc edx,edx
add eax,esi ;2*(R-d) + d = 2*R - d
adc edx,ebx
add ebp,ebp ;Double quotient too
dec edi ;Decrement exponent difference
DivisorFit:
inc ebp ;Count one in quotient
PremNormalize:
bsr ecx,edx ;Find first 1 bit
jz PremHighZero
not cl
and cl,1FH ;Convert bit no. to shift count
shld edx,eax,cl ;Normalize
shl eax,cl
sub edi,ecx ;Reduce exponent difference
jl PremTooFar
shl ebp,cl ;Shift quotient
FitDivisor:
;Dividend could be larger or smaller than divisor
sub eax,esi
sbb edx,ebx
jnc DivisorFit
;Couldn't subtract divisor from dividend.
or edi,edi ;Is exponent difference zero or less?
jg FpremLoop
add eax,esi ;Restore dividend
adc edx,ebx
xor ecx,ecx ;Sign is positive
ret
PremTooFar:
;Exponent difference in edi went negative when reduced by shift count in ecx.
;We need a quotient corresponding to exponent difference of zero.
add ecx,edi ;Restore exponent difference
shl ebp,cl ;Fix up quotient
ExitPremLoop:
;edx:eax = remainder, normalized
;ebp = quotient
;edi = exponent difference, zero or less
xor ecx,ecx ;Sign is positive
ret
PremHighZero:
;High half of remainder is all zero, so we've reduced exponent difference
;by 32 bits and overshot. We need a quotient corresponding to exponent
;difference of zero, so we just shift it by the original difference. Then
;we need to normalize the low half remainder.
mov ecx,edi
shl ebp,cl ;Fix up quotient
bsr ecx,eax
jz ExactPrem
lea edi,[edi+ecx-63] ;Fix up exponent for normalization
xchg eax,edx ;Shift by 32 bits
not cl
shl edx,cl ;Normalize remainder
xor ecx,ecx ;Sign is positive
ret