1207 lines
36 KiB
NASM
1207 lines
36 KiB
NASM
|
subttl emftran.asm - Transcendental instructions
|
||
|
page
|
||
|
;*******************************************************************************
|
||
|
; Copyright (c) Microsoft Corporation 1991
|
||
|
; All Rights Reserved
|
||
|
;
|
||
|
;emftran.asm - Transcendental instructions
|
||
|
; by Tim Paterson
|
||
|
;
|
||
|
;Purpose:
|
||
|
; F2XM1, FPATAN, FYL2X, FYL2XP1 instructions
|
||
|
;Inputs:
|
||
|
; edi = [CURstk]
|
||
|
;
|
||
|
;Revision History:
|
||
|
;
|
||
|
; [] 09/05/91 TP Initial 32-bit version.
|
||
|
;
|
||
|
;*******************************************************************************
|
||
|
|
||
|
|
||
|
;********************* Polynomial Coefficients *********************
|
||
|
|
||
|
;These polynomial coefficients were all taken from "Computer Approximations"
|
||
|
;by J.F. Hart (reprinted 1978 w/corrections). All calculations and
|
||
|
;conversions to hexadecimal were done with a character-string calculator
|
||
|
;written in Visual Basic with precision set to 30 digits. Once the constants
|
||
|
;were typed into this file, all transfers were done with cut-and-paste
|
||
|
;operations to and from the calculator to help eliminate any typographical
|
||
|
;errors.
|
||
|
|
||
|
|
||
|
tAtanPoly label word
|
||
|
|
||
|
;These constants are from Hart #5056: atan(x) = x * P(x^2) / Q(x^2),
|
||
|
;accurate to 20.78 digits over interval [0, tan(pi/12)].
|
||
|
|
||
|
dd 4 ;P() is degree four
|
||
|
|
||
|
; Hart constant
|
||
|
;
|
||
|
;+.16241 70218 72227 96595 08 E0
|
||
|
;Hex value: 0.A650A5D5050DE43A2C25A8C00 HFFFE
|
||
|
dq 0A650A5D5050DE43AH
|
||
|
dw bTAG_VALID,0FFFEH-1
|
||
|
|
||
|
;+.65293 76545 29069 63960 675 E1
|
||
|
;Hex value: 0.D0F0A714A9604993AC4AC49A0 H3
|
||
|
dq 0D0F0A714A9604994H
|
||
|
dw bTAG_VALID,03H-1
|
||
|
|
||
|
;+.39072 57269 45281 71734 92684 E2
|
||
|
;Hex value: 0.9C4A507F16530AC3CDDEFA3DE H6
|
||
|
dq 09C4A507F16530AC4H
|
||
|
dw bTAG_VALID,06H-1
|
||
|
|
||
|
;+.72468 55912 17450 17145 90416 9 E2
|
||
|
;Hex value: 0.90EFE6FB30465042CF089D1310 H7
|
||
|
dq 090EFE6FB30465043H
|
||
|
dw bTAG_VALID,07H-1
|
||
|
|
||
|
;+.41066 29181 34876 24224 77349 62 E2
|
||
|
;Hex value: 0.A443E2004BB000B84A5154D44 H6
|
||
|
dq 0A443E2004BB000B8H
|
||
|
dw bTAG_VALID,06H-1
|
||
|
|
||
|
dd 4 ;Q() is degree four
|
||
|
|
||
|
; Hart constant
|
||
|
;
|
||
|
;+.15023 99905 56978 85827 4928 E2
|
||
|
;Hex value: 0.F0624CD575B782643AFB912D0 H4
|
||
|
dq 0F0624CD575B78264H
|
||
|
dw bTAG_VALID,04H-1
|
||
|
|
||
|
;+.59578 42201 83554 49303 22456 E2
|
||
|
;Hex value: 0.EE504DDC907DEAEB7D7473B82 H6
|
||
|
dq 0EE504DDC907DEAEBH
|
||
|
dw bTAG_VALID,06H-1
|
||
|
|
||
|
;+.86157 32305 95742 25062 42472 E2
|
||
|
;Hex value: 0.AC508CA5E78E504AB2032E864 H7
|
||
|
dq 0AC508CA5E78E504BH
|
||
|
dw bTAG_VALID,07H-1
|
||
|
|
||
|
;+.41066 29181 34876 24224 84140 84 E2
|
||
|
;Hex value: 0.A443E2004BB000B84F542813C H6
|
||
|
dq 0A443E2004BB000B8H
|
||
|
dw bTAG_VALID,06H-1
|
||
|
|
||
|
|
||
|
;tan(pi/12) = tan(15 deg.) = 2 - sqrt(3)
|
||
|
;= 0.26794 91924 31122 70647 25536 58494 12763 ;From Hart appendix
|
||
|
;Hex value: 0.8930A2F4F66AB189B517A51F2 HFFFF
|
||
|
Tan15Hi equ 08930A2F4H
|
||
|
Tan15Lo equ 0F66AB18AH
|
||
|
Tan15exp equ 0FFFFH-1
|
||
|
|
||
|
;1/tan(pi/6) = sqrt(3) = 1.73205 08075 68877 29352 74463 41505 87236 ;From Hart appendix
|
||
|
;Hex value: 0.DDB3D742C265539D92BA16B8 H1
|
||
|
Sqrt3Hi equ 0DDB3D742H
|
||
|
Sqrt3Lo equ 0C265539EH
|
||
|
Sqrt3exp equ 01H-1
|
||
|
|
||
|
;pi = +3.14159265358979323846264338328
|
||
|
;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
|
||
|
PiHi equ 0C90FDAA2H
|
||
|
PiLo equ 02168C235H
|
||
|
PiExp equ 02H-1
|
||
|
|
||
|
;3*pi = +9.42477796076937971538793014984
|
||
|
;Hex value: 0.96CBE3F9990E91A79394C9E890 H4
|
||
|
XThreePiHi equ 096CBE3F9H
|
||
|
XThreePiMid equ 0990E91A7H
|
||
|
XThreePiLo equ 090000000H
|
||
|
ThreePiExp equ 04H-1
|
||
|
|
||
|
|
||
|
;This is a table of multiples of pi/6. It is used to adjust the
|
||
|
;final result angle after atan(). Derived from Hart appendix
|
||
|
;pi/180 = 0.01745 32925 19943 29576 92369 07684 88612
|
||
|
;
|
||
|
;When the reduced argument for atan() is very small, these correction
|
||
|
;constants simply become the result. These constants have all been
|
||
|
;rounded to nearest, but the user may have selected a different rounding
|
||
|
;mode. The tag byte is not needed for these constants, so its space
|
||
|
;is used to indicate if it was rounded. To determine if a constant
|
||
|
;was rounded, 7FH is subtracted from this flag; CY set means it was
|
||
|
;rounded up.
|
||
|
|
||
|
RoundedUp equ 040H
|
||
|
RoundedDown equ 0C0H
|
||
|
|
||
|
tAtanPiFrac label dword
|
||
|
;pi/2 = +1.57079632679489661923132169163
|
||
|
;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
|
||
|
dq 0C90FDAA22168C235H
|
||
|
dw RoundedUp,01H-1
|
||
|
|
||
|
;2*pi/3 = +2.09439510239319549230842892218
|
||
|
;Hex value: 0.860A91C16B9B2C232DD997078 H2
|
||
|
dq 0860A91C16B9B2C23H
|
||
|
dw RoundedDown,02H-1
|
||
|
|
||
|
;none
|
||
|
dd 0,0,0
|
||
|
|
||
|
;pi/6 = +0.523598775598298873077107230544E0
|
||
|
;Hex value: 0.860A91C16B9B2C232DD99707A H0
|
||
|
dq 0860A91C16B9B2C23H
|
||
|
dw RoundedDown,00H-1
|
||
|
|
||
|
;pi/2 = +1.57079632679489661923132169163
|
||
|
;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
|
||
|
dq 0C90FDAA22168C235H
|
||
|
dw RoundedUp,01H-1
|
||
|
|
||
|
;pi/3 = +1.04719755119659774615421446109
|
||
|
;Hex value: 0.860A91C16B9B2C232DD997078 H1
|
||
|
dq 0860A91C16B9B2C23H
|
||
|
dw RoundedDown,01H-1
|
||
|
|
||
|
;pi = +3.14159265358979323846264338328
|
||
|
;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
|
||
|
dq 0C90FDAA22168C235H
|
||
|
dw RoundedUp,02H-1
|
||
|
|
||
|
;5*pi/6 = +2.61799387799149436538553615272
|
||
|
;Hex value: 0.A78D3631C681F72BF94FFCC96 H2
|
||
|
dq 0A78D3631C681F72CH
|
||
|
dw RoundedUp,02H-1
|
||
|
|
||
|
;*********************
|
||
|
|
||
|
tExpPoly label word
|
||
|
|
||
|
;These constants are from Hart #1324: 2^x - 1 =
|
||
|
; 2 * x * P(x^2) / ( Q(x^2) - x * P(x^2) )
|
||
|
;accurate to 21.54 digits over interval [0, 0.5].
|
||
|
|
||
|
dd 2 ;P() is degree two
|
||
|
|
||
|
; Hart constant
|
||
|
;
|
||
|
;+.60613 30790 74800 42574 84896 07 E2
|
||
|
;Hex value: 0.F27406FCF405189818F68BB78 H6
|
||
|
dq 0F27406FCF4051898H
|
||
|
dw bTAG_VALID,06H-1
|
||
|
|
||
|
;+.30285 61978 21164 59206 24269 927 E5
|
||
|
;Hex value: 0.EC9B3D5414E1AD0852E432A18 HF
|
||
|
dq 0EC9B3D5414E1AD08H
|
||
|
dw bTAG_VALID,0FH-1
|
||
|
|
||
|
;+.20802 83036 50596 27128 55955 242 E7
|
||
|
;Hex value: 0.FDF0D84AC3A35FAF89A690CC4 H15
|
||
|
dq 0FDF0D84AC3A35FB0H
|
||
|
dw bTAG_VALID,015H-1
|
||
|
|
||
|
dd 3 ;Q() is degree three. First
|
||
|
;coefficient is 1.0 and is not listed.
|
||
|
; Hart constant
|
||
|
;
|
||
|
;+.17492 20769 51057 14558 99141 717 E4
|
||
|
;Hex value: 0.DAA7108B387B776F212ECFBEC HB
|
||
|
dq 0DAA7108B387B776FH
|
||
|
dw bTAG_VALID,0BH-1
|
||
|
|
||
|
;+.32770 95471 93281 18053 40200 719 E6
|
||
|
;Hex value: 0.A003B1829B7BE85CC81BD5309 H13
|
||
|
dq 0A003B1829B7BE85DH
|
||
|
dw bTAG_VALID,013H-1
|
||
|
|
||
|
;+.60024 28040 82517 36653 36946 908 E7
|
||
|
;Hex value: 0.B72DF814E709837E066855BDD H17
|
||
|
dq 0B72DF814E709837EH
|
||
|
dw bTAG_VALID,017H-1
|
||
|
|
||
|
|
||
|
;sqrt(2) = 1.41421 35623 73095 04880 16887 24209 69808 ;From Hart appendix
|
||
|
;Hex value: 0.B504F333F9DE6484597D89B30 H1
|
||
|
Sqrt2Hi equ 0B504F333H
|
||
|
Sqrt2Lo equ 0F9DE6484H
|
||
|
Sqrt2Exp equ 01H-1
|
||
|
|
||
|
;sqrt(2) - 1 = +0.4142135623730950488016887242E0
|
||
|
;Hex value: 0.D413CCCFE779921165F626CC4 HFFFF
|
||
|
Sqrt2m1Hi equ 0D413CCCFH
|
||
|
Sqrt2m1Lo equ 0E7799211H
|
||
|
XSqrt2m1Lo equ 060000000H
|
||
|
Sqrt2m1Exp equ 0FFFFH-1
|
||
|
|
||
|
;2 - sqrt(2) = +0.5857864376269049511983112758E0
|
||
|
;Hex value: 0.95F619980C4336F74D04EC9A0 H0
|
||
|
TwoMinusSqrt2Hi equ 095F61998H
|
||
|
TwoMinusSqrt2Lo equ 00C4336F7H
|
||
|
TwoMinusSqrt2Exp equ 00H-1
|
||
|
|
||
|
;*********************
|
||
|
|
||
|
tLogPoly label dword
|
||
|
|
||
|
;These constants are derived from Hart #2355: log2(x) = z * P(z^2) / Q(z^2),
|
||
|
; z = (x+1) / (x-1) accurate to 19.74 digits over interval
|
||
|
;[1/sqrt(2), sqrt(2)]. The original Hart coefficients were for log10();
|
||
|
;the P() coefficients have been scaled by log2(10) to compute log2().
|
||
|
;
|
||
|
;log2(10) = 3.32192 80948 87362 34787 03194 29489 39017 ;From Hart appendix
|
||
|
|
||
|
dd 3 ;P() is degree three
|
||
|
|
||
|
; Original Hart constant Scaled value
|
||
|
;
|
||
|
;+.18287 59212 09199 9337 E0 +0.607500660543248917834110566373E0
|
||
|
;Hex value: 0.9B8529CD54E72022A12BAEC53 H0
|
||
|
dq 09B8529CD54E72023H
|
||
|
dw bTAG_VALID,00H-1
|
||
|
|
||
|
;-.41855 96001 31266 20633 E1 -13.9042489506087332809657007634
|
||
|
;Hex value: 0.DE77CDBF64E8C53F0DCD458D0 H4
|
||
|
dq 0DE77CDBF64E8C53FH
|
||
|
dw bSign shl 8 + bTAG_VALID,04H-1
|
||
|
|
||
|
;+.13444 58152 27503 62236 E2 +44.6619330844279438866067340334
|
||
|
;Hex value: 0.B2A5D1C95708A0C9FE50F6F97 H6
|
||
|
dq 0B2A5D1C95708A0CAH
|
||
|
dw bTAG_VALID,06H-1
|
||
|
|
||
|
;-.10429 11213 72526 69497 44122 E2 -34.6447606134704282123622236943
|
||
|
;Hex value: 0.8A943C20526AE439A98B30F6A H6
|
||
|
dq 08A943C20526AE43AH
|
||
|
dw bSign shl 8 + bTAG_VALID,06H-1
|
||
|
|
||
|
|
||
|
dd 3 ;Q() is degree three. First
|
||
|
;coefficient is 1.0 and is not listed.
|
||
|
; Hart constant
|
||
|
;
|
||
|
;-.89111 09060 90270 85654 E1
|
||
|
;Hex value: 0.8E93E7183AA998D74F45CDFF0 H4
|
||
|
dq 08E93E7183AA998D7H
|
||
|
dw bSign shl 8 + bTAG_VALID,04H-1
|
||
|
|
||
|
;+.19480 96618 79809 36524 155 E2
|
||
|
;Hex value: 0.9BD904CCFEE118D4BEF319716 H5
|
||
|
dq 09BD904CCFEE118D5H
|
||
|
dw bTAG_VALID,05H-1
|
||
|
|
||
|
;-.12006 95907 02006 34243 4218 E2
|
||
|
;Hex value: 0.C01C811D2EC1B5806304B1858 H4
|
||
|
dq 0C01C811D2EC1B580H
|
||
|
dw bSign shl 8 + bTAG_VALID,04H-1
|
||
|
|
||
|
;Log2(e) = 1.44269 50408 88963 40735 99246 81001 89213 ;From Hart appendix
|
||
|
;Hex value: 0.B8AA3B295C17F0BBBE87FED04 H1
|
||
|
Log2OfEHi equ 0B8AA3B29H
|
||
|
Log2OfELo equ 05C17F0BCH
|
||
|
Log2OfEexp equ 01H-1
|
||
|
|
||
|
|
||
|
;********************* Generic polynomial evaluation *********************
|
||
|
;
|
||
|
;EvalPoly, EvalPolyAdd, EvalPolySetup, Eval2Poly
|
||
|
;
|
||
|
;Inputs:
|
||
|
; ebx:esi,ecx = floating point number, internal format
|
||
|
; edi = pointer to polynomial degree and coefficients
|
||
|
;Outputs:
|
||
|
; result in ebx:esi,ecx
|
||
|
; edi incremented to start of last coefficient in list
|
||
|
;
|
||
|
;EvalPoly is the basic polynomial evaluator, using Horner's rule. The
|
||
|
;polynomial pointer in edi points to a list: the first dword in the list
|
||
|
;is the degree of the polynomial (n); it is followed by the n+1
|
||
|
;coefficients in internal (12-byte) format. The argment for EvalPoly
|
||
|
;must be stored in the static FloatTemp in addition to being in
|
||
|
;registers.
|
||
|
;
|
||
|
;EvalPolyAdd is an alternate entry point into the middle of EvalPoly.
|
||
|
;It is used when the first coefficient is 1.0, so it skips the first
|
||
|
;multiplication. It requires that the degree of the polynomial be
|
||
|
;already loaded into ebp.
|
||
|
;
|
||
|
;EvalPolySetup store a copy of the argument in the static ArgTemp,
|
||
|
;and stores the square of the argument in the static FloatTemp.
|
||
|
;Then it falls into EvalPoly to evaluate the polynomial on the square.
|
||
|
;
|
||
|
;Eval2Poly evaluate two polynomials on its argument. The first
|
||
|
;polynomial is x * P(x^2), and its result is left at [[CURstk]].
|
||
|
;The second polynomial is Q(x^2), and its result is left in registers.
|
||
|
;The most significant coefficient of Q() is 1.
|
||
|
;
|
||
|
;Polynomial evaluation uses a slight variation on the standard add
|
||
|
;and multiply routines. PolyAddDouble and PolyMulDouble both check
|
||
|
;to see if the argument in registers (the current accumulation) is
|
||
|
;zero. The argument pointed to by edi is a coefficient and is never
|
||
|
;zero.
|
||
|
;
|
||
|
;In addition, the [RoundMode] and [ZeroVector] vectors are "trapped",
|
||
|
;i.e., redirected to special handlers for polynomial evaluation.
|
||
|
;[RoundMode] ordinarily points to the routine that handles the
|
||
|
;the current rounding mode and precision control; however, during
|
||
|
;polynomial evaluation, we always want full precision and round
|
||
|
;nearest. The normal rounding routines also store their result
|
||
|
;at [[Result]], but we want the result left in registers.
|
||
|
;[ZeroVector] exists solely so polynomial evaluation can trap
|
||
|
;when AddDouble results of zero. The normal response is to store
|
||
|
;a zero at [[Result]], but we need the zero left in registers.
|
||
|
;PolyRound and PolyZero handle these traps.
|
||
|
|
||
|
|
||
|
EvalPolySetup:
|
||
|
;Save x in ArgTemp
|
||
|
mov EMSEG:[ArgTemp].ExpSgn,ecx
|
||
|
mov EMSEG:[ArgTemp].lManHi,ebx
|
||
|
mov EMSEG:[ArgTemp].lManLo,esi
|
||
|
mov EMSEG:[RoundMode],offset PolyRound
|
||
|
mov EMSEG:[ZeroVector],offset PolyZero
|
||
|
push edi ;Save pointer to polynomials
|
||
|
;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
|
||
|
mov edx,ebx
|
||
|
mov edi,esi
|
||
|
mov eax,ecx
|
||
|
;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
|
||
|
call MulDoubleReg ;Compute x^2
|
||
|
;Save x^2 in FloatTemp
|
||
|
mov EMSEG:[FloatTemp].ExpSgn,ecx
|
||
|
mov EMSEG:[FloatTemp].lManHi,ebx
|
||
|
mov EMSEG:[FloatTemp].lManLo,esi
|
||
|
pop edi
|
||
|
EvalPoly:
|
||
|
;ebx:esi,ecx = arg to evaluate, also in FloatTemp
|
||
|
;edi = pointer to degree and list of coefficients.
|
||
|
push edi
|
||
|
mov eax,cs:[edi+4].ExpSgn
|
||
|
mov edx,cs:[edi+4].lManHi
|
||
|
mov edi,cs:[edi+4].lManLo
|
||
|
call MulDoubleReg ;Multiply arg by first coef.
|
||
|
pop edi
|
||
|
mov ebp,cs:[edi] ;Get polynomial degree
|
||
|
add edi,4+Reg87Len ;Point to second coefficient
|
||
|
jmp EvalPolyAdd
|
||
|
|
||
|
PolyLoop:
|
||
|
push ebp ;Save loop count
|
||
|
ifdef NT386
|
||
|
mov edi,YFloatTemp
|
||
|
else
|
||
|
mov edi,offset edata:FloatTemp
|
||
|
endif
|
||
|
call PolyMulDouble
|
||
|
pop ebp
|
||
|
pop edi
|
||
|
add di,Reg87Len
|
||
|
EvalPolyAdd:
|
||
|
push edi
|
||
|
mov eax,cs:[edi].ExpSgn
|
||
|
mov edx,cs:[edi].lManHi
|
||
|
mov edi,cs:[edi].lManLo
|
||
|
cmp cl,bTAG_ZERO ;Adding to zero?
|
||
|
jz AddToZero
|
||
|
call AddDoubleReg ;ebp preserved
|
||
|
ContPolyLoop:
|
||
|
dec ebp
|
||
|
jnz PolyLoop
|
||
|
pop edi
|
||
|
ret
|
||
|
|
||
|
AddToZero:
|
||
|
;Number in registers is zero, so just return value from memory.
|
||
|
mov ecx,eax
|
||
|
mov ebx,edx
|
||
|
mov esi,edi
|
||
|
jmp ContPolyLoop
|
||
|
|
||
|
|
||
|
Eval2Poly:
|
||
|
call EvalPolySetup
|
||
|
push edi
|
||
|
ifdef NT386
|
||
|
mov edi,YArgTemp
|
||
|
else
|
||
|
mov edi,offset edata:ArgTemp
|
||
|
endif
|
||
|
call PolyMulDouble ;Multiply first result by argument
|
||
|
pop edi
|
||
|
;Save result of first polynomial at [[CURstk]]
|
||
|
mov edx,EMSEG:[CURstk]
|
||
|
mov EMSEG:[edx].ExpSgn,ecx
|
||
|
mov EMSEG:[edx].lManHi,ebx
|
||
|
mov EMSEG:[edx].lManLo,esi
|
||
|
;Load x^2 back into registers
|
||
|
mov ecx,EMSEG:[FloatTemp].ExpSgn
|
||
|
mov ebx,EMSEG:[FloatTemp].lManHi
|
||
|
mov esi,EMSEG:[FloatTemp].lManLo
|
||
|
;Start second polynomial evaluation
|
||
|
add edi,4+Reg87Len ;Point to coefficient
|
||
|
mov ebp,cs:[edi-4] ;Get polynomial degree
|
||
|
jmp EvalPolyAdd
|
||
|
|
||
|
|
||
|
PolyRound:
|
||
|
;This routine handles all rounding during polynomial evaluation.
|
||
|
;It performs 64-but round nearest, with result left in registers.
|
||
|
;
|
||
|
;Inputs:
|
||
|
; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
|
||
|
;Outputs:
|
||
|
; same, plus tag in cl.
|
||
|
;
|
||
|
;To perform "round even" when the round bit is set and the sticky bits
|
||
|
;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
|
||
|
;is set, that will always force a round up (to even) if the round bit is
|
||
|
;set. If the LSB is zero, then the sticky bits remain zero and we always
|
||
|
;round down. This rounding rule is implemented by adding RoundBit-1
|
||
|
;(7F..FFH), setting CY if round up.
|
||
|
;
|
||
|
;This routine needs to be reversible in case we're at the last step
|
||
|
;in the polynomial and final rounding uses a different rounding mode.
|
||
|
;We do this by copying the LSB of esi into al. While the rounding is
|
||
|
;reversible, you can't tell if the answer was exact.
|
||
|
|
||
|
mov edx,esi
|
||
|
and dl,1 ;Look at LSB
|
||
|
or al,dl ;Set LSB as sticky bit
|
||
|
add eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
|
||
|
adc esi,0
|
||
|
adc ebx,0
|
||
|
jc PolyBumpExponent ;Overflowed, increment exponent
|
||
|
or esi,esi ;Any bits in low half?
|
||
|
.erre bTAG_VALID eq 1
|
||
|
.erre bTAG_SNGL eq 0
|
||
|
setnz cl ;if low half==0 then cl=0 else cl=1
|
||
|
ret
|
||
|
|
||
|
PolyBumpExponent:
|
||
|
add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
|
||
|
or ebx,1 shl 31 ;Set MSB
|
||
|
mov cl,bTAG_SNGL
|
||
|
PolyZero:
|
||
|
;Enter here when result is zero
|
||
|
ret
|
||
|
|
||
|
;*******************************************************************************
|
||
|
|
||
|
;FPATAN instruction
|
||
|
|
||
|
;Actual instruction entry point is in emarith.asm
|
||
|
|
||
|
tFpatanDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
|
||
|
dd AtanDouble ;single single
|
||
|
dd AtanDouble ;single double
|
||
|
dd AtanZeroDest ;single zero
|
||
|
dd AtanSpclDest ;single special
|
||
|
dd AtanDouble ;double single
|
||
|
dd AtanDouble ;double double
|
||
|
dd AtanZeroDest ;double zero
|
||
|
dd AtanSpclDest ;double special
|
||
|
dd AtanZeroSource ;zero single
|
||
|
dd AtanZeroSource ;zero double
|
||
|
dd AtanZeroDest ;zero zero
|
||
|
dd AtanSpclDest ;zero special
|
||
|
dd AtanSpclSource ;special single
|
||
|
dd AtanSpclSource ;special double
|
||
|
dd AtanSpclSource ;special zero
|
||
|
dd TwoOpBothSpcl ;special special
|
||
|
dd AtanTwoInf ;Two infinites
|
||
|
|
||
|
;Compute atan( st(1)/st(0) ). Neither st(0) or st(1) are zero or
|
||
|
;infinity at this point.
|
||
|
;
|
||
|
;Argument reduction starts by dividing the smaller by the larger,
|
||
|
;ensuring that the result x is <= 1. The absolute value of the quotient
|
||
|
;is used and the quadrant is fixed up later. If x = st(0)/st(1), then
|
||
|
;the final atan result is subtracted from pi/2 (and normalized for the
|
||
|
;correct range of -pi to +pi).
|
||
|
;
|
||
|
;The range of x is further reduced using the formulas:
|
||
|
; t = (x - k) / (1 + kx)
|
||
|
; atan(x) = atan(k) + atan(t)
|
||
|
;
|
||
|
;Given that x <= 1, if we choose k = tan(pi/6) = 1/sqrt(3), then we
|
||
|
;are assured that t <= tan(pi/12) = 2 - sqrt(3), and
|
||
|
;for x >= tan(pi/12) = 2 - sqrt(3), t >= -tan(pi/12).
|
||
|
;Thus we can always reduce the argument to abs(t) <= tan(pi/12).
|
||
|
;
|
||
|
;Since k = 1/sqrt(3), it is convenient to multiply the numerator
|
||
|
;and denominator of t by 1/k, which gives
|
||
|
;t = (x/k - 1) / (1/k + x) = ( x*sqrt(3) - 1 ) / ( sqrt(3) + x ).
|
||
|
;This is the form found in Cody and Waite and in previous versions
|
||
|
;of the emulator. It requires one each add, subtract, multiply, and
|
||
|
;divide.
|
||
|
;
|
||
|
;Hart has derived a simpler version of this formula:
|
||
|
;t = 1/k - (1/k^2 + 1) / (1/k + x) = sqrt(3) - 4 / ( sqrt(3) + x ).
|
||
|
;Note that this computation requires one each add, subtract, and
|
||
|
;divide, but no multiply.
|
||
|
|
||
|
;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
|
||
|
;[edi] points to st(1), where result is returned
|
||
|
|
||
|
AtanDouble:
|
||
|
mov EMSEG:[Result],edi
|
||
|
mov EMSEG:[RoundMode],offset PolyRound
|
||
|
mov EMSEG:[ZeroVector],offset PolyZero
|
||
|
mov ah,EMSEG:[edi].bSgn ;Sign of result
|
||
|
mov al,ch ;Affects quadrant of result
|
||
|
and al,bSign ;Zero other bits, used as flags
|
||
|
push eax ;Save flag
|
||
|
;First figure out which is larger
|
||
|
push offset AtanQuo ;Return address for DivDouble
|
||
|
shld edx,ecx,16 ;Get exponent to ax
|
||
|
cmp dx,EMSEG:[edi].wExp ;Compare exponents
|
||
|
jl DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
|
||
|
jg DivDouble ; ...is bigger, make it divisor
|
||
|
;Exponents are equal, compare mantissas
|
||
|
cmp ebx,EMSEG:[edi].lManHi
|
||
|
jb DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
|
||
|
ja DivDouble ; ...is bigger, make it divisor
|
||
|
cmp esi,EMSEG:[edi].lManLo
|
||
|
jbe DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
|
||
|
jmp DivDouble
|
||
|
|
||
|
TinyAtan:
|
||
|
;Come here if the angle was reduced to zero, or the divide resulted in
|
||
|
;unmasked underflow so that the quotient exponent was biased.
|
||
|
;Note that an angle of zero means reduction was performed, and the
|
||
|
;result will be corrected to a non-zero value.
|
||
|
mov dl,[esp] ;Get flag byte
|
||
|
or dl,dl ;No correction needed?
|
||
|
jz AtanSetSign ;Just return result of divide
|
||
|
and EMSEG:[CURerr],not Underflow
|
||
|
;Angle in registers is too small to affect correction amount. Just
|
||
|
;load up correction angle instead of adding it in.
|
||
|
add dl,40H ;Change flags for correction lookup
|
||
|
shr dl,5-2 ;Now in bits 2,3,4
|
||
|
and edx,7 shl 2
|
||
|
mov ebx,[edx+2*edx+tAtanPiFrac].lManHi
|
||
|
mov esi,[edx+2*edx+tAtanPiFrac].lManLo
|
||
|
mov ecx,[edx+2*edx+tAtanPiFrac].ExpSgn
|
||
|
shrd eax,ecx,8 ;Copy rounding flag to high eax
|
||
|
jmp AtanSetSign
|
||
|
|
||
|
AtanQuo:
|
||
|
;Return here after divide. Underflow flag is set only for "big underflow",
|
||
|
;meaning the (15-bit) exponent couldn't even be kept in 16 bits. This can
|
||
|
;only happen dividing a denormal by one of the largest numbers.
|
||
|
;
|
||
|
;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
|
||
|
test EMSEG:[CURerr],Underflow;Did we underflow?
|
||
|
jnz TinyAtan
|
||
|
;Now compare quotient in ebx:esi,ecx with tan(pi/12) = 2 - sqrt(3)
|
||
|
xor cx,cx ;Use absolute value
|
||
|
cmp ecx,Tan15exp shl 16
|
||
|
jg AtnNeedReduce
|
||
|
jl AtnReduced
|
||
|
cmp ebx,Tan15Hi
|
||
|
ja AtnNeedReduce
|
||
|
jb AtnReduced
|
||
|
cmp esi,Tan15Lo
|
||
|
jbe AtnReduced
|
||
|
AtnNeedReduce:
|
||
|
or byte ptr [esp],20H ;Note reduction in flags on stack
|
||
|
;Compute t = sqrt(3) - 4 / ( sqrt(3) + x ).
|
||
|
mov eax,Sqrt3exp shl 16
|
||
|
mov edx,Sqrt3Hi
|
||
|
mov edi,Sqrt3Lo
|
||
|
call AddDoubleReg ;x + sqrt(3)
|
||
|
mov edi,esi
|
||
|
mov esi,ebx ;Mantissa in esi:edi
|
||
|
mov ebx,ecx ;ExpSgn to ebx
|
||
|
mov ecx,(2+TexpBias) shl 16
|
||
|
mov edx,1 shl 31
|
||
|
xor eax,eax ;edx:edi,eax = 4.0
|
||
|
;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
|
||
|
;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
|
||
|
call DivDoubleReg ;4 / ( x + sqrt(3) )
|
||
|
not ch ;Flip sign
|
||
|
mov eax,Sqrt3exp shl 16
|
||
|
mov edx,Sqrt3Hi
|
||
|
mov edi,Sqrt3Lo
|
||
|
call AddDoubleReg ;sqrt(3) - 4 / ( x + sqrt(3) )
|
||
|
;Result in ebx:esi,ecx could be very small (or zero) if arg was near tan(pi/6).
|
||
|
cmp cl,bTAG_ZERO
|
||
|
jz TinyAtan
|
||
|
AtnReduced:
|
||
|
;If angle is small, skip the polynomial. atan(x) = x when x - x^3/3 = x
|
||
|
;[or 1 - x^2/3 = 1], which happens when x < 2^-32. This prevents underflow
|
||
|
;in computing x^2.
|
||
|
TinyAtanArg equ -32
|
||
|
cmp ecx,TinyAtanArg shl 16
|
||
|
jl AtanCorrection
|
||
|
mov edi,offset tAtanPoly
|
||
|
call Eval2Poly
|
||
|
mov edi,EMSEG:[CURstk] ;Point to first result
|
||
|
call DivDouble ;x * P(x^2) / Q(x^2)
|
||
|
AtanCorrection:
|
||
|
;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
|
||
|
;
|
||
|
;Correct sign and add fraction of pi to account for various angle reductions:
|
||
|
;
|
||
|
; flag bit indicates correction
|
||
|
;----------------------------------------------------
|
||
|
; 5 arg > tan(pi/12) add pi/6
|
||
|
; 6 st(1) > st(0) sub from pi/2
|
||
|
; 7 st(0) < 0 sub from pi
|
||
|
;
|
||
|
;This results in the following correction for the result R:
|
||
|
;
|
||
|
;bit 7 6 5 correction
|
||
|
;---------------------------
|
||
|
; 0 0 0 none
|
||
|
; 0 0 1 pi/6 + R
|
||
|
; 0 1 0 pi/2 - R
|
||
|
; 0 1 1 pi/3 - R
|
||
|
; 1 0 0 pi - R
|
||
|
; 1 0 1 5*pi/6 - R
|
||
|
; 1 1 0 pi/2 + R
|
||
|
; 1 1 1 2*pi/3 + R
|
||
|
|
||
|
mov dl,[esp] ;Get flag byte
|
||
|
or dl,dl ;No correction needed?
|
||
|
jz AtanSetSign
|
||
|
add dl,40H ;Set bit 7 for all -R cases
|
||
|
|
||
|
;This changes the meaning of the flag bits to the following:
|
||
|
;
|
||
|
;bit 7 6 5 correction
|
||
|
;---------------------------
|
||
|
; 0 0 0 pi/2 + R
|
||
|
; 0 0 1 2*pi/3 + R
|
||
|
; 0 1 0 none
|
||
|
; 0 1 1 pi/6 + R
|
||
|
; 1 0 0 pi/2 - R
|
||
|
; 1 0 1 pi/3 - R
|
||
|
; 1 1 0 pi - R
|
||
|
; 1 1 1 5*pi/6 - R
|
||
|
|
||
|
xor ch,dl ;Flip sign bit in cases 4 - 7
|
||
|
shr dl,5-2 ;Now in bits 2,3,4
|
||
|
and edx,7 shl 2
|
||
|
mov eax,[edx+2*edx+tAtanPiFrac].ExpSgn
|
||
|
mov edi,[edx+2*edx+tAtanPiFrac].lManLo
|
||
|
mov edx,[edx+2*edx+tAtanPiFrac].lManHi
|
||
|
call AddDoubleReg ;Add in correction angle
|
||
|
AtanSetSign:
|
||
|
pop edx ;Get flags again
|
||
|
mov ch,dh ;Set sign to original ST(1)
|
||
|
;Rounded mantissa in ebx:esi:eax, exp/sign in ecx
|
||
|
jmp TransUnround
|
||
|
|
||
|
|
||
|
;***
|
||
|
AtanSpclDest:
|
||
|
mov al,EMSEG:[edi].bTag ;Pick up tag
|
||
|
; cmp cl,bTAG_INF ;Is argument infinity?
|
||
|
cmp al,bTAG_INF ;Is argument infinity?
|
||
|
jnz SpclDest ;In emarith.asm
|
||
|
AtanZeroSource:
|
||
|
;Dividend is infinity or divisor is zero. Return pi/2 with
|
||
|
;same sign as dividend.
|
||
|
mov ecx,(PiExp-1) shl 16 + bTAG_VALID ;Exponent for pi/2
|
||
|
PiMant:
|
||
|
;For storing multiples of pi. Exponent/tag is in ecx.
|
||
|
mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
|
||
|
mov ebx,XPiHi
|
||
|
mov esi,XPiMid
|
||
|
mov eax,XPiLo
|
||
|
;A jump through [TransRound] is only valid if the number is known not to
|
||
|
;underflow. Unmasked underflow requires [RoundMode] be set.
|
||
|
jmp EMSEG:[TransRound]
|
||
|
|
||
|
;***
|
||
|
AtanSpclSource:
|
||
|
cmp cl,bTAG_INF ;Scaling by infinity?
|
||
|
jnz SpclSource ;in emarith.asm
|
||
|
AtanZeroDest:
|
||
|
;Divisor is infinity or dividend is zero. Return zero for +divisor,
|
||
|
;pi for -divisor. Result sign is same is dividend.
|
||
|
or ch,ch ;Check divisor's sign
|
||
|
mov ecx,PiExp shl 16 + bTAG_VALID ;Exponent for pi
|
||
|
js PiMant ;Store pi
|
||
|
;Result is zero
|
||
|
mov EMSEG:[edi].lManHi,0
|
||
|
mov EMSEG:[edi].lManLo,0
|
||
|
mov EMSEG:[edi].wExp,0
|
||
|
mov EMSEG:[edi].bTAG,bTAG_ZERO
|
||
|
ret
|
||
|
|
||
|
;***
|
||
|
AtanTwoInf:
|
||
|
;Return pi/4 for +infinity divisor, 3*pi/4 for -infinity divisor.
|
||
|
;Result sign is same is dividend infinity.
|
||
|
or ch,ch ;Check divisor's sign
|
||
|
mov ecx,(PiExp-2) shl 16 + bTAG_VALID ;Exponent for pi/4
|
||
|
jns PiMant ;Store pi/4
|
||
|
mov ecx,(ThreePiExp-2) shl 16 + bTAG_VALID ;Exponent for 3*pi/4
|
||
|
mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
|
||
|
mov ebx,XThreePiHi
|
||
|
mov esi,XThreePiMid
|
||
|
mov eax,XThreePiLo
|
||
|
;A jump through [TransRound] is only valid if the number is known not to
|
||
|
;underflow. Unmasked underflow requires [RoundMode] be set.
|
||
|
jmp EMSEG:[TransRound]
|
||
|
|
||
|
;*******************************************************************************
|
||
|
|
||
|
ExpSpcl:
|
||
|
;Tagged special
|
||
|
cmp cl,bTAG_DEN
|
||
|
jz ExpDenorm
|
||
|
cmp cl,bTAG_INF
|
||
|
mov al, cl
|
||
|
jnz SpclDestNotDen ;Check for Empty or NAN
|
||
|
;Have infinity, check its sign.
|
||
|
;Return -1 for -infinity, no change if +infinity
|
||
|
or ch,ch ;Check sign
|
||
|
jns ExpRet ;Just return the +inifinity
|
||
|
mov EMSEG:[edi].lManLo,0
|
||
|
mov EMSEG:[edi].lManHi,1 shl 31
|
||
|
mov EMSEG:[edi].ExpSgn,bSign shl 8 + bTAG_SNGL ;-1.0 (exponent is zero)
|
||
|
ret
|
||
|
|
||
|
ExpDenorm:
|
||
|
mov EMSEG:[CURerr],Denormal
|
||
|
test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
|
||
|
jnz ExpCont ;Yes, continue
|
||
|
ExpRet:
|
||
|
ret
|
||
|
|
||
|
EM_ENTRY eF2XM1
|
||
|
eF2XM1:
|
||
|
;edi = [CURstk]
|
||
|
mov ecx,EMSEG:[edi].ExpSgn
|
||
|
cmp cl,bTAG_ZERO
|
||
|
jz ExpRet ;Return same zero
|
||
|
ja ExpSpcl
|
||
|
ExpCont:
|
||
|
|
||
|
;The input range specified for the function is (-1, +1). The polynomial
|
||
|
;used for this function is valid only over the range [0, +0.5], so range
|
||
|
;reduction is needed. Range reduction is based on the identity:
|
||
|
;
|
||
|
; 2^(a+b) = 2^a * 2^b
|
||
|
;
|
||
|
;1.0 or 0.5 can be added/subtracted from the argument to bring it into
|
||
|
;range. We calculate 2^x - 1 with a polynomial, and then adjust the
|
||
|
;result according to the amount added or subtracted, as shown in the table:
|
||
|
;
|
||
|
;Arg range Adj Polynomial result Required result, 2^x - 1
|
||
|
;
|
||
|
; (-1, -0.5] +1 P = 2^(x+1) - 1 (P - 1)/2
|
||
|
;
|
||
|
; (-0.5, 0) +0.5 P = 2^(x+0.5) - 1 P * sqrt(2)/2 + (sqrt(2)/2 - 1)
|
||
|
;
|
||
|
; (0, 0.5) 0 P = 2^x - 1 P
|
||
|
;
|
||
|
; [0.5, 1) -0.5 P = 2^(x-0.5) - 1 P * sqrt(2) + (sqrt(2)-1)
|
||
|
;
|
||
|
;Since the valid input range does not include +1.0 or -1.0, and zero is
|
||
|
;handled separately, the precision exception will always be set.
|
||
|
|
||
|
mov EMSEG:[Result],edi
|
||
|
mov EMSEG:[RoundMode],offset PolyRound
|
||
|
mov EMSEG:[ZeroVector],offset PolyZero
|
||
|
push offset TransUnround ;Always exit through here
|
||
|
mov ebx,EMSEG:[edi].lManHi
|
||
|
mov esi,EMSEG:[edi].lManLo
|
||
|
;Check for small argument, so that x^2 does not underflow. Note that
|
||
|
;e^x = 1+x for small x, where small x means x + x^2/2 = x [or 1 + x/2 = 1],
|
||
|
;which happens when x < 2^-64, so 2^x - 1 = x * ln(2) for small x.
|
||
|
TinyExpArg equ -64
|
||
|
cmp ecx,TinyExpArg shl 16
|
||
|
jl TinyExp
|
||
|
cmp ecx,-1 shl 16 + bSign shl 8 ;See if positive, < 0.5
|
||
|
jl ExpReduced
|
||
|
;Argument was not in range (0, 0.5), so we need some kind of reduction
|
||
|
or ecx,ecx ;Exp >= 0 means arg >= 1.0 --> too big
|
||
|
;CONSIDER: this returns through TransUnround which restores the rounding
|
||
|
;vectors, but it also randomly rounds the result becase eax is not set.
|
||
|
jge ExpRet ;Give up if arg out of range
|
||
|
;We're going to need to add/subtract 1.0 or 0.5, so load up the constant
|
||
|
mov edx,1 shl 31
|
||
|
xor edi,edi
|
||
|
mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
|
||
|
mov ebp,offset ExpReducedMinusHalf
|
||
|
or ch,ch ;If it's positive, must be [0.5, 1)
|
||
|
jns ExpReduction
|
||
|
xor ah,ah ;edx:edi,eax = +0.5
|
||
|
mov ebp,offset ExpReducedPlusHalf
|
||
|
cmp ecx,eax ;See if abs(arg) >= 0.5
|
||
|
jl ExpReduction ;No, adjust by .5
|
||
|
xor eax,eax ;edx:edi,eax = 1.0
|
||
|
mov ebp,offset ExpReducedPlusOne
|
||
|
ExpReduction:
|
||
|
call AddDoubleReg ;Argument now in range [0, 0.5]
|
||
|
cmp cl,bTAG_ZERO ;Did reduction result in zero?
|
||
|
jz ExpHalf ;If so, must have been exactly 0.5
|
||
|
push ebp ;Address of reduction cleanup
|
||
|
ExpReduced:
|
||
|
mov edi,offset tExpPoly
|
||
|
call Eval2Poly
|
||
|
;2^x - 1 is approximated with 2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
|
||
|
;Q(x^2) is in registers, P(x^2) is at [[CURstk]]
|
||
|
mov edi,EMSEG:[CURstk]
|
||
|
mov dx,bSign shl 8 ;Subtract memory operand
|
||
|
;Note that Q() and P() have no roots over the input range
|
||
|
;(they will never be zero).
|
||
|
call AddDouble ;Q(x^2) - x*P(x^2)
|
||
|
sub ecx,1 shl 16 ;Divide by two
|
||
|
mov edi,EMSEG:[CURstk]
|
||
|
jmp DivDouble ;2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
|
||
|
;Returns to correct argument reduction correction routine or TransUnround
|
||
|
|
||
|
TinyExp:
|
||
|
;Exponent is very small (and was not reduced)
|
||
|
mov edx,cFLDLN2hi
|
||
|
mov edi,cFLDLN2lo
|
||
|
mov eax,cFLDLN2exp shl 16
|
||
|
;This could underflow (but not big time)
|
||
|
jmp MulDoubleReg ;Returns to TransUnround
|
||
|
|
||
|
ExpHalf:
|
||
|
;Argument of exactly 0.5 was reduced to zero. Just return result.
|
||
|
mov ebx,Sqrt2m1Hi
|
||
|
mov esi,Sqrt2m1Lo
|
||
|
mov eax,XSqrt2m1Lo + 1 shl 31 - 1
|
||
|
mov ecx,Sqrt2m1Exp shl 16
|
||
|
ret ;Exit through TransUnround
|
||
|
|
||
|
ExpReducedPlusOne:
|
||
|
;Correct result is (P - 1)/2
|
||
|
sub ecx,1 shl 16 ;Divide by two
|
||
|
mov edx,1 shl 31
|
||
|
xor edi,edi
|
||
|
mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
|
||
|
jmp AddDoubleReg
|
||
|
|
||
|
ExpReducedPlusHalf:
|
||
|
;Correct result is P * sqrt(2)/2 - (1 - sqrt(2)/2)
|
||
|
mov edx,Sqrt2Hi
|
||
|
mov edi,Sqrt2Lo
|
||
|
mov eax,Sqrt2exp-1 shl 16 ;sqrt(2)/2
|
||
|
call MulDoubleReg
|
||
|
mov edx,TwoMinusSqrt2Hi
|
||
|
mov edi,TwoMinusSqrt2Lo
|
||
|
mov eax,(TwoMinusSqrt2Exp-1) shl 16 + bSign shl 8 ;(2-sqrt(2))/2
|
||
|
jmp AddDoubleReg
|
||
|
|
||
|
ExpReducedMinusHalf:
|
||
|
;Correct result is P * sqrt(2) + (sqrt(2)-1)
|
||
|
mov edx,Sqrt2Hi
|
||
|
mov edi,Sqrt2Lo
|
||
|
mov eax,Sqrt2exp shl 16
|
||
|
call MulDoubleReg
|
||
|
mov edx,Sqrt2m1Hi
|
||
|
mov edi,Sqrt2m1Lo
|
||
|
mov eax,Sqrt2m1Exp shl 16
|
||
|
jmp AddDoubleReg
|
||
|
|
||
|
;*******************************************************************************
|
||
|
|
||
|
;Dispatch table for log(x+1)
|
||
|
;
|
||
|
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
|
||
|
;pointed to by edi ("dest").
|
||
|
;
|
||
|
;Tag of source is shifted. Tag values are as follows:
|
||
|
|
||
|
.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
|
||
|
.erre TAG_VALID eq 1
|
||
|
.erre TAG_ZERO eq 2
|
||
|
.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
|
||
|
|
||
|
;Any special case routines not found in this file are in emarith.asm
|
||
|
|
||
|
tFyl2xp1Disp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
|
||
|
dd LogP1Double ;single single
|
||
|
dd LogP1Double ;single double
|
||
|
dd LogP1ZeroDest ;single zero
|
||
|
dd LogP1SpclDest ;single special
|
||
|
dd LogP1Double ;double single
|
||
|
dd LogP1Double ;double double
|
||
|
dd LogP1ZeroDest ;double zero
|
||
|
dd LogP1SpclDest ;double special
|
||
|
dd XorSourceSign ;zero single
|
||
|
dd XorSourceSign ;zero double
|
||
|
dd XorDestSign ;zero zero
|
||
|
dd LogP1SpclDest ;zero special
|
||
|
dd LogSpclSource ;special single
|
||
|
dd LogSpclSource ;special double
|
||
|
dd LogSpclSource ;special zero
|
||
|
dd TwoOpBothSpcl ;special special
|
||
|
dd LogTwoInf ;Two infinites
|
||
|
|
||
|
|
||
|
LogP1Double:
|
||
|
;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
|
||
|
;[edi] points to st(1), where result is returned
|
||
|
;
|
||
|
;This instruction is defined only for x+1 in the range [1/sqrt(2), sqrt(2)]
|
||
|
;The approximation used (valid over exactly this range) is
|
||
|
; log2(x) = z * P(z^2) / Q(z^2), z = (x-1) / (x+1), which is
|
||
|
; log2(x+1) = r * P(r^2) / Q(r^2), r = x / (x+2)
|
||
|
;
|
||
|
;We're not too picky about this range check because the function is simply
|
||
|
;"undefined" if out of range--EXCEPT, we're supposed to check for -1 and
|
||
|
;signal Invalid if less, -infinity if equal.
|
||
|
or ecx,ecx ;abs(x) >= 1.0?
|
||
|
jge LogP1OutOfRange ;Valid range is approx [-0.3, +0.4]
|
||
|
mov EMSEG:[Result],edi
|
||
|
mov EMSEG:[RoundMode],offset PolyRound
|
||
|
mov EMSEG:[ZeroVector],offset PolyZero
|
||
|
mov eax,1 shl 16 ;Exponent of 1 for adding 2.0
|
||
|
push offset TotalLog ;Return address for BasicLog
|
||
|
; jmp BasicLog ;Fall into BasicLog
|
||
|
;.erre BasicLog eq $
|
||
|
|
||
|
;BasicLog is used by eFYL2X and eFYL2XP1.
|
||
|
;eax has exponent and sign to add 1.0 or 2.0 to argument
|
||
|
;ebx:esi,ecx has argument, non-zero, tag not set
|
||
|
;ST has argument to take log2 of, minus 1. (This is the actual argument
|
||
|
;of eFYL2XP1, or argument minus 1 of eFYL2X.)
|
||
|
|
||
|
BasicLog:
|
||
|
mov edx,1 shl 31
|
||
|
xor edi,edi ;edx:edi,eax = +1.0 or +2.0
|
||
|
call AddDoubleReg
|
||
|
mov edi,EMSEG:[CURstk] ;Point to x-1
|
||
|
call DivDouble ;Compute (x-1) / (x+1)
|
||
|
;Result in registers is z = (x-1)/(x+1). For tiny z, ln(x) = 2*z, so
|
||
|
; log2(x) = 2 * log2(e) * z. Tiny z is such that z + z^3/3 = z.
|
||
|
cmp ecx,-32 shl 16 ;Smallest exponent to bother with
|
||
|
jl LogSkipPoly
|
||
|
mov edi,offset tLogPoly
|
||
|
call Eval2Poly
|
||
|
mov edi,EMSEG:[CURstk] ;Point to first result, r * P(r^2)
|
||
|
jmp DivDouble ;Compute r * P(r^2) / Q(r^2)
|
||
|
|
||
|
LogSkipPoly:
|
||
|
;Multiply r by 2 * log2(e)
|
||
|
mov edx,Log2OfEHi
|
||
|
mov edi,Log2OfELo
|
||
|
mov eax,(Log2OfEexp+1) shl 16
|
||
|
jmp MulDoubleReg
|
||
|
|
||
|
LogP1OutOfRange:
|
||
|
;Input range isn't valid, so we can return anything we want--EXCEPT, for
|
||
|
;numbers < -1 we must signal Invalid Operation, and Divide By Zero for
|
||
|
;-1. Otherwise, we return an effective log of one by just leaving the
|
||
|
;second operand as the return value.
|
||
|
;
|
||
|
;Exponent in ecx >= 0 ( abs(x) >= 1 )
|
||
|
or ch,ch ;Is it positive?
|
||
|
jns LogP1Ret ;If so, skip it
|
||
|
and ecx,0FFFFH shl 16 ;Look at exponent only: 0 for -1.0
|
||
|
sub ebx,1 shl 31 ;Kill MSB
|
||
|
or ebx,esi
|
||
|
or ebx,ecx
|
||
|
jnz ReturnIndefinite ;Must be < -1.0
|
||
|
jmp DivideByMinusZero
|
||
|
|
||
|
LogP1Ret:
|
||
|
ret
|
||
|
|
||
|
;***
|
||
|
LogP1ZeroDest:
|
||
|
or ch,ch ;Is it negative?
|
||
|
jns LogP1Ret ;If not, just leave it zero
|
||
|
or ecx,ecx ;abs(x) >= 1.0?
|
||
|
jl XorDestSign ;Flip sign of zero
|
||
|
;Argument is <= -1
|
||
|
jmp ReturnIndefinite ;Have 0 * log( <=0 )
|
||
|
|
||
|
;***
|
||
|
LogP1SpclDest:
|
||
|
mov al,EMSEG:[edi].bTag ;Pick up tag
|
||
|
cmp al,bTAG_INF ;Is argument infinity?
|
||
|
jnz SpclDest ;In emarith.asm
|
||
|
;Multiplying log(x+1) * infinity.
|
||
|
;If x > 0, return original infinity.
|
||
|
;If -1 <= x < 0, return infinity with sign flipped.
|
||
|
;If x < -1 or x == 0, invalid operation.
|
||
|
cmp cl,bTAG_ZERO
|
||
|
jz ReturnIndefinite
|
||
|
or ch,ch ;Is it positive?
|
||
|
jns LogP1Ret
|
||
|
test ecx,0FFFFH shl 16 ;Is exponent zero?
|
||
|
jl XorDestSign
|
||
|
jg ReturnIndefinite
|
||
|
sub ebx,1 shl 31 ;Kill MSB
|
||
|
or ebx,esi
|
||
|
jnz ReturnIndefinite ;Must be < -1.0
|
||
|
jmp XorDestSign
|
||
|
|
||
|
;***
|
||
|
LogSpclSource:
|
||
|
cmp cl,bTAG_INF ;Is argument infinity?
|
||
|
jnz SpclSource ;in emarith.asm
|
||
|
or ch,ch ;Is it negative infinity?
|
||
|
js ReturnIndefinite
|
||
|
jmp MulByInf
|
||
|
|
||
|
;***
|
||
|
LogTwoInf:
|
||
|
or ch,ch ;Is it negative infinity?
|
||
|
js ReturnIndefinite
|
||
|
jmp XorDestSign
|
||
|
|
||
|
;*******************************************************************************
|
||
|
|
||
|
;Dispatch table for log(x)
|
||
|
;
|
||
|
;One operand has been loaded into ecx:ebx:esi ("source"), the other is
|
||
|
;pointed to by edi ("dest").
|
||
|
;
|
||
|
;Tag of source is shifted. Tag values are as follows:
|
||
|
|
||
|
.erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
|
||
|
.erre TAG_VALID eq 1
|
||
|
.erre TAG_ZERO eq 2
|
||
|
.erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
|
||
|
|
||
|
;Any special case routines not found in this file are in emarith.asm
|
||
|
|
||
|
tFyl2xDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
|
||
|
dd LogDouble ;single single
|
||
|
dd LogDouble ;single double
|
||
|
dd LogZeroDest ;single zero
|
||
|
dd LogSpclDest ;single special
|
||
|
dd LogDouble ;double single
|
||
|
dd LogDouble ;double double
|
||
|
dd LogZeroDest ;double zero
|
||
|
dd LogSpclDest ;double special
|
||
|
dd DivideByMinusZero ;zero single
|
||
|
dd DivideByMinusZero ;zero double
|
||
|
dd ReturnIndefinite ;zero zero
|
||
|
dd LogSpclDest ;zero special
|
||
|
dd LogSpclSource ;special single
|
||
|
dd LogSpclSource ;special double
|
||
|
dd LogSpclSource ;special zero
|
||
|
dd TwoOpBothSpcl ;special special
|
||
|
dd LogTwoInf ;Two infinites
|
||
|
|
||
|
|
||
|
LogDouble:
|
||
|
;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
|
||
|
;[edi] points to st(1), where result is returned
|
||
|
;
|
||
|
;Must reduce the argument to the range [1/sqrt(2), sqrt(2)]
|
||
|
or ch,ch ;Is it positive?
|
||
|
js ReturnIndefinite ;Can't take log of negative number
|
||
|
mov EMSEG:[Result],edi
|
||
|
mov EMSEG:[RoundMode],offset PolyRound
|
||
|
mov EMSEG:[ZeroVector],offset PolyZero
|
||
|
shld eax,ecx,16 ;Save exponent in ax as int part of log2
|
||
|
xor ecx,ecx ;Zero exponent: 1 <= x < 2
|
||
|
cmp ebx,Sqrt2Hi ;x > sqrt(2)?
|
||
|
jb LogReduced
|
||
|
ja LogReduceOne
|
||
|
cmp esi,Sqrt2Lo
|
||
|
jb LogReduced
|
||
|
LogReduceOne:
|
||
|
sub ecx,1 shl 16 ;1/sqrt(2) < x < 1
|
||
|
inc eax
|
||
|
LogReduced:
|
||
|
push eax ;Save integer part of log2
|
||
|
mov ebp,ecx ;Save reduced exponent (tag is wrong!)
|
||
|
mov edx,1 shl 31
|
||
|
mov eax,bSign shl 8 ;Exponent of 0, negaitve
|
||
|
xor edi,edi ;edx:edi,eax = -1.0
|
||
|
call AddDoubleReg
|
||
|
cmp cl,bTAG_ZERO ;Was it exact power of two?
|
||
|
jz LogDone ;Skip log if power of two
|
||
|
;Save (x - 1), reload x with reduced exponent
|
||
|
mov edi,EMSEG:[CURstk] ;Point to original x again
|
||
|
xchg EMSEG:[edi].lManHi,ebx
|
||
|
xchg EMSEG:[edi].lManLo,esi
|
||
|
mov EMSEG:[edi].ExpSgn,ecx
|
||
|
mov ecx,ebp ;Get reduced exponent
|
||
|
xor eax,eax ;Exponent of 0, positive
|
||
|
call BasicLog
|
||
|
LogDone:
|
||
|
pop eax ;Get integer part back
|
||
|
cwde
|
||
|
or eax,eax ;Is it zero?
|
||
|
jz TotalLog
|
||
|
;Next 3 instructions take abs() of integer
|
||
|
cdq ;Extend sign through edx
|
||
|
xor eax,edx ;Complement...
|
||
|
sub eax,edx ; and increment if negative
|
||
|
bsr dx,ax ;Look for MSB to normalize integer
|
||
|
;Bit number in dx ranges from 0 to 15
|
||
|
mov cl,dl
|
||
|
not cl ;Convert to shift count
|
||
|
shl eax,cl ;Normalize
|
||
|
.erre TexpBias eq 0
|
||
|
rol edx,16 ;Move exponent high, sign low
|
||
|
or ebx,ebx ;Was log zero?
|
||
|
jz ExactPower
|
||
|
xchg edx,eax ;Exp/sign to eax, mantissa to edx
|
||
|
xor edi,edi ;Extend with zero
|
||
|
call AddDoubleReg
|
||
|
TotalLog:
|
||
|
;Registers could be zero if input was exactly 1.0
|
||
|
cmp cl,bTAG_ZERO
|
||
|
jz ZeroLog
|
||
|
TotalLogNotZero:
|
||
|
mov edi,EMSEG:[Result] ;Point to second arg
|
||
|
push offset TransUnround
|
||
|
jmp MulDouble
|
||
|
|
||
|
ExactPower:
|
||
|
;Arg was a power of two, so log is exact (but not zero).
|
||
|
mov ebx,eax ;Mantissa to ebx
|
||
|
mov ecx,edx ;Exponent to ecx
|
||
|
xor esi,esi ;Extend with zero
|
||
|
;Exponent of arg [= log2(arg)] is now normalized in ebx:esi,ecx
|
||
|
;
|
||
|
;The result log is exact, so we don't want TransUnround, which is designed
|
||
|
;to ensure the result is never exact. Instead we set the [RoundMode]
|
||
|
;vector to [TransRound] before the final multiply.
|
||
|
mov eax,EMSEG:[TransRound]
|
||
|
mov EMSEG:[RoundMode],eax
|
||
|
mov edi,EMSEG:[Result] ;Point to second arg
|
||
|
push offset RestoreRound ;Return addr. for MulDouble in emtrig.asm
|
||
|
jmp MulDouble
|
||
|
|
||
|
ZeroLog:
|
||
|
mov eax,EMSEG:[SavedRoundMode]
|
||
|
mov EMSEG:[RoundMode],eax
|
||
|
mov EMSEG:[ZeroVector],offset SaveResult
|
||
|
jmp SaveResult
|
||
|
|
||
|
;***
|
||
|
LogZeroDest:
|
||
|
or ch,ch ;Is it negative?
|
||
|
js ReturnIndefinite ;Can't take log of negative numbers
|
||
|
;See if log is + or - so we can get correct sign of zero
|
||
|
or ecx,ecx ;Is exponent >= 0?
|
||
|
jge LogRet ;If so, keep present zero sign
|
||
|
FlipDestSign:
|
||
|
not EMSEG:[edi].bSgn
|
||
|
ret
|
||
|
|
||
|
;***
|
||
|
LogSpclDest:
|
||
|
mov al,EMSEG:[edi].bTag ;Pick up tag
|
||
|
cmp al,bTAG_INF ;Is argument infinity?
|
||
|
jnz SpclDest ;In emarith.asm
|
||
|
;Multiplying log(x) * infinity.
|
||
|
;If x > 1, return original infinity.
|
||
|
;If 0 <= x < 1, return infinity with sign flipped.
|
||
|
;If x < 0 or x == 1, invalid operation.
|
||
|
cmp cl,bTAG_ZERO
|
||
|
jz FlipDestSign
|
||
|
or ch,ch ;Is it positive?
|
||
|
js ReturnIndefinite
|
||
|
test ecx,0FFFFH shl 16 ;Is exponent zero?
|
||
|
jg LogRet ;x > 1, just return infinity
|
||
|
jl FlipDestSign
|
||
|
sub ebx,1 shl 31 ;Kill MSB
|
||
|
or ebx,esi
|
||
|
jz ReturnIndefinite ;x == 1.0
|
||
|
LogRet:
|
||
|
ret
|