windows-nt/Source/XPSP1/NT/multimedia/opengl/server/generic/i386/texspanr.asm

641 lines
13 KiB
NASM
Raw Normal View History

2020-09-26 03:20:57 -05:00
;---------------------------Module-Header------------------------------;
; Module Name: texspanr.asm
;
; Fast replace-mode texturing.
;
; Created: 011/15/1995
; Author: Otto Berkes [ottob]
;
; Copyright (c) 1995 Microsoft Corporation
;----------------------------------------------------------------------;
rMask = ((1 SHL rBits) - 1) SHL rShift
gMask = ((1 SHL gBits) - 1) SHL gShift
bMask = ((1 SHL bBits) - 1) SHL bShift
rRightShiftAdj = 16 - (rShift + rBits)
gRightShiftAdj = 16 - (gShift + gBits)
bRightShiftAdj = 16 - (bShift + bBits)
TMASK_SUBDIV equ [esi].GENGC_tMaskSubDiv
TSHIFT_SUBDIV equ [esi].GENGC_tShiftSubDiv
if FAST_REPLACE
TEXPALETTE equ [esi].GENGC_texImageReplace
if (PALETTE_ONLY)
TEXIMAGE equ [esi].GENGC_texImage
else
TEXIMAGE equ [esi].GENGC_texImageReplace
endif
if PALETTE_ONLY
TEX_BPP_LOG2 = 0
elseif (BPP eq 8)
TEX_BPP_LOG2 = 0
else
TEX_BPP_LOG2 = 1
endif
else
.error
endif
if PALETTE_ONLY
HANDLE_PALETTE MACRO
mov bl, [edx] ; V
and ebx, 0ffh ;U
mov edx, TEXPALETTE ; V
lea edx, [edx+4*ebx] ;U
ENDM
HANDLE_PALETTEX MACRO
mov al, [edx] ; V
and eax, 0ffh ;U
mov ebx, TEXPALETTE ; V
mov edx, TEMP2 ;U
lea ebx, [ebx+4*eax] ; V
mov TEMP2, edx ;U
GET_TEXEL_ACCUM ; V
;U
ENDM
else
HANDLE_PALETTE MACRO
ENDM
endif
TEMP equ [esi].GENGC_sResult
;;
;;
;; Macros for advancing a single pixel unit
;;
;;
if (BPP eq 8)
PIXADVANCE MACRO var
inc var
ENDM
elseif (BPP eq 16)
PIXADVANCE MACRO var
add var, (BPP / 8)
ENDM
else
PIXADVANCE MACRO var
add var, [esi].GENGC_bytesPerPixel
ENDM
endif
;;
;; Get pointer to current texel value in EDX:
;;
GET_TEXEL_ADDRESS MACRO
mov eax, TMASK_SUBDIV ;U
mov edx, [esi].GENGC_tResult ; V
mov ebx, [esi].GENGC_sResult ;U
and edx, eax ; V
shr edx, (6-TEX_BPP_LOG2) ;U
mov ecx, [esi].GENGC_sMask ; V
and ebx, ecx ;U
mov eax, DWORD PTR [esi].GENGC_sResult ; V
shr ebx, (16-TEX_BPP_LOG2) ;U
mov ecx, [esi].GENGC_subDs ; V
add eax, ecx ;U
add edx, ebx ; V
mov ecx, TEXIMAGE ;U
mov ebx, [esi].GENGC_subDt ; V
add edx, ecx ;U
mov ecx, DWORD PTR [esi].GENGC_tResult ; V
add ecx, ebx ;U
mov DWORD PTR [esi].GENGC_sResult, eax ; V
mov DWORD PTR [esi].GENGC_tResult, ecx ;U
HANDLE_PALETTE
ENDM
if (BPP eq 8)
GET_TEXEL MACRO
mov al, [edx] ; V get texel value
ENDM
elseif (BPP eq 16)
GET_TEXEL MACRO
mov ax, [edx]
ENDM
endif
GET_TEXEL_ADDRESS2 MACRO count
;; input : ecx = GENGC_tResult, edi = GENGC_sResult
;; output: edx = final texel address
;; free : ebx, edx are free
mov ebx, TMASK_SUBDIV ;U
mov edx, ecx ; V
mov TEMP, eax ;U
and edx, ebx ; V
mov eax, edi ;U
mov ebx, [esi].GENGC_sMask ; V
shr edx, (6-TEX_BPP_LOG2) ;U
and eax, ebx ; V
shr eax, (16-TEX_BPP_LOG2) ;U
mov ebx, TEXIMAGE ; V
add edx, eax ;U
mov eax, [esi].GENGC_subDs ; V
add edx, ebx ;U
add edi, eax ; V
mov ebx, [esi].GENGC_subDt ;U
mov eax, TEMP ; V
add ecx, ebx ;U
HANDLE_PALETTE
ENDM
if (BPP eq 8)
GET_TEXEL_ACCUM MACRO
mov al, [edx] ; V get texel value
ror eax, BPP ;U
ENDM
elseif (BPP eq 16)
GET_TEXEL_ACCUM MACRO
mov ax, [edx]
ror eax, BPP
ENDM
endif
if (BPP eq 8)
WRITE_TEXEL_DECEBP MACRO
mov al, [edx]
dec ebp
mov [edi-1], al
ENDM
elseif (BPP eq 16)
WRITE_TEXEL_DECEBP MACRO
mov ax, [edx]
dec ebp
mov [edi-2], ax
ENDM
endif
;;----------------------------------------------------------------------
;;
;; This is the start of the texture routine. Kick off the divide, and use
;; the dead time to set up all of the accumulators and other variables.
;;
;;----------------------------------------------------------------------
;;
;; Start the divide:
;;
mov eax, [ecx].GENGC_flags
fld DWORD PTR [ecx].GENGC_SPAN_qw ;qwAccum
fld DWORD PTR [ecx].GENGC_SPAN_qw ;qwAccum qwAccum
test eax, GEN_TEXTURE_ORTHO
jne @f
fdivr __One ;1/qw qwAccum
@@:
;;
;; Save the registers that we need to:
;;
push ebx ;U
push esi ; V
push edi ;U
push ebp ; V
mov esi, ecx ;U
;;
;; Set up accumulators:
;;
mov eax, [ecx].GENGC_SPAN_s ; V
mov ebx, [ecx].GENGC_SPAN_t ;U
mov [ecx].GENGC_sAccum, eax ; V
mov [esi].GENGC_tAccum, ebx ;U
mov ecx, [esi].GENGC_SPAN_qw ; V
mov edi, [esi].GENGC_SPAN_ppix ;U
mov [esi].GENGC_qwAccum, ecx ; V
mov eax, [esi].GENGC_flags ;U
mov ebx, [esi].GENGC_SPAN_x ; V
test eax, SURFACE_TYPE_DIB ;U
jne @f ; V
mov edi, [esi].GENGC_ColorsBits ;U
jmp short @pixAccumDone
@@:
if (BPP eq 8)
add edi, ebx ; V
elseif (BPP eq 16)
lea edi, [edi + 2*ebx]
endif
@pixAccumDone:
mov ebp, [esi].GENGC_SPAN_length ;U
;;
;; Before we get into the main loop, do pixel-by-pixel writes until
;; we're DWORD aligned:
;;
test edi, 3
je alignmentDone
getAligned:
test eax, GEN_TEXTURE_ORTHO
je @f
mov edx, [esi].GENGC_sAccum
mov eax, [esi].GENGC_tAccum
mov DWORD PTR [esi].GENGC_sResult, edx
mov DWORD PTR [esi].GENGC_tResult, eax
jmp short @stResultDone1
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
fmul ST, ST(1) ; s/qw 1/qw qwAccum
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
fmulp ST(2), ST ; s/qw t/qw qwAccum
fistp QWORD PTR [esi].GENGC_sResult ; t/qw qwAccum
fistp QWORD PTR [esi].GENGC_tResult ; qwAccum
fadd DWORD PTR [esi].GENGC_SPAN_dqwdx; qwAccum
fld ST(0) ; qwAccum qwAccum
fdivr __One ; 1/qw qwAccum
@stResultDone1:
mov cl, TSHIFT_SUBDIV ;U
mov edx, [esi].GENGC_tResult ; V
sar edx, cl ;UV (4)
and edx, NOT 7 ;U
mov ebx, [esi].GENGC_sResult ; V
mov [esi].GENGC_tResult, edx ;U
and edx, TMASK_SUBDIV ; V
shr edx, (6-TEX_BPP_LOG2) ;U
and ebx, [esi].GENGC_sMask ; V
shr ebx, (16-TEX_BPP_LOG2) ;U
mov ecx, TEXIMAGE ; V
add edx, ecx ;U
PIXADVANCE edi ; V
add edx, ebx ;U
HANDLE_PALETTE
mov eax, [esi].GENGC_sAccum ; V
mov ebx, [esi].GENGC_tAccum ;U
add eax, [esi].GENGC_SPAN_ds ; V
add ebx, [esi].GENGC_SPAN_dt ;U
mov [esi].GENGC_sAccum, eax ; V
mov [esi].GENGC_tAccum, ebx ;U
WRITE_TEXEL_DECEBP
jle spanExit ; V
test edi, 3
mov eax, [esi].GENGC_flags
jne getAligned
alignmentDone:
;;
;; Kick off the next divide:
;;
test eax, GEN_TEXTURE_ORTHO
je @f
mov edx, [esi].GENGC_sAccum
mov eax, [esi].GENGC_tAccum
mov DWORD PTR [esi].GENGC_sResult, edx
mov DWORD PTR [esi].GENGC_tResult, eax
jmp short @stResultDone2
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
fmul ST, ST(1) ; s/qw 1/qw qwAccum
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
fmulp ST(2), ST ; s/qw t/qw qwAccum
fistp QWORD PTR [esi].GENGC_sResult ; t/qw qwAccum
fistp QWORD PTR [esi].GENGC_tResult ; qwAccum
fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum
fld ST(0) ; qwAccum qwAccum
fdivr __One ; 1/qw qwAccum
@stResultDone2:
mov eax, [esi].GENGC_sAccum ; V
mov ebx, [esi].GENGC_tAccum ;U
add eax, [esi].GENGC_sStepX ; V
add ebx, [esi].GENGC_tStepX ;U
mov [esi].GENGC_sAccum, eax ; V
mov [esi].GENGC_tAccum, ebx ;U
mov eax, [esi].GENGC_sResult ; V
mov ebx, [esi].GENGC_tResult ;U
mov cl, TSHIFT_SUBDIV ; V
sar ebx, cl ;UV (4)
and ebx, NOT 7 ;U
mov ecx, [esi].GENGC_flags ; V
mov [esi].GENGC_tResult, ebx ;U
test ecx, GEN_TEXTURE_ORTHO ; V
je @f
mov ecx, [esi].GENGC_sAccum
mov edx, [esi].GENGC_tAccum
mov DWORD PTR [esi].GENGC_sResultNew, ecx
mov DWORD PTR [esi].GENGC_tResultNew, edx
jmp short @stResultDone3
;; We may have to burn some cycles here...
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
fmul ST, ST(1) ; s/qw 1/qw qwAccum
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
fmulp ST(2), ST ; s/qw t/qw qwAccum
fistp QWORD PTR [esi].GENGC_sResultNew; t/qw qwAccum
fistp QWORD PTR [esi].GENGC_tResultNew; qwAccum
fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum
@stResultDone3:
mov cl, TSHIFT_SUBDIV ;U
mov edx, [esi].GENGC_tResultNew ; V
sar edx, cl ;UV (4)
and edx, NOT 7 ;U
mov ecx, [esi].GENGC_sResultNew ; V
mov [esi].GENGC_tResultNew, edx ;U
sub ecx, eax ; V
sar ecx, 3 ;U
sub edx, ebx ; V
sar edx, 3 ;U
mov [esi].GENGC_subDs, ecx ; V
mov [esi].GENGC_subDt, edx ;U
;;
;;
;;
;; If we have fewer than 4 (or 2) pixels, just do right edge...
if (BPP eq 8)
test ebp, 0fffch ;U
else
test ebp, 0fffeh ;U
endif
je singlePixels
add ebp, 070000h
mov [esi].GENGC_pixAccum, edi
mov ecx, [esi].GENGC_tResult
mov eax, [esi].GENGC_flags
mov edi, [esi].GENGC_sResult
loopTop:
;;
;; This is the start of the outer loop. We come back here on each
;; subdivision. The key thing is to kick off the next divide:
;;
test eax, GEN_TEXTURE_ORTHO
jne @f
fld ST(0) ; qwAccum qwAccum
fadd DWORD PTR [esi].GENGC_qwStepX ; qwAccum+ qwAccum
fxch ST(1) ; qwAccum qwAccum+
fdivr __One ; 1/qw qwAccum+ -- let the divide rip!
@@:
loopTopNoDiv:
;; If we have fewer than 4 (or 2) pixels, just do right edge...
if (BPP eq 8)
GET_TEXEL_ADDRESS2
GET_TEXEL_ACCUM
GET_TEXEL_ADDRESS2
GET_TEXEL_ACCUM
GET_TEXEL_ADDRESS2
GET_TEXEL_ACCUM
GET_TEXEL_ADDRESS2
mov ebx, [esi].GENGC_pixAccum ; V
add ebx, 4 ;U
GET_TEXEL_ACCUM ; V
;U
sub ebp, 040004h ; V
mov [esi].GENGC_pixAccum, ebx ;U
mov [ebx-4], eax ; V
else
GET_TEXEL_ADDRESS2
GET_TEXEL_ACCUM
GET_TEXEL_ADDRESS2
mov ebx, [esi].GENGC_pixAccum
add ebx, 4
GET_TEXEL_ACCUM
sub ebp, 020002h
mov [esi].GENGC_pixAccum, ebx
mov [ebx-4], eax
endif
jle doSubDiv ;U
if (BPP eq 8)
test ebp, 0fffch ; V
else
test ebp, 0fffeh
endif
je doRightEdgePixels ;U
jmp loopTopNoDiv ; V
doRightEdgePixels:
test ebp, 0ffffh ; V
je spanExit ;U
mov [esi].GENGC_sResult, edi
mov [esi].GENGC_tResult, ecx
mov edi, [esi].GENGC_pixAccum
rightEdgePixels:
PIXADVANCE edi ;U
GET_TEXEL_ADDRESS
GET_TEXEL
if (BPP eq 8)
sub ebp, 010001h ;U
mov [edi-1], al ; V
elseif (BPP eq 16)
sub ebp, 010001h
mov [edi-2], ax
endif
test ebp, 0ffffh ;U
jne rightEdgePixels ; V
;;
;; This is the exit point. We need to pop the unused floating-point
;; registers off the stack, and return:
;;
spanExit:
fstp ST(0)
fstp ST(0)
pop ebp
pop edi
pop esi
pop ebx
ret 0
singlePixels:
PIXADVANCE edi ;U
GET_TEXEL_ADDRESS
GET_TEXEL
dec ebp
if (BPP eq 8)
mov [edi-1], al ; V
elseif (BPP eq 16)
mov [edi-2], ax
endif
jg singlePixels ; V
;;
;; This is the exit point. We need to pop the unused floating-point
;; registers off the stack, and return:
;;
fstp ST(0)
mov eax, [esi].GENGC_flags
pop ebp
pop edi
pop esi
pop ebx
test eax, GEN_TEXTURE_ORTHO
je @f
fstp ST(0)
@@:
ret 0
;;
;; This is the subdivision code. After the required number of steps, the
;; routine will jump here to calculate the next set of interpolants based
;; on subdivision:
;;
doSubDiv:
add ebp, 080000h
mov eax, [esi].GENGC_sAccum
if (BPP eq 8)
test ebp, 0fffch ; V
else
test ebp, 0fffeh
endif
je doRightEdgePixels ;U
test ebp, 0ffffh
je spanExit
mov ecx, [esi].GENGC_flags
mov ebx, [esi].GENGC_tAccum
;;
;; Increment the big S and T steps:
;;
add eax, [esi].GENGC_sStepX
add ebx, [esi].GENGC_tStepX
mov [esi].GENGC_sAccum, eax
mov [esi].GENGC_tAccum, ebx
mov edi, [esi].GENGC_sResultNew
mov ebx, [esi].GENGC_tResultNew
test ecx, GEN_TEXTURE_ORTHO
je @f
;;
;; Handle ortho case (easy)
;;
mov edx, DWORD PTR [esi].GENGC_tAccum
mov DWORD PTR [esi].GENGC_sResultNew, eax
mov DWORD PTR [esi].GENGC_tResultNew, edx
jmp short @stResultDone4
;;
;; Do the floating-point computation for perspective:
;;
@@:
fild DWORD PTR [esi].GENGC_sAccum ; s 1/qw qwAccum
fmul ST, ST(1) ; s/qw 1/qw qwAccum
fild DWORD PTr [esi].GENGC_tAccum ; t s/qw 1/qw qwAccum
fmulp ST(2), ST ; s/qw t/qw qwAccum
fistp QWORD PTR [esi].GENGC_sResultNew; t/qw qwAccum
fistp QWORD PTR [esi].GENGC_tResultNew; qwAccum
@stResultDone4:
;;
;; Now, calculate the per-pixel deltas:
;;
mov cl, TSHIFT_SUBDIV ;U
mov edx, [esi].GENGC_tResultNew ; V
sar edx, cl ;UV (4)
mov ecx, [esi].GENGC_sResultNew ;U
and edx, NOT 7 ; V
sub ecx, edi ;U
mov [esi].GENGC_tResultNew, edx ; V
sar ecx, 3 ;U
sub edx, ebx ; V
sar edx, 3 ;U
mov [esi].GENGC_subDs, ecx ; V
mov [esi].GENGC_subDt, edx ;U
mov ecx, ebx ; V
mov eax, [esi].GENGC_flags ;U
jmp loopTop ; V