/* ** Copyright 1991, 1992, 1993, Silicon Graphics, Inc. ** All Rights Reserved. ** ** This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.; ** the contents of this file may not be disclosed to third parties, copied or ** duplicated in any form, in whole or in part, without the prior written ** permission of Silicon Graphics, Inc. ** ** RESTRICTED RIGHTS LEGEND: ** Use, duplication or disclosure by the Government is subject to restrictions ** as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data ** and Computer Software clause at DFARS 252.227-7013, and/or in similar or ** successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished - ** rights reserved under the Copyright Laws of the United States. */ #include "precomp.h" #pragma hdrstop #ifdef _X86_ #define SHADER __GLcontext.polygon.shader #define GENGCACCEL __GLGENcontext.genAccel #define SPANDELTA __GLGENcontext.genAccel.spanDelta #define SPANVALUE __GLGENcontext.genAccel.spanValue #endif #define ENABLE_ASM 1 #if DBG //#define FORCE_NPX_DEBUG 1 #endif /**************************************************************************\ \**************************************************************************/ /* This routine sets gc->polygon.shader.cfb to gc->drawBuffer */ void FASTCALL __fastGenFillSubTriangle(__GLcontext *gc, GLint iyBottom, GLint iyTop) { GLint ixLeft, ixRight; GLint ixLeftFrac, ixRightFrac; GLint spanWidth, clipY0, clipY1; ULONG ulSpanVisibility; GLint cWalls; GLint *Walls; #ifdef NT __GLstippleWord stackWords[__GL_MAX_STACK_STIPPLE_WORDS]; __GLstippleWord *words; GLuint maxWidth; #else __GLstippleWord words[__GL_MAX_STIPPLE_WORDS]; #endif BOOL bSurfaceDIB; BOOL bClipped; GLint xScr, yScr; GLint zFails; __GLzValue *zbuf, z; GLint r, g, b, s, t; __GLGENcontext *gengc = (__GLGENcontext *)gc; __genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr; __GLspanFunc zSpanFunc = GENACCEL(gc).__fastZSpanFuncPtr; int scansize; #ifdef NT maxWidth = (gc->transform.clipX1 - gc->transform.clipX0) + 31; if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS) { words = gcTempAlloc(gc, (maxWidth+__GL_STIPPLE_BITS-1)/8); if (words == NULL) { return; } } else { words = stackWords; } #endif gc->polygon.shader.stipplePat = words; scansize = gc->polygon.shader.cfb->buf.outerWidth; bSurfaceDIB = (gc->polygon.shader.cfb->buf.flags & DIB_FORMAT) != 0; bClipped = (!(gc->drawBuffer->buf.flags & NO_CLIP)) && bSurfaceDIB; if (bSurfaceDIB) GENACCEL(gc).flags |= SURFACE_TYPE_DIB; else GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); ixLeft = gc->polygon.shader.ixLeft; ixLeftFrac = gc->polygon.shader.ixLeftFrac; ixRight = gc->polygon.shader.ixRight; ixRightFrac = gc->polygon.shader.ixRightFrac; clipY0 = gc->transform.clipY0; clipY1 = gc->transform.clipY1; r = GENACCEL(gc).spanValue.r; g = GENACCEL(gc).spanValue.g; b = GENACCEL(gc).spanValue.b; s = GENACCEL(gc).spanValue.s; t = GENACCEL(gc).spanValue.t; if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z = gc->polygon.shader.frag.z; if( gc->modes.depthBits == 32 ) zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), ixLeft, iyBottom); else zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), ixLeft, iyBottom); } else if ((gc->polygon.shader.modeFlags & __GL_SHADE_STIPPLE) == 0) { GLuint w; if (w = ((gc->transform.clipX1 - gc->transform.clipX0) + 31) >> 3) RtlFillMemoryUlong(words, w, ~((ULONG)0)); GENACCEL(gc).flags &= ~(HAVE_STIPPLE); } // // render the spans // while (iyBottom < iyTop) { spanWidth = ixRight - ixLeft; /* ** Only render spans that have non-zero width and which are ** not scissored out vertically. */ if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) { gc->polygon.shader.frag.x = ixLeft; gc->polygon.shader.frag.y = iyBottom; gc->polygon.shader.zbuf = zbuf; gc->polygon.shader.frag.z = z; GENACCEL(gc).spanValue.r = r; GENACCEL(gc).spanValue.g = g; GENACCEL(gc).spanValue.b = b; GENACCEL(gc).spanValue.s = s; GENACCEL(gc).spanValue.t = t; // take care of horizontal scissoring if (!gc->transform.reasonableViewport) { GLint clipX0 = gc->transform.clipX0; GLint clipX1 = gc->transform.clipX1; // see if we skip entire span if ((ixRight <= clipX0) || (ixLeft >= clipX1)) goto advance; // now clip right and left if (ixRight > clipX1) spanWidth = (clipX1 - ixLeft); if (ixLeft < clipX0) { GLuint delta; delta = clipX0 - ixLeft; spanWidth -= delta; if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r; if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g; GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b; } } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s; GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t; } gc->polygon.shader.frag.x = clipX0; if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { if( gc->modes.depthBits == 32 ) gc->polygon.shader.zbuf += delta; else (__GLz16Value *)gc->polygon.shader.zbuf += delta; gc->polygon.shader.frag.z += (gc->polygon.shader.dzdx * delta); } } } // now have span length gc->polygon.shader.length = spanWidth; // If a stipple is active, process it first if (gc->polygon.shader.modeFlags & __GL_SHADE_STIPPLE) { // If no pixels are left after stippling and depth // testing then we can skip the span // Note that this function handles the no-depth- // testing case also gc->polygon.shader.done = GL_FALSE; if (!(*GENACCEL(gc).__fastStippleDepthTestSpan)(gc) || gc->polygon.shader.done) { goto advance; } GENACCEL(gc).flags |= HAVE_STIPPLE; } // Do z-buffering if needed, and short-circuit rest of span // operations if nothing will be drawn. else if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { // initially assume no stippling GENACCEL(gc).flags &= ~(HAVE_STIPPLE); if ((zFails = (*zSpanFunc)(gc)) == 1) goto advance; else if (zFails) GENACCEL(gc).flags |= HAVE_STIPPLE; } if (gc->state.raster.drawBuffer == GL_FRONT_AND_BACK) { gc->polygon.shader.cfb = &gc->frontBuffer; xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) + gc->frontBuffer.buf.xOrigin; yScr = __GL_UNBIAS_Y(gc, iyBottom) + gc->frontBuffer.buf.yOrigin; // If the front buffer is a DIB, we're drawing straight to // the screen, so we must check clipping. if ((gc->frontBuffer.buf.flags & (DIB_FORMAT | NO_CLIP)) == DIB_FORMAT) { ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth, &cWalls, &Walls); // If the span is completely visible, we can treat the // screen as a DIB. if (ulSpanVisibility == WGL_SPAN_ALL) { GENACCEL(gc).flags |= SURFACE_TYPE_DIB; (*cSpanFunc)(gengc); } else if (ulSpanVisibility == WGL_SPAN_PARTIAL) { GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); if (GENACCEL(gc).flags & HAVE_STIPPLE) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, FALSE); GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); (*cSpanFunc)(gengc); (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, TRUE); } } else { GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); if (GENACCEL(gc).flags & HAVE_STIPPLE) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, FALSE); GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); (*cSpanFunc)(gengc); (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, TRUE); } // The back buffer is always DIB-compatible gc->polygon.shader.cfb = &gc->backBuffer; GENACCEL(gc).flags |= SURFACE_TYPE_DIB; (*cSpanFunc)(gengc); } else { if (bClipped) { xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) + gc->drawBuffer->buf.xOrigin; yScr = __GL_UNBIAS_Y(gc, iyBottom) + gc->drawBuffer->buf.yOrigin; ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth, &cWalls, &Walls); if (ulSpanVisibility == WGL_SPAN_ALL) { GENACCEL(gc).flags |= SURFACE_TYPE_DIB; (*cSpanFunc)(gengc); } else if (ulSpanVisibility == WGL_SPAN_PARTIAL) { GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); if (GENACCEL(gc).flags & HAVE_STIPPLE) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, FALSE); GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); (*cSpanFunc)(gengc); (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, TRUE); } } else if (bSurfaceDIB) { (*cSpanFunc)(gengc); } else { xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) + gc->drawBuffer->buf.xOrigin; yScr = __GL_UNBIAS_Y(gc, iyBottom) + gc->drawBuffer->buf.yOrigin; GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); if (GENACCEL(gc).flags & HAVE_STIPPLE) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, FALSE); (*cSpanFunc)(gengc); if (!bSurfaceDIB) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, TRUE); } } } advance: GENACCEL(gc).pPix += scansize; /* Advance right edge fixed point, adjusting for carry */ ixRightFrac += gc->polygon.shader.dxRightFrac; if (ixRightFrac < 0) { /* Carry/Borrow'd. Use large step */ ixRight += gc->polygon.shader.dxRightBig; ixRightFrac &= ~0x80000000; } else { ixRight += gc->polygon.shader.dxRightLittle; } iyBottom++; ixLeftFrac += gc->polygon.shader.dxLeftFrac; if (ixLeftFrac < 0) { /* Carry/Borrow'd. Use large step */ ixLeft += gc->polygon.shader.dxLeftBig; ixLeftFrac &= ~0x80000000; if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rBig); g += *((GLint *)&gc->polygon.shader.gBig); b += *((GLint *)&gc->polygon.shader.bBig); } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { s += *((GLint *)&gc->polygon.shader.sBig); t += *((GLint *)&gc->polygon.shader.tBig); } } else { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rBig); } } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z += gc->polygon.shader.zBig; /* The implicit multiply is taken out of the loop */ zbuf = (__GLzValue*)((GLubyte*)zbuf + gc->polygon.shader.zbufBig); } } else { /* Use small step */ ixLeft += gc->polygon.shader.dxLeftLittle; if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rLittle); g += *((GLint *)&gc->polygon.shader.gLittle); b += *((GLint *)&gc->polygon.shader.bLittle); } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { s += *((GLint *)&gc->polygon.shader.sLittle); t += *((GLint *)&gc->polygon.shader.tLittle); } } else { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rLittle); } } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z += gc->polygon.shader.zLittle; /* The implicit multiply is taken out of the loop */ zbuf = (__GLzValue*)((GLubyte*)zbuf + gc->polygon.shader.zbufLittle); } } } gc->polygon.shader.ixLeft = ixLeft; gc->polygon.shader.ixLeftFrac = ixLeftFrac; gc->polygon.shader.ixRight = ixRight; gc->polygon.shader.ixRightFrac = ixRightFrac; gc->polygon.shader.frag.z = z; GENACCEL(gc).spanValue.r = r; GENACCEL(gc).spanValue.g = g; GENACCEL(gc).spanValue.b = b; GENACCEL(gc).spanValue.s = s; GENACCEL(gc).spanValue.t = t; #ifdef NT if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS) { gcTempFree(gc, words); } #endif } void FASTCALL __fastGenFillSubTriangleTexRGBA(__GLcontext *gc, GLint iyBottom, GLint iyTop) { GLint ixLeft, ixRight; GLint ixLeftFrac, ixRightFrac; GLint spanWidth, clipY0, clipY1; ULONG ulSpanVisibility; GLint cWalls; GLint *Walls; BOOL bSurfaceDIB; BOOL bClipped; GLint xScr, yScr; __GLzValue *zbuf, z; GLint r, g, b, a, s, t; __GLfloat qw; __GLGENcontext *gengc = (__GLGENcontext *)gc; __genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr; int scansize; BOOL bReadPixels = (gc->state.enables.general & __GL_BLEND_ENABLE) || (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST); #ifdef _MCD_ GLboolean bMcdZ = ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) && (gengc->pMcdState != NULL) && (gengc->pMcdState->pDepthSpan != NULL) && (gengc->pMcdState->pMcdSurf != NULL) && !(gengc->pMcdState->McdBuffers.mcdDepthBuf.bufFlags & MCDBUF_ENABLED)); #endif scansize = gc->polygon.shader.cfb->buf.outerWidth; bSurfaceDIB = (gc->polygon.shader.cfb->buf.flags & DIB_FORMAT) != 0; bClipped = (!(gc->drawBuffer->buf.flags & NO_CLIP)) && bSurfaceDIB; if (bSurfaceDIB) GENACCEL(gc).flags |= SURFACE_TYPE_DIB; else GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); ixLeft = gc->polygon.shader.ixLeft; ixLeftFrac = gc->polygon.shader.ixLeftFrac; ixRight = gc->polygon.shader.ixRight; ixRightFrac = gc->polygon.shader.ixRightFrac; clipY0 = gc->transform.clipY0; clipY1 = gc->transform.clipY1; r = GENACCEL(gc).spanValue.r; g = GENACCEL(gc).spanValue.g; b = GENACCEL(gc).spanValue.b; a = GENACCEL(gc).spanValue.a; s = GENACCEL(gc).spanValue.s; t = GENACCEL(gc).spanValue.t; qw = gc->polygon.shader.frag.qw; if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z = gc->polygon.shader.frag.z; #ifdef _MCD_ if (bMcdZ) { zbuf = (__GLzValue *)gengc->pMcdState->pMcdSurf->McdDepthBuf.pv; } else #endif { if( gc->modes.depthBits == 32 ) zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), ixLeft, iyBottom); else zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), ixLeft, iyBottom); } } // // render the spans // while (iyBottom < iyTop) { spanWidth = ixRight - ixLeft; /* ** Only render spans that have non-zero width and which are ** not scissored out vertically. */ if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) { gc->polygon.shader.frag.x = ixLeft; gc->polygon.shader.frag.y = iyBottom; gc->polygon.shader.zbuf = zbuf; gc->polygon.shader.frag.z = z; GENACCEL(gc).spanValue.r = r; GENACCEL(gc).spanValue.g = g; GENACCEL(gc).spanValue.b = b; GENACCEL(gc).spanValue.a = a; GENACCEL(gc).spanValue.s = s; GENACCEL(gc).spanValue.t = t; gc->polygon.shader.frag.qw = qw; // take care of horizontal scissoring if (!gc->transform.reasonableViewport) { GLint clipX0 = gc->transform.clipX0; GLint clipX1 = gc->transform.clipX1; // see if we skip entire span if ((ixRight <= clipX0) || (ixLeft >= clipX1)) goto advance; // now clip right and left if (ixRight > clipX1) spanWidth = (clipX1 - ixLeft); if (ixLeft < clipX0) { GLuint delta; delta = clipX0 - ixLeft; spanWidth -= delta; if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r; GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g; GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b; GENACCEL(gc).spanValue.a += delta * GENACCEL(gc).spanDelta.a; } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s; GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t; gc->polygon.shader.frag.qw += delta * gc->polygon.shader.dqwdx; } gc->polygon.shader.frag.x = clipX0; if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { if( gc->modes.depthBits == 32 ) gc->polygon.shader.zbuf += delta; else (__GLz16Value *)gc->polygon.shader.zbuf += delta; gc->polygon.shader.frag.z += (gc->polygon.shader.dzdx * delta); } } } // now have span length gc->polygon.shader.length = spanWidth; #ifdef _MCD_ // read from driver z buffer into z span buffer if (bMcdZ) { GenMcdReadZRawSpan(&gc->depthBuffer, gc->polygon.shader.frag.x, iyBottom, spanWidth); } #endif if (bClipped) { xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) + gc->drawBuffer->buf.xOrigin; yScr = __GL_UNBIAS_Y(gc, iyBottom) + gc->drawBuffer->buf.yOrigin; ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth, &cWalls, &Walls); if (ulSpanVisibility == WGL_SPAN_ALL) { GENACCEL(gc).flags |= SURFACE_TYPE_DIB; (*cSpanFunc)(gengc); } else if (ulSpanVisibility == WGL_SPAN_PARTIAL) { GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); if (bReadPixels) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, FALSE); (*cSpanFunc)(gengc); (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, TRUE); } } else if (bSurfaceDIB) { (*cSpanFunc)(gengc); } else { xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) + gc->drawBuffer->buf.xOrigin; yScr = __GL_UNBIAS_Y(gc, iyBottom) + gc->drawBuffer->buf.yOrigin; GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB); if (bReadPixels) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, FALSE); (*cSpanFunc)(gengc); if (!bSurfaceDIB) (*gengc->pfnCopyPixels)(gengc, gc->polygon.shader.cfb, xScr, yScr, spanWidth, TRUE); } #ifdef _MCD_ // write z span buffer back to driver z buffer if (bMcdZ) { GenMcdWriteZRawSpan(&gc->depthBuffer, gc->polygon.shader.frag.x, iyBottom, spanWidth); } #endif } advance: GENACCEL(gc).pPix += scansize; /* Advance right edge fixed point, adjusting for carry */ ixRightFrac += gc->polygon.shader.dxRightFrac; if (ixRightFrac < 0) { /* Carry/Borrow'd. Use large step */ ixRight += gc->polygon.shader.dxRightBig; ixRightFrac &= ~0x80000000; } else { ixRight += gc->polygon.shader.dxRightLittle; } iyBottom++; ixLeftFrac += gc->polygon.shader.dxLeftFrac; if (ixLeftFrac < 0) { /* Carry/Borrow'd. Use large step */ ixLeft += gc->polygon.shader.dxLeftBig; ixLeftFrac &= ~0x80000000; if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rBig); g += *((GLint *)&gc->polygon.shader.gBig); b += *((GLint *)&gc->polygon.shader.bBig); a += *((GLint *)&gc->polygon.shader.aBig); } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { s += *((GLint *)&gc->polygon.shader.sBig); t += *((GLint *)&gc->polygon.shader.tBig); qw += gc->polygon.shader.qwBig; } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z += gc->polygon.shader.zBig; /* The implicit multiply is taken out of the loop */ #ifdef _MCD_ if (!bMcdZ) #endif { zbuf = (__GLzValue*)((GLubyte*)zbuf + gc->polygon.shader.zbufBig); } } } else { /* Use small step */ ixLeft += gc->polygon.shader.dxLeftLittle; if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rLittle); g += *((GLint *)&gc->polygon.shader.gLittle); b += *((GLint *)&gc->polygon.shader.bLittle); a += *((GLint *)&gc->polygon.shader.aLittle); } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { s += *((GLint *)&gc->polygon.shader.sLittle); t += *((GLint *)&gc->polygon.shader.tLittle); qw += gc->polygon.shader.qwLittle; } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z += gc->polygon.shader.zLittle; /* The implicit multiply is taken out of the loop */ #ifdef _MCD_ if (!bMcdZ) #endif { zbuf = (__GLzValue*)((GLubyte*)zbuf + gc->polygon.shader.zbufLittle); } } } } gc->polygon.shader.ixLeft = ixLeft; gc->polygon.shader.ixLeftFrac = ixLeftFrac; gc->polygon.shader.ixRight = ixRight; gc->polygon.shader.ixRightFrac = ixRightFrac; gc->polygon.shader.frag.z = z; gc->polygon.shader.zbuf = zbuf; GENACCEL(gc).spanValue.r = r; GENACCEL(gc).spanValue.g = g; GENACCEL(gc).spanValue.b = b; GENACCEL(gc).spanValue.a = a; GENACCEL(gc).spanValue.s = s; GENACCEL(gc).spanValue.t = t; gc->polygon.shader.frag.qw = qw; } /**************************************************************************\ \**************************************************************************/ void FASTCALL GenDrvFillSubTriangle(__GLcontext *gc, GLint iyBottom, GLint iyTop) { GLint ixLeft, ixRight; GLint ixLeftFrac, ixRightFrac; GLint spanWidth, clipY0, clipY1; #ifdef NT __GLstippleWord stackWords[__GL_MAX_STACK_STIPPLE_WORDS]; __GLstippleWord *words; GLuint maxWidth; #else __GLstippleWord words[__GL_MAX_STIPPLE_WORDS]; #endif GLint zFails; __GLzValue *zbuf = NULL, z; GLint r, g, b, a, s, t; __GLGENcontext *gengc = (__GLGENcontext *)gc; __genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr; __GLspanFunc zSpanFunc = GENACCEL(gc).__fastZSpanFuncPtr; #ifdef NT maxWidth = (gc->transform.clipX1 - gc->transform.clipX0) + 31; if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS) { words = gcTempAlloc(gc, (maxWidth+__GL_STIPPLE_BITS-1)/8); if (words == NULL) { return; } } else { words = stackWords; } #endif gc->polygon.shader.stipplePat = words; gc->polygon.shader.cfb = gc->drawBuffer; ixLeft = gc->polygon.shader.ixLeft; ixLeftFrac = gc->polygon.shader.ixLeftFrac; ixRight = gc->polygon.shader.ixRight; ixRightFrac = gc->polygon.shader.ixRightFrac; clipY0 = gc->transform.clipY0; clipY1 = gc->transform.clipY1; r = GENACCEL(gc).spanValue.r; g = GENACCEL(gc).spanValue.g; b = GENACCEL(gc).spanValue.b; a = GENACCEL(gc).spanValue.a; s = GENACCEL(gc).spanValue.s; t = GENACCEL(gc).spanValue.t; if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z = gc->polygon.shader.frag.z; if( gc->modes.depthBits == 32 ) zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), ixLeft, iyBottom); else zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), ixLeft, iyBottom); } else { GLuint w; if (w = ((gc->transform.clipX1 - gc->transform.clipX0) + 31) >> 3) RtlFillMemoryUlong(words, w, ~((ULONG)0)); GENACCEL(gc).flags &= ~(HAVE_STIPPLE); } while (iyBottom < iyTop) { spanWidth = ixRight - ixLeft; /* ** Only render spans that have non-zero width and which are ** not scissored out vertically. */ if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) { gc->polygon.shader.frag.x = ixLeft; gc->polygon.shader.frag.y = iyBottom; gc->polygon.shader.zbuf = zbuf; gc->polygon.shader.frag.z = z; GENACCEL(gc).spanValue.r = r; GENACCEL(gc).spanValue.g = g; GENACCEL(gc).spanValue.b = b; GENACCEL(gc).spanValue.a = a; GENACCEL(gc).spanValue.s = s; GENACCEL(gc).spanValue.t = t; // take care of horizontal scissoring if (!gc->transform.reasonableViewport) { GLint clipX0 = gc->transform.clipX0; GLint clipX1 = gc->transform.clipX1; // see if we skip entire span if ((ixRight <= clipX0) || (ixLeft >= clipX1)) goto advance; // now clip right and left if (ixRight > clipX1) spanWidth = (clipX1 - ixLeft); if (ixLeft < clipX0) { GLuint delta; delta = clipX0 - ixLeft; spanWidth -= delta; if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r; if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g; GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b; } } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s; GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t; } gc->polygon.shader.frag.x = clipX0; if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { if( gc->modes.depthBits == 32 ) gc->polygon.shader.zbuf += delta; else (__GLz16Value *)gc->polygon.shader.zbuf += delta; gc->polygon.shader.frag.z += (gc->polygon.shader.dzdx * delta); } } } // now have span length gc->polygon.shader.length = spanWidth; // Do z-buffering if needed, and short-circuit rest of span // operations if nothing will be drawn. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { // initially assume no stippling GENACCEL(gc).flags &= ~(HAVE_STIPPLE); if ((zFails = (*zSpanFunc)(gc)) == 1) goto advance; else if (zFails) GENACCEL(gc).flags |= HAVE_STIPPLE; } (*cSpanFunc)(gengc); } advance: /* Advance right edge fixed point, adjusting for carry */ ixRightFrac += gc->polygon.shader.dxRightFrac; if (ixRightFrac < 0) { /* Carry/Borrow'd. Use large step */ ixRight += gc->polygon.shader.dxRightBig; ixRightFrac &= ~0x80000000; } else { ixRight += gc->polygon.shader.dxRightLittle; } iyBottom++; ixLeftFrac += gc->polygon.shader.dxLeftFrac; if (ixLeftFrac < 0) { /* Carry/Borrow'd. Use large step */ ixLeft += gc->polygon.shader.dxLeftBig; ixLeftFrac &= ~0x80000000; if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rBig); g += *((GLint *)&gc->polygon.shader.gBig); b += *((GLint *)&gc->polygon.shader.bBig); a += *((GLint *)&gc->polygon.shader.aBig); } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { s += *((GLint *)&gc->polygon.shader.sBig); t += *((GLint *)&gc->polygon.shader.tBig); } } else { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rBig); } } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z += gc->polygon.shader.zBig; /* The implicit multiply is taken out of the loop */ zbuf = (__GLzValue*)((GLubyte*)zbuf + gc->polygon.shader.zbufBig); } } else { /* Use small step */ ixLeft += gc->polygon.shader.dxLeftLittle; if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rLittle); g += *((GLint *)&gc->polygon.shader.gLittle); b += *((GLint *)&gc->polygon.shader.bLittle); a += *((GLint *)&gc->polygon.shader.aLittle); } if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) { s += *((GLint *)&gc->polygon.shader.sLittle); t += *((GLint *)&gc->polygon.shader.tLittle); } } else { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { r += *((GLint *)&gc->polygon.shader.rLittle); } } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { z += gc->polygon.shader.zLittle; /* The implicit multiply is taken out of the loop */ zbuf = (__GLzValue*)((GLubyte*)zbuf + gc->polygon.shader.zbufLittle); } } } gc->polygon.shader.ixLeft = ixLeft; gc->polygon.shader.ixLeftFrac = ixLeftFrac; gc->polygon.shader.ixRight = ixRight; gc->polygon.shader.ixRightFrac = ixRightFrac; gc->polygon.shader.frag.z = z; GENACCEL(gc).spanValue.r = r; GENACCEL(gc).spanValue.g = g; GENACCEL(gc).spanValue.b = b; GENACCEL(gc).spanValue.a = a; GENACCEL(gc).spanValue.s = s; GENACCEL(gc).spanValue.t = t; #ifdef NT if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS) { gcTempFree(gc, words); } #endif } /**************************************************************************\ \**************************************************************************/ void GenSnapXLeft(__GLcontext *gc, __GLfloat xLeft, __GLfloat dxdyLeft) { GLint ixLeft, ixLeftFrac; ixLeft = __GL_VERTEX_FLOAT_TO_INT(xLeft); ixLeftFrac = __GL_VERTEX_PROMOTED_FRACTION(xLeft) + 0x40000000; gc->polygon.shader.ixLeftFrac = ixLeftFrac & ~0x80000000; gc->polygon.shader.ixLeft = ixLeft + (((GLuint) ixLeftFrac) >> 31); /* Compute big and little steps */ gc->polygon.shader.dxLeftLittle = FTOL(dxdyLeft); gc->polygon.shader.dxLeftFrac = FLT_FRACTION(dxdyLeft - gc->polygon.shader.dxLeftLittle); if (gc->polygon.shader.dxLeftFrac < 0) { gc->polygon.shader.dxLeftBig = gc->polygon.shader.dxLeftLittle - 1; } else { gc->polygon.shader.dxLeftBig = gc->polygon.shader.dxLeftLittle + 1; } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { /* ** Compute the big and little depth buffer steps. We walk the ** memory pointers for the depth buffer along the edge of the ** triangle as we walk the edge. This way we don't have to ** recompute the buffer address as we go. */ if (gc->depthBuffer.buf.elementSize == 2) { gc->polygon.shader.zbufLittle = (gc->depthBuffer.buf.outerWidth + gc->polygon.shader.dxLeftLittle) << 1; gc->polygon.shader.zbufBig = (gc->depthBuffer.buf.outerWidth + gc->polygon.shader.dxLeftBig) << 1; } else { gc->polygon.shader.zbufLittle = (gc->depthBuffer.buf.outerWidth + gc->polygon.shader.dxLeftLittle) << 2; gc->polygon.shader.zbufBig = (gc->depthBuffer.buf.outerWidth + gc->polygon.shader.dxLeftBig) << 2; } } } /**************************************************************************\ \**************************************************************************/ void GenSnapXRight(__GLcontext *gc, __GLfloat xRight, __GLfloat dxdyRight) { GLint ixRight, ixRightFrac; ixRight = __GL_VERTEX_FLOAT_TO_INT(xRight); ixRightFrac = __GL_VERTEX_PROMOTED_FRACTION(xRight) + 0x40000000; gc->polygon.shader.ixRightFrac = ixRightFrac & ~0x80000000; gc->polygon.shader.ixRight = ixRight + (((GLuint) ixRightFrac) >> 31); /* Compute big and little steps */ gc->polygon.shader.dxRightLittle = FTOL(dxdyRight); gc->polygon.shader.dxRightFrac = FLT_FRACTION(dxdyRight - gc->polygon.shader.dxRightLittle); if (gc->polygon.shader.dxRightFrac < 0) { gc->polygon.shader.dxRightBig = gc->polygon.shader.dxRightLittle - 1; } else { gc->polygon.shader.dxRightBig = gc->polygon.shader.dxRightLittle + 1; } } /**************************************************************************\ \**************************************************************************/ void __fastGenSetInitialParameters( __GLcontext *gc, const __GLvertex *a, __GLfloat fdx, __GLfloat fdy) { #define sh gc->polygon.shader #define bPolygonOffset \ (gc->state.enables.general & __GL_POLYGON_OFFSET_FILL_ENABLE) __GLfloat zOffset; __GLfloat dxLeftLittle; #if _X86_ && ENABLE_ASM LARGE_INTEGER temp; _asm{ mov edx, gc fild DWORD PTR [OFFSET(SHADER.dxLeftLittle)][edx] mov edi, [OFFSET(SHADER.modeFlags)][edx] test edi, __GL_SHADE_DEPTH_ITER fstp dxLeftLittle je noZ } _asm{ mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx] mov ecx, __glZero test ebx, __GL_POLYGON_OFFSET_FILL_ENABLE mov zOffset, ecx je noPolyOffset } zOffset = __glPolygonOffsetZ(gc); _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } noPolyOffset: _asm{ mov eax, a fld fdx fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx] fld fdy fmul DWORD PTR [OFFSET(SHADER.dzdyf)][edx] // zy zx fxch ST(1) // zx zy fadd DWORD PTR [OFFSET(__GLvertex.window.z)][eax] fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx] // zl zy zx fxch ST(1) // zy zl zx fadd zOffset fxch ST(1) // zl zy zx fadd DWORD PTR [OFFSET(SHADER.dzdyf)][edx] // zl zy zx (+1) fxch ST(1) // zy zl zx faddp ST(2), ST // zl z fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx] // ZL z (+1) fxch ST(1) // z ZL fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx] // Z ZL fxch ST(1) // ZL Z fistp temp mov eax, DWORD PTR temp mov DWORD PTR [OFFSET(SHADER.zLittle)][edx], eax fistp temp mov eax, DWORD PTR temp mov DWORD PTR [OFFSET(SHADER.frag.z)][edx], eax } #if FORCE_NPX_DEBUG { ULONG fragZ = FTOL((a->window.z + fdx*sh.dzdxf + (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale); __GLfloat zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale; LONG shZLittle = FTOL(zLittle); if (sh.frag.z != fragZ) DbgPrint("fragZ %x %x\n", fragZ, sh.frag.z); if (sh.zLittle != shZLittle) DbgPrint("sh.zLittle %x %x\n", shZLittle, sh.zLittle); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG noZ: _asm{ test edi, __GL_SHADE_SMOOTH je done test edi, __GL_SHADE_RGB jne rgbShade } // ciShade: { CASTFIX(sh.rLittle) = FLT_TO_FIX(gc->polygon.shader.drdy + dxLeftLittle * gc->polygon.shader.drdx); GENACCEL(gc).spanValue.r = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy); } _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] jmp done } rgbShade: _asm { mov eax, a fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.drdx)][edx] fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dgdx)][edx] // g r fxch ST(1) // r g fadd DWORD PTR [OFFSET(SHADER.drdy)][edx] // R g fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dbdx)][edx] // b R g fxch ST(2) // g R b fadd DWORD PTR [OFFSET(SHADER.dgdy)][edx] // G R b fxch ST(2) // b R G fadd DWORD PTR [OFFSET(SHADER.dbdy)][edx] // B R G fxch ST(1) // R B G fmul __glVal65536 // sR B G fxch ST(2) // G B sR fmul __glVal65536 // sG B sR fxch ST(1) // B sG sR fmul __glVal65536 // sB sG sR fxch ST(2) // sR sG sB fistp DWORD PTR [OFFSET(SHADER.rLittle)][edx] fistp DWORD PTR [OFFSET(SHADER.gLittle)][edx] fistp DWORD PTR [OFFSET(SHADER.bLittle)][edx] fld DWORD PTR [OFFSET(SHADER.drdx)][edx] mov eax, [OFFSET(__GLvertex.color)][eax] fmul fdx fld DWORD PTR [OFFSET(SHADER.drdy)][edx] fmul fdy // r r fxch ST(1) // r r fadd DWORD PTR [OFFSET(__GLcolor.r)][eax] fld DWORD PTR [OFFSET(SHADER.dgdx)][edx] fmul fdx fld DWORD PTR [OFFSET(SHADER.dgdy)][edx] fmul fdy // g g r r fxch ST(1) // g g r r fadd DWORD PTR [OFFSET(__GLcolor.g)][eax] fld DWORD PTR [OFFSET(SHADER.dbdx)][edx] fmul fdx fld DWORD PTR [OFFSET(SHADER.dbdy)][edx] fmul fdy // b b g g r r fxch ST(1) // b b g g r r fadd DWORD PTR [OFFSET(__GLcolor.b)][eax] fxch ST(4) // r b g g b r faddp ST(5), ST // b g g b r fxch ST(2) // g g b b r faddp ST(1), ST // g b b r fxch ST(2) // b b g r faddp ST(1), ST // b g r fxch ST(2) // r g b fmul __glVal65536 // R g b fxch ST(1) // g R b fmul __glVal65536 // G R b fxch ST(2) // b R G fmul __glVal65536 // B R G fxch ST(1) // R B G fadd __glVal128 // R B G fxch ST(2) // G B R fadd __glVal128 // G B R fxch ST(1) // B G R fadd __glVal128 // B G R fxch ST(2) // R G B fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.r)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.g)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.b)][edx] } #if FORCE_NPX_DEBUG { LONG rLittle = FLT_TO_FIX(gc->polygon.shader.drdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.drdx); LONG gLittle = FLT_TO_FIX(gc->polygon.shader.dgdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dgdx); LONG bLittle = FLT_TO_FIX(gc->polygon.shader.dbdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dbdx); LONG spanR = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080; LONG spanG = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080; LONG spanB = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080; if (CASTFIX(sh.rLittle) != rLittle) DbgPrint("rLittle: %x %x\n", rLittle, sh.rLittle); if (CASTFIX(sh.gLittle) != gLittle) DbgPrint("gLittle: %x %x\n", gLittle, sh.gLittle); if (CASTFIX(sh.bLittle) != bLittle) DbgPrint("bLittle: %x %x\n", bLittle, sh.bLittle); if (spanR != GENACCEL(gc).spanValue.r) DbgPrint("spanR: %x %x\n", spanR, GENACCEL(gc).spanValue.r); if (spanG != GENACCEL(gc).spanValue.g) DbgPrint("spanG: %x %x\n", spanG, GENACCEL(gc).spanValue.g); if (spanB != GENACCEL(gc).spanValue.b) DbgPrint("spanB: %x %x\n", spanB, GENACCEL(gc).spanValue.b); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG done: _asm { mov eax, [OFFSET(SHADER.dxLeftBig)][edx] mov ecx, [OFFSET(SHADER.dxLeftLittle)][edx] cmp eax, ecx jle littleGreater test edi, __GL_SHADE_SMOOTH je bigNoSmooth mov eax, [OFFSET(SHADER.rLittle)][edx] mov ecx, [OFFSET(SPANDELTA.r)][edx] mov esi, [OFFSET(SHADER.gLittle)][edx] mov ebx, [OFFSET(SPANDELTA.g)][edx] add eax, ecx add esi, ebx mov [OFFSET(SHADER.rBig)][edx], eax mov [OFFSET(SHADER.gBig)][edx], esi mov eax, [OFFSET(SHADER.bLittle)][edx] mov ecx, [OFFSET(SPANDELTA.b)][edx] mov esi, [OFFSET(SHADER.zLittle)][edx] mov ebx, [OFFSET(SHADER.dzdx)][edx] add eax, ecx add esi, ebx mov [OFFSET(SHADER.bBig)][edx], eax mov [OFFSET(SHADER.zBig)][edx], esi bigNoSmooth: test edi, __GL_SHADE_DEPTH_ITER je done2 mov eax, [OFFSET(SHADER.zLittle)][edx] mov ecx, [OFFSET(SHADER.dzdx)][edx] add eax, ecx mov [OFFSET(SHADER.zBig)][edx], eax jmp done2 littleGreater: test edi, __GL_SHADE_SMOOTH je smallNoSmooth mov eax, [OFFSET(SHADER.rLittle)][edx] mov ecx, [OFFSET(SPANDELTA.r)][edx] mov esi, [OFFSET(SHADER.gLittle)][edx] mov ebx, [OFFSET(SPANDELTA.g)][edx] sub eax, ecx sub esi, ebx mov [OFFSET(SHADER.rBig)][edx], eax mov [OFFSET(SHADER.gBig)][edx], esi mov eax, [OFFSET(SHADER.bLittle)][edx] mov ecx, [OFFSET(SPANDELTA.b)][edx] mov esi, [OFFSET(SHADER.zLittle)][edx] mov ebx, [OFFSET(SHADER.dzdx)][edx] sub eax, ecx sub esi, ebx mov [OFFSET(SHADER.bBig)][edx], eax mov [OFFSET(SHADER.zBig)][edx], esi smallNoSmooth: test edi, __GL_SHADE_DEPTH_ITER je done2 mov eax, [OFFSET(SHADER.zLittle)][edx] mov ecx, [OFFSET(SHADER.dzdx)][edx] sub eax, ecx mov [OFFSET(SHADER.zBig)][edx], eax done2: } #else _X86_ __GLfloat zLittle; dxLeftLittle = (__GLfloat)sh.dxLeftLittle; if (sh.modeFlags & __GL_SHADE_SMOOTH) { if (sh.modeFlags & __GL_SHADE_RGB) { CASTFIX(sh.rLittle) = FLT_TO_FIX(gc->polygon.shader.drdy + dxLeftLittle * gc->polygon.shader.drdx); CASTFIX(sh.gLittle) = FLT_TO_FIX(gc->polygon.shader.dgdy + dxLeftLittle * gc->polygon.shader.dgdx); CASTFIX(sh.bLittle) = FLT_TO_FIX(gc->polygon.shader.dbdy + dxLeftLittle * gc->polygon.shader.dbdx); GENACCEL(gc).spanValue.r = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy); GENACCEL(gc).spanValue.g = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy); GENACCEL(gc).spanValue.b = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy); } else { CASTFIX(sh.rLittle) = FLT_TO_FIX(gc->polygon.shader.drdy + dxLeftLittle * gc->polygon.shader.drdx); GENACCEL(gc).spanValue.r = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy); } } if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) { zOffset = bPolygonOffset ? __glPolygonOffsetZ(gc) : 0.0f; sh.frag.z = FTOL((a->window.z + fdx*sh.dzdxf + (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale); zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale; sh.zLittle = FTOL(zLittle); } if (sh.dxLeftBig > sh.dxLeftLittle) { if (sh.modeFlags & __GL_SHADE_SMOOTH) { CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) + GENACCEL(gc).spanDelta.r; CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) + GENACCEL(gc).spanDelta.g; CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) + GENACCEL(gc).spanDelta.b; } if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) { sh.zBig = sh.zLittle + sh.dzdx; } } else { if (sh.modeFlags & __GL_SHADE_SMOOTH) { CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) - GENACCEL(gc).spanDelta.r; CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) - GENACCEL(gc).spanDelta.g; CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) - GENACCEL(gc).spanDelta.b; } if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) { sh.zBig = sh.zLittle - sh.dzdx; } } #endif } void __fastGenSetInitialParametersTexRGBA( __GLcontext *gc, const __GLvertex *a, __GLfloat fdx, __GLfloat fdy) { #define sh gc->polygon.shader __GLfloat zOffset; __GLfloat dxLeftLittle; #if _X86_ && ENABLE_ASM LARGE_INTEGER temp; _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] fild DWORD PTR [OFFSET(SHADER.dxLeftLittle)][edx] test edi, __GL_SHADE_TEXTURE mov eax, [OFFSET(__GLcontext.state.texture.env)][edx] je notTexture mov ebx, [OFFSET(__GLtextureEnvState.mode)][eax] cmp ebx, GL_REPLACE je fastReplace cmp ebx, GL_DECAL jne notTexture fastReplace: fstp dxLeftLittle jmp colorDone notTexture: test edi, __GL_SHADE_SMOOTH fstp dxLeftLittle je colorDone } _asm { mov eax, a fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.drdx)][edx] fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dgdx)][edx] // g r fxch ST(1) // r g fadd DWORD PTR [OFFSET(SHADER.drdy)][edx] // R g fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dbdx)][edx] // b R g fxch ST(2) // g R b fadd DWORD PTR [OFFSET(SHADER.dgdy)][edx] // G R b fxch ST(2) // b R G fadd DWORD PTR [OFFSET(SHADER.dbdy)][edx] // B R G fxch ST(1) // R B G fmul __glVal65536 // sR B G fxch ST(2) // G B sR fmul __glVal65536 // sG B sR fxch ST(1) // B sG sR fmul __glVal65536 // sB sG sR fxch ST(2) // sR sG sB fistp DWORD PTR [OFFSET(SHADER.rLittle)][edx] mov eax, [OFFSET(__GLvertex.color)][eax] fistp DWORD PTR [OFFSET(SHADER.gLittle)][edx] fistp DWORD PTR [OFFSET(SHADER.bLittle)][edx] fld DWORD PTR [OFFSET(SHADER.drdx)][edx] fmul fdx fld DWORD PTR [OFFSET(SHADER.drdy)][edx] fmul fdy // r r fxch ST(1) // r r fadd DWORD PTR [OFFSET(__GLcolor.r)][eax] fld DWORD PTR [OFFSET(SHADER.dgdx)][edx] fmul fdx fld DWORD PTR [OFFSET(SHADER.dgdy)][edx] fmul fdy // g g r r fxch ST(1) // g g r r fadd DWORD PTR [OFFSET(__GLcolor.g)][eax] fld DWORD PTR [OFFSET(SHADER.dbdx)][edx] fmul fdx fld DWORD PTR [OFFSET(SHADER.dbdy)][edx] fmul fdy // b b g g r r fxch ST(1) // b b g g r r fadd DWORD PTR [OFFSET(__GLcolor.b)][eax] fxch ST(4) // r b g g b r faddp ST(5), ST // b g g b r fxch ST(2) // g g b b r faddp ST(1), ST // g b b r fxch ST(2) // b b g r faddp ST(1), ST // b g r fxch ST(2) // r g b fmul __glVal65536 // R g b fxch ST(1) // g R b fmul __glVal65536 // G R b fxch ST(2) // b R G fmul __glVal65536 // B R G fxch ST(1) // R B G fadd __glVal128 // R B G fxch ST(2) // G B R fadd __glVal128 // G B R fxch ST(1) // B G R fadd __glVal128 // B G R fxch ST(2) // R G B fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.r)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.g)][edx] mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.b)][edx] } _asm{ test ebx, __GL_BLEND_ENABLE je noBlend } _asm{ mov eax, a fld DWORD PTR [OFFSET(SHADER.dadx)][edx] mov eax, DWORD PTR [OFFSET(__GLvertex.color)][eax] fmul fdx fld DWORD PTR [OFFSET(SHADER.dady)][edx] fmul fdy // a a fxch ST(1) fadd DWORD PTR [OFFSET(__GLcolor.a)][eax] // a a fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dadx)][edx] // al a a fxch ST(1) // a al a faddp ST(2), ST // al a fadd DWORD PTR [OFFSET(SHADER.dady)][edx] // al a (+1) fxch ST(1) // a al fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx] // A al fxch ST(1) // al A fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx] // AL A (+1) fxch ST(1) // A AL fadd __glVal128 // A AL (+1) fxch ST(1) // AL A fistp DWORD PTR [OFFSET(SHADER.aLittle)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.a)][edx] } #if FORCE_NPX_DEBUG { LONG aLittle = FTOL((gc->polygon.shader.dady + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dadx) * GENACCEL(gc).aAccelScale); LONG spanA = FTOL((a->color->a + fdx * sh.dadx + fdy * sh.dady) * GENACCEL(gc).aAccelScale)+0x0080; if (aLittle != CASTFIX(sh.aLittle)) DbgPrint("sh.aLittle %x %x\n", aLittle, CASTFIX(sh.aLittle)); if (spanA != GENACCEL(gc).spanValue.a) DbgPrint("spanValue.a %x %x\n", spanA, GENACCEL(gc).spanValue.a); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG noBlend: #if FORCE_NPX_DEBUG { LONG rLittle = FLT_TO_FIX(gc->polygon.shader.drdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.drdx); LONG gLittle = FLT_TO_FIX(gc->polygon.shader.dgdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dgdx); LONG bLittle = FLT_TO_FIX(gc->polygon.shader.dbdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dbdx); LONG spanR = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080; LONG spanG = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080; LONG spanB = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080; if (CASTFIX(sh.rLittle) != rLittle) DbgPrint("rLittle: %x %x\n", rLittle, sh.rLittle); if (CASTFIX(sh.gLittle) != gLittle) DbgPrint("gLittle: %x %x\n", gLittle, sh.gLittle); if (CASTFIX(sh.bLittle) != bLittle) DbgPrint("bLittle: %x %x\n", bLittle, sh.bLittle); if (spanR != GENACCEL(gc).spanValue.r) DbgPrint("spanR: %x %x\n", spanR, GENACCEL(gc).spanValue.r); if (spanG != GENACCEL(gc).spanValue.g) DbgPrint("spanG: %x %x\n", spanG, GENACCEL(gc).spanValue.g); if (spanB != GENACCEL(gc).spanValue.b) DbgPrint("spanB: %x %x\n", spanB, GENACCEL(gc).spanValue.b); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG colorDone: _asm{ test edi, __GL_SHADE_TEXTURE je doneTexture mov ebx, [OFFSET(__GLcontext.state.hints.perspectiveCorrection)][edx] cmp ebx, GL_NICEST je nicestTex } _asm{ mov eax, a fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx] fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx] // dt ds fld fdx fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx] fld fdy fmul DWORD PTR [OFFSET(SHADER.dsdy)][edx] fxch ST(1) // s s dt ds fadd DWORD PTR [OFFSET(__GLvertex.texture.x)][eax] fxch ST(3) // ds s dt s fadd DWORD PTR [OFFSET(SHADER.dsdy)][edx] fld fdx fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx] fld fdy fmul DWORD PTR [OFFSET(SHADER.dtdy)][edx] fxch ST(1) // t t ds s dt s fadd DWORD PTR [OFFSET(__GLvertex.texture.y)][eax] fxch ST(4) // dt t ds s t s fadd DWORD PTR [OFFSET(SHADER.dtdy)][edx] fxch ST(5) // s t ds s t dt faddp ST(3), ST // t ds s t dt faddp ST(3), ST // ds s t dt fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx] // DS s t dt fxch ST(3) // dt s t DS fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] // DT s t DS fxch ST(1) // s DT t DS fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx] // S DT t DS fxch ST(2) // t DT S DS fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] // T DT S DS fxch ST(3) // DS DT S T fistp DWORD PTR [OFFSET(SHADER.sLittle)][edx] fistp DWORD PTR [OFFSET(SHADER.tLittle)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.s)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.t)][edx] #if !FORCE_NPX_DEBUG jmp doneTexture #endif } #if FORCE_NPX_DEBUG { LONG sLittle = FTOL((gc->polygon.shader.dsdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale); LONG tLittle = FTOL((gc->polygon.shader.dtdy + (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale); LONG spanS = FTOL((a->texture.x + (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale); LONG spanT = FTOL((a->texture.y + (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale); if (sLittle != CASTFIX(sh.sLittle)) DbgPrint("sLittle %x %x\n", sLittle, CASTFIX(sh.sLittle)); if (tLittle != CASTFIX(sh.tLittle)) DbgPrint("tLittle %x %x\n", tLittle, CASTFIX(sh.tLittle)); if (GENACCEL(gc).spanValue.s != spanS) DbgPrint("spanValue.s %x %x\n", spanS, GENACCEL(gc).spanValue.s); if (GENACCEL(gc).spanValue.t != spanT) DbgPrint("spanValue.t %x %x\n", spanT, GENACCEL(gc).spanValue.t); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] jmp doneTexture; } #endif // FORCE_NPX_DEBUG nicestTex: _asm{ mov eax, a fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx] fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx] // dt ds fld DWORD PTR fdx fmul DWORD PTR [OFFSET(SHADER.dqwdx)][edx] // qwx dt ds fxch ST(2) // ds dt qwx fadd DWORD PTR [OFFSET(SHADER.dsdy)][edx] fxch ST(1) // dt ds qwx fadd DWORD PTR [OFFSET(SHADER.dtdy)][edx] fxch ST(2) // qwx ds dt fld DWORD PTR fdy fmul DWORD PTR [OFFSET(SHADER.dqwdy)][edx] // qwy qwx ds dt fxch ST(2) // ds qwx qwy dt fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx] fxch ST(3) // dt qwx qwy ds fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] fxch ST(2) // qwy qwx dt ds fld DWORD PTR [OFFSET(__GLvertex.texture.w)][eax] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // qww qwy qwx dt ds fxch ST(4) // ds qwy qwx dt qww fistp DWORD PTR [OFFSET(SHADER.sLittle)][edx] // qwy qwx dt qww faddp ST(1), ST // qw dt qww fxch ST(1) // dt qw qww fistp DWORD PTR [OFFSET(SHADER.tLittle)][edx] // qw qww fld DWORD PTR [OFFSET(SHADER.dqwdx)][edx] fmul dxLeftLittle // lt qw qww fxch ST(1) // qw lt qww faddp ST(2), ST // lt qw fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // s lt qw fxch ST(1) // lt s qw fadd DWORD PTR [OFFSET(SHADER.dqwdy)][edx] fxch ST(1) // s lt qw fld fdx fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx] fld fdy fmul DWORD PTR [OFFSET(SHADER.dsdy)][edx] fxch ST(1) // s s s lt qw faddp ST(2), ST // s s lt qw fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] fld fdx fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx] // t t s s lt qw fxch ST(2) // s t t s lt qw faddp ST(3), ST // t t s lt qw fld fdy fmul DWORD PTR [OFFSET(SHADER.dtdy)][edx] fxch ST(1) // t t t s lt qw faddp ST(2), ST // t t s lt qw fxch ST(2) // s t t lt qw fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx] // S t t lt qw fxch ST(4) // qw t t lt S fstp DWORD PTR [OFFSET(SHADER.frag.qw)][edx] faddp ST(1), ST // t lt S fxch ST(1) // lt t S fstp DWORD PTR [OFFSET(SHADER.qwLittle)][edx] // t S fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] // (+1) // T S fxch ST(1) // S T fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.s)][edx] fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.t)][edx] } #if FORCE_NPX_DEBUG { LONG sLittle = FTOL((gc->polygon.shader.dsdy + dxLeftLittle * gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale); LONG tLittle = FTOL((gc->polygon.shader.dtdy + dxLeftLittle * gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale); __GLfloat qw = (a->texture.w * a->window.w) + (fdx * sh.dqwdx) + (fdy * sh.dqwdy); __GLfloat qwLittle = sh.dqwdy + dxLeftLittle * sh.dqwdx; LONG spanS = FTOL(((a->texture.x * a->window.w) + (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale); LONG spanT = FTOL(((a->texture.y * a->window.w) + (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale); if (sLittle != CASTFIX(sh.sLittle)) DbgPrint("sLittle %x %x\n", sLittle, CASTFIX(sh.sLittle)); if (tLittle != CASTFIX(sh.tLittle)) DbgPrint("tLittle %x %x\n", tLittle, CASTFIX(sh.tLittle)); if (qw != sh.frag.qw) DbgPrint("qw %f %f\n", qw, sh.frag.qw); if (qwLittle != sh.qwLittle) DbgPrint("qw %f %f\n", qwLittle, sh.qwLittle); if (GENACCEL(gc).spanValue.s != spanS) DbgPrint("spanValue.s %x %x\n", spanS, GENACCEL(gc).spanValue.s); if (GENACCEL(gc).spanValue.t != spanT) DbgPrint("spanValue.t %x %x\n", spanT, GENACCEL(gc).spanValue.t); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG doneTexture: _asm{ test edi, __GL_SHADE_DEPTH_ITER je noZ } _asm{ mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx] mov ecx, __glZero test ebx, __GL_POLYGON_OFFSET_FILL_ENABLE mov zOffset, ecx je noPolyOffset } zOffset = __glPolygonOffsetZ(gc); _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } noPolyOffset: _asm{ mov eax, a fld fdx fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx] fld fdy fmul DWORD PTR [OFFSET(SHADER.dzdyf)][edx] // zy zx fxch ST(1) // zx zy fadd DWORD PTR [OFFSET(__GLvertex.window.z)][eax] fld dxLeftLittle fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx] // zl zy zx fxch ST(1) // zy zl zx fadd zOffset fxch ST(1) // zl zy zx fadd DWORD PTR [OFFSET(SHADER.dzdyf)][edx] // zl zy zx (+1) fxch ST(1) // zy zl zx faddp ST(2), ST // zl z fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx] // ZL z (+1) fxch ST(1) // z ZL fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx] // Z ZL fxch ST(1) // ZL Z fistp temp mov eax, DWORD PTR temp mov DWORD PTR [OFFSET(SHADER.zLittle)][edx], eax fistp temp mov eax, DWORD PTR temp mov DWORD PTR [OFFSET(SHADER.frag.z)][edx], eax } #if FORCE_NPX_DEBUG { ULONG fragZ = FTOL((a->window.z + fdx*sh.dzdxf + (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale); __GLfloat zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale; LONG shZLittle = FTOL(zLittle); if (sh.frag.z != fragZ) DbgPrint("fragZ %x %x\n", fragZ, sh.frag.z); if (sh.zLittle != shZLittle) DbgPrint("sh.zLittle %x %x\n", shZLittle, sh.zLittle); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG noZ: _asm { mov eax, [OFFSET(SHADER.dxLeftBig)][edx] mov ecx, [OFFSET(SHADER.dxLeftLittle)][edx] cmp eax, ecx jle littleGreater test edi, __GL_SHADE_SMOOTH je bigNoSmooth mov eax, [OFFSET(SHADER.rLittle)][edx] mov ecx, [OFFSET(SPANDELTA.r)][edx] mov esi, [OFFSET(SHADER.gLittle)][edx] mov ebx, [OFFSET(SPANDELTA.g)][edx] add eax, ecx add esi, ebx mov [OFFSET(SHADER.rBig)][edx], eax mov [OFFSET(SHADER.gBig)][edx], esi mov eax, [OFFSET(SHADER.bLittle)][edx] mov ecx, [OFFSET(SPANDELTA.b)][edx] mov esi, [OFFSET(SHADER.aLittle)][edx] mov ebx, [OFFSET(SPANDELTA.a)][edx] add eax, ecx add esi, ebx mov [OFFSET(SHADER.bBig)][edx], eax mov [OFFSET(SHADER.aBig)][edx], esi bigNoSmooth: test edi, __GL_SHADE_TEXTURE je bigNoTexture fld DWORD PTR [OFFSET(SHADER.qwLittle)][edx] mov eax, [OFFSET(SHADER.sLittle)][edx] fadd DWORD PTR [OFFSET(SHADER.dqwdx)][edx] mov ecx, [OFFSET(SPANDELTA.s)][edx] mov esi, [OFFSET(SHADER.tLittle)][edx] mov ebx, [OFFSET(SPANDELTA.t)][edx] add eax, ecx add esi, ebx mov [OFFSET(SHADER.sBig)][edx], eax mov [OFFSET(SHADER.tBig)][edx], esi fstp DWORD PTR [OFFSET(SHADER.qwBig)][edx] bigNoTexture: test edi, __GL_SHADE_DEPTH_ITER je done mov eax, [OFFSET(SHADER.zLittle)][edx] mov ecx, [OFFSET(SHADER.dzdx)][edx] add eax, ecx mov [OFFSET(SHADER.zBig)][edx], eax jmp done littleGreater: test edi, __GL_SHADE_SMOOTH je smallNoSmooth mov eax, [OFFSET(SHADER.rLittle)][edx] mov ecx, [OFFSET(SPANDELTA.r)][edx] mov esi, [OFFSET(SHADER.gLittle)][edx] mov ebx, [OFFSET(SPANDELTA.g)][edx] sub eax, ecx sub esi, ebx mov [OFFSET(SHADER.rBig)][edx], eax mov [OFFSET(SHADER.gBig)][edx], esi mov eax, [OFFSET(SHADER.bLittle)][edx] mov ecx, [OFFSET(SPANDELTA.b)][edx] mov esi, [OFFSET(SHADER.aLittle)][edx] mov ebx, [OFFSET(SPANDELTA.a)][edx] sub eax, ecx sub esi, ebx mov [OFFSET(SHADER.bBig)][edx], eax mov [OFFSET(SHADER.aBig)][edx], esi smallNoSmooth: test edi, __GL_SHADE_TEXTURE je smallNoTexture fld DWORD PTR [OFFSET(SHADER.qwLittle)][edx] mov eax, [OFFSET(SHADER.sLittle)][edx] fsub DWORD PTR [OFFSET(SHADER.dqwdx)][edx] mov ecx, [OFFSET(SPANDELTA.s)][edx] mov esi, [OFFSET(SHADER.tLittle)][edx] mov ebx, [OFFSET(SPANDELTA.t)][edx] sub eax, ecx sub esi, ebx mov [OFFSET(SHADER.sBig)][edx], eax mov [OFFSET(SHADER.tBig)][edx], esi fstp DWORD PTR [OFFSET(SHADER.qwBig)][edx] smallNoTexture: test edi, __GL_SHADE_DEPTH_ITER je done mov eax, [OFFSET(SHADER.zLittle)][edx] mov ecx, [OFFSET(SHADER.dzdx)][edx] sub eax, ecx mov [OFFSET(SHADER.zBig)][edx], eax done: } #else __GLfloat zLittle; __GLfloat tmp1, tmp2; dxLeftLittle = (float)sh.dxLeftLittle; // Don't bother with the color deltas if we're decaling or replacing // with textures. if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) && ((gc->state.texture.env[0].mode == GL_REPLACE) || (gc->state.texture.env[0].mode == GL_DECAL))) { ; } else if (sh.modeFlags & __GL_SHADE_SMOOTH) { CASTFIX(sh.rLittle) = FLT_TO_FIX(gc->polygon.shader.drdy + dxLeftLittle * gc->polygon.shader.drdx); CASTFIX(sh.gLittle) = FLT_TO_FIX(gc->polygon.shader.dgdy + dxLeftLittle * gc->polygon.shader.dgdx); CASTFIX(sh.bLittle) = FLT_TO_FIX(gc->polygon.shader.dbdy + dxLeftLittle * gc->polygon.shader.dbdx); GENACCEL(gc).spanValue.r = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080; GENACCEL(gc).spanValue.g = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080; GENACCEL(gc).spanValue.b = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080; if (gc->state.enables.general & __GL_BLEND_ENABLE) { CASTFIX(sh.aLittle) = FTOL((gc->polygon.shader.dady + dxLeftLittle * gc->polygon.shader.dadx) * GENACCEL(gc).aAccelScale); GENACCEL(gc).spanValue.a = FTOL((a->color->a + fdx * sh.dadx + fdy * sh.dady) * GENACCEL(gc).aAccelScale)+0x0080; } } if (sh.modeFlags & __GL_SHADE_TEXTURE) { if (gc->state.hints.perspectiveCorrection != GL_NICEST) { tmp1 = (gc->polygon.shader.dsdy + dxLeftLittle * gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale; tmp2 = (gc->polygon.shader.dtdy + dxLeftLittle * gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale; CASTFIX(sh.sLittle) = FTOL(tmp1); CASTFIX(sh.tLittle) = FTOL(tmp2); tmp1 = (a->texture.x + (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale; tmp2 = (a->texture.y + (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale; GENACCEL(gc).spanValue.s = FTOL(tmp1); GENACCEL(gc).spanValue.t = FTOL(tmp2); } else { tmp1 = (gc->polygon.shader.dsdy + dxLeftLittle * gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale; tmp2 = (gc->polygon.shader.dtdy + dxLeftLittle * gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale; CASTFIX(sh.sLittle) = FTOL(tmp1); CASTFIX(sh.tLittle) = FTOL(tmp2); sh.frag.qw = (a->texture.w * a->window.w) + (fdx * sh.dqwdx) + (fdy * sh.dqwdy); sh.qwLittle = sh.dqwdy + dxLeftLittle * sh.dqwdx; tmp1 = ((a->texture.x * a->window.w) + (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale; tmp2 = ((a->texture.y * a->window.w) + (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale; GENACCEL(gc).spanValue.s = FTOL(tmp1); GENACCEL(gc).spanValue.t = FTOL(tmp2); } } if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) { zOffset = bPolygonOffset ? __glPolygonOffsetZ(gc) : 0.0f; sh.frag.z = FTOL((a->window.z + fdx*sh.dzdxf + (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale); zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale; sh.zLittle = FTOL(zLittle); } if (sh.dxLeftBig > sh.dxLeftLittle) { if (sh.modeFlags & __GL_SHADE_SMOOTH) { CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) + GENACCEL(gc).spanDelta.r; CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) + GENACCEL(gc).spanDelta.g; CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) + GENACCEL(gc).spanDelta.b; if (gc->state.enables.general & __GL_BLEND_ENABLE) CASTFIX(sh.aBig) = CASTFIX(sh.aLittle) + GENACCEL(gc).spanDelta.a; } if (sh.modeFlags & __GL_SHADE_TEXTURE) { CASTFIX(sh.sBig) = CASTFIX(sh.sLittle) + GENACCEL(gc).spanDelta.s; CASTFIX(sh.tBig) = CASTFIX(sh.tLittle) + GENACCEL(gc).spanDelta.t; sh.qwBig = sh.qwLittle + sh.dqwdx; } if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) { sh.zBig = sh.zLittle + sh.dzdx; } } else { if (sh.modeFlags & __GL_SHADE_SMOOTH) { CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) - GENACCEL(gc).spanDelta.r; CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) - GENACCEL(gc).spanDelta.g; CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) - GENACCEL(gc).spanDelta.b; if (gc->state.enables.general & __GL_BLEND_ENABLE) CASTFIX(sh.aBig) = CASTFIX(sh.aLittle) - GENACCEL(gc).spanDelta.a; } if (sh.modeFlags & __GL_SHADE_TEXTURE) { CASTFIX(sh.sBig) = CASTFIX(sh.sLittle) - GENACCEL(gc).spanDelta.s; CASTFIX(sh.tBig) = CASTFIX(sh.tLittle) - GENACCEL(gc).spanDelta.t; sh.qwBig = sh.qwLittle - sh.dqwdx; } if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) { sh.zBig = sh.zLittle - sh.dzdx; } } #endif } /**************************************************************************\ \**************************************************************************/ void FASTCALL __fastGenCalcDeltas( __GLcontext *gc, __GLvertex *a, __GLvertex *b, __GLvertex *c) { __GLfloat oneOverArea, t1, t2, t3, t4; #if _X86_ && ENABLE_ASM LARGE_INTEGER temp; _asm{ mov edx, gc fld __glOne fdiv DWORD PTR [OFFSET(SHADER.area)][edx] mov edi, [OFFSET(SHADER.modeFlags)][edx] test edi, __GL_SHADE_RGB je notRGB test edi, __GL_SHADE_SMOOTH je notSmoothRGB } _asm{ mov eax, a mov ebx, b mov ecx, c fstp oneOverArea // finish divide fld DWORD PTR [OFFSET(SHADER.dyAC)][edx] mov eax, [OFFSET(__GLvertex.color)][eax] fmul oneOverArea fld DWORD PTR [OFFSET(SHADER.dyBC)][edx] mov ebx, [OFFSET(__GLvertex.color)][ebx] fmul oneOverArea // dyBC dyAC fld DWORD PTR [OFFSET(SHADER.dxAC)][edx] mov ecx, [OFFSET(__GLvertex.color)][ecx] fmul oneOverArea // dxAC dyBC dyAC fxch ST(1) // dyBC dxAC dyAC fld DWORD PTR [OFFSET(SHADER.dxBC)][edx] fmul oneOverArea // dxBC dyBC dxAC dyAC fxch ST(3) // dyAC dyBC dxAC dxBC fstp t1 fstp t2 fstp t3 fstp t4 // Now, calculate deltas: // Red fld DWORD PTR [OFFSET(__GLcolor.r)][eax] fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx] fld DWORD PTR [OFFSET(__GLcolor.r)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx] // drBC drAC fld ST(1) // drAC drBC drAC fmul t2 // drACt2 drBC drAC fld ST(1) // drBC drACt2 drBC drAC fmul t1 // drBCt1 drACt2 drBC drAC fxch ST(2) // drBC drACt2 drBCt1 drAC fmul t3 // drBCt3 drACt2 drBCt1 drAC fxch ST(3) // drAC drACt2 drBCt1 drBCt3 fmul t4 // drACt4 drACt2 drBCt1 drBCt3 fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3 fsubp ST(1), ST // drACBC drACt4 drBCt3 fld DWORD PTR [OFFSET(__GLcolor.g)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx] // dgBC drACBC drACt4 drBCt3 fxch ST(2) // drACt4 drACBC dgBC drBCt3 fsubp ST(3), ST // drACBC dgBC drBCAC fst DWORD PTR [OFFSET(SHADER.drdx)][edx] fmul __glVal65536 // DRACBC dgBC drBCAC fxch ST(2) // drBCAC dgBC DRACBC fstp DWORD PTR [OFFSET(SHADER.drdy)][edx] // dgBC DRACBC fld DWORD PTR [OFFSET(__GLcolor.g)][eax] fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx] // dgAC dgBC DRACBC fxch ST(2) // DRACBC dgBC dgAC fistp DWORD PTR [OFFSET(SPANDELTA.r)][edx] // Green // dgBC dgAC fld ST(1) // dgAC dgBC dgAC fmul t2 // dgACt2 dgBC dgAC fld ST(1) // dgBC dgACt2 dgBC dgAC fmul t1 // dgBCt1 dgACt2 dgBC dgAC fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3 fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3 fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3 fsubp ST(1), ST // dgACBC dgACt4 dgBCt3 fld DWORD PTR [OFFSET(__GLcolor.b)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx] // dbBC dgACBC dgACt4 dgBCt3 fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3 fsubp ST(3), ST // dgACBC dbBC dgBCAC fst DWORD PTR [OFFSET(SHADER.dgdx)][edx] fmul __glVal65536 // DGACBC dbBC dgBCAC fxch ST(2) // dgBCAC dbBC DGACBC fstp DWORD PTR [OFFSET(SHADER.dgdy)][edx] // dbBC DGACBC fld DWORD PTR [OFFSET(__GLcolor.b)][eax] fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx] // dbAC dbBC DGACBC fxch ST(2) // DGACBC dbBC dbAC fistp DWORD PTR [OFFSET(SPANDELTA.g)][edx] // Blue // dbBC dbAC fld ST(1) // dbAC dbBC dbAC fmul t2 // dbACt2 dbBC dbAC fld ST(1) // dbBC dbACt2 dbBC dbAC fmul t1 // dbBCt1 dbACt2 dbBC dbAC fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3 fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3 fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3 fsubp ST(1), ST // dbACBC dbACt4 dbBCt3 fxch ST(1) // dbACt4 dbACBC dbBCt3 fsubp ST(2), ST // dbACBC dbBCAC (+1) fst DWORD PTR [OFFSET(SHADER.dbdx)][edx] fmul __glVal65536 // DBACBC dbBCAC fxch ST(1) // dbBCAC DBACBC fstp DWORD PTR [OFFSET(SHADER.dbdy)][edx] fistp DWORD PTR [OFFSET(SPANDELTA.b)][edx] mov ebx, [OFFSET(GENGCACCEL.__fastSmoothSpanFuncPtr)][edx] mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx mov eax, [OFFSET(SPANDELTA.r)][edx] mov ebx, [OFFSET(SPANDELTA.g)][edx] mov ecx, [OFFSET(SPANDELTA.b)][edx] or eax, ebx or eax, ecx jne notZeroDelta mov eax, [OFFSET(GENGCACCEL.flags)][edx] test eax, GEN_FASTZBUFFER jne notZeroDelta mov ebx, [OFFSET(GENGCACCEL.__fastFlatSpanFuncPtr)][edx] mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx notZeroDelta: #if !FORCE_NPX_DEBUG jmp colorDone #endif } #if FORCE_NPX_DEBUG { __GLfloat drAC, dgAC, dbAC, daAC; __GLfloat drBC, dgBC, dbBC, daBC; __GLcolor *ac, *bc, *cc; __GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea; __GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea; __GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea; __GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea; __GLfloat drdx; __GLfloat drdy; __GLfloat dgdx; __GLfloat dgdy; __GLfloat dbdx; __GLfloat dbdy; LONG spanR, spanG, spanB; ac = a->color; bc = b->color; cc = c->color; drAC = ac->r - cc->r; drBC = bc->r - cc->r; dgAC = ac->g - cc->g; dgBC = bc->g - cc->g; dbAC = ac->b - cc->b; dbBC = bc->b - cc->b; drdx = drAC * t2 - drBC * t1; drdy = drBC * t3 - drAC * t4; dgdx = dgAC * t2 - dgBC * t1; dgdy = dgBC * t3 - dgAC * t4; dbdx = dbAC * t2 - dbBC * t1; dbdy = dbBC * t3 - dbAC * t4; spanR = FLT_TO_FIX(drdx); spanG = FLT_TO_FIX(dgdx); spanB = FLT_TO_FIX(dbdx); if (ft1 != t1) DbgPrint("t1 %f %f\n", t1, ft1); if (ft2 != t2) DbgPrint("t2 %f %f\n", t2, ft2); if (ft3 != t3) DbgPrint("t3 %f %f\n", t3, ft3); if (ft4 != t4) DbgPrint("t4 %f %f\n", t4, ft4); if (drdx != gc->polygon.shader.drdx) DbgPrint("drdx %f %f\n", drdx, gc->polygon.shader.drdx); if (drdy != gc->polygon.shader.drdy) DbgPrint("drdy %f %f\n", drdy, gc->polygon.shader.drdy); if (dgdx != gc->polygon.shader.dgdx) DbgPrint("dgdx %f %f\n", dgdx, gc->polygon.shader.dgdx); if (dgdy != gc->polygon.shader.dgdy) DbgPrint("dgdy %f %f\n", dgdy, gc->polygon.shader.dgdy); if (dbdx != gc->polygon.shader.dbdx) DbgPrint("dbdx %f %f\n", dbdx, gc->polygon.shader.dbdx); if (dbdy != gc->polygon.shader.dbdy) DbgPrint("dbdy %f %f\n", dbdy, gc->polygon.shader.dbdy); if (spanR != GENACCEL(gc).spanDelta.r) DbgPrint("spanDelta.r %x %x\n", spanR, GENACCEL(gc).spanDelta.r); if (spanG!= GENACCEL(gc).spanDelta.g) DbgPrint("spanDelta.g %x %x\n", spanG, GENACCEL(gc).spanDelta.g); if (spanB != GENACCEL(gc).spanDelta.b) DbgPrint("spanDelta.b %x %x\n", spanB, GENACCEL(gc).spanDelta.b); } _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] jmp colorDone } #endif // FORCE_NPX_DEBUG notSmoothRGB: _asm{ mov eax, [OFFSET(__GLcontext.vertex.provoking)][edx] fld __glVal65536 mov eax, [OFFSET(__GLvertex.color)][eax] fmul DWORD PTR [OFFSET(__GLcolor.r)][eax] fld __glVal65536 fmul DWORD PTR [OFFSET(__GLcolor.g)][eax] fld __glVal65536 fmul DWORD PTR [OFFSET(__GLcolor.b)][eax] // B G R fxch ST(2) // R G B fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] // G B fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx] fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx] mov ebx, [OFFSET(GENGCACCEL.__fastFlatSpanFuncPtr)][edx] mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx jmp colorDone } notRGB: if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { __GLfloat drAC; __GLfloat drBC; __GLcolor *ac, *bc, *cc; ac = a->color; bc = b->color; cc = c->color; drAC = ac->r - cc->r; drBC = bc->r - cc->r; __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; gc->polygon.shader.drdx = drAC * t2 - drBC * t1; gc->polygon.shader.drdy = drBC * t3 - drAC * t4; GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx); if (GENACCEL(gc).spanDelta.r == 0) { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } else { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastSmoothSpanFuncPtr; } } else { GENACCEL(gc).spanValue.r = FLT_TO_FIX(gc->vertex.provoking->color->r); GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } colorDone: _asm{ test edi, __GL_SHADE_DEPTH_ITER je noZ test edi, __GL_SHADE_SMOOTH jne areaOK } _asm{ fstp oneOverArea // finish divide fld DWORD PTR [OFFSET(SHADER.dyAC)][edx] fmul oneOverArea fld DWORD PTR [OFFSET(SHADER.dyBC)][edx] fmul oneOverArea // dyBC dyAC fld DWORD PTR [OFFSET(SHADER.dxAC)][edx] fmul oneOverArea // dxAC dyBC dyAC fxch ST(1) // dyBC dxAC dyAC fld DWORD PTR [OFFSET(SHADER.dxBC)][edx] fmul oneOverArea // dxBC dyBC dxAC dyAC fxch ST(3) // dyAC dyBC dxAC dxBC fstp t1 fstp t2 fstp t3 fstp t4 } #if FORCE_NPX_DEBUG { __GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea; __GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea; __GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea; __GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea; if (ft1 != t1) DbgPrint("zt1 %f %f\n", t1, ft1); if (ft2 != t2) DbgPrint("zt2 %f %f\n", t2, ft2); if (ft3 != t3) DbgPrint("zt3 %f %f\n", t3, ft3); if (ft4 != t4) DbgPrint("zt4 %f %f\n", t4, ft4); } _asm{ mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG areaOK: _asm{ mov ecx, c mov eax, a mov ebx, b fld DWORD PTR [OFFSET(__GLvertex.window.z)][eax] fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx] fld DWORD PTR [OFFSET(__GLvertex.window.z)][ebx] fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx] // dzBC dzAC fld ST(1) // dzAC dzBC dzAC fmul t2 // ACt2 dzBC dzAC fld ST(1) // dzBC ACt2 dzBC dzAC fmul t1 // BCt1 ACt2 dzBC dzAC fxch ST(3) // dzAC ACt2 dzBC BCt1 fmul t4 // ACt4 ACt2 dzBC BCt1 fxch ST(2) // dzBC ACt2 ACt4 BCt1 fmul t3 // BCt3 ACt2 ACt4 BCt1 fsubrp ST(2),ST // ACt2 BCAC BCt1 fsubrp ST(2),ST // BCAC ACBC fxch ST(1) // ACBC BCAC // dzdx dzdy fld ST(0) // dzdx dzdx dzdy fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx] // dzdxS dzdx dzdy fxch ST(2) // dzdy dzdx dzdxS fstp DWORD PTR [OFFSET(SHADER.dzdyf)][edx] fstp DWORD PTR [OFFSET(SHADER.dzdxf)][edx] fistp temp mov ebx, DWORD PTR temp mov DWORD PTR [OFFSET(SHADER.dzdx)][edx], ebx mov DWORD PTR [OFFSET(SPANDELTA.z)][edx], ebx #if !FORCE_NPX_DEBUG jmp deltaDone #endif } #if FORCE_NPX_DEBUG { __GLfloat dzdxf; __GLfloat dzdyf; __GLfloat dzAC, dzBC; ULONG spanDeltaZ; dzAC = a->window.z - c->window.z; dzBC = b->window.z - c->window.z; dzdxf = dzAC * t2 - dzBC * t1; dzdyf = dzBC * t3 - dzAC * t4; spanDeltaZ = FTOL(dzdxf * GENACCEL(gc).zScale); if (dzdxf != gc->polygon.shader.dzdxf) DbgPrint("dzdxf %f %f\n", dzdxf, gc->polygon.shader.dzdxf); if (dzdyf != gc->polygon.shader.dzdyf) DbgPrint("dzdyf %f %f\n", dzdyf, gc->polygon.shader.dzdyf); if (spanDeltaZ != GENACCEL(gc).spanDelta.z) DbgPrint("spanDeltaZ %x %x\n", spanDeltaZ, GENACCEL(gc).spanDelta.z); goto deltaDone; } #endif // FORCE_NPX_DEBUG noZ: _asm{ test edi, __GL_SHADE_SMOOTH jne deltaDone fstp ST(0) } deltaDone: return; #else // _X86_ /* Pre-compute one over polygon area */ __GL_FLOAT_BEGIN_DIVIDE(__glOne, gc->polygon.shader.area, &oneOverArea); /* ** t1-4 are delta values for unit changes in x or y for each ** parameter. */ if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { __GLfloat drAC, dgAC, dbAC, daAC; __GLfloat drBC, dgBC, dbBC, daBC; __GLcolor *ac, *bc, *cc; ac = a->color; bc = b->color; cc = c->color; drAC = ac->r - cc->r; drBC = bc->r - cc->r; dgAC = ac->g - cc->g; dgBC = bc->g - cc->g; dbAC = ac->b - cc->b; dbBC = bc->b - cc->b; __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; gc->polygon.shader.drdx = drAC * t2 - drBC * t1; gc->polygon.shader.drdy = drBC * t3 - drAC * t4; gc->polygon.shader.dgdx = dgAC * t2 - dgBC * t1; gc->polygon.shader.dgdy = dgBC * t3 - dgAC * t4; gc->polygon.shader.dbdx = dbAC * t2 - dbBC * t1; gc->polygon.shader.dbdy = dbBC * t3 - dbAC * t4; GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx); GENACCEL(gc).spanDelta.g = FLT_TO_FIX(gc->polygon.shader.dgdx); GENACCEL(gc).spanDelta.b = FLT_TO_FIX(gc->polygon.shader.dbdx); if ( ((GENACCEL(gc).spanDelta.r | GENACCEL(gc).spanDelta.g | GENACCEL(gc).spanDelta.b) == 0) && ((GENACCEL(gc).flags & GEN_FASTZBUFFER) == 0)) { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } else { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastSmoothSpanFuncPtr; } } else { __GLcolor *flatColor = gc->vertex.provoking->color; GENACCEL(gc).spanValue.r = FLT_TO_FIX(flatColor->r); GENACCEL(gc).spanValue.g = FLT_TO_FIX(flatColor->g); GENACCEL(gc).spanValue.b = FLT_TO_FIX(flatColor->b); GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } } else { if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { __GLfloat drAC; __GLfloat drBC; __GLcolor *ac, *bc, *cc; ac = a->color; bc = b->color; cc = c->color; drAC = ac->r - cc->r; drBC = bc->r - cc->r; __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; gc->polygon.shader.drdx = drAC * t2 - drBC * t1; gc->polygon.shader.drdy = drBC * t3 - drAC * t4; GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx); if (GENACCEL(gc).spanDelta.r == 0) { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } else { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastSmoothSpanFuncPtr; } } else { GENACCEL(gc).spanValue.r = FLT_TO_FIX(gc->vertex.provoking->color->r); GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { __GLfloat dzAC, dzBC; if ((gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) == 0) { __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; } dzAC = a->window.z - c->window.z; dzBC = b->window.z - c->window.z; gc->polygon.shader.dzdxf = dzAC * t2 - dzBC * t1; gc->polygon.shader.dzdyf = dzBC * t3 - dzAC * t4; GENACCEL(gc).spanDelta.z = gc->polygon.shader.dzdx = FTOL(gc->polygon.shader.dzdxf * GENACCEL(gc).zScale); } else if ((gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) == 0) { // In this case the divide hasn't been terminated yet so // we need to complete it even though we don't use the result __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); } #endif // _X86_ } void FASTCALL __fastGenCalcDeltasTexRGBA( __GLcontext *gc, __GLvertex *a, __GLvertex *b, __GLvertex *c) { __GLfloat oneOverArea, t1, t2, t3, t4; GLboolean oneOverAreaDone; #if _X86_ && ENABLE_ASM LARGE_INTEGER temp; _asm{ mov edx, gc xor eax, eax mov oneOverAreaDone, al mov edi, [OFFSET(SHADER.modeFlags)][edx] fld __glOne fdiv DWORD PTR [OFFSET(SHADER.area)][edx] mov ebx, [OFFSET(GENGCACCEL.__fastTexSpanFuncPtr)][edx] test edi, __GL_SHADE_TEXTURE mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx mov eax, [OFFSET(__GLcontext.state.texture.env)][edx] je notReplace mov ebx, [OFFSET(__GLtextureEnvState.mode)][eax] cmp ebx, GL_REPLACE je fastReplace cmp ebx, GL_DECAL jne notReplace } fastReplace: _asm{ mov eax, [OFFSET(GENGCACCEL.constantR)][edx] mov ebx, [OFFSET(GENGCACCEL.constantG)][edx] mov [OFFSET(SPANVALUE.r)][edx], eax mov [OFFSET(SPANVALUE.g)][edx], ebx mov eax, [OFFSET(GENGCACCEL.constantB)][edx] mov ebx, [OFFSET(GENGCACCEL.constantA)][edx] mov [OFFSET(SPANVALUE.b)][edx], eax mov [OFFSET(SPANVALUE.a)][edx], ebx jmp colorDone } notReplace: _asm{ test edi, __GL_SHADE_SMOOTH je doFlat mov al, 1 mov oneOverAreaDone, al } // smooth: _asm{ mov eax, a mov ebx, b mov ecx, c fstp oneOverArea // finish divide fld DWORD PTR [OFFSET(SHADER.dyAC)][edx] mov eax, [OFFSET(__GLvertex.color)][eax] fmul oneOverArea fld DWORD PTR [OFFSET(SHADER.dyBC)][edx] mov ebx, [OFFSET(__GLvertex.color)][ebx] fmul oneOverArea // dyBC dyAC fld DWORD PTR [OFFSET(SHADER.dxAC)][edx] mov ecx, [OFFSET(__GLvertex.color)][ecx] fmul oneOverArea // dxAC dyBC dyAC fxch ST(1) // dyBC dxAC dyAC fld DWORD PTR [OFFSET(SHADER.dxBC)][edx] fmul oneOverArea // dxBC dyBC dxAC dyAC fxch ST(3) // dyAC dyBC dxAC dxBC fstp t1 fstp t2 fstp t3 fstp t4 // Now, calculate deltas: // Red fld DWORD PTR [OFFSET(__GLcolor.r)][eax] fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx] fld DWORD PTR [OFFSET(__GLcolor.r)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx] // drBC drAC fld ST(1) // drAC drBC drAC fmul t2 // drACt2 drBC drAC fld ST(1) // drBC drACt2 drBC drAC fmul t1 // drBCt1 drACt2 drBC drAC fxch ST(2) // drBC drACt2 drBCt1 drAC fmul t3 // drBCt3 drACt2 drBCt1 drAC fxch ST(3) // drAC drACt2 drBCt1 drBCt3 fmul t4 // drACt4 drACt2 drBCt1 drBCt3 fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3 fsubp ST(1), ST // drACBC drACt4 drBCt3 fld DWORD PTR [OFFSET(__GLcolor.g)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx] // dgBC drACBC drACt4 drBCt3 fxch ST(2) // drACt4 drACBC dgBC drBCt3 fsubp ST(3), ST // drACBC dgBC drBCAC fst DWORD PTR [OFFSET(SHADER.drdx)][edx] fmul __glVal65536 // DRACBC dgBC drBCAC fxch ST(2) // drBCAC dgBC DRACBC fstp DWORD PTR [OFFSET(SHADER.drdy)][edx] // dgBC DRACBC fld DWORD PTR [OFFSET(__GLcolor.g)][eax] fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx] // dgAC dgBC DRACBC fxch ST(2) // DRACBC dgBC dgAC fistp DWORD PTR [OFFSET(SPANDELTA.r)][edx] // Green // dgBC dgAC fld ST(1) // dgAC dgBC dgAC fmul t2 // dgACt2 dgBC dgAC fld ST(1) // dgBC dgACt2 dgBC dgAC fmul t1 // dgBCt1 dgACt2 dgBC dgAC fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3 fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3 fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3 fsubp ST(1), ST // dgACBC dgACt4 dgBCt3 fld DWORD PTR [OFFSET(__GLcolor.b)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx] // dbBC dgACBC dgACt4 dgBCt3 fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3 fsubp ST(3), ST // dgACBC dbBC dgBCAC fst DWORD PTR [OFFSET(SHADER.dgdx)][edx] fmul __glVal65536 // DGACBC dbBC dgBCAC fxch ST(2) // dgBCAC dbBC DGACBC fstp DWORD PTR [OFFSET(SHADER.dgdy)][edx] // dbBC DGACBC fld DWORD PTR [OFFSET(__GLcolor.b)][eax] fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx] // dbAC dbBC DGACBC fxch ST(2) // DGACBC dbBC dbAC fistp DWORD PTR [OFFSET(SPANDELTA.g)][edx] // Blue // dbBC dbAC fld ST(1) // dbAC dbBC dbAC fmul t2 // dbACt2 dbBC dbAC fld ST(1) // dbBC dbACt2 dbBC dbAC fmul t1 // dbBCt1 dbACt2 dbBC dbAC fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3 fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3 fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3 fsubp ST(1), ST // dbACBC dbACt4 dbBCt3 fxch ST(1) // dbACt4 dbACBC dbBCt3 fsubp ST(2), ST // dbACBC dbBCAC (+1) fst DWORD PTR [OFFSET(SHADER.dbdx)][edx] fmul __glVal65536 // DBACBC dbBCAC fxch ST(1) // dbBCAC DBACBC fstp DWORD PTR [OFFSET(SHADER.dbdy)][edx] test [OFFSET(__GLcontext.state.enables.general)][edx], __GL_BLEND_ENABLE fistp DWORD PTR [OFFSET(SPANDELTA.b)][edx] je colorDone fld DWORD PTR [OFFSET(__GLcolor.a)][eax] fsub DWORD PTR [OFFSET(__GLcolor.a)][ecx] // daAC fld DWORD PTR [OFFSET(__GLcolor.a)][ebx] fsub DWORD PTR [OFFSET(__GLcolor.a)][ecx] // daBC daAC fld ST(1) // daAC daBC daAC fmul t2 // daACt2 daBC daAC fld ST(1) // daBC daACt2 daBC daAC fmul t1 // daBCt1 daACt2 daBC daAC fxch ST(3) // daAC daACt2 daBC daBCt1 fmul t4 // daACt4 daACt2 daBC daBCt1 fxch ST(2) // daBC daACt2 daACt4 daBCt1 fmul t3 // daBCt3 daACt2 daACt4 daBCt1 fxch ST(3) // daBCt1 daACt2 daACt4 daBCt3 fsubp ST(1), ST // daACBC daACt4 daBCt3 fxch ST(1) // daACt4 daACBC daBCt3 fsubp ST(2), ST // daACBC daBCAC (+1) fst DWORD PTR [OFFSET(SHADER.dadx)][edx] fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx] fxch ST(1) fstp DWORD PTR [OFFSET(SHADER.dady)][edx] fistp DWORD PTR [OFFSET(SPANDELTA.a)][edx] // (+1) #if !FORCE_NPX_DEBUG jmp colorDone #endif } #if FORCE_NPX_DEBUG { __GLfloat drAC, dgAC, dbAC, daAC; __GLfloat drBC, dgBC, dbBC, daBC; __GLcolor *ac, *bc, *cc; __GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea; __GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea; __GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea; __GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea; __GLfloat drdx; __GLfloat drdy; __GLfloat dgdx; __GLfloat dgdy; __GLfloat dbdx; __GLfloat dbdy; LONG spanR, spanG, spanB; ac = a->color; bc = b->color; cc = c->color; drAC = ac->r - cc->r; drBC = bc->r - cc->r; dgAC = ac->g - cc->g; dgBC = bc->g - cc->g; dbAC = ac->b - cc->b; dbBC = bc->b - cc->b; drdx = drAC * t2 - drBC * t1; drdy = drBC * t3 - drAC * t4; dgdx = dgAC * t2 - dgBC * t1; dgdy = dgBC * t3 - dgAC * t4; dbdx = dbAC * t2 - dbBC * t1; dbdy = dbBC * t3 - dbAC * t4; spanR = FLT_TO_FIX(drdx); spanG = FLT_TO_FIX(dgdx); spanB = FLT_TO_FIX(dbdx); if (ft1 != t1) DbgPrint("t1 %f %f\n", t1, ft1); if (ft2 != t2) DbgPrint("t2 %f %f\n", t2, ft2); if (ft3 != t3) DbgPrint("t3 %f %f\n", t3, ft3); if (ft4 != t4) DbgPrint("t4 %f %f\n", t4, ft4); if (drdx != gc->polygon.shader.drdx) DbgPrint("drdx %f %f\n", drdx, gc->polygon.shader.drdx); if (drdy != gc->polygon.shader.drdy) DbgPrint("drdy %f %f\n", drdy, gc->polygon.shader.drdy); if (dgdx != gc->polygon.shader.dgdx) DbgPrint("dgdx %f %f\n", dgdx, gc->polygon.shader.dgdx); if (dgdy != gc->polygon.shader.dgdy) DbgPrint("dgdy %f %f\n", dgdy, gc->polygon.shader.dgdy); if (dbdx != gc->polygon.shader.dbdx) DbgPrint("dbdx %f %f\n", dbdx, gc->polygon.shader.dbdx); if (dbdy != gc->polygon.shader.dbdy) DbgPrint("dbdy %f %f\n", dbdy, gc->polygon.shader.dbdy); if (spanR != GENACCEL(gc).spanDelta.r) DbgPrint("spanDelta.r %x %x\n", spanR, GENACCEL(gc).spanDelta.r); if (spanG!= GENACCEL(gc).spanDelta.g) DbgPrint("spanDelta.g %x %x\n", spanG, GENACCEL(gc).spanDelta.g); if (spanB != GENACCEL(gc).spanDelta.b) DbgPrint("spanDelta.b %x %x\n", spanB, GENACCEL(gc).spanDelta.b); if (gc->state.enables.general & __GL_BLEND_ENABLE) { __GLfloat dadx; __GLfloat dady; LONG a; daAC = ac->a - cc->a; daBC = bc->a - cc->a; dadx = daAC * t2 - daBC * t1; dady = daBC * t3 - daAC * t4; a = FTOL(gc->polygon.shader.dadx * GENACCEL(gc).aAccelScale); if (dadx != gc->polygon.shader.dadx) DbgPrint("dadx %f %f\n", dadx, gc->polygon.shader.dadx); if (dady != gc->polygon.shader.dady) DbgPrint("dady %f %f\n", dady, gc->polygon.shader.dady); if (a != GENACCEL(gc).spanDelta.a) DbgPrint("spanDelta.a %x %x\n", a, GENACCEL(gc).spanDelta.a); } } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] jmp colorDone } #endif // FORCE_NPX_DEBUG doFlat: _asm{ mov eax, [OFFSET(__GLcontext.vertex.provoking)][edx] fld __glVal65536 mov eax, [OFFSET(__GLvertex.color)][eax] fmul DWORD PTR [OFFSET(__GLcolor.r)][eax] fld __glVal65536 fmul DWORD PTR [OFFSET(__GLcolor.g)][eax] mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx] fld __glVal65536 test ebx, __GL_BLEND_ENABLE fmul DWORD PTR [OFFSET(__GLcolor.b)][eax] je noFlatBlend fld DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx] fmul DWORD PTR [OFFSET(__GLcolor.a)][eax] // A B G R fxch ST(3) // R B G A fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx] fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx] fistp DWORD PTR [OFFSET(SPANVALUE.a)][edx] jmp short flatDone noFlatBlend: // B G R fxch ST(2) // R G B fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] // G B fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx] fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx] flatDone: } colorDone: _asm{ test edi, __GL_SHADE_TEXTURE mov eax, [OFFSET(GENGCACCEL.texImage)][edx] je texDone test eax, eax je texDone } _asm{ mov al, oneOverAreaDone mov ebx, [OFFSET(__GLcontext.state.hints.perspectiveCorrection)][edx] test al, al jne areaDoneAlready } _asm{ fstp oneOverArea // finish divide fld DWORD PTR [OFFSET(SHADER.dyAC)][edx] fmul oneOverArea fld DWORD PTR [OFFSET(SHADER.dyBC)][edx] fmul oneOverArea // dyBC dyAC fld DWORD PTR [OFFSET(SHADER.dxAC)][edx] fmul oneOverArea // dxAC dyBC dyAC fxch ST(1) // dyBC dxAC dyAC fld DWORD PTR [OFFSET(SHADER.dxBC)][edx] fmul oneOverArea // dxBC dyBC dxAC dyAC fxch ST(3) // dyAC dyBC dxAC dxBC fstp t1 inc eax fstp t2 mov oneOverAreaDone, al fstp t3 fstp t4 } areaDoneAlready: _asm{ cmp ebx, GL_NICEST je doNicest } _asm{ mov eax, a mov ecx, c mov ebx, b fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax] fsub DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx] // dsAC fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ebx] fsub DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx] // dsBC dsAC fld ST(1) // dsAC dsBC dsAC fmul t2 fxch ST(2) // dsAC dsBC dsACt2 fmul t4 // dsACt4 dsBC dsACt2 fld ST(1) // dsBC dsACt4 dsBC dsACt2 fmul t1 // dsBCt1 dsACt4 dsBC dsACt2 fxch ST(2) // dsBC dsACt4 dsBCt1 dsACt2 fmul t3 // dsBCt3 dsACt4 dsBCt1 dsACt2 fxch ST(2) // dsBCt1 dsACt4 dsBCt3 dsACt2 fsubp ST(3), ST // dsACt4 dsBCt3 dsACBC fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ebx] fsub DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx] // dtBC dsACt4 dsBCt3 dsACBC fxch ST(1) // dsACt4 dtBC dsBCt3 dsACBC fsubp ST(2), ST // dtBC dsBCAC dsACBC fxch ST(2) // dsACBC dsBCAC dtBC fst DWORD PTR [OFFSET(SHADER.dsdx)][edx] // dsdx dsBCAC dtBC fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax] fsub DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx] // dtAC dsdx dsBCAC dtBC fxch ST(2) // dsBCAC dsdx dtAC dtBC fstp DWORD PTR [OFFSET(SHADER.dsdy)][edx] // dsdx dtAC dtBC fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx] // deltaS dtAC dtBC fxch ST(2) // dtBC dtAC deltaS fld ST(1) // dtAC dtBC dtAC deltaS fmul t2 // dtACt2 dtBC dtAC deltaS fxch ST(2) // dtAC dtBC dtACt2 deltaS fmul t4 // dtACt4 dtBC dtACt2 deltaS fld ST(1) // dtBC dtACt4 dtBC dtACt2 deltaS fmul t1 // dtBCt1 dtACt4 dtBC dtACt2 deltaS fxch ST(2) // dtBC dtACt4 dtBCt1 dtACt2 deltaS fmul t3 // dtBCt3 dtACt4 dtBCt1 dtACt2 deltaS fxch ST(2) // dtBCt1 dtACt4 dtBCt3 dtACt2 deltaS fsubp ST(3), ST // dtACt4 dtBCt3 dtACBC deltaS fxch ST(3) // deltaS dtBCt3 dtACBC dtACt4 fistp DWORD PTR [OFFSET(SPANDELTA.s)][edx] // dtBCt3 dtACBC dtACt4 fsubrp ST(2), ST // dtACBC dtBCAC fst DWORD PTR [OFFSET(SHADER.dtdx)][edx] fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] fxch ST(1) // dtBCAC deltaT fstp DWORD PTR [OFFSET(SHADER.dtdy)][edx] mov eax, [OFFSET(SPANDELTA.s)][edx] fistp DWORD PTR [OFFSET(SPANDELTA.t)][edx] shl eax, TEX_SUBDIV_LOG2 mov ebx, [OFFSET(SPANDELTA.t)][edx] shl ebx, TEX_SUBDIV_LOG2 mov [OFFSET(GENGCACCEL.sStepX)][edx], eax mov [OFFSET(GENGCACCEL.tStepX)][edx], ebx #if !FORCE_NPX_DEBUG jmp texDone #endif } #if FORCE_NPX_DEBUG { __GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv; __GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC; __GLfloat dsdx, dsdy; __GLfloat dtdx, dtdy; LONG spanDeltaS, spanDeltaT; dsAC = a->texture.x - c->texture.x; dsBC = b->texture.x - c->texture.x; dsdx = dsAC * t2 - dsBC * t1; dsdy = dsBC * t3 - dsAC * t4; dtAC = a->texture.y - c->texture.y; dtBC = b->texture.y - c->texture.y; dtdx = dtAC * t2 - dtBC * t1; dtdy = dtBC * t3 - dtAC * t4; spanDeltaS = FTOL(dsdx * GENACCEL(gc).texXScale); spanDeltaT = FTOL(dtdx * GENACCEL(gc).texYScale); if (gc->polygon.shader.dsdx != dsdx) DbgPrint("dsdx %f %f\n", dsdx, gc->polygon.shader.dsdx); if (gc->polygon.shader.dsdy != dsdy) DbgPrint("dsdy %f %f\n", dsdy, gc->polygon.shader.dsdy); if (gc->polygon.shader.dtdx != dtdx) DbgPrint("dtdx %f %f\n", dtdx, gc->polygon.shader.dtdx); if (gc->polygon.shader.dtdy != dtdy) DbgPrint("dtdy %f %f\n", dtdy, gc->polygon.shader.dtdy); if (spanDeltaS != GENACCEL(gc).spanDelta.s) DbgPrint("spanDelta.s %x %x\n", spanDeltaS, GENACCEL(gc).spanDelta.s); if (spanDeltaT != GENACCEL(gc).spanDelta.t) DbgPrint("spanDelta.t %x %x\n", spanDeltaT, GENACCEL(gc).spanDelta.t); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] jmp texDone } #endif // FORCE_NPX_DEBUG doNicest: // LATER - remove store/read of dsdx, dydx _asm{ mov ecx, c mov ebx, b mov eax, a fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx] // sc fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx] fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // dsA sc fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ebx] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx] // dsB dsA sc fxch ST(2) // sc dsA dsB fsub ST(1), ST // sc dsAC dsB fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx] // tcwinv fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx] // tc sc dsAC dsB fxch ST(1) // sc tc dsAC dsB fsubp ST(3), ST // tc dsAC dsBC fxch ST(2) // dsBC dsAC tc fld ST(1) // dsAC dsBC dsAC tc fmul t2 fxch ST(2) // dsAC dsBC dsACt2 tc fmul t4 // dsACt4 dsBC dsACt2 tc fld ST(1) // dsBC dsACt4 dsBC dsACt2 tc fmul t1 // dsBCt1 dsACt4 dsBC dsACt2 tc fxch ST(2) // dsBC dsACt4 dsBCt1 dsACt2 tc fmul t3 // dsBCt3 dsACt4 dsBCt1 dsACt2 tc fxch ST(2) // dsBCt1 dsACt4 dsBCt3 dsACt2 tc fsubp ST(3), ST // dsACt4 dsBCt3 dsACBC tc fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // dtA dsACt4 dsBCt3 dsACBC tc fxch ST(1) // dsACt4 dtA dsBCt3 dsACBC tc fsubp ST(2), ST // dtA dsBCAC dsACBC tc fxch ST(2) // dsACBC dsBCAC dtA tc fstp DWORD PTR [OFFSET(SHADER.dsdx)][edx] // dsBCAC dtA tc fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ebx] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx] // dtB dsBCAC dtA tc fxch ST(1) // dsBCAC dtB dtA tc fstp DWORD PTR [OFFSET(SHADER.dsdy)][edx] // dtB dtA tc fxch ST(2) // tc dtA dtB fsub ST(1), ST // tc dtAC dtB fsubp ST(2), ST // dtAC dtBC fld DWORD PTR [OFFSET(__GLvertex.texture.w)][ecx] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx] // qw dtAC dtBC fxch ST(2) // dtBC dtAC qw fld ST(1) // dtAC dtBC dtAC qw fmul t2 // dtACt2 dtBC dtAC qw fxch ST(2) // dtAC dtBC dtACt2 qw fmul t4 // dtACt4 dtBC dtACt2 qw fld ST(1) // dtBC dtACt4 dtBC dtACt2 qw fmul t1 // dtBCt1 dtACt4 dtBC dtACt2 qw fxch ST(2) // dtBC dtACt4 dtBCt1 dtACt2 qw fmul t3 // dtBCt3 dtACt4 dtBCt1 dtACt2 qw fxch ST(2) // dtBCt1 dtACt4 dtBCt3 dtACt2 qw fsubp ST(3), ST // dtACt4 dtBCt3 dtACBC qw fld DWORD PTR [OFFSET(__GLvertex.texture.w)][eax] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // dqA dtACt4 dtBCt3 dtACBC qw fxch ST(1) // dtACt4 dqA dtBCt3 dtACBC qw fsubp ST(2), ST // dqA dtBCAC dtACBC qw fxch ST(2) // dtACBC dtBCAC dqA qw fstp DWORD PTR [OFFSET(SHADER.dtdx)][edx] // dsBCAC dqA qw fld DWORD PTR [OFFSET(__GLvertex.texture.w)][ebx] fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx] // dqB dsBCAC dqA qw fxch ST(3) // qw dsBCAC dqA dqB fsub ST(2), ST // qw dsBCAC dqAC dqB fxch ST(1) // dsBCAC qw dqAC dqB fstp DWORD PTR [OFFSET(SHADER.dtdy)][edx] // qw dqAC dqB fsubp ST(2), ST // dqAC dqBC fxch ST(1) // dqBC dqAC fld ST(1) // dqAC dqBC dqAC fmul t2 // dqACt2 dqBC dqAC fxch ST(2) // dqAC dqBC dqACt2 fmul t4 // dqACt4 dqBC dqACt2 fld ST(1) // dqBC dqACt4 dqBC dqACt2 fmul t1 // dqBCt1 dqACt4 dqBC dqACt2 fxch ST(2) // dqBC dqACt4 dqBCt1 dqACt2 fmul t3 // dqBCt3 dqACt4 dqBCt1 dqACt2 fxch ST(2) // dqBCt1 dqACt4 dqBCt3 dqACt2 fsubp ST(3), ST // dqACt4 dqBCt3 dqACBC fxch ST(2) // dqACBC dqBCt3 dqACt4 fld DWORD PTR [OFFSET(SHADER.dsdx)][edx] fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx] // deltaS dqACBC dqBCt3 dqACt4 fxch ST(3) // dqACt4 dqACBC dqBCt3 deltaS fsubp ST(2), ST // dqACBC dqBCAC deltaS fld DWORD PTR [OFFSET(SHADER.dtdx)][edx] fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] // deltaT dqACBC dqBCAC deltaS fld __glTexSubDiv fmul ST, ST(2) // qwStep deltaT dqACBC dqBCAC deltaS fxch ST(4) // deltaS deltaT dqACBC dqBCAC qwStep fistp DWORD PTR [OFFSET(SPANDELTA.s)][edx] fistp DWORD PTR [OFFSET(SPANDELTA.t)][edx] // dqACBC dqBCAC qwStep fstp DWORD PTR [OFFSET(SHADER.dqwdx)][edx] fstp DWORD PTR [OFFSET(SHADER.dqwdy)][edx] mov eax, [OFFSET(SPANDELTA.s)][edx] fstp DWORD PTR [OFFSET(GENGCACCEL.qwStepX)][edx] shl eax, TEX_SUBDIV_LOG2 mov ebx, [OFFSET(SPANDELTA.t)][edx] shl ebx, TEX_SUBDIV_LOG2 mov [OFFSET(GENGCACCEL.sStepX)][edx], eax mov [OFFSET(GENGCACCEL.tStepX)][edx], ebx } #if FORCE_NPX_DEBUG { __GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv; __GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC; __GLfloat dsdx, dsdy; __GLfloat dtdx, dtdy; __GLfloat dqwdx, dqwdy; __GLfloat qwStepX; LONG spanDeltaS, spanDeltaT; awinv = a->window.w; bwinv = b->window.w; cwinv = c->window.w; scwinv = c->texture.x * cwinv; tcwinv = c->texture.y * cwinv; qwcwinv = c->texture.w * cwinv; dsAC = a->texture.x * awinv - scwinv; dsBC = b->texture.x * bwinv - scwinv; dsdx = dsAC * t2 - dsBC * t1; dsdy = dsBC * t3 - dsAC * t4; dtAC = a->texture.y * awinv - tcwinv; dtBC = b->texture.y * bwinv - tcwinv; dtdx = dtAC * t2 - dtBC * t1; dtdy = dtBC * t3 - dtAC * t4; dqwAC = a->texture.w * awinv - qwcwinv; dqwBC = b->texture.w * bwinv - qwcwinv; dqwdx = dqwAC * t2 - dqwBC * t1; dqwdy = dqwBC * t3 - dqwAC * t4; spanDeltaS = FTOL(dsdx * GENACCEL(gc).texXScale); spanDeltaT = FTOL(dtdx * GENACCEL(gc).texYScale); qwStepX = (gc->polygon.shader.dqwdx * (__GLfloat)TEX_SUBDIV); if (gc->polygon.shader.dsdx != dsdx) DbgPrint("dsdx %f %f\n", dsdx, gc->polygon.shader.dsdx); if (gc->polygon.shader.dsdy != dsdy) DbgPrint("dsdy %f %f\n", dsdy, gc->polygon.shader.dsdy); if (gc->polygon.shader.dtdx != dtdx) DbgPrint("dtdx %f %f\n", dtdx, gc->polygon.shader.dtdx); if (gc->polygon.shader.dtdy != dtdy) DbgPrint("dtdy %f %f\n", dtdy, gc->polygon.shader.dtdy); if (gc->polygon.shader.dqwdx != dqwdx) DbgPrint("dqdx %f %f\n", dqwdx, gc->polygon.shader.dqwdx); if (gc->polygon.shader.dqwdy != dqwdy) DbgPrint("dqdy %f %f\n", dqwdy, gc->polygon.shader.dqwdy); if (spanDeltaS != GENACCEL(gc).spanDelta.s) DbgPrint("spanDelta.s %x %x\n", spanDeltaS, GENACCEL(gc).spanDelta.s); if (spanDeltaT != GENACCEL(gc).spanDelta.t) DbgPrint("spanDelta.t %x %x\n", spanDeltaT, GENACCEL(gc).spanDelta.t); if (qwStepX != GENACCEL(gc).qwStepX) DbgPrint("qwStepX %f %f\n", qwStepX, GENACCEL(gc).qwStepX); } _asm { mov edx, gc mov edi, [OFFSET(SHADER.modeFlags)][edx] } #endif // FORCE_NPX_DEBUG texDone: _asm{ test edi, __GL_SHADE_DEPTH_ITER je noZ mov al, oneOverAreaDone test al, al jne areaDoneAlready2 } _asm{ fstp oneOverArea // finish divide fld DWORD PTR [OFFSET(SHADER.dyAC)][edx] fmul oneOverArea fld DWORD PTR [OFFSET(SHADER.dyBC)][edx] fmul oneOverArea // dyBC dyAC fld DWORD PTR [OFFSET(SHADER.dxAC)][edx] fmul oneOverArea // dxAC dyBC dyAC fxch ST(1) // dyBC dxAC dyAC fld DWORD PTR [OFFSET(SHADER.dxBC)][edx] fmul oneOverArea // dxBC dyBC dxAC dyAC fxch ST(3) // dyAC dyBC dxAC dxBC fstp t1 inc eax fstp t2 mov oneOverAreaDone, al fstp t3 fstp t4 } areaDoneAlready2: _asm{ mov ecx, c mov eax, a mov ebx, b fld DWORD PTR [OFFSET(__GLvertex.window.z)][eax] fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx] fld DWORD PTR [OFFSET(__GLvertex.window.z)][ebx] fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx] // dzBC dzAC fld ST(1) // dzAC dzBC dzAC fmul t2 // ACt2 dzBC dzAC fld ST(1) // dzBC ACt2 dzBC dzAC fmul t1 // BCt1 ACt2 dzBC dzAC fxch ST(3) // dzAC ACt2 dzBC BCt1 fmul t4 // ACt4 ACt2 dzBC BCt1 fxch ST(2) // dzBC ACt2 ACt4 BCt1 fmul t3 // BCt3 ACt2 ACt4 BCt1 fsubrp ST(2),ST // ACt2 BCAC BCt1 fsubrp ST(2),ST // BCAC ACBC fxch ST(1) // ACBC BCAC // dzdx dzdy fld ST(0) // dzdx dzdx dzdy fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx] // dzdxS dzdx dzdy fxch ST(2) // dzdy dzdx dzdxS fstp DWORD PTR [OFFSET(SHADER.dzdyf)][edx] fstp DWORD PTR [OFFSET(SHADER.dzdxf)][edx] fistp temp mov ebx, DWORD PTR temp mov DWORD PTR [OFFSET(SHADER.dzdx)][edx], ebx mov DWORD PTR [OFFSET(SPANDELTA.z)][edx], ebx #if !FORCE_NPX_DEBUG jmp deltaDone #endif } #if FORCE_NPX_DEBUG { __GLfloat dzAC, dzBC; __GLfloat dzdxf; __GLfloat dzdyf; ULONG spanDeltaZ; dzAC = a->window.z - c->window.z; dzBC = b->window.z - c->window.z; dzdxf = dzAC * t2 - dzBC * t1; dzdyf = dzBC * t3 - dzAC * t4; spanDeltaZ = FTOL(dzdxf * GENACCEL(gc).zScale); if (dzdxf != gc->polygon.shader.dzdxf) DbgPrint("dzdxf %f %f\n", dzdxf, gc->polygon.shader.dzdxf); if (dzdyf != gc->polygon.shader.dzdyf) DbgPrint("dzdyf %f %f\n", dzdyf, gc->polygon.shader.dzdyf); if (spanDeltaZ != GENACCEL(gc).spanDelta.z) DbgPrint("spanDeltaZ %x %x\n", spanDeltaZ, GENACCEL(gc).spanDelta.z); } #endif // FORCE_NPX_DEBUG noZ: _asm{ mov al, oneOverAreaDone test al, al jne deltaDone fstp ST(0) } deltaDone: return; #else /* Pre-compute one over polygon area */ __GL_FLOAT_BEGIN_DIVIDE(__glOne, gc->polygon.shader.area, &oneOverArea); oneOverAreaDone = GL_FALSE; /* ** Compute delta values for unit changes in x or y for each ** parameter. */ GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastTexSpanFuncPtr; if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) && ((gc->state.texture.env[0].mode == GL_REPLACE) || (gc->state.texture.env[0].mode == GL_DECAL))) { GENACCEL(gc).spanValue.r = GENACCEL(gc).constantR; GENACCEL(gc).spanValue.g = GENACCEL(gc).constantG; GENACCEL(gc).spanValue.b = GENACCEL(gc).constantB; GENACCEL(gc).spanValue.a = GENACCEL(gc).constantA; } else if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) { __GLfloat drAC, dgAC, dbAC, daAC; __GLfloat drBC, dgBC, dbBC, daBC; __GLcolor *ac, *bc, *cc; oneOverAreaDone = GL_TRUE; ac = a->color; bc = b->color; cc = c->color; drAC = ac->r - cc->r; drBC = bc->r - cc->r; dgAC = ac->g - cc->g; dgBC = bc->g - cc->g; dbAC = ac->b - cc->b; dbBC = bc->b - cc->b; __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; gc->polygon.shader.drdx = drAC * t2 - drBC * t1; gc->polygon.shader.drdy = drBC * t3 - drAC * t4; gc->polygon.shader.dgdx = dgAC * t2 - dgBC * t1; gc->polygon.shader.dgdy = dgBC * t3 - dgAC * t4; gc->polygon.shader.dbdx = dbAC * t2 - dbBC * t1; gc->polygon.shader.dbdy = dbBC * t3 - dbAC * t4; GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx); GENACCEL(gc).spanDelta.g = FLT_TO_FIX(gc->polygon.shader.dgdx); GENACCEL(gc).spanDelta.b = FLT_TO_FIX(gc->polygon.shader.dbdx); if (gc->state.enables.general & __GL_BLEND_ENABLE) { daAC = ac->a - cc->a; daBC = bc->a - cc->a; gc->polygon.shader.dadx = daAC * t2 - daBC * t1; gc->polygon.shader.dady = daBC * t3 - daAC * t4; GENACCEL(gc).spanDelta.a = FTOL(gc->polygon.shader.dadx * GENACCEL(gc).aAccelScale); } #ifdef GENERIC_CAN_BLEND //!! Note: this is not enabled in the assembly code above if ( ((GENACCEL(gc).spanDelta.r | GENACCEL(gc).spanDelta.g | GENACCEL(gc).spanDelta.b) == 0) && ((GENACCEL(gc).flags & GEN_FASTZBUFFER) == 0) ) { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; } else { GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastSmoothSpanFuncPtr; } #endif } else { __GLcolor *flatColor = gc->vertex.provoking->color; GENACCEL(gc).spanValue.r = FLT_TO_FIX(flatColor->r); GENACCEL(gc).spanValue.g = FLT_TO_FIX(flatColor->g); GENACCEL(gc).spanValue.b = FLT_TO_FIX(flatColor->b); if (gc->state.enables.general & __GL_BLEND_ENABLE) GENACCEL(gc).spanValue.a = FTOL(flatColor->a * GENACCEL(gc).aAccelScale); #ifdef GENERIC_CAN_BLEND //!! Note: this is not enabled in the assembly code above GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr; #endif } if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) && (GENACCEL(gc).texImage)) { __GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv; __GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC; #ifdef GENERIC_CAN_BLEND GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastTexSpanFuncPtr; #endif if (!oneOverAreaDone) { oneOverAreaDone = GL_TRUE; __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; } if (gc->state.hints.perspectiveCorrection != GL_NICEST) { dsAC = a->texture.x - c->texture.x; dsBC = b->texture.x - c->texture.x; gc->polygon.shader.dsdx = dsAC * t2 - dsBC * t1; gc->polygon.shader.dsdy = dsBC * t3 - dsAC * t4; dtAC = a->texture.y - c->texture.y; dtBC = b->texture.y - c->texture.y; gc->polygon.shader.dtdx = dtAC * t2 - dtBC * t1; gc->polygon.shader.dtdy = dtBC * t3 - dtAC * t4; GENACCEL(gc).spanDelta.s = FTOL(gc->polygon.shader.dsdx * GENACCEL(gc).texXScale); GENACCEL(gc).spanDelta.t = FTOL(gc->polygon.shader.dtdx * GENACCEL(gc).texYScale); GENACCEL(gc).sStepX = (GENACCEL(gc).spanDelta.s * TEX_SUBDIV); GENACCEL(gc).tStepX = (GENACCEL(gc).spanDelta.t * TEX_SUBDIV); } else { awinv = a->window.w; bwinv = b->window.w; cwinv = c->window.w; scwinv = c->texture.x * cwinv; tcwinv = c->texture.y * cwinv; qwcwinv = c->texture.w * cwinv; dsAC = a->texture.x * awinv - scwinv; dsBC = b->texture.x * bwinv - scwinv; gc->polygon.shader.dsdx = dsAC * t2 - dsBC * t1; gc->polygon.shader.dsdy = dsBC * t3 - dsAC * t4; dtAC = a->texture.y * awinv - tcwinv; dtBC = b->texture.y * bwinv - tcwinv; gc->polygon.shader.dtdx = dtAC * t2 - dtBC * t1; gc->polygon.shader.dtdy = dtBC * t3 - dtAC * t4; dqwAC = a->texture.w * awinv - qwcwinv; dqwBC = b->texture.w * bwinv - qwcwinv; gc->polygon.shader.dqwdx = dqwAC * t2 - dqwBC * t1; gc->polygon.shader.dqwdy = dqwBC * t3 - dqwAC * t4; GENACCEL(gc).spanDelta.s = FTOL(gc->polygon.shader.dsdx * GENACCEL(gc).texXScale); GENACCEL(gc).spanDelta.t = FTOL(gc->polygon.shader.dtdx * GENACCEL(gc).texYScale); GENACCEL(gc).qwStepX = (gc->polygon.shader.dqwdx * (__GLfloat)TEX_SUBDIV); GENACCEL(gc).sStepX = (GENACCEL(gc).spanDelta.s * TEX_SUBDIV); GENACCEL(gc).tStepX = (GENACCEL(gc).spanDelta.t * TEX_SUBDIV); } } if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) { __GLfloat dzAC, dzBC; if (!oneOverAreaDone) { oneOverAreaDone = GL_TRUE; __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); t1 = gc->polygon.shader.dyAC * oneOverArea; t2 = gc->polygon.shader.dyBC * oneOverArea; t3 = gc->polygon.shader.dxAC * oneOverArea; t4 = gc->polygon.shader.dxBC * oneOverArea; } dzAC = a->window.z - c->window.z; dzBC = b->window.z - c->window.z; gc->polygon.shader.dzdxf = dzAC * t2 - dzBC * t1; gc->polygon.shader.dzdyf = dzBC * t3 - dzAC * t4; GENACCEL(gc).spanDelta.z = gc->polygon.shader.dzdx = FTOL(gc->polygon.shader.dzdxf * GENACCEL(gc).zScale); } if (!oneOverAreaDone) { // In this case the divide hasn't been terminated yet so // we need to complete it even though we don't use the result __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea); } #endif // _X86_ } /**************************************************************************\ \**************************************************************************/ void FASTCALL __fastGenFillTriangle( __GLcontext *gc, __GLvertex *a, __GLvertex *b, __GLvertex *c, GLboolean ccw) { GLint aIY, bIY, cIY; __GLfloat dxdyAC, dxdyBC, dxdyBA; __GLfloat dx, dy; __GLfloat invDyAB, invDyBC, invDyAC; #if DBG && CHECK_FPU { USHORT cw; __asm { _asm fnstcw cw _asm mov ax, cw _asm and ah, (~0x3f) _asm mov cw,ax _asm fldcw cw } } #endif // // Snap each y coordinate to its pixel center // aIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(a->window.y)+ __GL_VERTEX_FRAC_HALF); cIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(c->window.y)+ __GL_VERTEX_FRAC_HALF); if (aIY == cIY) { return; } bIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(b->window.y)+ __GL_VERTEX_FRAC_HALF); if (cIY - aIY > __GL_MAX_INV_TABLE) goto bigTriangle; gc->polygon.shader.cfb = gc->drawBuffer; CASTFIX(invDyAB) = CASTFIX(invTable[CASTFIX(b->window.y) - CASTFIX(a->window.y)]) | 0x80000000; CASTFIX(invDyBC) = CASTFIX(invTable[CASTFIX(c->window.y) - CASTFIX(b->window.y)]) | 0x80000000; CASTFIX(invDyAC) = CASTFIX(invTable[CASTFIX(c->window.y) - CASTFIX(a->window.y)]) | 0x80000000; // // Calculate delta values for unit changes in x or y // GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c); // // calculate the destination address // GENACCEL(gc).pPix = (BYTE *)gc->polygon.shader.cfb->buf.base + ( gc->polygon.shader.cfb->buf.outerWidth * ( aIY - gc->constants.viewportYAdjust + gc->polygon.shader.cfb->buf.yOrigin ) ) + ( GENACCEL(gc).xMultiplier * ( - gc->constants.viewportXAdjust + gc->polygon.shader.cfb->buf.xOrigin ) ); // Calculate destination Z if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, aIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, aIY); } } /* ** This algorithm always fills from bottom to top, left to right. ** Because of this, ccw triangles are inherently faster because ** the parameter values need not be recomputed. */ if (ccw) { dy = (aIY + __glHalf) - a->window.y; dxdyAC = gc->polygon.shader.dxAC * invDyAC; GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC); dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy); if (aIY != bIY) { dxdyBA = (a->window.x - b->window.x) * invDyAB; GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA); if (bIY == cIY) gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI; GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); if (bIY != cIY) { if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 2)); } else { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 1)); } } } } if (bIY != cIY) { dy = (bIY + __glHalf) - b->window.y; dxdyBC = (b->window.x - c->window.x) * invDyBC; GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC); gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI; GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY); } } else { dy = (aIY + __glHalf) - a->window.y; dxdyAC = gc->polygon.shader.dxAC * invDyAC; GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC); if (aIY != bIY) { dxdyBA = (a->window.x - b->window.x) * invDyAB; GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA); dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy); if (bIY == cIY) gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI; GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); if (bIY != cIY) { if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 2)); } else { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 1)); } } } } if (bIY != cIY) { dy = (bIY + __glHalf) - b->window.y; dxdyBC = gc->polygon.shader.dxBC * invDyBC; GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC); dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy); gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI; GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY); } } gc->polygon.shader.modeFlags &= ~(__GL_SHADE_LAST_SUBTRI); return; bigTriangle: __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxAC, gc->polygon.shader.dyAC, &dxdyAC); gc->polygon.shader.cfb = gc->drawBuffer; // // Calculate delta values for unit changes in x or y // GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c); // // calculate the destination address // GENACCEL(gc).pPix = (BYTE *)gc->polygon.shader.cfb->buf.base + ( gc->polygon.shader.cfb->buf.outerWidth * ( aIY - gc->constants.viewportYAdjust + gc->polygon.shader.cfb->buf.yOrigin ) ) + ( GENACCEL(gc).xMultiplier * ( - gc->constants.viewportXAdjust + gc->polygon.shader.cfb->buf.xOrigin ) ); // Calculate destination Z if ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) && aIY != bIY) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, aIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, aIY); } } /* ** This algorithm always fills from bottom to top, left to right. ** Because of this, ccw triangles are inherently faster because ** the parameter values need not be recomputed. */ if (ccw) { dy = (aIY + __glHalf) - a->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC); GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC); dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy); if (aIY != bIY) { dxdyBA = (a->window.x - b->window.x) / (a->window.y - b->window.y); GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA); if (bIY != cIY) { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 2)); } else { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 1)); } } } else { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); } } else if (bIY != cIY) { if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, bIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, bIY); } } __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); } if (bIY != cIY) { dy = (bIY + __glHalf) - b->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC); GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC); GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY); } } else { dy = (aIY + __glHalf) - a->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC); GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC); if (aIY != bIY) { dxdyBA = (a->window.x - b->window.x) / (a->window.y - b->window.y); GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA); dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy); if (bIY != cIY) { __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 2)); } else { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 1)); } } } else { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); } } else if (bIY != cIY) { if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, bIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, bIY); } } __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); } if (bIY != cIY) { dy = (bIY + __glHalf) - b->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC); GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC); dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy); GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY); } } CHOP_ROUND_OFF(); } /**************************************************************************\ * __fastGenMcdFillTriangle * * Just like __fastGenFillTriangle, except that the floating point macros * __GL_FLOAT_BEGIN_DIVIDE and __GL_FLOAT_SIMPLE_END_DIVIDE are not allowed * to straddle a function call to the driver (i.e., __fastFillSubTrianglePtr * calls the display driver span functions if direct frame buffer access is * not available. \**************************************************************************/ void FASTCALL __fastGenMcdFillTriangle( __GLcontext *gc, __GLvertex *a, __GLvertex *b, __GLvertex *c, GLboolean ccw) { GLint aIY, bIY, cIY; __GLfloat dxdyAC, dxdyBC, dxdyBA; __GLfloat dx, dy; CHOP_ROUND_ON(); // // Calculate delta values for unit changes in x or y // GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c); __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxAC, gc->polygon.shader.dyAC, &dxdyAC); // // can this be moved up even farther? // gc->polygon.shader.cfb = gc->drawBuffer; // // Snap each y coordinate to its pixel center // aIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(a->window.y)+ __GL_VERTEX_FRAC_HALF); bIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(b->window.y)+ __GL_VERTEX_FRAC_HALF); cIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(c->window.y)+ __GL_VERTEX_FRAC_HALF); // // calculate the destination address // GENACCEL(gc).pPix = (BYTE *)gc->polygon.shader.cfb->buf.base + ( gc->polygon.shader.cfb->buf.outerWidth * ( aIY - gc->constants.viewportYAdjust + gc->polygon.shader.cfb->buf.yOrigin ) ) + ( GENACCEL(gc).xMultiplier * ( - gc->constants.viewportXAdjust + gc->polygon.shader.cfb->buf.xOrigin ) ); // Calculate destination Z if ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) && aIY != bIY) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, aIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, aIY); } } /* ** This algorithm always fills from bottom to top, left to right. ** Because of this, ccw triangles are inherently faster because ** the parameter values need not be recomputed. */ if (ccw) { dy = (aIY + __glHalf) - a->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC); GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC); dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy); if (aIY != bIY) { dxdyBA = (a->window.x - b->window.x) / (a->window.y - b->window.y); GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA); if (bIY != cIY) { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 2)); } else { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 1)); } } } else { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); } } else if (bIY != cIY) { if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, bIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, bIY); } } __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); } if (bIY != cIY) { dy = (bIY + __glHalf) - b->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC); GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC); GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY); } } else { dy = (aIY + __glHalf) - a->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC); GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC); if (aIY != bIY) { dxdyBA = (a->window.x - b->window.x) / (a->window.y - b->window.y); GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA); dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy); if (bIY != cIY) { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 2)); } else { gc->polygon.shader.zbuf = (__GLzValue *) ((GLubyte *)gc->polygon.shader.zbuf- (gc->polygon.shader.ixLeft << 1)); } } } else { GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY); } } else if (bIY != cIY) { if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) { if ( gc->modes.depthBits == 32 ) { gc->polygon.shader.zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*), 0, bIY); } else { gc->polygon.shader.zbuf = (__GLzValue *) __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*), 0, bIY); } } __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC, gc->polygon.shader.dyBC, &dxdyBC); } if (bIY != cIY) { dy = (bIY + __glHalf) - b->window.y; __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC); GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC); dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x; GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy); GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY); } } CHOP_ROUND_OFF(); }