373 lines
14 KiB
C++
373 lines
14 KiB
C++
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
// Copyright (C) Microsoft Corporation, 2000.
|
||
|
//
|
||
|
// psexec.cpp
|
||
|
//
|
||
|
// Direct3D Reference Device - Pixel Shader Execution
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
#include "pch.cpp"
|
||
|
#pragma hdrstop
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
//
|
||
|
// ExecShader - Executes the current pixel shader.
|
||
|
//
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefRast::ExecShader( void )
|
||
|
{
|
||
|
#define _InstParam(__INST) (*(__INST##_PARAMS UNALIGNED64*)pRDPSInstBuffer)
|
||
|
#define _StepOverInst(__INST) pRDPSInstBuffer += sizeof(__INST##_PARAMS);
|
||
|
#define _DeclArgs(__INST) __INST##_PARAMS& Args = _InstParam(__INST);
|
||
|
|
||
|
#define _PerChannel(__STATEMENT) \
|
||
|
for( iChn=0; iChn<4; iChn++ ) \
|
||
|
{ \
|
||
|
__STATEMENT \
|
||
|
} \
|
||
|
|
||
|
#define _PerChannelMasked(__STATEMENT) \
|
||
|
for( iChn=0; iChn<4; iChn++ ) \
|
||
|
{ \
|
||
|
if( !(Args.WriteMask & ComponentMask[iChn] ) ) \
|
||
|
continue; \
|
||
|
__STATEMENT \
|
||
|
} \
|
||
|
|
||
|
#define _Dst Args.DstReg.GetRegPtr()[m_iPix][iChn]
|
||
|
#define _DstC(__chn) Args.DstReg.GetRegPtr()[m_iPix][__chn]
|
||
|
|
||
|
#define _Src0 Args.SrcReg0.GetRegPtr()[m_iPix][iChn]
|
||
|
#define _Src1 Args.SrcReg1.GetRegPtr()[m_iPix][iChn]
|
||
|
#define _Src2 Args.SrcReg2.GetRegPtr()[m_iPix][iChn]
|
||
|
#define _Src0C(__chn) Args.SrcReg0.GetRegPtr()[m_iPix][__chn]
|
||
|
#define _Src1C(__chn) Args.SrcReg1.GetRegPtr()[m_iPix][__chn]
|
||
|
#define _Src2C(__chn) Args.SrcReg2.GetRegPtr()[m_iPix][__chn]
|
||
|
|
||
|
#define _Src0N (Args.bSrcReg0_Negate?(-_Src0):_Src0)
|
||
|
#define _Src1N (Args.bSrcReg1_Negate?(-_Src1):_Src1)
|
||
|
#define _Src2N (Args.bSrcReg2_Negate?(-_Src2):_Src2)
|
||
|
#define _Src0NC(__chn) (Args.bSrcReg0_Negate?(-_Src0C(__chn)):_Src0C(__chn))
|
||
|
#define _Src1NC(__chn) (Args.bSrcReg1_Negate?(-_Src1C(__chn)):_Src1C(__chn))
|
||
|
#define _Src2NC(__chn) (Args.bSrcReg2_Negate?(-_Src2C(__chn)):_Src2C(__chn))
|
||
|
|
||
|
BYTE ComponentMask[4] = {RDPS_COMPONENTMASK_0, RDPS_COMPONENTMASK_1, RDPS_COMPONENTMASK_2, RDPS_COMPONENTMASK_3};
|
||
|
BYTE* pRDPSInstBuffer = &m_pCurrentPixelShader->m_RDPSInstBuffer[0]; // Buffer of "RISC" RDPS_* instructions to execute.
|
||
|
int QueueIndex[4] = {-1,-1,-1,-1}; // For simulating co-issue sequentially ("parallel" writes staged in queue)
|
||
|
int iChn; // For macros
|
||
|
|
||
|
#if DBG
|
||
|
PixelShaderInstruction* pCurrD3DPSInst = NULL; // Current true D3DSIO_ instruction being simulated.
|
||
|
#endif
|
||
|
|
||
|
m_bPixelDiscard[0] = m_bPixelDiscard[1] = m_bPixelDiscard[2] = m_bPixelDiscard[3] = FALSE;
|
||
|
|
||
|
while(RDPSINST_END != _InstParam(RDPSINST_BASE).Inst)
|
||
|
{
|
||
|
switch(_InstParam(RDPSINST_BASE).Inst)
|
||
|
{
|
||
|
case RDPSINST_EVAL:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_EVAL)
|
||
|
m_Attr[RDATTR_TEXTURE0+Args.uiCoordSet].Sample( Args.DstReg.GetRegPtr()[m_iPix],
|
||
|
(FLOAT)m_iX[m_iPix], (FLOAT)m_iY[m_iPix],
|
||
|
Args.bIgnoreD3DTTFF_PROJECTED, Args.bClamp );
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_EVAL)
|
||
|
break;
|
||
|
case RDPSINST_SAMPLE:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_SAMPLE)
|
||
|
ComputeTextureFilter( Args.uiStage, Args.CoordReg.GetRegPtr()[m_iPix] );
|
||
|
SampleTexture( Args.uiStage, Args.DstReg.GetRegPtr()[m_iPix] );
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_SAMPLE)
|
||
|
break;
|
||
|
case RDPSINST_KILL:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_KILL)
|
||
|
DWORD TexKillFlags = 0x0; // TODO: get these from TSS or per-instruction
|
||
|
_PerChannel(
|
||
|
// compare against zero according to kill flags
|
||
|
if ( TexKillFlags & (1<<iChn) )
|
||
|
{
|
||
|
if ( _Dst >= 0. )
|
||
|
m_bPixelDiscard[m_iPix] |= 0x1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if ( _Dst < 0. )
|
||
|
m_bPixelDiscard[m_iPix] |= 0x1;
|
||
|
}
|
||
|
)
|
||
|
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_KILL)
|
||
|
break;
|
||
|
case RDPSINST_BEM:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_BEM)
|
||
|
|
||
|
RDTextureStageState* pTSS = &m_pRD->m_TextureStageState[Args.uiStage];
|
||
|
// Just assuming Args.WriteMask is .rg
|
||
|
|
||
|
_DstC(0) = _Src0NC(0) +
|
||
|
pTSS->m_fVal[D3DTSS_BUMPENVMAT00] * _Src1NC(0) +
|
||
|
pTSS->m_fVal[D3DTSS_BUMPENVMAT10] * _Src1NC(1);
|
||
|
_DstC(1) = _Src0NC(1) +
|
||
|
pTSS->m_fVal[D3DTSS_BUMPENVMAT01] * _Src1NC(0) +
|
||
|
pTSS->m_fVal[D3DTSS_BUMPENVMAT11] * _Src1NC(1);
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_BEM)
|
||
|
break;
|
||
|
case RDPSINST_LUMINANCE:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_LUMINANCE)
|
||
|
RDTextureStageState* pTSS = &m_pRD->m_TextureStageState[Args.uiStage];
|
||
|
|
||
|
FLOAT fLum = _Src1NC(2) *
|
||
|
pTSS->m_fVal[D3DTSS_BUMPENVLSCALE] +
|
||
|
pTSS->m_fVal[D3DTSS_BUMPENVLOFFSET];
|
||
|
|
||
|
fLum = min(max(fLum, 0.0f), 1.0F);
|
||
|
|
||
|
// apply luminance modulation to RGB only
|
||
|
_DstC(0) = _Src0C(0)*fLum;
|
||
|
_DstC(1) = _Src0C(1)*fLum;
|
||
|
_DstC(2) = _Src0C(2)*fLum;
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_LUMINANCE)
|
||
|
break;
|
||
|
case RDPSINST_DEPTH:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_DEPTH)
|
||
|
|
||
|
FLOAT result;
|
||
|
|
||
|
FLOAT* pDstReg = Args.DstReg.GetRegPtr()[m_iPix];
|
||
|
if( pDstReg[1] )
|
||
|
result = pDstReg[0] / pDstReg[1];
|
||
|
else
|
||
|
result = 1.0f;
|
||
|
|
||
|
// clamp
|
||
|
m_Depth[m_iPix] = MAX(0, MIN(1, result));
|
||
|
|
||
|
// snap off extra bits by converting to/from buffer format - necessary
|
||
|
// to make depth buffer equality tests function correctly
|
||
|
SnapDepth();
|
||
|
|
||
|
do
|
||
|
{
|
||
|
m_SampleDepth[m_CurrentSample][m_iPix] = m_Depth[m_iPix];
|
||
|
}
|
||
|
while (NextSample());
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_DEPTH)
|
||
|
break;
|
||
|
case RDPSINST_SRCMOD:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_SRCMOD)
|
||
|
_PerChannelMasked(
|
||
|
|
||
|
if( Args.bComplement )
|
||
|
_Dst = 1 - _Src0;
|
||
|
else if( Args.bBias && Args.bTimes2 )
|
||
|
_Dst = 2*(_Src0 - 0.5);
|
||
|
else if( Args.bBias )
|
||
|
_Dst = _Src0 - 0.5f;
|
||
|
else if( Args.bTimes2 )
|
||
|
_Dst = 2*_Src0;
|
||
|
else
|
||
|
_Dst = _Src0;
|
||
|
|
||
|
_Dst = MAX( _Dst, Args.fRangeMin );
|
||
|
_Dst = MIN( _Dst, Args.fRangeMax );
|
||
|
)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_SRCMOD)
|
||
|
break;
|
||
|
case RDPSINST_SWIZZLE:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_SWIZZLE)
|
||
|
BYTE Swizzle = Args.Swizzle;
|
||
|
_PerChannelMasked(
|
||
|
_Dst = _Src0C(Swizzle&0x3);
|
||
|
Swizzle >>= 2;
|
||
|
)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_SWIZZLE)
|
||
|
break;
|
||
|
case RDPSINST_DSTMOD:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_DSTMOD)
|
||
|
|
||
|
_PerChannelMasked(
|
||
|
_Dst *= Args.fScale;
|
||
|
// clamp to range
|
||
|
_Dst = MAX( _Dst, Args.fRangeMin );
|
||
|
_Dst = MIN( _Dst, Args.fRangeMax );
|
||
|
)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_DSTMOD)
|
||
|
break;
|
||
|
case RDPSINST_MOV:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_MOV)
|
||
|
_PerChannelMasked(_Dst = _Src0N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_MOV)
|
||
|
break;
|
||
|
case RDPSINST_RCP:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_RCP)
|
||
|
_PerChannelMasked(_Dst = _Src0N ? 1/_Src0N : 1.0f;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_RCP)
|
||
|
break;
|
||
|
case RDPSINST_FRC:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_FRC)
|
||
|
_PerChannelMasked(_Dst = _Src0N - (float)floor(_Src0N);)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_FRC)
|
||
|
break;
|
||
|
case RDPSINST_ADD:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_ADD)
|
||
|
_PerChannelMasked(_Dst = _Src0N + _Src1N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_ADD)
|
||
|
break;
|
||
|
case RDPSINST_SUB:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_SUB)
|
||
|
_PerChannelMasked(_Dst = _Src0N - _Src1N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_SUB)
|
||
|
break;
|
||
|
case RDPSINST_MUL:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_MUL)
|
||
|
_PerChannelMasked(_Dst = _Src0N * _Src1N;);
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_MUL)
|
||
|
break;
|
||
|
case RDPSINST_DP3:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_DP3)
|
||
|
FLOAT dp3 = _Src0NC(0) * _Src1NC(0) +
|
||
|
_Src0NC(1) * _Src1NC(1) +
|
||
|
_Src0NC(2) * _Src1NC(2);
|
||
|
_PerChannelMasked(_Dst = dp3;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_DP3)
|
||
|
break;
|
||
|
case RDPSINST_DP4:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_DP4)
|
||
|
FLOAT dp4 = _Src0NC(0) * _Src1NC(0) +
|
||
|
_Src0NC(1) * _Src1NC(1) +
|
||
|
_Src0NC(2) * _Src1NC(2) +
|
||
|
_Src0NC(3) * _Src1NC(3);
|
||
|
_PerChannelMasked(_Dst = dp4;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_DP4)
|
||
|
break;
|
||
|
case RDPSINST_MAD:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_MAD)
|
||
|
_PerChannelMasked(_Dst = _Src0N * _Src1N + _Src2N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_MAD)
|
||
|
break;
|
||
|
case RDPSINST_LRP:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_LRP)
|
||
|
_PerChannelMasked(_Dst = (_Src0N*(_Src1N - _Src2N)) + _Src2N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_LRP)
|
||
|
break;
|
||
|
case RDPSINST_CND:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_CND)
|
||
|
_PerChannelMasked(_Dst = _Src0N > 0.5f ? _Src1N : _Src2N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_CND)
|
||
|
break;
|
||
|
case RDPSINST_CMP:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_CMP)
|
||
|
_PerChannelMasked(_Dst = _Src0N >= 0.f ? _Src1N : _Src2N;)
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_CMP)
|
||
|
break;
|
||
|
case RDPSINST_TEXCOVERAGE:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_TEXCOVERAGE);
|
||
|
Args.pGradients[0][0] = *Args.pDUDX_0 - *Args.pDUDX_1; // du/dx
|
||
|
Args.pGradients[0][1] = *Args.pDUDY_0 - *Args.pDUDY_1; // du/dy
|
||
|
Args.pGradients[1][0] = *Args.pDVDX_0 - *Args.pDVDX_1; // dv/dx
|
||
|
Args.pGradients[1][1] = *Args.pDVDY_0 - *Args.pDVDY_1; // dv/dy
|
||
|
Args.pGradients[2][0] = *Args.pDWDX_0 - *Args.pDWDX_1; // dw/dx
|
||
|
Args.pGradients[2][1] = *Args.pDWDY_0 - *Args.pDWDY_1; // dw/dy
|
||
|
ComputeTextureCoverage( Args.uiStage, Args.pGradients );
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_TEXCOVERAGE)
|
||
|
break;
|
||
|
case RDPSINST_QUADLOOPBEGIN:
|
||
|
m_iPix = 0;
|
||
|
_StepOverInst(RDPSINST_QUADLOOPBEGIN)
|
||
|
break;
|
||
|
case RDPSINST_QUADLOOPEND:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_QUADLOOPEND);
|
||
|
if( 4 > ++m_iPix )
|
||
|
pRDPSInstBuffer -= Args.JumpBackByOffset;
|
||
|
else
|
||
|
_StepOverInst(RDPSINST_QUADLOOPEND)
|
||
|
}
|
||
|
break;
|
||
|
case RDPSINST_QUEUEWRITE:
|
||
|
{
|
||
|
_DeclArgs(RDPSINST_QUEUEWRITE);
|
||
|
QueueIndex[m_iPix]++;
|
||
|
m_QueuedWriteDst[QueueIndex[m_iPix]].DstReg = Args.DstReg;
|
||
|
m_QueuedWriteDst[QueueIndex[m_iPix]].WriteMask = Args.WriteMask;
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_QUEUEWRITE)
|
||
|
break;
|
||
|
case RDPSINST_FLUSHQUEUE:
|
||
|
{
|
||
|
_ASSERT(QueueIndex[m_iPix] >= 0, "Nothing in pixelshader write queue to flush. Refrast mistranslated this pixelshader." );
|
||
|
_ASSERT(QueueIndex[m_iPix] < RDPS_MAX_NUMQUEUEDWRITEREG, "Pixelshader write queue overflow. Refrast mistranslated this pixelshader." );
|
||
|
for( int i = 0; i <= QueueIndex[m_iPix]; i++ )
|
||
|
{
|
||
|
_PerChannel(
|
||
|
if (m_QueuedWriteDst[i].WriteMask & ComponentMask[iChn])
|
||
|
m_QueuedWriteDst[i].DstReg.GetRegPtr()[m_iPix][iChn] = m_QueuedWriteReg[i][m_iPix][iChn];
|
||
|
)
|
||
|
}
|
||
|
QueueIndex[m_iPix] = -1;
|
||
|
}
|
||
|
_StepOverInst(RDPSINST_FLUSHQUEUE)
|
||
|
break;
|
||
|
case RDPSINST_NEXTD3DPSINST:
|
||
|
#if DBG
|
||
|
if (m_pRD->m_pDbgMon)
|
||
|
m_pRD->m_pDbgMon->NextEvent( D3DDM_EVENT_PIXELSHADERINST );
|
||
|
pCurrD3DPSInst = _InstParam(RDPSINST_NEXTD3DPSINST).pInst; // Handy to look at when debugging.
|
||
|
#endif
|
||
|
_StepOverInst(RDPSINST_NEXTD3DPSINST)
|
||
|
break;
|
||
|
default:
|
||
|
_ASSERT(FALSE,"Refrast::ExecShader() - Unrecognized micro-instruction!");
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
// end
|