windows-nt/Source/XPSP1/NT/multimedia/directx/dxg/d3d8/tnl/vvm.cpp

2169 lines
73 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*==========================================================================
*
* Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
*
* File: vvm.cpp
* Content: Virtual Vertex Machine implementation
*
* History:
* 6/16/00
* Added LOGP, EXPP, NM3
* RCP, RSQ, LOG, LOGP, EXP, EXPP take input value from W instead of X
* 7/11/00
* Removed NM3 macro
*
*
***************************************************************************/
#include "pch.cpp"
#pragma hdrstop
#include <stdio.h>
#include "vvm.h"
#include "d3dexcept.hpp"
#include "float.h"
#if DBG
#include "rtdmon.hpp"
#endif
const DWORD __MAX_CODE_SIZE = 4096;
//-----------------------------------------------------------------------------
HRESULT ComputeShaderCodeSize(
CONST DWORD* pCode,
DWORD* pdwCodeOnlySize,
DWORD* pdwCodeAndCommentSize,
DWORD* pdwNumConstDefs)
{
// set this now for error return
*pdwCodeOnlySize = 0;
*pdwCodeAndCommentSize = 0;
DWORD dwNumConstDefs = 0;
DWORD dwCodeOnlySize = 0;
DWORD dwCodeAndCommentSize = 0;
CONST DWORD* pToken = pCode;
DWORD Version = *pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
if ( (((Version >> 16) != 0xFFFF) && (Version >> 16) != 0xFFFE) ||
((Version & 0xFFFF) == 0x0))
{
D3D_ERR("invalid version token");
return D3DERR_INVALIDCALL;
}
// very basic parse to find number of instructions
while ( ((*pToken) != 0x0000FFFF) && (dwCodeOnlySize <= __MAX_CODE_SIZE) )
{
if (IsInstructionToken(*pToken))
{
DWORD opCode = (*pToken) & D3DSI_OPCODE_MASK;
if ( opCode == D3DSIO_COMMENT )
{
UINT DWordSize = ((*pToken)&D3DSI_COMMENTSIZE_MASK)>>D3DSI_COMMENTSIZE_SHIFT;
dwCodeAndCommentSize += (1+DWordSize); // instruction token + comment
pToken += (1+DWordSize);
}
else if (opCode == D3DSIO_DEF )
{
pToken += 6;
dwCodeOnlySize += 6;
dwCodeAndCommentSize += 6;
dwNumConstDefs++;
}
else
{
pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
}
}
else
{
pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
}
}
dwCodeOnlySize++; dwCodeAndCommentSize++; // for END token
if (dwCodeOnlySize > __MAX_CODE_SIZE)
{
D3D_ERR("Shader code size is too big. Possibly, missing D3DVS_END()");
return D3DERR_INVALIDCALL;
}
*pdwCodeOnlySize = 4*dwCodeOnlySize;
*pdwCodeAndCommentSize = 4*dwCodeAndCommentSize;
if( pdwNumConstDefs )
*pdwNumConstDefs = dwNumConstDefs;
return S_OK;
}
//-----------------------------------------------------------------------------
float MINUS_INFINITY()
{
return -FLT_MAX;
}
float PLUS_INFINITY()
{
return FLT_MAX;
}
//-----------------------------------------------------------------------------
// Returns instruction size, based on the op-code
//
UINT CVertexVM::GetNumSrcOperands(UINT opcode)
{
// returns number of source operands + opcode + destination
switch (opcode)
{
case D3DSIO_MOV : return 1;
case D3DSIO_ADD : return 2;
case D3DSIO_MAD : return 3;
case D3DSIO_MUL : return 2;
case D3DSIO_RCP : return 1;
case D3DSIO_RSQ : return 1;
case D3DSIO_DP3 : return 2;
case D3DSIO_DP4 : return 2;
case D3DSIO_MIN : return 2;
case D3DSIO_MAX : return 2;
case D3DSIO_SLT : return 2;
case D3DSIO_SGE : return 2;
case D3DSIO_EXP : return 1;
case D3DSIO_LOG : return 1;
case D3DSIO_EXPP: return 1;
case D3DSIO_LOGP: return 1;
case D3DSIO_LIT : return 1;
case D3DSIO_DST : return 2;
case D3DSIO_FRC : return 1;
case D3DSIO_M4x4: return 2;
case D3DSIO_M4x3: return 2;
case D3DSIO_M3x4: return 2;
case D3DSIO_M3x3: return 2;
case D3DSIO_M3x2: return 2;
case D3DSIO_NOP: return 0;
default:
PrintInstCount();
D3D_THROW_FAIL("Illegal instruction");
}
return 0;
}
//-----------------------------------------------------------------------------
// Returns a bit field to say which source register components are used to
// produce the output components.
// 4 bits are used per each output component:
// 0-3 output component X
// 4-7 output component Y
// 8-11 output component Z
// 12-15 output component W
// Each of the four bits is used to say if this source component is used to
// produce the output component:
// bit 0 - X, bit 1 - Y, bit 2 - Z, bit 3 - W.
//
// SourceIndex - sequential index of the source operand
//
UINT CVertexVM::GetRegisterUsage(UINT opcode, UINT SourceIndex)
{
switch (opcode)
{
case D3DSIO_MOV : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_ADD : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_MAD : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_MUL : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_RCP : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
case D3DSIO_RSQ : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
case D3DSIO_DP3 : return 7 | (7 << 4) | (7 << 8) | (7 << 12);
case D3DSIO_DP4 : return 0xF | (0xF << 4) | (0xF << 8) | (0xF << 12);
case D3DSIO_MIN : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_MAX : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_SLT : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_SGE : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_EXP : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
case D3DSIO_LOG : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
case D3DSIO_EXPP: return 8 | (8 << 4) | (8 << 8);
case D3DSIO_LOGP: return 8 | (8 << 4) | (8 << 8);
case D3DSIO_LIT : return (1 << 4) | ((1 | 2 | 8) << 8);
case D3DSIO_DST :
if (SourceIndex == 0)
return (2 << 4) | (4 << 8);
else
return (2 << 4) | (8 << 12);
case D3DSIO_FRC : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
case D3DSIO_M4x4: return 0xF | (0xF << 4) | (0xF << 8) | (0xF << 12);
case D3DSIO_M4x3: return 0xF | (0xF << 4) | (0xF << 8);
case D3DSIO_M3x4: return 7 | (7 << 4) | (7 << 8) | (7 << 12);
case D3DSIO_M3x3: return 7 | (7 << 4) | (7 << 8);
case D3DSIO_M3x2: return 7 | (7 << 4);
case D3DSIO_NOP: return 0;
default:
PrintInstCount();
D3D_THROW_FAIL("Illegal instruction");
}
return 0;
}
//-----------------------------------------------------------------------------
// Returns instruction size in DWORDs, based on the op-code
//
UINT CVertexVM::GetInstructionLength(DWORD inst)
{
// returns number of source operands + opcode + destination
DWORD opcode = D3DSI_GETOPCODE(inst);
if (opcode == D3DSIO_NOP)
return 1;
else if (opcode == D3DSIO_COMMENT)
return ((inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT) + 1;
else
return GetNumSrcOperands(opcode) + 2;
}
//-----------------------------------------------------------------------------
// VertexShaderInstDisAsm - Generates human-readable character string for a
// single vertex shader instruction. String interface is similar to _snprintf.
//-----------------------------------------------------------------------------
static int VertexShaderInstDisAsm(
char* pStrRet, int StrSizeRet, DWORD* pShader, DWORD Flags )
{
DWORD* pToken = pShader;
// stage in local string, then copy
char pStr[256] = "";
#define _ADDSTR( _Str ) { _snprintf( pStr, 256, "%s" _Str , pStr ); }
#define _ADDSTRP( _Str, _Param ) { _snprintf( pStr, 256, "%s" _Str , pStr, _Param ); }
DWORD Inst = *pToken++;
DWORD Opcode = (Inst & D3DSI_OPCODE_MASK);
switch (Opcode)
{
case D3DSIO_NOP: _ADDSTR("NOP"); break;
case D3DSIO_MOV: _ADDSTR("MOV"); break;
case D3DSIO_ADD: _ADDSTR("ADD"); break;
case D3DSIO_MAD: _ADDSTR("MAD"); break;
case D3DSIO_MUL: _ADDSTR("MUL"); break;
case D3DSIO_RCP: _ADDSTR("RCP"); break;
case D3DSIO_RSQ: _ADDSTR("RSQ"); break;
case D3DSIO_DP3: _ADDSTR("DP3"); break;
case D3DSIO_DP4: _ADDSTR("DP4"); break;
case D3DSIO_MIN: _ADDSTR("MIN"); break;
case D3DSIO_MAX: _ADDSTR("MAX"); break;
case D3DSIO_SLT: _ADDSTR("SLT"); break;
case D3DSIO_SGE: _ADDSTR("SGE"); break;
case D3DSIO_EXP: _ADDSTR("EXP"); break;
case D3DSIO_LOG: _ADDSTR("LOG"); break;
case D3DSIO_EXPP:_ADDSTR("EXPP"); break;
case D3DSIO_LOGP:_ADDSTR("LOGP"); break;
case D3DSIO_LIT: _ADDSTR("LIT"); break;
case D3DSIO_DST: _ADDSTR("DST"); break;
case D3DSIO_COMMENT: _ADDSTR("COMMENT"); break;
default : _ADDSTR("???"); break;
}
if (*pToken & (1L<<31))
{
DWORD DstParam = *pToken++;
switch (DstParam & D3DSP_REGTYPE_MASK)
{
case D3DSPR_TEMP : _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
case D3DSPR_ADDR : _ADDSTR(" Addr"); break;
case D3DSPR_RASTOUT : _ADDSTRP(" R%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
case D3DSPR_ATTROUT : _ADDSTRP(" A%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
case D3DSPR_TEXCRDOUT: _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
}
if (*pToken & (1L<<31)) _ADDSTR(" ");
while (*pToken & (1L<<31))
{
DWORD SrcParam = *pToken++;
switch (SrcParam & D3DSP_REGTYPE_MASK)
{
case D3DSPR_TEMP : _ADDSTRP(" T%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
case D3DSPR_INPUT : _ADDSTRP(" I%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
case D3DSPR_CONST : _ADDSTRP(" C%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
}
if (*pToken & (1L<<31)) _ADDSTR(",");
}
}
return _snprintf( pStrRet, StrSizeRet, "%s", pStr );
}
//-----------------------------------------------------------------------------
#if DBG
typedef struct _VShaderInst
{
DWORD m_Tokens[D3DDM_MAX_VSINSTDWORD];
char m_String[D3DDM_MAX_VSINSTSTRING];
DWORD* m_pComment;
DWORD m_cdwComment;
} VShaderInst;
#endif
//-----------------------------------------------------------------------------
class CVShaderCodeI: public CVShaderCode
{
public:
CVShaderCodeI()
{
m_pdwCode = NULL;
m_InstCount = 0;
#if DBG
m_pInst = NULL;
#endif
}
~CVShaderCodeI()
{
delete m_pdwCode;
#if DBG
if (m_pInst) delete m_pInst;
#endif
}
DWORD* m_pdwCode; // Pointer to the original code
DWORD m_dwSize; // Size of the code in DWORDs
DWORD m_InstCount;
#if DBG
VShaderInst* m_pInst;
#endif
DWORD InstCount( void ) { return m_InstCount; }
DWORD* InstTokens( DWORD Inst );
char* InstDisasm( DWORD Inst );
DWORD* InstComment( DWORD Inst );
DWORD InstCommentSize( DWORD Inst );
};
//-----------------------------------------------------------------------------
DWORD* CVShaderCodeI::InstTokens( DWORD Inst )
{
#if DBG
if ( Inst >= m_InstCount ) return NULL;
return m_pInst[Inst].m_Tokens;
#else
return NULL;
#endif
}
//-----------------------------------------------------------------------------
char* CVShaderCodeI::InstDisasm( DWORD Inst )
{
#if DBG
if ( Inst >= m_InstCount ) return NULL;
return m_pInst[Inst].m_String;
#else
return NULL;
#endif
}
//-----------------------------------------------------------------------------
DWORD* CVShaderCodeI::InstComment( DWORD Inst )
{
#if DBG
if ( Inst >= m_InstCount ) return NULL;
return m_pInst[Inst].m_pComment;
#else
return NULL;
#endif
}
//-----------------------------------------------------------------------------
DWORD CVShaderCodeI::InstCommentSize( DWORD Inst )
{
#if DBG
if ( Inst >= m_InstCount ) return NULL;
return m_pInst[Inst].m_cdwComment;
#else
return NULL;
#endif
}
//-----------------------------------------------------------------------------
// Vertex Virtual Machine object implementation
//
//-----------------------------------------------------------------------------
CVertexVM::CVertexVM()
{
m_pCurrentShader = NULL;
m_CurInstIndex = 0;
#if DBG
for (UINT i=0; i < D3DVS_CONSTREG_MAX_V1_1; i++)
m_c_initialized[i] = FALSE;
#endif
}
//-----------------------------------------------------------------------------
CVertexVM::~CVertexVM()
{
}
//-----------------------------------------------------------------------------
void CVertexVM::Init(UINT MaxVertexShaderConst)
{
m_MaxVertexShaderConst = max(MaxVertexShaderConst, D3DVS_CONSTREG_MAX_V1_1);
m_reg.m_c = new VVM_WORD[m_MaxVertexShaderConst];
if (m_reg.m_c == NULL)
D3D_THROW_FAIL("Not enough memory to allocate vertex shader constant array");
}
//-----------------------------------------------------------------------------
// Returns addres of the first vertex of the element
//
VVM_WORD * CVertexVM::GetDataAddr(DWORD dwRegType, DWORD dwElementIndex)
{
switch (dwRegType)
{
case D3DSPR_TEMP : return &m_reg.m_r[dwElementIndex][0];
case D3DSPR_INPUT : return &m_reg.m_v[dwElementIndex][0];
case D3DSPR_CONST : return &m_reg.m_c[dwElementIndex];
case D3DSPR_ADDR : return &m_reg.m_a[dwElementIndex][0];
case D3DSPR_RASTOUT : return &m_reg.m_output[dwElementIndex][0];
case D3DSPR_ATTROUT : return &m_reg.m_color[dwElementIndex][0];
case D3DSPR_TEXCRDOUT : return &m_reg.m_texture[dwElementIndex][0];
default:
D3D_THROW(D3DERR_INVALIDCALL, "Invalid register type");
}
return NULL;
}
//-----------------------------------------------------------------------------
// Sets data of the first vertex pf the register
//
HRESULT CVertexVM::SetData(DWORD dwMemType, DWORD dwStart, DWORD dwCount,
LPVOID pBuffer)
{
try
{
VVM_WORD* p = this->GetDataAddr(dwMemType, dwStart);
if (dwMemType == D3DSPR_CONST)
{
#if DBG
if ((dwStart + dwCount) > m_MaxVertexShaderConst)
{
D3D_THROW_FAIL("Attemt to write outside constant register array");
}
// We only can set initialized flag for software constant registers
if (dwStart < D3DVS_CONSTREG_MAX_V1_1)
{
BOOL* p = &m_c_initialized[dwStart];
UINT count = dwCount;
if ((dwStart + dwCount) >= D3DVS_CONSTREG_MAX_V1_1)
{
count = D3DVS_CONSTREG_MAX_V1_1 - dwStart;
}
for (UINT i = 0; i < count; i++)
{
p[i] = TRUE;
}
}
#endif
UINT size = dwCount * sizeof(VVM_WORD);
memcpy(p, pBuffer, size);
}
else
{
// Set only the first element of the register batch
for (UINT i=0; i < dwCount; i++)
{
p[i * VVMVERTEXBATCH] = ((VVM_WORD*)pBuffer)[i];
}
}
}
D3D_CATCH;
return D3D_OK;
}
//-----------------------------------------------------------------------------
HRESULT CVertexVM::GetData(DWORD dwMemType, DWORD dwStart, DWORD dwCount,
LPVOID pBuffer)
{
try
{
VVM_WORD* p = this->GetDataAddr(dwMemType, dwStart);
if (dwMemType == D3DSPR_CONST)
{
memcpy(pBuffer, p, dwCount * sizeof(VVM_WORD));
}
else
{
// Set only the first element of the register batch
for (UINT i=0; i < dwCount; i++)
{
((VVM_WORD*)pBuffer)[i] = p[i * VVMVERTEXBATCH];
}
}
}
D3D_CATCH;
return D3D_OK;
}
//-----------------------------------------------------------------------------
// - allocates memory for the shader
// - validates shader code
// - computes output FVF and vertex elements offsets
//
void CVertexVM::ValidateShader(CVShaderCodeI* shader, DWORD* orgShader)
{
// shader will be already stripped of comments upon reaching here if stripping
// is necessary, so always use CodeAndComment size
DWORD dwCodeOnlySize;
DWORD dwCodeAndCommentSize;
HRESULT hr = ComputeShaderCodeSize(orgShader, &dwCodeOnlySize,
&dwCodeAndCommentSize, NULL);
if (hr != S_OK)
D3D_THROW(hr, "");
// Initialize shader header and allocate memory for the shader code
shader->m_dwSize = dwCodeAndCommentSize >> 2; // Size in DWORDs
shader->m_pdwCode = new DWORD[shader->m_dwSize];
if (shader->m_pdwCode == NULL)
{
D3D_THROW_FAIL("Cannot allocate memory for shader code");
}
memcpy(shader->m_pdwCode, orgShader, dwCodeAndCommentSize);
// Based on the what output registers are modified, we compute the
// corresponding FVF id. The id will be used for memory allocation
// of the output buffer and will be passed to the rasterizer
DWORD dwOutFVF = 0;
DWORD nTexCoord = 0; // Number of output texture coordinates
// For each texture register stores the combined write mask.
// Used to find how many floats are written to each texture coordinates
DWORD TextureWritten[8];
memset(TextureWritten, 0, sizeof(TextureWritten));
m_pdwCurToken = shader->m_pdwCode;
DWORD* pEnd = shader->m_pdwCode + shader->m_dwSize;
shader->m_dwOutRegs = 0;
shader->m_InstCount = 0;
m_CurInstIndex = 0;
if ((*m_pdwCurToken != D3DVS_VERSION(1, 1)) &&
(*m_pdwCurToken != D3DVS_VERSION(1, 0)) )
{
D3D_THROW_FAIL("Invalid vertex shader code version");
}
m_pdwCurToken++;
while (m_pdwCurToken < pEnd && *m_pdwCurToken != D3DVS_END())
{
DWORD * pdwNextToken = m_pdwCurToken;
DWORD dwInst = *m_pdwCurToken;
if (!IsInstructionToken(dwInst))
{
PrintInstCount();
D3D_THROW_FAIL("Intruction token has 31 bit set");
}
DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
m_pdwCurToken++;
switch (dwOpCode)
{
case D3DSIO_COMMENT:
case D3DSIO_NOP : ; break;
case D3DSIO_MOV :
case D3DSIO_ADD :
case D3DSIO_MAD :
case D3DSIO_MUL :
case D3DSIO_RCP :
case D3DSIO_RSQ :
case D3DSIO_DP3 :
case D3DSIO_DP4 :
case D3DSIO_MIN :
case D3DSIO_MAX :
case D3DSIO_SLT :
case D3DSIO_SGE :
case D3DSIO_EXP :
case D3DSIO_LOG :
case D3DSIO_EXPP :
case D3DSIO_LOGP :
case D3DSIO_LIT :
case D3DSIO_DST :
case D3DSIO_FRC :
case D3DSIO_M4x4 :
case D3DSIO_M4x3 :
case D3DSIO_M3x4 :
case D3DSIO_M3x3 :
case D3DSIO_M3x2 :
{
// Find out if output register are modified by the command and
// update the output FVF
DWORD dwOffset;
EvalDestination();
VVM_WORD* m_pOutRegister = NULL;
if ((m_pDest - m_dwOffset * VVMVERTEXBATCH) != m_reg.m_r[0])
{
dwOffset = m_dwOffset;
m_pOutRegister = m_pDest - m_dwOffset * VVMVERTEXBATCH;
if (m_pOutRegister == m_reg.m_output[0])
{
if (dwOffset == D3DSRO_POSITION)
{
dwOutFVF |= D3DFVF_XYZRHW;
shader->m_dwOutRegs |= CPSGPShader_POSITION;
}
else
if (dwOffset == D3DSRO_FOG)
{
dwOutFVF |= D3DFVF_FOG;
shader->m_dwOutRegs |= CPSGPShader_FOG;
}
else
if (dwOffset == D3DSRO_POINT_SIZE)
{
dwOutFVF |= D3DFVF_PSIZE;
shader->m_dwOutRegs |= CPSGPShader_PSIZE;
}
}
else
if (m_pOutRegister == m_reg.m_color[0])
if (dwOffset == 0)
{
dwOutFVF |= D3DFVF_DIFFUSE;
shader->m_dwOutRegs |= CPSGPShader_DIFFUSE;
}
else
{
dwOutFVF |= D3DFVF_SPECULAR;
shader->m_dwOutRegs |= CPSGPShader_SPECULAR;
}
else
if (m_pOutRegister == m_reg.m_texture[0])
{
if (TextureWritten[dwOffset] == 0)
{
nTexCoord++;
}
TextureWritten[dwOffset] |= m_WriteMask;
}
else
if (m_pOutRegister == m_reg.m_a[0])
{
}
else
{
PrintInstCount();
D3D_THROW_FAIL("Invalid output register offset");
}
}
}
break;
default:
{
PrintInstCount();
D3D_THROW_FAIL("Invalid shader opcode");
}
}
m_pdwCurToken = pdwNextToken + GetInstructionLength(dwInst);
shader->m_InstCount++;
if (dwOpCode != D3DSIO_COMMENT)
{
m_CurInstIndex++;
if (m_CurInstIndex > D3DVS_MAXINSTRUCTIONCOUNT_V1_1)
{
D3D_THROW_FAIL("Too many instructions in the shader");
}
}
}
#ifdef DBG
// compute per-instruction stuff for shader
if (shader->m_InstCount)
{
shader->m_pInst = new VShaderInst[shader->m_InstCount];
if (shader->m_pInst == NULL)
{
D3D_THROW_FAIL("Cannot allocate memory for shader instructions");
}
memset( shader->m_pInst, 0, sizeof(VShaderInst)*shader->m_InstCount );
DWORD dwCurInst = 0;
// Remove version
m_pdwCurToken = shader->m_pdwCode + 1;
pEnd = shader->m_pdwCode + shader->m_dwSize;
while( m_pdwCurToken < pEnd && *m_pdwCurToken != D3DVS_END())
{
UINT ilength = GetInstructionLength(*m_pdwCurToken);
DWORD dwOpCode = D3DSI_GETOPCODE(*m_pdwCurToken);
if (dwOpCode == D3DSIO_COMMENT)
{
shader->m_pInst[dwCurInst].m_Tokens[0] = *m_pdwCurToken;
shader->m_pInst[dwCurInst].m_pComment = (m_pdwCurToken+1);
shader->m_pInst[dwCurInst].m_cdwComment = ilength - 1;
}
else
{
memcpy( shader->m_pInst[dwCurInst].m_Tokens, m_pdwCurToken,
4*ilength );
VertexShaderInstDisAsm( shader->m_pInst[dwCurInst].m_String,
D3DDM_MAX_VSINSTSTRING, shader->m_pInst[dwCurInst].m_Tokens, 0x0 );
}
m_pdwCurToken += ilength;
dwCurInst++;
}
}
#endif
dwOutFVF |= nTexCoord << D3DFVF_TEXCOUNT_SHIFT;
// Compute output vertex offsets and size
shader->m_dwOutVerSize = 4 * sizeof(float); // X, Y, Z, RHW
shader->m_nOutTexCoord = nTexCoord;
DWORD dwOffset = 4 * sizeof(float); // Current offset in the output vertex
if ((dwOutFVF & D3DFVF_XYZRHW) == 0)
{
D3D_THROW_FAIL("Position is not written by shader");
}
shader->m_dwPointSizeOffset = dwOffset;
if (dwOutFVF & D3DFVF_PSIZE)
{
dwOffset += 4;
shader->m_dwOutVerSize += 4;
}
shader->m_dwDiffuseOffset = dwOffset;
if (dwOutFVF & D3DFVF_DIFFUSE)
{
shader->m_dwOutVerSize += 4;
dwOffset += 4;
}
shader->m_dwSpecularOffset = dwOffset;
if (dwOutFVF & D3DFVF_SPECULAR)
{
dwOffset += 4;
shader->m_dwOutVerSize += 4;
}
shader->m_dwFogOffset = dwOffset;
if (dwOutFVF & D3DFVF_FOG)
{
dwOffset += 4;
shader->m_dwOutVerSize += 4;
}
// Initialize texture coordinates
shader->m_dwTextureOffset = dwOffset;
if (nTexCoord)
{
for (DWORD i = 0; i < nTexCoord; i++)
{
DWORD n; // Size of texture coordinates
if (TextureWritten[i] == 0)
{
D3D_THROW_FAIL("Texture coordinates are not continuous");
}
switch (TextureWritten[i])
{
case D3DSP_WRITEMASK_ALL:
dwOutFVF |= D3DFVF_TEXCOORDSIZE4(i);
n = 4 * sizeof(float);
break;
case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1 | D3DSP_WRITEMASK_2:
dwOutFVF |= D3DFVF_TEXCOORDSIZE3(i);
n = 3 * sizeof(float);
break;
case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1:
dwOutFVF |= D3DFVF_TEXCOORDSIZE2(i);
n = 2 * sizeof(float);
break;
case D3DSP_WRITEMASK_0:
dwOutFVF |= D3DFVF_TEXCOORDSIZE1(i);
n = 1 * sizeof(float);
break;
default:
D3D_THROW_FAIL("Invalid write mask for texture register");
}
shader->m_dwOutVerSize += n;
shader->m_dwOutTexCoordSize[i] = n;
dwOffset += n;
}
}
shader->m_dwOutFVF = dwOutFVF;
}
//-----------------------------------------------------------------------------
CVShaderCode* CVertexVM::CreateShader(CVElement* pElements, DWORD dwNumElements,
DWORD* pCode)
{
CVShaderCodeI* pShaderCode = NULL;
try
{
pShaderCode = new CVShaderCodeI();
if (pShaderCode == NULL)
{
D3D_THROW(E_OUTOFMEMORY, "Cannot allocate memory");
}
ValidateShader(pShaderCode, pCode);
return pShaderCode;
}
catch (HRESULT e)
{
delete pShaderCode;
D3D_ERR("Error in shader code creation");
return NULL;
}
}
//-----------------------------------------------------------------------------
HRESULT CVertexVM::SetActiveShader(CVShaderCode* pCode)
{
m_pCurrentShader = (CVShaderCodeI*)pCode;
return D3D_OK;
}
//-----------------------------------------------------------------------------
// - parses destination token
// - computes m_pDest, m_WrideMask, m_dwOffset for the destination
// - current token pointer is andvanced to the next token
//
void CVertexVM::EvalDestination()
{
DWORD dwCurToken = *m_pdwCurToken;
DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
m_dwOffset = D3DSI_GETREGNUM(dwCurToken);
m_WriteMask = D3DSI_GETWRITEMASK(dwCurToken);
switch (dwRegType)
{
case D3DSPR_TEMP:
m_pDest = m_reg.m_r[0];
break;
case D3DSPR_RASTOUT:
m_pDest = m_reg.m_output[0];
break;
case D3DSPR_ATTROUT:
m_pDest = m_reg.m_color[0];
break;
case D3DSPR_TEXCRDOUT:
m_pDest = m_reg.m_texture[0];
break;
case D3DSPR_ADDR:
m_pDest = m_reg.m_a[0];
break;
default:
PrintInstCount();
D3D_THROW_FAIL("Invalid register for destination");
}
m_pdwCurToken++;
m_pDest += m_dwOffset * VVMVERTEXBATCH;
}
//---------------------------------------------------------------------
void CVertexVM::PrintInstCount()
{
D3D_ERR("Error in instruction number: %d", m_CurInstIndex + 1);
}
//---------------------------------------------------------------------
// Computes m_Source[index] and advances m_pdwCurToken
//
void CVertexVM::EvalSource(DWORD index)
{
const DWORD dwCurToken = *m_pdwCurToken;
const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
VVM_WORD *src;
VVM_WORD *outsrc = m_Source[index];
if (dwRegType == D3DSPR_CONST)
{
D3DVS_ADDRESSMODE_TYPE am;
am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
int offset = (int)dwOffset;
if (am == D3DVS_ADDRMODE_RELATIVE)
{
for (UINT i=0; i < m_count; i++)
{
int relOffset = *(int*)&m_reg.m_a[0][i].x;
offset = (int)dwOffset + relOffset;
#if DBG
if (offset < 0 || offset >= D3DVS_CONSTREG_MAX_V1_1)
{
PrintInstCount();
D3D_THROW_FAIL("Constant register index is out of bounds");
}
if (!m_c_initialized[offset])
{
PrintInstCount();
D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
D3D_THROW_FAIL("");
}
#endif
src = &m_reg.m_c[offset];
if (swizzle == D3DVS_NOSWIZZLE)
*outsrc = *src;
else
{
// Where to take X
const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
// Where to take Y
const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
// Where to take Z
const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
// Where to take W
const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
outsrc->x = ((float*)src)[dwSrcX];
outsrc->y = ((float*)src)[dwSrcY];
outsrc->z = ((float*)src)[dwSrcZ];
outsrc->w = ((float*)src)[dwSrcW];
}
outsrc++;
}
}
else
{
#if DBG
if (!m_c_initialized[offset])
{
PrintInstCount();
D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
D3D_THROW_FAIL("");
}
#endif
src = &m_reg.m_c[offset];
if (swizzle == D3DVS_NOSWIZZLE)
{
for (UINT i=0; i < m_count; i++)
{
outsrc[i] = *src;
}
}
else
{
// Where to take X
const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
// Where to take Y
const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
// Where to take Z
const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
// Where to take W
const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
VVM_WORD v;
v.x = ((float*)src)[dwSrcX];
v.y = ((float*)src)[dwSrcY];
v.z = ((float*)src)[dwSrcZ];
v.w = ((float*)src)[dwSrcW];
for (UINT i=0; i < m_count; i++)
{
outsrc[i] = v;
}
}
}
}
else
{
src = this->GetDataAddr(dwRegType, dwOffset);
if (swizzle == D3DVS_NOSWIZZLE)
memcpy(outsrc, src, m_count * sizeof(VVM_WORD));
else
{
// Where to take X
const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
// Where to take Y
const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
// Where to take Z
const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
// Where to take W
const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
for (UINT i=0; i < m_count; i++)
{
outsrc->x = ((float*)src)[dwSrcX];
outsrc->y = ((float*)src)[dwSrcY];
outsrc->z = ((float*)src)[dwSrcZ];
outsrc->w = ((float*)src)[dwSrcW];
outsrc++;
src++;
}
}
}
if (D3DVS_GETSRCMODIFIER(dwCurToken) == D3DSPSM_NEG)
{
VVM_WORD *outsrc = m_Source[index];
for (UINT i=0; i < m_count; i++)
{
outsrc->x = -outsrc->x;
outsrc->y = -outsrc->y;
outsrc->z = -outsrc->z;
outsrc->w = -outsrc->w;
outsrc++;
}
}
m_pdwCurToken++;
}
//---------------------------------------------------------------------
// Computes source operands and advances m_pdwCurToken
//
// Parameters:
// index - index of the first source operand
// count - number of source operands
//
void CVertexVM::EvalSource(DWORD index, DWORD count)
{
const DWORD dwCurToken = *m_pdwCurToken;
const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
VVM_WORD *src;
VVM_WORD *outsrc = m_Source[index];
if (dwRegType == D3DSPR_CONST)
{
D3DVS_ADDRESSMODE_TYPE am;
am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
int offset = (int)dwOffset;
if (am == D3DVS_ADDRMODE_RELATIVE)
{
for (UINT j=0; j < count; j++)
{
VVM_WORD *outsrc = m_Source[index + j];
for (UINT i=0; i < m_count; i++)
{
int relOffset = *(int*)&m_reg.m_a[0][i].x;
offset = (int)dwOffset + relOffset;
#if DBG
if (offset < 0 || offset >= D3DVS_CONSTREG_MAX_V1_1)
{
PrintInstCount();
D3D_THROW_FAIL("Constant register index is out of bounds");
}
if (!m_c_initialized[offset])
{
PrintInstCount();
D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
D3D_THROW_FAIL("");
}
#endif // DBG
src = &m_reg.m_c[offset] + j;
*outsrc = *src;
outsrc++;
}
}
}
else
{
#if DBG
for (UINT i = 0; i < count; i++)
{
if (!m_c_initialized[offset + i])
{
PrintInstCount();
D3D_ERR("Attempt to read from uninitialized constant register %d", i);
D3D_THROW_FAIL("");
}
}
#endif
src = &m_reg.m_c[offset];
for (UINT j=0; j < count; j++)
{
for (UINT i=0; i < m_count; i++)
{
outsrc[i] = *src;
}
src++;
outsrc += VVMVERTEXBATCH;
}
}
}
else
{
src = this->GetDataAddr(dwRegType, dwOffset);
UINT size = m_count * sizeof(VVM_WORD);
for (UINT i=0; i < count; i++)
{
memcpy(outsrc, src, size);
outsrc += VVMVERTEXBATCH;
src += VVMVERTEXBATCH;
}
}
m_pdwCurToken++;
}
//-----------------------------------------------------------------------------
void CVertexVM::InstMov()
{
EvalDestination();
EvalSource(0);
if (m_pDest == m_reg.m_a[0])
{
for (UINT i=0; i < m_count; i++)
{
float p = (float)floor(m_Source[0][i].x);
*(int*)&m_pDest[i].x = FTOI(p);
}
}
else
{
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
memcpy(m_pDest, m_Source[0], m_BatchSize);
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].w;
}
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstAdd()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x + m_Source[1][i].x;
m_pDest[i].y = m_Source[0][i].y + m_Source[1][i].y;
m_pDest[i].z = m_Source[0][i].z + m_Source[1][i].z;
m_pDest[i].w = m_Source[0][i].w + m_Source[1][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x + m_Source[1][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].y + m_Source[1][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].z + m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].w + m_Source[1][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstMad()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
EvalSource(2);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x + m_Source[2][i].x;
m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y + m_Source[2][i].y;
m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z + m_Source[2][i].z;
m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w + m_Source[2][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x + m_Source[2][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y + m_Source[2][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z + m_Source[2][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w + m_Source[2][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstMul()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x;
m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z;
m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstDP3()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = v;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = v;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = v;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstDP4()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z +
m_Source[0][i].w * m_Source[1][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z +
m_Source[0][i].w * m_Source[1][i].w;
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = v;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = v;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = v;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstRcp()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
float v = m_Source[0][i].w;
if (v == 1.0f)
{
// Must be exactly 1.0
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = 1.0f;
}
else
if (v == 0)
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = PLUS_INFINITY();
}
else
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = 1.0f/v;
}
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = m_Source[0][i].w;
if (FLOAT_EQZ(v))
v = PLUS_INFINITY();
else
if (v != 1.0f)
v = 1.0f/v;
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = v;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = v;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = v;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstRsq()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
float v = ABSF(m_Source[0][i].w);
if (v == 1.0f)
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = 1.0f;
}
else
if (v == 0)
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = PLUS_INFINITY();
}
else
{
v = (float)(1.0f / sqrt(v));
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = v;
}
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = ABSF(m_Source[0][i].w);
if (FLOAT_EQZ(v))
v = PLUS_INFINITY();
else
if (FLOAT_CMP_PONE(v, !=))
v = (float)(1.0f / sqrt(v));
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = v;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = v;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = v;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstSlt()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = (m_Source[0][i].x < m_Source[1][i].x) ? 1.0f : 0.0f;
m_pDest[i].y = (m_Source[0][i].y < m_Source[1][i].y) ? 1.0f : 0.0f;
m_pDest[i].z = (m_Source[0][i].z < m_Source[1][i].z) ? 1.0f : 0.0f;
m_pDest[i].w = (m_Source[0][i].w < m_Source[1][i].w) ? 1.0f : 0.0f;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = (m_Source[0][i].x < m_Source[1][i].x) ? 1.0f : 0.0f;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = (m_Source[0][i].y < m_Source[1][i].y) ? 1.0f : 0.0f;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = (m_Source[0][i].z < m_Source[1][i].z) ? 1.0f : 0.0f;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = (m_Source[0][i].w < m_Source[1][i].w) ? 1.0f : 0.0f;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstSge()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = (m_Source[0][i].x >= m_Source[1][i].x) ? 1.0f : 0.0f;
m_pDest[i].y = (m_Source[0][i].y >= m_Source[1][i].y) ? 1.0f : 0.0f;
m_pDest[i].z = (m_Source[0][i].z >= m_Source[1][i].z) ? 1.0f : 0.0f;
m_pDest[i].w = (m_Source[0][i].w >= m_Source[1][i].w) ? 1.0f : 0.0f;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = (m_Source[0][i].x >= m_Source[1][i].x) ? 1.0f : 0.0f;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = (m_Source[0][i].y >= m_Source[1][i].y) ? 1.0f : 0.0f;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = (m_Source[0][i].z >= m_Source[1][i].z) ? 1.0f : 0.0f;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = (m_Source[0][i].w >= m_Source[1][i].w) ? 1.0f : 0.0f;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstMin()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x=(m_Source[0][i].x < m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
m_pDest[i].y=(m_Source[0][i].y < m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
m_pDest[i].z=(m_Source[0][i].z < m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
m_pDest[i].w=(m_Source[0][i].w < m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x=(m_Source[0][i].x < m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y=(m_Source[0][i].y < m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z=(m_Source[0][i].z < m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w=(m_Source[0][i].w < m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstMax()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x=(m_Source[0][i].x >= m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
m_pDest[i].y=(m_Source[0][i].y >= m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
m_pDest[i].z=(m_Source[0][i].z >= m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
m_pDest[i].w=(m_Source[0][i].w >= m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x=(m_Source[0][i].x >= m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y=(m_Source[0][i].y >= m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z=(m_Source[0][i].z >= m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w=(m_Source[0][i].w >= m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
}
}
}
//-----------------------------------------------------------------------------
// Approximation 2**x
//
float ExpApprox(float x)
{
float tmp = (float)pow(2, x);
// Artificially reduce precision
DWORD tmpd = *(DWORD*)&tmp & 0xFFFFFF00;
return *(float*)&tmpd;
}
//-----------------------------------------------------------------------------
// Approximation Log2(x)
//
const float LOG2 = (float)(1.0f/log(2));
float LogApprox(float x)
{
float tmp = (float)(log(x) * LOG2);
// Artificially reduce precision
DWORD tmpd = *(DWORD*)&tmp & 0xFFFFFF00;
return *(float*)&tmpd;
}
//-----------------------------------------------------------------------------
// Full precision EXP
//
void CVertexVM::InstExp()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
float v = (float)pow(2, m_Source[0][i].w);
m_pDest[i].x = v;
m_pDest[i].y = v;
m_pDest[i].z = v;
m_pDest[i].w = v;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = (float)pow(2, m_Source[0][i].w);
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = v;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = v;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = v;
}
}
}
//-----------------------------------------------------------------------------
// Low precision EXP
//
void CVertexVM::InstExpP()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
float w = m_Source[0][i].w; // Input value
float v = (float)floor(w);
m_pDest[i].x = (float)pow(2, v);
m_pDest[i].y = w - v;
m_pDest[i].z = ExpApprox(w);
m_pDest[i].w = 1;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float w = m_Source[0][i].w; // Input value
float v = (float)floor(w);
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = (float)pow(2, v);
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = w - v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = ExpApprox(w);
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = 1;
}
}
}
//-----------------------------------------------------------------------------
// Full precision LOG
//
void CVertexVM::InstLog()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
float v = ABSF(m_Source[0][i].w);
if (v != 0)
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = (float)(log(v) * LOG2);
}
else
{
m_pDest[i].x =
m_pDest[i].y =
m_pDest[i].z =
m_pDest[i].w = MINUS_INFINITY();
}
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = ABSF(m_Source[0][i].w);
if (v != 0)
{
v = (float)(log(v) * LOG2);
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = v;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = v;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = v;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = v;
}
else
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = MINUS_INFINITY();
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = MINUS_INFINITY();
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = MINUS_INFINITY();
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = MINUS_INFINITY();
}
}
}
}
//-----------------------------------------------------------------------------
// Low precision LOG
//
void CVertexVM::InstLogP()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
float v = ABSF(m_Source[0][i].w);
if (v != 0)
{
// -128.0 <= exponent < 127.0
int p = (int)(*(DWORD*)&v >> 23) - 127;
m_pDest[i].x = (float)p;
// 1.0 <= mantissa < 2.0
p = (*(DWORD*)&v & 0x7FFFFF) | 0x3F800000;
m_pDest[i].y = *(float*)&p;
m_pDest[i].z = LogApprox(v);
m_pDest[i].w = 1.0f;
}
else
{
m_pDest[i].x = MINUS_INFINITY();
m_pDest[i].y = 1.0f;
m_pDest[i].z = MINUS_INFINITY();
m_pDest[i].w = 1.0f;
}
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
float v = ABSF(m_Source[0][i].w);
if (v != 0)
{
// -128.0 <= exponent < 127.0
int p = (int)(*(DWORD*)&v >> 23) - 127;
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = (float)p;
// 1.0 <= mantissa < 2.0
p = (*(DWORD*)&v & 0x7FFFFF) | 0x3F800000;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = *(float*)&p;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = LogApprox(v);
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = 1.0f;
}
else
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = MINUS_INFINITY();
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = 1.0f;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = MINUS_INFINITY();
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = 1.0f;
}
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstFrc()
{
EvalDestination();
EvalSource(0);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x - (float)floor(m_Source[0][i].x);
m_pDest[i].y = m_Source[0][i].y - (float)floor(m_Source[0][i].y);
m_pDest[i].z = m_Source[0][i].z - (float)floor(m_Source[0][i].z);
m_pDest[i].w = m_Source[0][i].w - (float)floor(m_Source[0][i].w);
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x - (float)floor(m_Source[0][i].x);
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].y - (float)floor(m_Source[0][i].y);
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].z - (float)floor(m_Source[0][i].z);
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].w - (float)floor(m_Source[0][i].w);
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstLit()
{
EvalDestination();
EvalSource(0);
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = 1;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = 0;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = 0;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = 1;
float power = m_Source[0][i].w;
const float MAXPOWER = 127.9961f;
if (power < -MAXPOWER)
power = -MAXPOWER; // Fits into 8.8 fixed point format
else
if (power > MAXPOWER)
power = MAXPOWER; // Fits into 8.8 fixed point format
if (m_Source[0][i].x > 0)
{
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].x;
if (m_WriteMask & D3DSP_WRITEMASK_2)
if (m_Source[0][i].y > 0)
{
// Allowed approximation is EXP(power * LOG(m_Source[0].y))
m_pDest[i].z = (float)(pow(m_Source[0][i].y, power));
}
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstDst()
{
EvalDestination();
EvalSource(0);
EvalSource(1);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = 1;
m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
m_pDest[i].z = m_Source[0][i].z;
m_pDest[i].w = m_Source[1][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = 1;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[1][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstM4x4()
{
EvalDestination();
EvalSource(0);
EvalSource(1, 4);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z +
m_Source[0][i].w * m_Source[1][i].w;
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z +
m_Source[0][i].w * m_Source[2][i].w;
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z +
m_Source[0][i].w * m_Source[3][i].w;
m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
m_Source[0][i].y * m_Source[4][i].y +
m_Source[0][i].z * m_Source[4][i].z +
m_Source[0][i].w * m_Source[4][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z +
m_Source[0][i].w * m_Source[1][i].w;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z +
m_Source[0][i].w * m_Source[2][i].w;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z +
m_Source[0][i].w * m_Source[3][i].w;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
m_Source[0][i].y * m_Source[4][i].y +
m_Source[0][i].z * m_Source[4][i].z +
m_Source[0][i].w * m_Source[4][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstM4x3()
{
EvalDestination();
EvalSource(0);
EvalSource(1, 3);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z +
m_Source[0][i].w * m_Source[1][i].w;
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z +
m_Source[0][i].w * m_Source[2][i].w;
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z +
m_Source[0][i].w * m_Source[3][i].w;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z +
m_Source[0][i].w * m_Source[1][i].w;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z +
m_Source[0][i].w * m_Source[2][i].w;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z +
m_Source[0][i].w * m_Source[3][i].w;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstM3x4()
{
EvalDestination();
EvalSource(0);
EvalSource(1, 4);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z;
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z;
m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
m_Source[0][i].y * m_Source[4][i].y +
m_Source[0][i].z * m_Source[4][i].z;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_3)
m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
m_Source[0][i].y * m_Source[4][i].y +
m_Source[0][i].z * m_Source[4][i].z;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstM3x3()
{
EvalDestination();
EvalSource(0);
EvalSource(1, 3);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z;
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_2)
m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
m_Source[0][i].y * m_Source[3][i].y +
m_Source[0][i].z * m_Source[3][i].z;
}
}
}
//-----------------------------------------------------------------------------
void CVertexVM::InstM3x2()
{
EvalDestination();
EvalSource(0);
EvalSource(1, 2);
if (m_WriteMask == D3DSP_WRITEMASK_ALL)
{
for (UINT i=0; i < m_count; i++)
{
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z;
}
}
else
{
for (UINT i=0; i < m_count; i++)
{
if (m_WriteMask & D3DSP_WRITEMASK_0)
m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
m_Source[0][i].y * m_Source[1][i].y +
m_Source[0][i].z * m_Source[1][i].z;
if (m_WriteMask & D3DSP_WRITEMASK_1)
m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
m_Source[0][i].y * m_Source[2][i].y +
m_Source[0][i].z * m_Source[2][i].z;
}
}
}
//-----------------------------------------------------------------------------
HRESULT CVertexVM::ExecuteShader(LPD3DFE_PROCESSVERTICES pv, UINT vertexCount)
{
if (m_pCurrentShader == NULL)
{
D3D_ERR("No current shader set in the Virtual Shader Machine");
return D3DERR_INVALIDCALL;
}
try
{
m_count = vertexCount;
m_BatchSize = vertexCount * sizeof(VVM_WORD);
// Skip version
m_pdwCurToken = m_pCurrentShader->m_pdwCode + 1;
DWORD* pEnd = m_pCurrentShader->m_pdwCode + m_pCurrentShader->m_dwSize;
pEnd -= 1;
m_CurInstIndex = 0;
// Initialize position register
for (UINT i=0; i < m_count; i++)
{
m_reg.m_output[0][i].x = 0;
m_reg.m_output[0][i].y = 0;
m_reg.m_output[0][i].z = 0;
m_reg.m_output[0][i].w = 1;
}
while (m_pdwCurToken < pEnd)
{
DWORD dwInst = *m_pdwCurToken;
DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
m_pdwCurToken++;
switch (dwOpCode)
{
case D3DSIO_COMMENT: m_pdwCurToken += ((GetInstructionLength(dwInst))-1); break;
case D3DSIO_NOP : ; break;
case D3DSIO_MOV : InstMov(); break;
case D3DSIO_ADD : InstAdd(); break;
case D3DSIO_MAD : InstMad(); break;
case D3DSIO_MUL : InstMul(); break;
case D3DSIO_RCP : InstRcp(); break;
case D3DSIO_RSQ : InstRsq(); break;
case D3DSIO_DP3 : InstDP3(); break;
case D3DSIO_DP4 : InstDP4(); break;
case D3DSIO_MIN : InstMin(); break;
case D3DSIO_MAX : InstMax(); break;
case D3DSIO_SLT : InstSlt(); break;
case D3DSIO_SGE : InstSge(); break;
case D3DSIO_EXP : InstExp(); break;
case D3DSIO_LOG : InstLog(); break;
case D3DSIO_EXPP : InstExpP(); break;
case D3DSIO_LOGP : InstLogP(); break;
case D3DSIO_LIT : InstLit(); break;
case D3DSIO_DST : InstDst(); break;
case D3DSIO_FRC : InstFrc(); break;
case D3DSIO_M4x4 : InstM4x4(); break;
case D3DSIO_M4x3 : InstM4x3(); break;
case D3DSIO_M3x4 : InstM3x4(); break;
case D3DSIO_M3x3 : InstM3x3(); break;
case D3DSIO_M3x2 : InstM3x2(); break;
default:
{
PrintInstCount();
D3D_THROW_FAIL("Invalid shader opcode");
}
}
#ifndef PSGPDLL
#if DBG
if (pv->pDbgMon) pv->pDbgMon->NextEvent(D3DDM_EVENT_VERTEXSHADERINST);
#endif
#endif // PSGPDLL
if (dwOpCode != D3DSIO_COMMENT)
m_CurInstIndex++;
}
m_CurInstIndex = 0;
}
D3D_CATCH;
return D3D_OK;
}
//-----------------------------------------------------------------------------
HRESULT CVertexVM::GetDataPointer(DWORD dwMemType, VVM_WORD ** pData)
{
try
{
*pData = this->GetDataAddr(dwMemType, 0);
}
catch (HRESULT e)
{
*pData = NULL;
return D3DERR_INVALIDCALL;
}
return D3D_OK;
}
//---------------------------------------------------------------------
VVM_REGISTERS* CVertexVM::GetRegisters()
{
return &m_reg;
}