1440 lines
44 KiB
C++
1440 lines
44 KiB
C++
|
/*==========================================================================
|
||
|
*
|
||
|
* Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
|
||
|
*
|
||
|
* File: vvm.cpp
|
||
|
* Content: Virtual Vertex Machine implementation
|
||
|
*
|
||
|
*
|
||
|
***************************************************************************/
|
||
|
#include "pch.cpp"
|
||
|
#pragma hdrstop
|
||
|
|
||
|
float MINUS_MAX()
|
||
|
{
|
||
|
DWORD v = 0xFF7FFFFF;
|
||
|
return *(float*)&v;
|
||
|
}
|
||
|
|
||
|
float PLUS_MAX()
|
||
|
{
|
||
|
DWORD v = 0x7F7FFFFF;
|
||
|
return *(float*)&v;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// Returns instruction size, based on the op-code
|
||
|
//
|
||
|
UINT GetInstructionLength(DWORD inst)
|
||
|
{
|
||
|
DWORD opcode = D3DSI_GETOPCODE( inst );
|
||
|
// returns number of source operands + length of opcode and the destination
|
||
|
switch (opcode)
|
||
|
{
|
||
|
case D3DSIO_MOV : return 1 + 2;
|
||
|
case D3DSIO_ADD : return 2 + 2;
|
||
|
case D3DSIO_MAD : return 3 + 2;
|
||
|
case D3DSIO_MUL : return 2 + 2;
|
||
|
case D3DSIO_RCP : return 1 + 2;
|
||
|
case D3DSIO_RSQ : return 1 + 2;
|
||
|
case D3DSIO_DP3 : return 2 + 2;
|
||
|
case D3DSIO_DP4 : return 2 + 2;
|
||
|
case D3DSIO_MIN : return 2 + 2;
|
||
|
case D3DSIO_MAX : return 2 + 2;
|
||
|
case D3DSIO_SLT : return 2 + 2;
|
||
|
case D3DSIO_SGE : return 2 + 2;
|
||
|
case D3DSIO_EXP : return 1 + 2;
|
||
|
case D3DSIO_LOG : return 1 + 2;
|
||
|
case D3DSIO_EXPP: return 1 + 2;
|
||
|
case D3DSIO_LOGP: return 1 + 2;
|
||
|
case D3DSIO_LIT : return 1 + 2;
|
||
|
case D3DSIO_DST : return 2 + 2;
|
||
|
case D3DSIO_FRC : return 1 + 2;
|
||
|
case D3DSIO_M4x4: return 2 + 2;
|
||
|
case D3DSIO_M4x3: return 2 + 2;
|
||
|
case D3DSIO_M3x4: return 2 + 2;
|
||
|
case D3DSIO_M3x3: return 2 + 2;
|
||
|
case D3DSIO_M3x2: return 2 + 2;
|
||
|
case D3DSIO_NOP : return 1;
|
||
|
default: return 1;
|
||
|
case D3DSIO_COMMENT: return 1 + ((inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if 0
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
HRESULT CVertexVM::GetDataPointer(DWORD dwMemType, RDVECTOR4 ** pData)
|
||
|
{
|
||
|
try
|
||
|
{
|
||
|
*pData = this->GetDataAddr(dwMemType, 0);
|
||
|
}
|
||
|
catch (CD3DException e)
|
||
|
{
|
||
|
*pData = NULL;
|
||
|
return DDERR_INVALIDPARAMS;
|
||
|
}
|
||
|
return D3D_OK;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// RefVM implementation.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// Vertex Virtual Machine Opcode implementations
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::WriteResult()
|
||
|
{
|
||
|
if( m_WriteMask == D3DSP_WRITEMASK_ALL)
|
||
|
{
|
||
|
*m_pDest = m_TmpReg;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( m_WriteMask & D3DSP_WRITEMASK_0)
|
||
|
m_pDest->x = m_TmpReg.x;
|
||
|
if( m_WriteMask & D3DSP_WRITEMASK_1)
|
||
|
m_pDest->y = m_TmpReg.y;
|
||
|
if( m_WriteMask & D3DSP_WRITEMASK_2)
|
||
|
m_pDest->z = m_TmpReg.z;
|
||
|
if( m_WriteMask & D3DSP_WRITEMASK_3)
|
||
|
m_pDest->w = m_TmpReg.w;
|
||
|
}
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstMov()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
if( m_pDest == m_reg.m_a )
|
||
|
{
|
||
|
float p = (float)floor(m_Source[0].x);
|
||
|
*(int*)&m_pDest->x = FTOI(p);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
m_TmpReg = m_Source[0];
|
||
|
WriteResult();
|
||
|
}
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstAdd()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x = m_Source[0].x + m_Source[1].x;
|
||
|
m_TmpReg.y = m_Source[0].y + m_Source[1].y;
|
||
|
m_TmpReg.z = m_Source[0].z + m_Source[1].z;
|
||
|
m_TmpReg.w = m_Source[0].w + m_Source[1].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstMad()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
SetSrcReg(2);
|
||
|
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x + m_Source[2].x;
|
||
|
m_TmpReg.y = m_Source[0].y * m_Source[1].y + m_Source[2].y;
|
||
|
m_TmpReg.z = m_Source[0].z * m_Source[1].z + m_Source[2].z;
|
||
|
m_TmpReg.w = m_Source[0].w * m_Source[1].w + m_Source[2].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstMul()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x;
|
||
|
m_TmpReg.y = m_Source[0].y * m_Source[1].y;
|
||
|
m_TmpReg.z = m_Source[0].z * m_Source[1].z;
|
||
|
m_TmpReg.w = m_Source[0].w * m_Source[1].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstRcp()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
if( m_Source[0].w == 1.0f )
|
||
|
{
|
||
|
// Must be exactly 1.0
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = 1.0f;
|
||
|
}
|
||
|
else if( m_Source[0].w == 0 )
|
||
|
{
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = PLUS_MAX();
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = 1.0f/m_Source[0].w;
|
||
|
}
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstRsq()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
float v = ABSF(m_Source[0].w);
|
||
|
if( v == 1.0f )
|
||
|
{
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = 1.0f;
|
||
|
}
|
||
|
else if( v == 0 )
|
||
|
{
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = PLUS_MAX();
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
v = (float)(1.0f / sqrt(v));
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = v;
|
||
|
}
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstDP3()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x =
|
||
|
m_TmpReg.y =
|
||
|
m_TmpReg.z =
|
||
|
m_TmpReg.w = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstDP4()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x =
|
||
|
m_TmpReg.y =
|
||
|
m_TmpReg.z =
|
||
|
m_TmpReg.w = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z +
|
||
|
m_Source[0].w * m_Source[1].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstSlt()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x = (m_Source[0].x < m_Source[1].x) ? 1.0f : 0.0f;
|
||
|
m_TmpReg.y = (m_Source[0].y < m_Source[1].y) ? 1.0f : 0.0f;
|
||
|
m_TmpReg.z = (m_Source[0].z < m_Source[1].z) ? 1.0f : 0.0f;
|
||
|
m_TmpReg.w = (m_Source[0].w < m_Source[1].w) ? 1.0f : 0.0f;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstSge()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x = (m_Source[0].x >= m_Source[1].x) ? 1.0f : 0.0f;
|
||
|
m_TmpReg.y = (m_Source[0].y >= m_Source[1].y) ? 1.0f : 0.0f;
|
||
|
m_TmpReg.z = (m_Source[0].z >= m_Source[1].z) ? 1.0f : 0.0f;
|
||
|
m_TmpReg.w = (m_Source[0].w >= m_Source[1].w) ? 1.0f : 0.0f;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstMin()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x=(m_Source[0].x < m_Source[1].x) ? m_Source[0].x : m_Source[1].x;
|
||
|
m_TmpReg.y=(m_Source[0].y < m_Source[1].y) ? m_Source[0].y : m_Source[1].y;
|
||
|
m_TmpReg.z=(m_Source[0].z < m_Source[1].z) ? m_Source[0].z : m_Source[1].z;
|
||
|
m_TmpReg.w=(m_Source[0].w < m_Source[1].w) ? m_Source[0].w : m_Source[1].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstMax()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x=(m_Source[0].x >= m_Source[1].x) ? m_Source[0].x : m_Source[1].x;
|
||
|
m_TmpReg.y=(m_Source[0].y >= m_Source[1].y) ? m_Source[0].y : m_Source[1].y;
|
||
|
m_TmpReg.z=(m_Source[0].z >= m_Source[1].z) ? m_Source[0].z : m_Source[1].z;
|
||
|
m_TmpReg.w=(m_Source[0].w >= m_Source[1].w) ? m_Source[0].w : m_Source[1].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstExp()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
float v = m_Source[0].w;
|
||
|
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = (float)pow(2, v);
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstExpP()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
float w = m_Source[0].w;
|
||
|
float v = (float)floor(m_Source[0].w);
|
||
|
|
||
|
m_TmpReg.x = (float)pow(2, v);
|
||
|
m_TmpReg.y = w - v;
|
||
|
// Reduced precision exponent
|
||
|
float tmp = (float)pow(2, w);
|
||
|
DWORD tmpd = *(DWORD*)&tmp & 0xffffff00;
|
||
|
m_TmpReg.z = *(float*)&tmpd;
|
||
|
m_TmpReg.w = 1;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstLog()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
float v = ABSF(m_Source[0].w);
|
||
|
if (v != 0)
|
||
|
{
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w =
|
||
|
(float)(log(v)/log(2));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = MINUS_MAX();
|
||
|
}
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstLogP()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
float v = ABSF(m_Source[0].w);
|
||
|
if (v != 0)
|
||
|
{
|
||
|
int p = (int)(*(DWORD*)&v >> 23) - 127;
|
||
|
m_TmpReg.x = (float)p; // exponent
|
||
|
p = (*(DWORD*)&v & 0x7FFFFF) | 0x3f800000;
|
||
|
m_TmpReg.y = *(float*)&p;// mantissa;
|
||
|
float tmp = (float)(log(v)/log(2));
|
||
|
DWORD tmpd = *(DWORD*)&tmp & 0xffffff00;
|
||
|
m_TmpReg.z = *(float*)&tmpd;
|
||
|
m_TmpReg.w = 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
m_TmpReg.x = MINUS_MAX();
|
||
|
m_TmpReg.y = 1.0f;
|
||
|
m_TmpReg.z = MINUS_MAX();
|
||
|
m_TmpReg.w = 1.0f;
|
||
|
}
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstLit()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
m_TmpReg.x = 1;
|
||
|
m_TmpReg.y = 0;
|
||
|
m_TmpReg.z = 0;
|
||
|
m_TmpReg.w = 1;
|
||
|
float power = m_Source[0].w;
|
||
|
const float MAXPOWER = 127.9961f;
|
||
|
if (power < -MAXPOWER)
|
||
|
power = -MAXPOWER; // Fits into 8.8 fixed point format
|
||
|
else
|
||
|
if (power > MAXPOWER)
|
||
|
power = MAXPOWER; // Fits into 8.8 fixed point format
|
||
|
|
||
|
if (m_Source[0].x > 0)
|
||
|
{
|
||
|
m_TmpReg.y = m_Source[0].x;
|
||
|
if (m_Source[0].y > 0)
|
||
|
{
|
||
|
// Allowed approximation is EXP(power * LOG(m_Source[0].y))
|
||
|
m_TmpReg.z = (float)(pow(m_Source[0].y, power));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstFrc()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
|
||
|
m_TmpReg.x = m_Source[0].x - (float)floor(m_Source[0].x);
|
||
|
m_TmpReg.y = m_Source[0].y - (float)floor(m_Source[0].y);
|
||
|
m_TmpReg.z = m_Source[0].z - (float)floor(m_Source[0].z);
|
||
|
m_TmpReg.w = m_Source[0].w - (float)floor(m_Source[0].w);
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstDst()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1);
|
||
|
|
||
|
m_TmpReg.x = 1;
|
||
|
m_TmpReg.y = m_Source[0].y * m_Source[1].y;
|
||
|
m_TmpReg.z = m_Source[0].z;
|
||
|
m_TmpReg.w = m_Source[1].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstM4x4()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1, 4);
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z +
|
||
|
m_Source[0].w * m_Source[1].w;
|
||
|
m_TmpReg.y = m_Source[0].x * m_Source[2].x +
|
||
|
m_Source[0].y * m_Source[2].y +
|
||
|
m_Source[0].z * m_Source[2].z +
|
||
|
m_Source[0].w * m_Source[2].w;
|
||
|
m_TmpReg.z = m_Source[0].x * m_Source[3].x +
|
||
|
m_Source[0].y * m_Source[3].y +
|
||
|
m_Source[0].z * m_Source[3].z +
|
||
|
m_Source[0].w * m_Source[3].w;
|
||
|
m_TmpReg.w = m_Source[0].x * m_Source[4].x +
|
||
|
m_Source[0].y * m_Source[4].y +
|
||
|
m_Source[0].z * m_Source[4].z +
|
||
|
m_Source[0].w * m_Source[4].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstM4x3()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1, 3);
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z +
|
||
|
m_Source[0].w * m_Source[1].w;
|
||
|
m_TmpReg.y = m_Source[0].x * m_Source[2].x +
|
||
|
m_Source[0].y * m_Source[2].y +
|
||
|
m_Source[0].z * m_Source[2].z +
|
||
|
m_Source[0].w * m_Source[2].w;
|
||
|
m_TmpReg.z = m_Source[0].x * m_Source[3].x +
|
||
|
m_Source[0].y * m_Source[3].y +
|
||
|
m_Source[0].z * m_Source[3].z +
|
||
|
m_Source[0].w * m_Source[3].w;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void
|
||
|
RefVM::InstM3x4()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1, 4);
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z;
|
||
|
m_TmpReg.y = m_Source[0].x * m_Source[2].x +
|
||
|
m_Source[0].y * m_Source[2].y +
|
||
|
m_Source[0].z * m_Source[2].z;
|
||
|
m_TmpReg.z = m_Source[0].x * m_Source[3].x +
|
||
|
m_Source[0].y * m_Source[3].y +
|
||
|
m_Source[0].z * m_Source[3].z;
|
||
|
m_TmpReg.w = m_Source[0].x * m_Source[4].x +
|
||
|
m_Source[0].y * m_Source[4].y +
|
||
|
m_Source[0].z * m_Source[4].z;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void RefVM::InstM3x3()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1, 3);
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z;
|
||
|
m_TmpReg.y = m_Source[0].x * m_Source[2].x +
|
||
|
m_Source[0].y * m_Source[2].y +
|
||
|
m_Source[0].z * m_Source[2].z;
|
||
|
m_TmpReg.z = m_Source[0].x * m_Source[3].x +
|
||
|
m_Source[0].y * m_Source[3].y +
|
||
|
m_Source[0].z * m_Source[3].z;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
void RefVM::InstM3x2()
|
||
|
{
|
||
|
SetDestReg();
|
||
|
SetSrcReg(0);
|
||
|
SetSrcReg(1, 2);
|
||
|
m_TmpReg.x = m_Source[0].x * m_Source[1].x +
|
||
|
m_Source[0].y * m_Source[1].y +
|
||
|
m_Source[0].z * m_Source[1].z;
|
||
|
m_TmpReg.y = m_Source[0].x * m_Source[2].x +
|
||
|
m_Source[0].y * m_Source[2].y +
|
||
|
m_Source[0].z * m_Source[2].z;
|
||
|
|
||
|
WriteResult();
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// RefVM::SetData
|
||
|
// Save data into the specified registers.
|
||
|
//-----------------------------------------------------------------------------
|
||
|
HRESULT
|
||
|
RefVM::SetData( DWORD dwMemType, DWORD dwStart, DWORD dwCount,
|
||
|
LPVOID pBuffer )
|
||
|
{
|
||
|
memcpy( GetDataAddr( dwMemType, dwStart ), pBuffer,
|
||
|
dwCount * sizeof(RDVECTOR4) );
|
||
|
return D3D_OK;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// RefVM::GetData
|
||
|
// Fetch data from the specified registers.
|
||
|
//-----------------------------------------------------------------------------
|
||
|
HRESULT
|
||
|
RefVM::GetData( DWORD dwMemType, DWORD dwStart, DWORD dwCount,
|
||
|
LPVOID pBuffer )
|
||
|
{
|
||
|
memcpy( pBuffer, GetDataAddr( dwMemType, dwStart ),
|
||
|
dwCount * sizeof(RDVECTOR4) );
|
||
|
return D3D_OK;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// RefVM::SetDestReg
|
||
|
// - parses destination token
|
||
|
// - computes m_pDest, m_WrideMask, m_dwOffset for the destination
|
||
|
// - current token pointer is andvanced to the next token
|
||
|
//-----------------------------------------------------------------------------
|
||
|
#undef RET_ERR
|
||
|
#define RET_ERR( a ) \
|
||
|
{ \
|
||
|
DPFERR( a ); \
|
||
|
return E_FAIL; \
|
||
|
}
|
||
|
HRESULT
|
||
|
RefVM::SetDestReg()
|
||
|
{
|
||
|
DWORD dwCurToken = *m_pCurToken;
|
||
|
DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
|
||
|
m_dwRegOffset = D3DSI_GETREGNUM(dwCurToken);
|
||
|
m_WriteMask = D3DSI_GETWRITEMASK(dwCurToken);
|
||
|
switch( dwRegType )
|
||
|
{
|
||
|
case D3DSPR_TEMP:
|
||
|
m_pDest = m_reg.m_t;
|
||
|
break;
|
||
|
case D3DSPR_RASTOUT:
|
||
|
m_pDest = m_reg.m_out;
|
||
|
break;
|
||
|
case D3DSPR_ATTROUT:
|
||
|
m_pDest = m_reg.m_col;
|
||
|
break;
|
||
|
case D3DSPR_TEXCRDOUT:
|
||
|
m_pDest = m_reg.m_tex;
|
||
|
break;
|
||
|
case D3DSPR_ADDR:
|
||
|
m_pDest = m_reg.m_a;
|
||
|
break;
|
||
|
default:
|
||
|
RET_ERR( "Invalid register for destination" );
|
||
|
}
|
||
|
m_pCurToken++;
|
||
|
m_pDest += m_dwRegOffset;
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// RefVM::SetSrcReg
|
||
|
// Computes m_Source[index] and advances m_pCurToken
|
||
|
//-----------------------------------------------------------------------------
|
||
|
HRESULT
|
||
|
RefVM::SetSrcReg( DWORD index )
|
||
|
{
|
||
|
const DWORD dwCurToken = *m_pCurToken;
|
||
|
const DWORD dwRegType = D3DSI_GETREGTYPE( dwCurToken );
|
||
|
const DWORD dwOffset = D3DSI_GETREGNUM( dwCurToken );
|
||
|
RDVECTOR4 *src = NULL;
|
||
|
if( dwRegType == D3DSPR_CONST )
|
||
|
{
|
||
|
D3DVS_ADDRESSMODE_TYPE am;
|
||
|
am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE( dwCurToken );
|
||
|
int offset = (int)dwOffset;
|
||
|
if( am == D3DVS_ADDRMODE_RELATIVE )
|
||
|
{
|
||
|
int relOffset = *(int*)&m_reg.m_a[0].x;
|
||
|
offset += relOffset;
|
||
|
if( offset < 0 || offset >= RD_MAX_NUMCONSTREG )
|
||
|
RET_ERR( "Constant register index is out of bounds" );
|
||
|
}
|
||
|
src = &m_reg.m_c[offset];
|
||
|
}
|
||
|
else
|
||
|
src = this->GetDataAddr(dwRegType, dwOffset);
|
||
|
|
||
|
_ASSERT( src != NULL, "src is NULL" );
|
||
|
RDVECTOR4 *outsrc = &m_Source[index];
|
||
|
DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
|
||
|
if( swizzle == D3DVS_NOSWIZZLE )
|
||
|
*outsrc = *src;
|
||
|
else
|
||
|
{
|
||
|
// Where to take X
|
||
|
const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
|
||
|
// Where to take Y
|
||
|
const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
|
||
|
// Where to take Z
|
||
|
const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
|
||
|
// Where to take W
|
||
|
const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
|
||
|
outsrc->x = ((float*)src)[dwSrcX];
|
||
|
outsrc->y = ((float*)src)[dwSrcY];
|
||
|
outsrc->z = ((float*)src)[dwSrcZ];
|
||
|
outsrc->w = ((float*)src)[dwSrcW];
|
||
|
}
|
||
|
if( D3DVS_GETSRCMODIFIER( dwCurToken ) == D3DSPSM_NEG)
|
||
|
{
|
||
|
outsrc->x = -outsrc->x;
|
||
|
outsrc->y = -outsrc->y;
|
||
|
outsrc->z = -outsrc->z;
|
||
|
outsrc->w = -outsrc->w;
|
||
|
}
|
||
|
m_pCurToken++;
|
||
|
return S_OK;
|
||
|
}
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// RefVM::SetSrcReg
|
||
|
// Computes m_Source[index] and advances m_pCurToken
|
||
|
//-----------------------------------------------------------------------------
|
||
|
HRESULT
|
||
|
RefVM::SetSrcReg( DWORD index, DWORD count )
|
||
|
{
|
||
|
const DWORD dwCurToken = *m_pCurToken;
|
||
|
const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
|
||
|
const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
|
||
|
RDVECTOR4 *src;
|
||
|
if (dwRegType == D3DSPR_CONST)
|
||
|
{
|
||
|
D3DVS_ADDRESSMODE_TYPE am;
|
||
|
am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
|
||
|
int offset = (int)dwOffset;
|
||
|
if (am == D3DVS_ADDRMODE_RELATIVE)
|
||
|
{
|
||
|
int relOffset = *(int*)&m_reg.m_a[0].x;
|
||
|
offset += relOffset;
|
||
|
if (offset < 0 || offset >= RD_MAX_NUMCONSTREG)
|
||
|
RET_ERR( "Constant register index is out of bounds" );
|
||
|
}
|
||
|
src = &m_reg.m_c[offset];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (dwOffset >= RD_MAX_NUMCONSTREG)
|
||
|
RET_ERR( "Constant register index is out of bounds" );
|
||
|
src = this->GetDataAddr(dwRegType, dwOffset);
|
||
|
}
|
||
|
RDVECTOR4 *outsrc = &m_Source[index];
|
||
|
DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
|
||
|
// Where to take X
|
||
|
const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
|
||
|
// Where to take Y
|
||
|
const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
|
||
|
// Where to take Z
|
||
|
const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
|
||
|
// Where to take W
|
||
|
const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
|
||
|
for (UINT i=0; i < count; i++)
|
||
|
{
|
||
|
if (swizzle == D3DVS_NOSWIZZLE)
|
||
|
*outsrc = *src;
|
||
|
else
|
||
|
{
|
||
|
outsrc->x = ((float*)src)[dwSrcX];
|
||
|
outsrc->y = ((float*)src)[dwSrcY];
|
||
|
outsrc->z = ((float*)src)[dwSrcZ];
|
||
|
outsrc->w = ((float*)src)[dwSrcW];
|
||
|
}
|
||
|
if (D3DVS_GETSRCMODIFIER(dwCurToken) == D3DSPSM_NEG)
|
||
|
{
|
||
|
outsrc->x = -outsrc->x;
|
||
|
outsrc->y = -outsrc->y;
|
||
|
outsrc->z = -outsrc->z;
|
||
|
outsrc->w = -outsrc->w;
|
||
|
}
|
||
|
outsrc++;
|
||
|
src++;
|
||
|
}
|
||
|
m_pCurToken++;
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
//---------------------------------------------------------------------
|
||
|
// RefVM::GetDataAddr
|
||
|
// Parses binary shader representation, compiles is and returns
|
||
|
// compiled object
|
||
|
//---------------------------------------------------------------------
|
||
|
RDVECTOR4*
|
||
|
RefVM::GetDataAddr(DWORD dwRegType, DWORD dwElementIndex)
|
||
|
{
|
||
|
RDVECTOR4* src;
|
||
|
switch( dwRegType )
|
||
|
{
|
||
|
case D3DSPR_TEMP : src = m_reg.m_t; break;
|
||
|
case D3DSPR_INPUT : src = m_reg.m_i; break;
|
||
|
case D3DSPR_CONST : src = m_reg.m_c; break;
|
||
|
case D3DSPR_ADDR : src = m_reg.m_a; break;
|
||
|
case D3DSPR_RASTOUT : src = m_reg.m_out; break;
|
||
|
case D3DSPR_ATTROUT : src = m_reg.m_col; break;
|
||
|
case D3DSPR_TEXCRDOUT : src = m_reg.m_tex; break;
|
||
|
default:
|
||
|
return NULL;
|
||
|
}
|
||
|
return &src[dwElementIndex];
|
||
|
}
|
||
|
|
||
|
//---------------------------------------------------------------------
|
||
|
// RefVM::ExecuteShader()
|
||
|
// Executes the shader once per vertex.
|
||
|
//---------------------------------------------------------------------
|
||
|
HRESULT
|
||
|
RefVM::ExecuteShader(RefDev *pRD)
|
||
|
{
|
||
|
if( m_pCurrentShaderCode == NULL )
|
||
|
{
|
||
|
RET_ERR( "No current shader set in the Virtual Shader Machine" );
|
||
|
}
|
||
|
|
||
|
m_pCurToken = m_pCurrentShaderCode->m_pRawBits;
|
||
|
DWORD* pEnd = m_pCurToken + m_pCurrentShaderCode->m_dwSize;
|
||
|
m_pCurToken++;
|
||
|
m_CurInstIndex = 0;
|
||
|
while( m_pCurToken < pEnd )
|
||
|
{
|
||
|
if( *m_pCurToken == D3DVS_END() ) break;
|
||
|
DWORD dwInst = *m_pCurToken;
|
||
|
DWORD dwOpCode = D3DSI_GETOPCODE( dwInst );
|
||
|
m_pCurToken++;
|
||
|
switch( dwOpCode )
|
||
|
{
|
||
|
case D3DSIO_COMMENT: m_pCurToken += (GetInstructionLength( dwInst ) - 1);
|
||
|
case D3DSIO_NOP : ; break;
|
||
|
case D3DSIO_MOV : InstMov(); break;
|
||
|
case D3DSIO_ADD : InstAdd(); break;
|
||
|
case D3DSIO_MAD : InstMad(); break;
|
||
|
case D3DSIO_MUL : InstMul(); break;
|
||
|
case D3DSIO_RCP : InstRcp(); break;
|
||
|
case D3DSIO_RSQ : InstRsq(); break;
|
||
|
case D3DSIO_DP3 : InstDP3(); break;
|
||
|
case D3DSIO_DP4 : InstDP4(); break;
|
||
|
case D3DSIO_MIN : InstMin(); break;
|
||
|
case D3DSIO_MAX : InstMax(); break;
|
||
|
case D3DSIO_SLT : InstSlt(); break;
|
||
|
case D3DSIO_SGE : InstSge(); break;
|
||
|
case D3DSIO_EXPP : InstExpP(); break;
|
||
|
case D3DSIO_LOGP : InstLogP(); break;
|
||
|
case D3DSIO_EXP : InstExp(); break;
|
||
|
case D3DSIO_LOG : InstLog(); break;
|
||
|
case D3DSIO_LIT : InstLit(); break;
|
||
|
case D3DSIO_DST : InstDst(); break;
|
||
|
case D3DSIO_FRC : InstFrc(); break;
|
||
|
case D3DSIO_M4x4 : InstM4x4(); break;
|
||
|
case D3DSIO_M4x3 : InstM4x3(); break;
|
||
|
case D3DSIO_M3x4 : InstM3x4(); break;
|
||
|
case D3DSIO_M3x3 : InstM3x3(); break;
|
||
|
case D3DSIO_M3x2 : InstM3x2(); break;
|
||
|
default:
|
||
|
RET_ERR( "Invalid shader opcode" );
|
||
|
}
|
||
|
if (pRD->m_pDbgMon) pRD->m_pDbgMon->NextEvent( D3DDM_EVENT_VERTEXSHADERINST );
|
||
|
if( dwOpCode != D3DSIO_COMMENT ) m_CurInstIndex++;
|
||
|
}
|
||
|
m_CurInstIndex = 0;
|
||
|
|
||
|
return D3D_OK;
|
||
|
}
|
||
|
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// VertexShaderInstDisAsm - Generates human-readable character string for a
|
||
|
// single vertex shader instruction. String interface is similar to _snprintf.
|
||
|
//-----------------------------------------------------------------------------
|
||
|
static int VertexShaderInstDisAsm(
|
||
|
char* pStrRet, int StrSizeRet, DWORD* pShader, DWORD Flags )
|
||
|
{
|
||
|
DWORD* pToken = pShader;
|
||
|
|
||
|
// stage in local string, then copy
|
||
|
char pStr[256] = "";
|
||
|
#define _ADDSTR( _Str ) { _snprintf( pStr, 256, "%s" _Str , pStr ); }
|
||
|
#define _ADDSTRP( _Str, _Param ) { _snprintf( pStr, 256, "%s" _Str , pStr, _Param ); }
|
||
|
|
||
|
DWORD Inst = *pToken++;
|
||
|
DWORD Opcode = (Inst & D3DSI_OPCODE_MASK);
|
||
|
switch (Opcode)
|
||
|
{
|
||
|
case D3DSIO_NOP: _ADDSTR("NOP"); break;
|
||
|
case D3DSIO_MOV: _ADDSTR("MOV"); break;
|
||
|
case D3DSIO_ADD: _ADDSTR("ADD"); break;
|
||
|
case D3DSIO_MAD: _ADDSTR("MAD"); break;
|
||
|
case D3DSIO_MUL: _ADDSTR("MUL"); break;
|
||
|
case D3DSIO_RCP: _ADDSTR("RCP"); break;
|
||
|
case D3DSIO_RSQ: _ADDSTR("RSQ"); break;
|
||
|
case D3DSIO_DP3: _ADDSTR("DP3"); break;
|
||
|
case D3DSIO_DP4: _ADDSTR("DP4"); break;
|
||
|
case D3DSIO_MIN: _ADDSTR("MIN"); break;
|
||
|
case D3DSIO_MAX: _ADDSTR("MAX"); break;
|
||
|
case D3DSIO_SLT: _ADDSTR("SLT"); break;
|
||
|
case D3DSIO_SGE: _ADDSTR("SGE"); break;
|
||
|
case D3DSIO_EXP: _ADDSTR("EXP"); break;
|
||
|
case D3DSIO_LOG: _ADDSTR("LOG"); break;
|
||
|
case D3DSIO_EXPP: _ADDSTR("EXPP"); break;
|
||
|
case D3DSIO_LOGP: _ADDSTR("LOGP"); break;
|
||
|
case D3DSIO_LIT: _ADDSTR("LIT"); break;
|
||
|
case D3DSIO_DST: _ADDSTR("DST"); break;
|
||
|
default : _ADDSTR("???"); break;
|
||
|
}
|
||
|
if (*pToken & (1L<<31))
|
||
|
{
|
||
|
DWORD DstParam = *pToken++;
|
||
|
switch (DstParam & D3DSP_REGTYPE_MASK)
|
||
|
{
|
||
|
case D3DSPR_TEMP : _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
case D3DSPR_ADDR : _ADDSTR(" Addr"); break;
|
||
|
case D3DSPR_RASTOUT : _ADDSTRP(" R%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
case D3DSPR_ATTROUT : _ADDSTRP(" A%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
case D3DSPR_TEXCRDOUT: _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
}
|
||
|
if (*pToken & (1L<<31)) _ADDSTR(" ");
|
||
|
while (*pToken & (1L<<31))
|
||
|
{
|
||
|
DWORD SrcParam = *pToken++;
|
||
|
switch (SrcParam & D3DSP_REGTYPE_MASK)
|
||
|
{
|
||
|
case D3DSPR_TEMP : _ADDSTRP(" T%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
case D3DSPR_INPUT : _ADDSTRP(" I%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
case D3DSPR_CONST : _ADDSTRP(" C%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
|
||
|
}
|
||
|
if (*pToken & (1L<<31)) _ADDSTR(",");
|
||
|
}
|
||
|
}
|
||
|
return _snprintf( pStrRet, StrSizeRet, "%s", pStr );
|
||
|
}
|
||
|
|
||
|
//---------------------------------------------------------------------
|
||
|
// RefVM::CompileCode
|
||
|
// Parses binary shader representation, compiles is and returns
|
||
|
// compiled object
|
||
|
//---------------------------------------------------------------------
|
||
|
#undef RET_ERR
|
||
|
#define RET_ERR( a ) \
|
||
|
{ \
|
||
|
DPFERR( a ); \
|
||
|
delete pShaderCode; \
|
||
|
return NULL; \
|
||
|
}
|
||
|
|
||
|
|
||
|
RDVShaderCode*
|
||
|
RefVM::CompileCode( DWORD dwSize, LPDWORD pBits )
|
||
|
{
|
||
|
RDVShaderCode* pShaderCode = new RDVShaderCode();
|
||
|
if( pShaderCode == NULL )
|
||
|
RET_ERR( "Out of memory allocating ShaderCode" );
|
||
|
|
||
|
pShaderCode->m_dwSize = dwSize >> 2; // #DWORDs
|
||
|
pShaderCode->m_pRawBits = new DWORD[pShaderCode->m_dwSize];
|
||
|
if( pShaderCode->m_pRawBits == NULL )
|
||
|
RET_ERR( "Out of memory allocating RawBits" );
|
||
|
|
||
|
memcpy( pShaderCode->m_pRawBits, (LPBYTE)pBits, dwSize );
|
||
|
|
||
|
|
||
|
// Based on the what output registers are modified, we compute the
|
||
|
// corresponding FVF id. The id will be used for memory allocation
|
||
|
// of the output buffer and will be passed to the rasterizer
|
||
|
UINT64 qwOutFVF = 0;
|
||
|
DWORD nTexCoord = 0; // Number of output texture coordinates
|
||
|
LPDWORD pEnd = NULL;
|
||
|
|
||
|
// For each texture register stores the combined write mask.
|
||
|
// Used to find how many floats are written to each texture coordinates
|
||
|
DWORD TextureWritten[8];
|
||
|
memset( TextureWritten, 0, sizeof(TextureWritten) );
|
||
|
|
||
|
m_pCurToken = pShaderCode->m_pRawBits;
|
||
|
pEnd = m_pCurToken + pShaderCode->m_dwSize;
|
||
|
m_pCurToken++; // Skip the version number
|
||
|
pShaderCode->m_InstCount = 0;
|
||
|
while( m_pCurToken < pEnd )
|
||
|
{
|
||
|
if( *m_pCurToken == D3DVS_END() ) break;
|
||
|
DWORD* pNextToken = m_pCurToken;
|
||
|
DWORD dwInst = *m_pCurToken;
|
||
|
DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
|
||
|
if( *m_pCurToken == D3DVS_END() ) break;
|
||
|
m_pCurToken++;
|
||
|
switch( dwOpCode )
|
||
|
{
|
||
|
case D3DSIO_COMMENT:
|
||
|
case D3DSIO_NOP : ; break;
|
||
|
case D3DSIO_MOV :
|
||
|
case D3DSIO_ADD :
|
||
|
case D3DSIO_MAD :
|
||
|
case D3DSIO_MUL :
|
||
|
case D3DSIO_RCP :
|
||
|
case D3DSIO_RSQ :
|
||
|
case D3DSIO_DP3 :
|
||
|
case D3DSIO_DP4 :
|
||
|
case D3DSIO_MIN :
|
||
|
case D3DSIO_MAX :
|
||
|
case D3DSIO_SLT :
|
||
|
case D3DSIO_SGE :
|
||
|
case D3DSIO_EXP :
|
||
|
case D3DSIO_LOG :
|
||
|
case D3DSIO_EXPP :
|
||
|
case D3DSIO_LOGP :
|
||
|
case D3DSIO_LIT :
|
||
|
case D3DSIO_DST :
|
||
|
case D3DSIO_FRC :
|
||
|
case D3DSIO_M4x4 :
|
||
|
case D3DSIO_M4x3 :
|
||
|
case D3DSIO_M3x4 :
|
||
|
case D3DSIO_M3x3 :
|
||
|
case D3DSIO_M3x2 :
|
||
|
{
|
||
|
// Find out if output register are modified by the command and
|
||
|
// update the output FVF
|
||
|
DWORD dwOffset;
|
||
|
if( FAILED( SetDestReg() ) )
|
||
|
RET_ERR( "Invalid shader opcode" );
|
||
|
|
||
|
RDVECTOR4* m_pOutRegister = NULL;
|
||
|
if( m_pDest - m_dwRegOffset != m_reg.m_t )
|
||
|
{
|
||
|
dwOffset = m_dwRegOffset;
|
||
|
m_pOutRegister = m_pDest - m_dwRegOffset;
|
||
|
}
|
||
|
else
|
||
|
break; // Output register is not modified
|
||
|
|
||
|
if( m_pOutRegister == m_reg.m_out )
|
||
|
{
|
||
|
if (dwOffset == D3DSRO_POSITION)
|
||
|
{
|
||
|
qwOutFVF |= D3DFVF_XYZRHW;
|
||
|
}
|
||
|
else if (dwOffset == D3DSRO_FOG)
|
||
|
{
|
||
|
qwOutFVF |= D3DFVFP_FOG;
|
||
|
}
|
||
|
else if (dwOffset == D3DSRO_POINT_SIZE)
|
||
|
{
|
||
|
qwOutFVF |= D3DFVF_PSIZE;
|
||
|
}
|
||
|
}
|
||
|
else if( m_pOutRegister == m_reg.m_col )
|
||
|
{
|
||
|
if( dwOffset == 0 )
|
||
|
{
|
||
|
qwOutFVF |= D3DFVF_DIFFUSE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
qwOutFVF |= D3DFVF_SPECULAR;
|
||
|
}
|
||
|
}
|
||
|
else if( m_pOutRegister == m_reg.m_tex )
|
||
|
{
|
||
|
if( TextureWritten[dwOffset] == 0 )
|
||
|
{
|
||
|
nTexCoord++;
|
||
|
}
|
||
|
TextureWritten[dwOffset] |= m_WriteMask;
|
||
|
}
|
||
|
else if( m_pOutRegister != m_reg.m_a )
|
||
|
RET_ERR( "Invalid output register offset" );
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
RET_ERR( "Invalid shader opcode" );
|
||
|
}
|
||
|
pShaderCode->m_InstCount++;
|
||
|
m_pCurToken = pNextToken + GetInstructionLength(dwInst);
|
||
|
}
|
||
|
|
||
|
// allocate and set instruction array
|
||
|
if (pShaderCode->m_InstCount)
|
||
|
{
|
||
|
pShaderCode->m_pInst = new RDVShaderInst[pShaderCode->m_InstCount];
|
||
|
if( pShaderCode->m_pInst == NULL )
|
||
|
RET_ERR( "Out of memory allocating Instructions" );
|
||
|
memset( pShaderCode->m_pInst, 0,
|
||
|
sizeof(RDVShaderInst)*pShaderCode->m_InstCount );
|
||
|
|
||
|
DWORD dwCurInst = 0;
|
||
|
m_pCurToken = pShaderCode->m_pRawBits;
|
||
|
pEnd = m_pCurToken + pShaderCode->m_dwSize;
|
||
|
m_pCurToken++;
|
||
|
while( m_pCurToken < pEnd )
|
||
|
{
|
||
|
DWORD dwInst = *m_pCurToken;
|
||
|
DWORD dwOpCode = D3DSI_GETOPCODE( dwInst );
|
||
|
if( *m_pCurToken == D3DVS_END() ) break;
|
||
|
UINT ilength = GetInstructionLength( dwInst );
|
||
|
if (dwOpCode == D3DSIO_COMMENT)
|
||
|
{
|
||
|
pShaderCode->m_pInst[dwCurInst].m_Tokens[0] = dwInst;
|
||
|
pShaderCode->m_pInst[dwCurInst].m_pComment = (m_pCurToken+1);
|
||
|
pShaderCode->m_pInst[dwCurInst].m_CommentSize = ilength - 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
memcpy( pShaderCode->m_pInst[dwCurInst].m_Tokens, m_pCurToken,
|
||
|
4*ilength );
|
||
|
VertexShaderInstDisAsm( pShaderCode->m_pInst[dwCurInst].m_String,
|
||
|
RD_MAX_SHADERINSTSTRING, pShaderCode->m_pInst[dwCurInst].m_Tokens, 0x0 );
|
||
|
}
|
||
|
m_pCurToken += ilength;
|
||
|
dwCurInst++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
qwOutFVF |= nTexCoord << D3DFVF_TEXCOUNT_SHIFT;
|
||
|
if( nTexCoord )
|
||
|
{
|
||
|
for( DWORD i = 0; i < nTexCoord; i++ )
|
||
|
{
|
||
|
if( TextureWritten[i] == 0 )
|
||
|
RET_ERR( "Texture coordinates are not continuous" );
|
||
|
switch( TextureWritten[i] )
|
||
|
{
|
||
|
case D3DSP_WRITEMASK_ALL:
|
||
|
qwOutFVF |= D3DFVF_TEXCOORDSIZE4(i);
|
||
|
break;
|
||
|
case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1 | D3DSP_WRITEMASK_2:
|
||
|
qwOutFVF |= D3DFVF_TEXCOORDSIZE3(i);
|
||
|
break;
|
||
|
case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1:
|
||
|
qwOutFVF |= D3DFVF_TEXCOORDSIZE2(i);
|
||
|
break;
|
||
|
case D3DSP_WRITEMASK_0:
|
||
|
qwOutFVF |= D3DFVF_TEXCOORDSIZE1(i);
|
||
|
break;
|
||
|
default:
|
||
|
RET_ERR( "Invalid write mask for texture register" );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
pShaderCode->m_qwFVFOut = qwOutFVF;
|
||
|
return pShaderCode;
|
||
|
}
|
||
|
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// RefDev implementation.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
//---------------------------------------------------------------------
|
||
|
// RefDev::ProcessPrimitiveVVM()
|
||
|
// Processess and draw the current primitive using the VVM
|
||
|
//---------------------------------------------------------------------
|
||
|
HRESULT
|
||
|
RefDev::ProcessPrimitiveVVM( D3DPRIMITIVETYPE primType,
|
||
|
DWORD dwStartVertex,
|
||
|
DWORD cVertices,
|
||
|
DWORD dwStartIndex,
|
||
|
DWORD cIndices )
|
||
|
{
|
||
|
HRESULT hr = S_OK;
|
||
|
RDCLIPCODE clipIntersection = ~0;
|
||
|
RDCLIPCODE clipUnion = 0;
|
||
|
|
||
|
// Save Prim Type for later use
|
||
|
m_primType = primType;
|
||
|
m_dwNumVertices = cVertices;
|
||
|
m_dwStartVertex = dwStartVertex;
|
||
|
m_dwNumIndices = cIndices;
|
||
|
m_dwStartIndex = dwStartIndex;
|
||
|
|
||
|
RDVDeclaration* pDecl = &(m_pCurrentVShader->m_Declaration);
|
||
|
RDVShaderCode* pCode = m_pCurrentVShader->m_pCode;
|
||
|
RDVVMREG* pRegisters = m_RefVM.GetRegisters();
|
||
|
|
||
|
// Output FVF that was computed at the compile time
|
||
|
m_qwFVFOut = pCode->m_qwFVFOut;
|
||
|
|
||
|
//
|
||
|
// Clipping information depends both on the output FVF computation
|
||
|
// and the other State, so do it here after both have been computed
|
||
|
//
|
||
|
HR_RET( UpdateClipper());
|
||
|
|
||
|
D3DVALUE scaleX = m_Clipper.scaleX;
|
||
|
D3DVALUE scaleY = m_Clipper.scaleY;
|
||
|
D3DVALUE scaleZ = m_Clipper.scaleZ;
|
||
|
|
||
|
D3DVALUE offsetX = m_Clipper.offsetX;
|
||
|
D3DVALUE offsetY = m_Clipper.offsetY;
|
||
|
D3DVALUE offsetZ = m_Clipper.offsetZ;
|
||
|
|
||
|
//
|
||
|
// Grow buffers to the requisite size
|
||
|
//
|
||
|
|
||
|
// Grow TLVArray if required
|
||
|
if( FAILED( this->m_TLVArray.Grow( m_dwNumVertices ) ) )
|
||
|
{
|
||
|
DPFERR( "Could not grow TL vertex buffer" );
|
||
|
hr = DDERR_OUTOFMEMORY;
|
||
|
return hr;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Process Vertices
|
||
|
//
|
||
|
for( DWORD i = 0; i < m_dwNumVertices; i++ )
|
||
|
{
|
||
|
RDVertex& Vout = m_TLVArray[i];
|
||
|
Vout.SetFVF( pCode->m_qwFVFOut | D3DFVFP_CLIP );
|
||
|
|
||
|
// Copy vertex elements to the input vertex registers
|
||
|
for( DWORD j = 0; j < pDecl->m_dwNumElements; j++ )
|
||
|
{
|
||
|
RDVElement& ve = pDecl->m_VertexElements[j];
|
||
|
RDVStream* pStream = &m_VStream[ve.m_dwStreamIndex];
|
||
|
LPBYTE pData = (LPBYTE)pStream->m_pData + ve.m_dwOffset +
|
||
|
pStream->m_dwStride * (m_dwStartVertex + i);
|
||
|
RDVECTOR4* pReg = m_RefVM.GetDataAddr( D3DSPR_INPUT,
|
||
|
ve.m_dwRegister );
|
||
|
|
||
|
ve.m_pfnCopy( pData, pReg );
|
||
|
}
|
||
|
|
||
|
if (m_pDbgMon) m_pDbgMon->NextEvent( D3DDM_EVENT_VERTEX );
|
||
|
|
||
|
// Execute the shader
|
||
|
m_RefVM.ExecuteShader(this);
|
||
|
|
||
|
// Get the result from the output VVM registers
|
||
|
float x, y, z, w, inv_w_clip = 0.0f;
|
||
|
|
||
|
w = pRegisters->m_out[D3DSRO_POSITION].w;
|
||
|
z = pRegisters->m_out[D3DSRO_POSITION].z;
|
||
|
|
||
|
// Make clipping rules 0 < x < w; 0 < y < w
|
||
|
|
||
|
x = (pRegisters->m_out[D3DSRO_POSITION].x + w) * 0.5f;
|
||
|
y = (pRegisters->m_out[D3DSRO_POSITION].y + w) * 0.5f;
|
||
|
|
||
|
// Save the clip coordinates
|
||
|
Vout.m_clip_x = x;
|
||
|
Vout.m_clip_y = y;
|
||
|
Vout.m_clip_z = z;
|
||
|
Vout.m_clip_w = w;
|
||
|
|
||
|
//
|
||
|
// Compute clip codes if needed
|
||
|
//
|
||
|
if( GetRS()[D3DRENDERSTATE_CLIPPING] )
|
||
|
{
|
||
|
RDCLIPCODE clip = m_Clipper.ComputeClipCodes(
|
||
|
&clipIntersection, &clipUnion, x, y, z, w);
|
||
|
if( clip == 0 )
|
||
|
{
|
||
|
Vout.m_clip = 0;
|
||
|
inv_w_clip = 1.0f/w;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( m_Clipper.UseGuardBand() )
|
||
|
{
|
||
|
if( (clip & ~RDCLIP_INGUARDBAND) == 0 )
|
||
|
{
|
||
|
// If vertex is inside the guardband we have to compute
|
||
|
// screen coordinates
|
||
|
inv_w_clip = 1.0f/w;
|
||
|
Vout.m_clip = (RDCLIPCODE)clip;
|
||
|
goto l_DoScreenCoord;
|
||
|
}
|
||
|
}
|
||
|
Vout.m_clip = (RDCLIPCODE)clip;
|
||
|
// If vertex is outside the frustum we can not compute screen
|
||
|
// coordinates, hence store the clip coordinates
|
||
|
#if 0
|
||
|
Vout.m_pos.x = x;
|
||
|
Vout.m_pos.y = y;
|
||
|
Vout.m_pos.z = z;
|
||
|
Vout.m_rhw = w;
|
||
|
#endif
|
||
|
goto l_DoLighting;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// We have to check this only for DONOTCLIP case, because otherwise
|
||
|
// the vertex with "we = 0" will be clipped and screen coordinates
|
||
|
// will not be computed
|
||
|
// "clip" is not zero, if "we" is zero.
|
||
|
if( !FLOAT_EQZ(w) )
|
||
|
inv_w_clip = D3DVAL(1)/w;
|
||
|
else
|
||
|
inv_w_clip = __HUGE_PWR2;
|
||
|
}
|
||
|
l_DoScreenCoord:
|
||
|
Vout.m_pos.x = x * inv_w_clip * scaleX + offsetX;
|
||
|
Vout.m_pos.y = y * inv_w_clip * scaleY + offsetY;
|
||
|
Vout.m_pos.z = z * inv_w_clip * scaleZ + offsetZ;
|
||
|
Vout.m_rhw = inv_w_clip;
|
||
|
|
||
|
l_DoLighting:
|
||
|
if( m_qwFVFOut & D3DFVF_DIFFUSE )
|
||
|
{
|
||
|
// Clamp the colors before copying.
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[0].a) )
|
||
|
pRegisters->m_col[0].a = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[0].a, >) )
|
||
|
pRegisters->m_col[0].a = 1.0f;
|
||
|
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[0].r) )
|
||
|
pRegisters->m_col[0].r = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[0].r, >) )
|
||
|
pRegisters->m_col[0].r = 1.0f;
|
||
|
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[0].g) )
|
||
|
pRegisters->m_col[0].g = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[0].g, >) )
|
||
|
pRegisters->m_col[0].g = 1.0f;
|
||
|
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[0].b) )
|
||
|
pRegisters->m_col[0].b = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[0].b, >) )
|
||
|
pRegisters->m_col[0].b = 1.0f;
|
||
|
|
||
|
memcpy( &Vout.m_diffuse,&(pRegisters->m_col[0]),
|
||
|
sizeof(RDVECTOR4) );
|
||
|
}
|
||
|
if( m_qwFVFOut & D3DFVF_SPECULAR )
|
||
|
{
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[1].a) )
|
||
|
pRegisters->m_col[1].a = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[1].a, >) )
|
||
|
pRegisters->m_col[1].a = 1.0f;
|
||
|
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[1].r) )
|
||
|
pRegisters->m_col[1].r = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[1].r, >) )
|
||
|
pRegisters->m_col[1].r = 1.0f;
|
||
|
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[1].g) )
|
||
|
pRegisters->m_col[1].g = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[1].g, >) )
|
||
|
pRegisters->m_col[1].g = 1.0f;
|
||
|
|
||
|
if( FLOAT_LTZ(pRegisters->m_col[1].b) )
|
||
|
pRegisters->m_col[1].b = 0.0f;
|
||
|
else if( FLOAT_CMP_PONE(pRegisters->m_col[1].b, >) )
|
||
|
pRegisters->m_col[1].b = 1.0f;
|
||
|
|
||
|
memcpy( &Vout.m_specular,&(pRegisters->m_col[1]),
|
||
|
sizeof(RDVECTOR4) );
|
||
|
}
|
||
|
if( m_qwFVFOut & D3DFVFP_FOG )
|
||
|
{
|
||
|
if( FLOAT_LTZ(pRegisters->m_out[D3DSRO_FOG].x) )
|
||
|
pRegisters->m_out[D3DSRO_FOG].x = 0.0f;
|
||
|
|
||
|
if( FLOAT_CMP_PONE(pRegisters->m_out[D3DSRO_FOG].x, >) )
|
||
|
pRegisters->m_out[D3DSRO_FOG].x = 1.0f;
|
||
|
|
||
|
Vout.m_fog = pRegisters->m_out[D3DSRO_FOG].x;
|
||
|
}
|
||
|
|
||
|
// Copy the textures over
|
||
|
if( m_qwFVFOut & D3DFVF_PSIZE )
|
||
|
{
|
||
|
Vout.m_pointsize = pRegisters->m_out[D3DSRO_POINT_SIZE].x;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Copy the textures over
|
||
|
|
||
|
{
|
||
|
DWORD i, j;
|
||
|
DWORD numTex = FVF_TEXCOORD_NUMBER(m_qwFVFOut);
|
||
|
for( i = 0; i < numTex; i++ )
|
||
|
{
|
||
|
DWORD n = GetTexCoordDim( m_qwFVFOut, i );
|
||
|
// DWORD n = (DWORD)(m_dwTexCoordSizeArray[i] >> 2);
|
||
|
float *pCoordDest = (float *)&Vout.m_tex[i];
|
||
|
float *pCoordSrc = (float *)&pRegisters->m_tex[i];
|
||
|
for( j = 0; j < n; j++ )
|
||
|
{
|
||
|
pCoordDest[j] = pCoordSrc[j];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if( GetRS()[D3DRENDERSTATE_CLIPPING] )
|
||
|
{
|
||
|
m_Clipper.m_clipIntersection = clipIntersection;
|
||
|
m_Clipper.m_clipUnion = clipUnion;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
m_Clipper.m_clipIntersection = 0;
|
||
|
m_Clipper.m_clipUnion = 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
//
|
||
|
// Clip and Draw the primitives
|
||
|
//
|
||
|
|
||
|
if( m_dwNumIndices )
|
||
|
{
|
||
|
if( !NeedClipping((m_Clipper.UseGuardBand()), m_Clipper.m_clipUnion) )
|
||
|
{
|
||
|
if( m_IndexStream.m_dwStride == 4 )
|
||
|
hr = DrawOneIndexedPrimitive(
|
||
|
m_TLVArray,
|
||
|
0,
|
||
|
(LPDWORD)m_IndexStream.m_pData,
|
||
|
m_dwStartIndex,
|
||
|
m_dwNumIndices,
|
||
|
m_primType );
|
||
|
else
|
||
|
hr = DrawOneIndexedPrimitive(
|
||
|
m_TLVArray,
|
||
|
0,
|
||
|
(LPWORD)m_IndexStream.m_pData,
|
||
|
m_dwStartIndex,
|
||
|
m_dwNumIndices,
|
||
|
m_primType );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( m_IndexStream.m_dwStride == 4 )
|
||
|
hr = m_Clipper.DrawOneIndexedPrimitive(
|
||
|
m_TLVArray,
|
||
|
0,
|
||
|
(LPDWORD)m_IndexStream.m_pData,
|
||
|
m_dwStartIndex,
|
||
|
m_dwNumIndices,
|
||
|
m_primType );
|
||
|
else
|
||
|
hr = m_Clipper.DrawOneIndexedPrimitive(
|
||
|
m_TLVArray,
|
||
|
0,
|
||
|
(LPWORD)m_IndexStream.m_pData,
|
||
|
m_dwStartIndex,
|
||
|
m_dwNumIndices,
|
||
|
m_primType );
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if( !NeedClipping((m_Clipper.UseGuardBand()), m_Clipper.m_clipUnion) )
|
||
|
{
|
||
|
hr = DrawOnePrimitive(
|
||
|
m_TLVArray,
|
||
|
0,
|
||
|
m_primType,
|
||
|
m_dwNumVertices );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
hr = m_Clipper.DrawOnePrimitive(
|
||
|
m_TLVArray,
|
||
|
0,
|
||
|
m_primType,
|
||
|
m_dwNumVertices );
|
||
|
}
|
||
|
}
|
||
|
return hr;
|
||
|
}
|