windows-nt/Source/XPSP1/NT/base/wow64/mscpu/analysis/analysis.c

1143 lines
38 KiB
C
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*++
Copyright (c) 1992-2000 Microsoft Corporation
Module Name:
analysis.c
Abstract:
This module contains the main file of the analysis
module.
Author:
Ori Gershony (t-orig) creation-date 6-July-1995
Revision History:
24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
--*/
#include <nt.h>
#include <ntrtl.h>
#include <nturtl.h>
#include <windows.h>
#include <wx86.h>
#include <wx86nt.h>
#include <wx86cpu.h>
#include <cpuassrt.h>
#include <threadst.h>
#include <instr.h>
#include <analysis.h>
#include <decoder.h>
#include <frag.h>
#include <config.h>
#include <compiler.h>
ASSERTNAME;
//
// Macro to determine when to stop looking ahead during compilation.
//
#define STOP_DECODING(inst) (Fragments[inst.Operation].Flags & OPFL_STOP_COMPILE)
//
// Map a REG_ constant (offset into cpu struct) into register bit map
// used by instruction data.
//
const DWORD MapRegNumToRegBits[0x1e] =
{REGEAX, REGECX, REGEDX, REGEBX, REGESP, REGEBP, REGESI, REGEDI,
0, 0, 0, 0, 0, 0,
REGAX, REGCX, REGDX, REGBX, REGSP, REGBP, REGSI, REGDI,
REGAL, REGCL, REGDL, REGBL, REGAH, REGCH, REGDH, REGBH };
ULONG
LocateEntryPoints(
PINSTRUCTION InstructionStream,
ULONG NumberOfInstructions
)
/*++
Routine Description:
This function scans the InstructionStream and marks instructions
which begin entrypoint. An instruction begins an entrypoint if its
EntryPoint field has a different value than the previous instruction's
value. No instruction will have a NULL pointer.
Note that in this pass, the EntryPoint field does *not* point to an
ENTRYPOINT structure... it is only a marker.
Arguments:
IntelStart -- The intel address of the first instruction in the stream
IntelStart -- The last byte of the last intel instruction in the stream
Return Value:
Count of EntryPoints located.
--*/
{
ULONG i, j, intelDest;
ULONG EntryPointCounter;
ULONG IntelStart;
ULONG IntelEnd;
if (CompilerFlags & COMPFL_SLOW) {
//
// The compiler is supposed to generate slowmode code. Each
// x86 instruction gets its own ENTRYPOINT
//
EntryPointCounter=1;
for (i=0; i<NumberOfInstructions; i++) {
//
// Mark all instructions which don't correspond to 0-byte NOPs
// following optimized instructions as starting EntryPoints.
//
if (InstructionStream[i].Size) {
EntryPointCounter++;
}
InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter;
}
} else {
//
// Find all instructions which need Entrypoints.
// Performance is O(n^2) in the worst case, although
// it will be typically much closer to O(n)
//
// Instructions which mark the starts of Entrypoints have
// their .EntryPoint pointer set to non-NULL. Instructions which
// don't require entrypoints have it set to NULL;
//
IntelStart = InstructionStream[0].IntelAddress;
IntelEnd = IntelStart +
InstructionStream[NumberOfInstructions-1].IntelAddress +
InstructionStream[NumberOfInstructions-1].Size;
//
// The first instruction always gets an entrypoint
//
InstructionStream[0].EntryPoint = (PENTRYPOINT)1;
//
// Visit each instruction in turn
//
for (i=0; i<NumberOfInstructions; i++) {
if (((i+1) < NumberOfInstructions) &&
(Fragments[InstructionStream[i].Operation].Flags & OPFL_END_NEXT_EP)) {
//
// This instruction marks the end of an Entrypoint. The next
// instruction gets a new Entrypoint.
//
CPUASSERT(i < CpuInstructionLookahead-1 && i < NumberOfInstructions-1);
InstructionStream[i+1].EntryPoint = (PENTRYPOINT)1;
}
// Now see if it is a direct control transfer instruction with a
// destination that lies within this instruction stream. If it is,
// we want to create an Entry Point at the destination so that the
// control transfer will be compiled directly to the patched form,
// and won't have to be patched later.
//
if (Fragments[InstructionStream[i].Operation].Flags & OPFL_CTRLTRNS) {
//
// The instruction is a direct control-transfer. If the
// destination is within the InstructionStream, create an
// Entrypoint at the destination.
//
if (InstructionStream[i].Operand1.Type == OPND_IMM ||
InstructionStream[i].Operand1.Type == OPND_NOCODEGEN) {
// Get the intel destination from the instruction structure.
intelDest = InstructionStream[i].Operand1.Immed;
} else {
CPUASSERT(InstructionStream[i].Operand1.Type == OPND_ADDRREF );
// A FAR instruction - Operand1 is a ptr to a SEL:OFFSET pair
intelDest = *(UNALIGNED PULONG)(InstructionStream[i].Operand1.Immed);
}
// Get the intel destination from the instruction structure.
// It is always an immediate with direct control transfers.
if ((intelDest >= IntelStart) && (intelDest <= IntelEnd)) {
//
// Destination of the control-transfer is within the
// instructionstream. Find the destination instruction.
//
if (intelDest > InstructionStream[i].IntelAddress) {
//
// The dest. address is at a higher address.
//
for (j=i+1; j<NumberOfInstructions; ++j) {
if (InstructionStream[j].IntelAddress == intelDest) {
break;
}
}
} else {
//
// The dest. address is at a lower address.
//
for (j=i; j>0; --j) {
if (InstructionStream[j].IntelAddress == intelDest) {
break;
}
}
}
//
// An exact match may not be found in the event that the
// app is punning (either a real pun or the app is jumping
// into the middle of an optimized instruction). In
// either of the cases, defer entrypoint creation until
// the branch is actually taken.
//
if (j >= 0 && j < NumberOfInstructions) {
//
// Exact match was found. Create an Entrypoint.
//
InstructionStream[j].EntryPoint = (PENTRYPOINT)1;
}
}
} // if OPFL_CTRLTRNS
} // for ()
//
// Convert the EntryPoint field from NULL/non-NULL to a unique
// value for each range of instructions.
//
EntryPointCounter=1;
i=0;
while (i<NumberOfInstructions) {
//
// This instruction marks the beginning of a basic block
//
InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter;
j=i+1;
while (j < NumberOfInstructions) {
if ((j >= NumberOfInstructions) ||
(InstructionStream[j].Size && InstructionStream[j].EntryPoint)) {
//
// Either ran out of instructions, or encountered an instruction
// which marks the start of the next basic block. Note that
// 0-byte NOP instructions are not allowed to start basic blocks
// as that violates the rules of OPT_ instructions.
//
break;
}
InstructionStream[j].EntryPoint = (PENTRYPOINT)EntryPointCounter;
j++;
}
EntryPointCounter++;
i = j;
}
} // if not COMPFL_SLOW
//
// At this point, EntryPointCounter holds the number of EntryPoints
// plus one, because we started the counter at 1, not 0. Correct
// that now.
//
EntryPointCounter--;
return EntryPointCounter;
}
VOID
UpdateRegs(
PINSTRUCTION pInstr,
POPERAND Operand
)
/*++
Routine Description:
Updates the list of registers referenced and/or modified based on the
Operand.
Arguments:
pInstr -- the instruction to examine
Operand -- the operand of the instruction to examine
Return Value:
return-value - none
--*/
{
switch (Operand->Type) {
case OPND_NOCODEGEN:
case OPND_REGREF:
if (Operand->Reg != NO_REG) {
pInstr->RegsSet |= MapRegNumToRegBits[Operand->Reg];
}
break;
case OPND_REGVALUE:
if (Operand->Reg != NO_REG) {
pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg];
}
break;
case OPND_ADDRREF:
case OPND_ADDRVALUE8:
case OPND_ADDRVALUE16:
case OPND_ADDRVALUE32:
if (Operand->Reg != NO_REG) {
pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg];
}
if (Operand->IndexReg != NO_REG) {
pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->IndexReg];
}
break;
default:
break;
}
}
VOID
CacheIntelRegs(
PINSTRUCTION InstructionStream,
ULONG numInstr)
/*++
Routine Description:
This function deterimes what x86 registers, if any, can be cached in
RISC preserved registers.
Arguments:
InstructionStream -- The instruction stream returned by the decoder
numInstr -- The length of InstructionStream
Return Value:
return-value - none
--*/
{
PINSTRUCTION pInstr;
BYTE RegUsage[REGCOUNT];
DWORD RegsToCache;
int i;
PENTRYPOINT PrevEntryPoint;
//
// Calculate the RegsSet and RegsNeeded for the bottommost instruction
//
pInstr = &InstructionStream[numInstr-1];
pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet;
PrevEntryPoint = pInstr->EntryPoint;
UpdateRegs(pInstr, &pInstr->Operand1);
UpdateRegs(pInstr, &pInstr->Operand2);
UpdateRegs(pInstr, &pInstr->Operand3);
//
// For each 32-bit register used as a parameter to this instruction,
// set the usage count to 1.
//
for (i=0; i<REGCOUNT; ++i) {
if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) {
RegUsage[i] = 1;
} else {
RegUsage[i] = 0;
}
}
//
// Loop over instruction stream from bottom to top, starting at the
// second-to-last instruction
//
for (pInstr--; pInstr >= InstructionStream; pInstr--) {
//
// Calculate the RegsSet and RegsNeeded values for this instruction
//
pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet;
UpdateRegs(pInstr, &pInstr->Operand1);
UpdateRegs(pInstr, &pInstr->Operand2);
UpdateRegs(pInstr, &pInstr->Operand3);
RegsToCache = 0;
if (PrevEntryPoint != pInstr->EntryPoint) {
//
// The current instruction marks the end of an Entrypoint.
//
PrevEntryPoint = pInstr->EntryPoint;
//
// For all x86 registers which have been read more than once
// but not modified in the basic block, load them into the
// cache before executing the first instruction in the basic
// block.
//
for (i=0; i<REGCOUNT; ++i) {
if (RegUsage[i] > 1) {
RegsToCache |= (REGMASK<<(REGSHIFT*i));
}
}
//
// Reset the RegUsage[] array to indicate no registers are
// cached.
//
RtlZeroMemory(RegUsage, REGCOUNT);
} else {
//
// For each 32-bit x86 register modified by this instruction,
// update the caching info.
//
for (i=0; i<REGCOUNT; ++i) {
DWORD RegBits = pInstr->RegsSet & (REGMASK<<(REGSHIFT*i));
if (RegBits) {
//
// The ith 32-bit x86 register has been modified by this
// instruction
//
if (RegUsage[i] > 1) {
//
// There is more than one consumer of the modified
// value so it is worth caching.
//
RegsToCache |= RegBits;
}
//
// Since this x86 register was dirtied by this instruction,
// it usage count must be reset to 0.
//
RegUsage[i] = 0;
}
}
}
//
// Update the list of x86 registers which can be loaded into
// cache registers before the next instruction executes.
//
pInstr[1].RegsToCache |= RegsToCache;
//
// For each 32-bit register used as a parameter to this instruction,
// bump the usage count.
//
for (i=0; i<REGCOUNT; ++i) {
if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) {
RegUsage[i]++;
}
}
}
}
VOID
OptimizeInstructionStream(
PINSTRUCTION IS,
ULONG numInstr
)
/*++
Routine Description:
This function performs various optimization on the instruction stream
retured by the decoder.
Arguments:
IS -- The instruction stream returned by the decoder
numInstr -- The length of IS
Return Value:
return-value - none
--*/
{
ULONG i;
CPUASSERTMSG(numInstr, "Cannot optimize 0-length instruction stream");
//
// Pass 1: Optimize x86 instruction stream, replacing single x86
// instructions with special-case instructions, and replacing
// multiple x86 instructions with single special-case OPT_
// instructions
//
for (i=0; i<numInstr; ++i) {
switch (IS[i].Operation) {
case OP_Push32:
if (i < numInstr-2
&& IS[i].Operand1.Type == OPND_REGVALUE){
if (IS[i].Operand1.Reg == GP_EBP) {
// OP_OPT_SetupStack --
// push ebp
// mov ebp, esp
// sub esp, x
if ((IS[i+1].Operation == OP_Mov32) &&
(IS[i+1].Operand1.Type == OPND_REGREF) &&
(IS[i+1].Operand1.Reg == GP_EBP) &&
(IS[i+1].Operand2.Type == OPND_REGVALUE) &&
(IS[i+1].Operand2.Reg == GP_ESP) &&
(IS[i+2].Operation == OP_Sub32) &&
(IS[i+2].Operand1.Type == OPND_REGREF) &&
(IS[i+2].Operand1.Reg == GP_ESP) &&
(IS[i+2].Operand2.Type == OPND_IMM)){
IS[i].Operation = OP_OPT_SetupStack;
IS[i].Operand1.Type = OPND_IMM;
IS[i].Operand1.Immed = IS[i+2].Operand2.Immed;
IS[i].Size += IS[i+1].Size + IS[i+2].Size;
IS[i].Operand2.Type = OPND_NONE;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Operand2.Type = OPND_NONE;
IS[i+1].Size = 0;
IS[i+2].Operation = OP_Nop;
IS[i+2].Operand1.Type = OPND_NONE;
IS[i+2].Operand2.Type = OPND_NONE;
IS[i+2].Size = 0;
i+=2;
break;
}
} else if (IS[i].Operand1.Reg == GP_EBX) {
// OP_OPT_PushEbxEsiEdi --
// push ebx
// push esi
// push edi
if ((IS[i+1].Operation == OP_Push32) &&
(IS[i+1].Operand1.Type == OPND_REGVALUE) &&
(IS[i+1].Operand1.Reg == GP_ESI) &&
(IS[i+2].Operation == OP_Push32) &&
(IS[i+2].Operand1.Type == OPND_REGVALUE) &&
(IS[i+2].Operand1.Reg == GP_EDI)){
IS[i].Operation = OP_OPT_PushEbxEsiEdi;
IS[i].Size += IS[i+1].Size + IS[i+2].Size;
IS[i].Operand1.Type = OPND_NONE;
IS[i].Operand2.Type = OPND_NONE;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Operand2.Type = OPND_NONE;
IS[i+1].Size = 0;
IS[i+2].Operation = OP_Nop;
IS[i+2].Operand1.Type = OPND_NONE;
IS[i+2].Operand2.Type = OPND_NONE;
IS[i+2].Size = 0;
i+=2;
break;
}
}
}
//
// It is not one of the other special PUSH sequences, so see
// if there are two consecutive PUSHes to merge together. Note:
// If the second PUSH references ESP, the two cannot be merged
// because the value is computed before 4 is subtracted from ESP.
// ie. the following is disallowed:
// PUSH EAX
// PUSH ESP ; second operand to Push2 would have been
// ; built before the PUSH EAX was executed.
//
if (i < numInstr-1 &&
!IS[i].FsOverride &&
!IS[i+1].FsOverride &&
IS[i+1].Operation == OP_Push32 &&
IS[i+1].Operand1.Reg != GP_ESP &&
IS[i+1].Operand1.IndexReg != GP_ESP) {
IS[i].Operation = OP_OPT_Push232;
IS[i].Operand2 = IS[i+1].Operand1;
IS[i].Size += IS[i+1].Size;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Size = 0;
i++;
}
break;
case OP_Pop32:
// OP_OPT_PopEdiEsiEbx
// pop edi
// pop esi
// pop ebx
if (i < numInstr-2 &&
(IS[i].Operand1.Type == OPND_REGREF) &&
(IS[i].Operand1.Reg == GP_EDI) &&
(IS[i+1].Operation == OP_Pop32) &&
(IS[i+1].Operand1.Type == OPND_REGREF) &&
(IS[i+1].Operand1.Reg == GP_ESI) &&
(IS[i+2].Operation == OP_Pop32) &&
(IS[i+2].Operand1.Type == OPND_REGREF) &&
(IS[i+2].Operand1.Reg == GP_EBX)){
IS[i].Operation = OP_OPT_PopEdiEsiEbx;
IS[i].Size += IS[i+1].Size + IS[i+2].Size;
IS[i].Operand1.Type = OPND_NONE;
IS[i].Operand2.Type = OPND_NONE;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Operand2.Type = OPND_NONE;
IS[i+1].Size = 0;
IS[i+2].Operation = OP_Nop;
IS[i+2].Operand1.Type = OPND_NONE;
IS[i+2].Operand2.Type = OPND_NONE;
IS[i+2].Size = 0;
i+=2;
} else if (i < numInstr-1 &&
!IS[i].FsOverride &&
!IS[i].FsOverride &&
IS[i].Operand1.Type == OPND_REGREF &&
IS[i+1].Operation == OP_Pop32 &&
IS[i+1].Operand1.Type == OPND_REGREF) {
// Fold the two POPs together. Both operands are REGREF,
// so there is no problem with interdependencies between
// memory touched by the first POP modifying the address
// of the second POP. ie. the following is not merged:
// POP EAX
// POP [EAX] ; depends on results of first POP
IS[i].Operation = OP_OPT_Pop232;
IS[i].Operand2 = IS[i+1].Operand1;
IS[i].Size += IS[i+1].Size;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Size = 0;
i++;
}
break;
case OP_Xor32:
case OP_Sub32:
if (IS[i].Operand1.Type == OPND_REGREF &&
IS[i].Operand2.Type == OPND_REGVALUE &&
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
// Instruction is XOR samereg, samereg (ie. XOR EAX, EAX),
// or SUB samereg, samereg (ie. SUB ECX, ECX).
// Emit OP_OPT_ZERO32 samereg
IS[i].Operand2.Type = OPND_NONE;
IS[i].Operation = OP_OPT_ZERO32;
}
break;
case OP_Test8:
if (IS[i].Operand1.Type == OPND_REGVALUE &&
IS[i].Operand2.Type == OPND_REGVALUE &&
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
// Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
// Emit OP_OPT_FastTest8/16/32
IS[i].Operand1.Type = OPND_REGVALUE;
IS[i].Operand2.Type = OPND_NONE;
IS[i].Operation = OP_OPT_FastTest8;
}
break;
case OP_Test16:
if (IS[i].Operand1.Type == OPND_REGVALUE &&
IS[i].Operand2.Type == OPND_REGVALUE &&
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
// Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
// Emit OP_OPT_FastTest8/16/32
IS[i].Operand1.Type = OPND_REGVALUE;
IS[i].Operand2.Type = OPND_NONE;
IS[i].Operation = OP_OPT_FastTest16;
}
break;
case OP_Test32:
if (IS[i].Operand1.Type == OPND_REGVALUE &&
IS[i].Operand2.Type == OPND_REGVALUE &&
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
// Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
// Emit OP_OPT_FastTest8/16/32
IS[i].Operand1.Type = OPND_REGVALUE;
IS[i].Operand2.Type = OPND_NONE;
IS[i].Operation = OP_OPT_FastTest32;
}
break;
case OP_Cmp32:
if (i<numInstr+1 && IS[i+1].Operation == OP_Sbb32 &&
IS[i+1].Operand1.Type == OPND_REGREF &&
IS[i+1].Operand2.Type == OPND_REGVALUE &&
IS[i+1].Operand1.Reg == IS[i+1].Operand2.Reg) {
// The two instructions are:
// CMP anything1, anything2
// SBB samereg, samereg
// The optimized instruction is:
// Operation = either CmpSbb32 or CmpSbbNeg32
// Operand1 = &samereg (passed as REGREF)
// Operand2 = anything1 (passed as ADDRVAL32 or REGVAL)
// Operand3 = anything2 (passed as ADDRVAL32 or REGVAL)
IS[i].Operand3 = IS[i].Operand2;
IS[i].Operand2 = IS[i].Operand1;
IS[i].Operand1 = IS[i+1].Operand1;
if (i<numInstr+2 && IS[i+2].Operation == OP_Neg32 &&
IS[i+2].Operand1.Type == OPND_REGREF &&
IS[i+2].Operand1.Reg == IS[i+1].Operand1.Reg) {
// The third instruction is NEG samereg, samereg
IS[i].Operation = OP_OPT_CmpSbbNeg32;
IS[i+2].Operation = OP_Nop;
IS[i+2].Operand1.Type = OPND_NONE;
IS[i+2].Operand2.Type = OPND_NONE;
IS[i+2].Size = 0;
} else {
IS[i].Operation = OP_OPT_CmpSbb32;
}
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Operand2.Type = OPND_NONE;
IS[i+1].Size = 0;
i++;
}
break;
case OP_Cwd16:
if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv16) {
IS[i].Operation = OP_OPT_CwdIdiv16;
IS[i].Operand1 = IS[i+1].Operand1;
IS[i].Size += IS[i+1].Size;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Size = 0;
i++;
}
break;
case OP_Cwd32:
if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv32) {
IS[i].Operation = OP_OPT_CwdIdiv32;
IS[i].Operand1 = IS[i+1].Operand1;
IS[i].Size += IS[i+1].Size;
IS[i+1].Operation = OP_Nop;
IS[i+1].Operand1.Type = OPND_NONE;
IS[i+1].Size = 0;
i++;
}
break;
case OP_FP_FNSTSW:
if (i<numInstr+1 && IS[i+1].Operation == OP_Sahf &&
IS[i].Operand1.Type == OPND_REGREF &&
IS[i].Operand1.Reg == GP_AX) {
// Replace FNSTSW AX / SAHF by one instruction
IS[i].Operation = OP_OPT_FNSTSWAxSahf;
IS[i].Operand1.Type = OPND_NONE;
IS[i].Size += IS[i+1].Size;
IS[i+1].Operation = OP_Nop;
IS[i+1].Size = 0;
i++;
}
break;
case OP_FP_FSTP_STi:
if (IS[i].Operand1.Immed == 0) {
IS[i].Operand1.Type = OPND_NONE;
IS[i].Operation = OP_OPT_FSTP_ST0;
}
break;
}
}
}
VOID
OptimizeIntelFlags(
PINSTRUCTION IS,
ULONG numInstr
)
/*++
Routine Description:
This function analysis x86 flag register usage and switches instructions
to use NoFlags versions if possible.
Arguments:
IS -- The instruction stream returned by the decoder
numInstr -- The length of IS
Return Value:
return-value - none
--*/
{
USHORT FlagsNeeded; // flags required to execute current x86 instr
USHORT FlagsToGenerate; // flags which current x86 instr must generate
PFRAGDESCR pFragDesc; // ptr to Fragments[] array for current instr
ULONG i; // instruction index
BOOL fPassNeeded = TRUE;// TRUE if the outer loop needs to loop once more
ULONG PassNumber = 0; // number of times outer loop has looped
PENTRYPOINT pEPDest; // Entrypoint for destination of a ctrl transfer
USHORT KnownFlagsNeeded[MAX_INSTR_COUNT]; // flags needed for each instr
while (fPassNeeded) {
//
// This loop is executed at most two times. The second pass is only
// required if there is a control-transfer instruction whose
// destination is within the Instruction Stream and at a lower
// Intel address (ie. a backwards JMP).
//
fPassNeeded = FALSE;
PassNumber++;
CPUASSERT(PassNumber <= 2);
//
// Iterate over all x86 instructions decoded, from bottom to top,
// propagating flags info up. Start off by assuming all x86 flags
// must be up-to-date at the end of the last basic block.
//
FlagsNeeded = ALLFLAGS;
i = numInstr;
do {
i--;
pFragDesc = &Fragments[IS[i].Operation];
//
// Calculate what flags will need to be computed by this
// instruction and ones before this.
//
KnownFlagsNeeded[i] = FlagsNeeded | pFragDesc->FlagsNeeded;
FlagsToGenerate = FlagsNeeded & pFragDesc->FlagsSet;
//
// Calculate what flags this instruction will need to have
// computed before it can be executed.
//
FlagsNeeded = (FlagsNeeded & ~FlagsToGenerate) |
pFragDesc->FlagsNeeded;
if (pFragDesc->Flags & OPFL_CTRLTRNS) {
ULONG IntelDest = IS[i].Operand1.Immed;
//
// For control-transfer instructions, FlagsNeeded also includes
// the flags required for the destination of the transfer.
//
if (IS[0].IntelAddress <= IntelDest &&
i > 0 && IS[i-1].IntelAddress >= IntelDest) {
//
// The destination of the control-transfer is at a lower
// address in the Instruction Stream.
//
if (PassNumber == 1) {
//
// Need to make a second pass over the flags
// optimizations in order to determine what flags are
// needed for the destination address.
//
fPassNeeded = TRUE;
FlagsNeeded = ALLFLAGS; // assume all flags are needed
} else {
ULONG j;
USHORT NewFlagsNeeded;
//
// Search for the IntelDest within the Instruction
// Stream. IntelDest may not be found if there is
// a pun.
//
NewFlagsNeeded = ALLFLAGS; // assume there is a pun
for (j=0; j < i; ++j) {
if (IS[j].IntelAddress == IntelDest) {
NewFlagsNeeded = KnownFlagsNeeded[j];
break;
}
}
FlagsNeeded |= NewFlagsNeeded;
}
} else if (IS[i+1].IntelAddress <= IntelDest &&
IntelDest <= IS[numInstr-1].IntelAddress) {
//
// The destination of the control-transfer is at a higher
// address in the Instruction Stream. Pick up the
// already-computed FlagsNeeded for the destination.
//
ULONG j;
USHORT NewFlagsNeeded = ALLFLAGS; // assume a pun
for (j=i+1; j < numInstr; ++j) {
if (IS[j].IntelAddress == IntelDest) {
NewFlagsNeeded = KnownFlagsNeeded[j];
break;
}
}
FlagsNeeded |= NewFlagsNeeded;
} else {
//
// Destination of the control-transfer is unknown. Assume
// the worst: all flags are required.
//
FlagsNeeded = ALLFLAGS;
}
}
if (!(FlagsToGenerate & pFragDesc->FlagsSet) &&
(pFragDesc->Flags & OPFL_HASNOFLAGS)) {
//
// This instruction is not required to generate any flags, and
// it has a NOFLAGS version. Update the flags that need to be
// computed by instructions before this one, and modify the
// Operation number to point at the NoFlags fragment.
//
FlagsToGenerate &= pFragDesc->FlagsSet;
if (pFragDesc->Flags & OPFL_ALIGN) {
IS[i].Operation += 2;
} else {
IS[i].Operation ++;
}
if (IS[i].Operation == OP_OPT_ZERONoFlags32) {
//
// Special-case this to be a "mov [value], zero" so it is
// inlined.
//
IS[i].Operation = OP_Mov32;
IS[i].Operand2.Type = OPND_IMM;
IS[i].Operand2.Immed = 0;
}
}
} while (i);
}
}
VOID
DetermineEbpAlignment(
PINSTRUCTION InstructionStream,
ULONG numInstr
)
/*++
Routine Description:
For each instruction in InstructionStream[], sets Instruction->EbpAligned
based on whether EBP is assumed to be DWORD-aligned or not. EBP is
assumed to be DWORD-aligned if a "MOV EBP, ESP" instruction is seen, and
it is assumed to become unaligned at the first instruction which is
flagged as modifying EBP.
Arguments:
InstructionStream -- The instruction stream returned by the decoder
numInstr -- The length of InstructionStream
Return Value:
return-value - none
--*/
{
ULONG i;
BOOL EbpAligned = FALSE;
for (i=0; i<numInstr; ++i) {
if (InstructionStream[i].RegsSet & REGEBP) {
//
// This instruction modified EBP
//
if (InstructionStream[i].Operation == OP_OPT_SetupStack ||
InstructionStream[i].Operation == OP_OPT_SetupStackNoFlags ||
(InstructionStream[i].Operation == OP_Mov32 &&
InstructionStream[i].Operand2.Type == OPND_REGVALUE &&
InstructionStream[i].Operand2.Reg == GP_ESP)) {
//
// The instruction is either "MOV EBP, ESP" or one of the
// SetupStack fragments (which contains a "MOV EBP, ESP")
// assume Ebp is aligned from now on.
//
EbpAligned = TRUE;
} else {
EbpAligned = FALSE;
}
}
InstructionStream[i].EbpAligned = EbpAligned;
}
}
ULONG
GetInstructionStream(
PINSTRUCTION InstructionStream,
PULONG NumberOfInstructions,
PVOID pIntelInstruction,
PVOID pLastIntelInstruction
)
/*++
Routine Description:
Returns an instruction stream to the compiler. The instruction
stream is terminated either when the buffer is full, or when
we reach a control transfer instruction.
Arguments:
InstructionStream -- A pointer to the buffer where the decoded
instructions are stored.
NumberOfInstructions -- Upon entry, this variable contains the
maximal number of instructions the buffer can hold. When
returning, it contains the actual number of instructions
decoded.
pIntelInstruction -- A pointer to the first real intel instruction
to be decoded.
pLastIntelInstruction -- A pointer to the last intel instruction to be
compiled, 0xffffffff if not used.
Return Value:
Number of entrypoints required to describe the decoded instruction
stream.
--*/
{
ULONG numInstr=0;
ULONG maxBufferSize;
ULONG cEntryPoints;
maxBufferSize = (*NumberOfInstructions);
//
// Zero-fill the InstructionStream. The decoder depends on this.
//
RtlZeroMemory(InstructionStream, maxBufferSize*sizeof(INSTRUCTION));
#if DBG
//
// Do a little analysis on the address we're about to decode. If
// the address is part of a non-x86 image, log that to the debugger.
// That probably indicates a thunking problem. If the address is not
// part of an image, warn that the app is running generated code.
//
try {
USHORT Instr;
//
// Try to read the instruction about to be executed. If we get
// an access violation, use 0 as the value of the instruction.
//
Instr = 0;
//
// Ignore BOP instructions - we assume we know what's going on with
// them.
//
if (Instr != 0xc4c4) {
NTSTATUS st;
MEMORY_BASIC_INFORMATION mbi;
st = NtQueryVirtualMemory(NtCurrentProcess(),
pIntelInstruction,
MemoryBasicInformation,
&mbi,
sizeof(mbi),
NULL);
if (NT_SUCCESS(st)) {
PIMAGE_NT_HEADERS Headers;
Headers = RtlImageNtHeader(mbi.AllocationBase);
if (!Headers || Headers->FileHeader.Machine != IMAGE_FILE_MACHINE_I386) {
LOGPRINT((TRACELOG, "CPU Analysis warning: jumping from Intel to non-intel code at 0x%X\r\n", pIntelInstruction));
}
} else {
// Eip isn't pointing anywhere???
}
}
} except(EXCEPTION_EXECUTE_HANDLER) {
;
}
#endif //DBG
while (numInstr < maxBufferSize) {
DecodeInstruction ((DWORD) (ULONGLONG)pIntelInstruction, InstructionStream+numInstr);
if ((STOP_DECODING(InstructionStream[numInstr])) ||
(pIntelInstruction >= pLastIntelInstruction)) {
// We reached a control transfer instruction
numInstr++;
(*NumberOfInstructions) = numInstr;
break; // SUCCESS
}
pIntelInstruction = (PVOID) ((ULONGLONG)pIntelInstruction + (InstructionStream+numInstr)->Size);
numInstr++;
}
//
// Optimize x86 code by merging x86 instructions into meta-instructions
// and cleaning up special x86 idioms.
//
if (!(CompilerFlags & COMPFL_SLOW)) {
OptimizeInstructionStream (InstructionStream, numInstr);
}
//
// Determine where all basic blocks are by filling in the EntryPoint
// field in each instruction. This must be done after
// OptimizeInstructionStream() runs so that EntryPoints don't fall
// into the middle of meta-instructions.
//
cEntryPoints = LocateEntryPoints(InstructionStream, numInstr);
//
// Perform optimizations which require knowledge of EntryPoints
//
if (numInstr > 2 && !(CompilerFlags & COMPFL_SLOW)) {
if (!CpuDisableNoFlags) {
OptimizeIntelFlags(InstructionStream, numInstr);
}
if (!CpuDisableRegCache) {
CacheIntelRegs(InstructionStream, numInstr);
}
if (!CpuDisableEbpAlign) {
DetermineEbpAlignment(InstructionStream, numInstr);
}
}
return cEntryPoints;
}