1143 lines
38 KiB
C
1143 lines
38 KiB
C
/*++
|
|
|
|
Copyright (c) 1992-2000 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
analysis.c
|
|
|
|
Abstract:
|
|
|
|
This module contains the main file of the analysis
|
|
module.
|
|
|
|
Author:
|
|
|
|
Ori Gershony (t-orig) creation-date 6-July-1995
|
|
|
|
Revision History:
|
|
|
|
24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
|
|
|
|
|
|
--*/
|
|
|
|
#include <nt.h>
|
|
#include <ntrtl.h>
|
|
#include <nturtl.h>
|
|
#include <windows.h>
|
|
#include <wx86.h>
|
|
#include <wx86nt.h>
|
|
#include <wx86cpu.h>
|
|
#include <cpuassrt.h>
|
|
#include <threadst.h>
|
|
#include <instr.h>
|
|
#include <analysis.h>
|
|
#include <decoder.h>
|
|
#include <frag.h>
|
|
#include <config.h>
|
|
#include <compiler.h>
|
|
|
|
ASSERTNAME;
|
|
|
|
|
|
|
|
//
|
|
// Macro to determine when to stop looking ahead during compilation.
|
|
//
|
|
#define STOP_DECODING(inst) (Fragments[inst.Operation].Flags & OPFL_STOP_COMPILE)
|
|
|
|
//
|
|
// Map a REG_ constant (offset into cpu struct) into register bit map
|
|
// used by instruction data.
|
|
//
|
|
const DWORD MapRegNumToRegBits[0x1e] =
|
|
{REGEAX, REGECX, REGEDX, REGEBX, REGESP, REGEBP, REGESI, REGEDI,
|
|
0, 0, 0, 0, 0, 0,
|
|
REGAX, REGCX, REGDX, REGBX, REGSP, REGBP, REGSI, REGDI,
|
|
REGAL, REGCL, REGDL, REGBL, REGAH, REGCH, REGDH, REGBH };
|
|
|
|
|
|
ULONG
|
|
LocateEntryPoints(
|
|
PINSTRUCTION InstructionStream,
|
|
ULONG NumberOfInstructions
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function scans the InstructionStream and marks instructions
|
|
which begin entrypoint. An instruction begins an entrypoint if its
|
|
EntryPoint field has a different value than the previous instruction's
|
|
value. No instruction will have a NULL pointer.
|
|
|
|
Note that in this pass, the EntryPoint field does *not* point to an
|
|
ENTRYPOINT structure... it is only a marker.
|
|
|
|
Arguments:
|
|
|
|
IntelStart -- The intel address of the first instruction in the stream
|
|
|
|
IntelStart -- The last byte of the last intel instruction in the stream
|
|
|
|
Return Value:
|
|
|
|
Count of EntryPoints located.
|
|
|
|
--*/
|
|
{
|
|
ULONG i, j, intelDest;
|
|
ULONG EntryPointCounter;
|
|
ULONG IntelStart;
|
|
ULONG IntelEnd;
|
|
|
|
if (CompilerFlags & COMPFL_SLOW) {
|
|
//
|
|
// The compiler is supposed to generate slowmode code. Each
|
|
// x86 instruction gets its own ENTRYPOINT
|
|
//
|
|
EntryPointCounter=1;
|
|
for (i=0; i<NumberOfInstructions; i++) {
|
|
//
|
|
// Mark all instructions which don't correspond to 0-byte NOPs
|
|
// following optimized instructions as starting EntryPoints.
|
|
//
|
|
if (InstructionStream[i].Size) {
|
|
EntryPointCounter++;
|
|
}
|
|
InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter;
|
|
}
|
|
|
|
} else {
|
|
|
|
//
|
|
// Find all instructions which need Entrypoints.
|
|
// Performance is O(n^2) in the worst case, although
|
|
// it will be typically much closer to O(n)
|
|
//
|
|
// Instructions which mark the starts of Entrypoints have
|
|
// their .EntryPoint pointer set to non-NULL. Instructions which
|
|
// don't require entrypoints have it set to NULL;
|
|
//
|
|
|
|
IntelStart = InstructionStream[0].IntelAddress;
|
|
IntelEnd = IntelStart +
|
|
InstructionStream[NumberOfInstructions-1].IntelAddress +
|
|
InstructionStream[NumberOfInstructions-1].Size;
|
|
|
|
//
|
|
// The first instruction always gets an entrypoint
|
|
//
|
|
InstructionStream[0].EntryPoint = (PENTRYPOINT)1;
|
|
|
|
//
|
|
// Visit each instruction in turn
|
|
//
|
|
for (i=0; i<NumberOfInstructions; i++) {
|
|
|
|
if (((i+1) < NumberOfInstructions) &&
|
|
(Fragments[InstructionStream[i].Operation].Flags & OPFL_END_NEXT_EP)) {
|
|
//
|
|
// This instruction marks the end of an Entrypoint. The next
|
|
// instruction gets a new Entrypoint.
|
|
//
|
|
CPUASSERT(i < CpuInstructionLookahead-1 && i < NumberOfInstructions-1);
|
|
InstructionStream[i+1].EntryPoint = (PENTRYPOINT)1;
|
|
}
|
|
|
|
// Now see if it is a direct control transfer instruction with a
|
|
// destination that lies within this instruction stream. If it is,
|
|
// we want to create an Entry Point at the destination so that the
|
|
// control transfer will be compiled directly to the patched form,
|
|
// and won't have to be patched later.
|
|
//
|
|
if (Fragments[InstructionStream[i].Operation].Flags & OPFL_CTRLTRNS) {
|
|
//
|
|
// The instruction is a direct control-transfer. If the
|
|
// destination is within the InstructionStream, create an
|
|
// Entrypoint at the destination.
|
|
//
|
|
|
|
if (InstructionStream[i].Operand1.Type == OPND_IMM ||
|
|
InstructionStream[i].Operand1.Type == OPND_NOCODEGEN) {
|
|
// Get the intel destination from the instruction structure.
|
|
intelDest = InstructionStream[i].Operand1.Immed;
|
|
} else {
|
|
CPUASSERT(InstructionStream[i].Operand1.Type == OPND_ADDRREF );
|
|
// A FAR instruction - Operand1 is a ptr to a SEL:OFFSET pair
|
|
intelDest = *(UNALIGNED PULONG)(InstructionStream[i].Operand1.Immed);
|
|
}
|
|
|
|
// Get the intel destination from the instruction structure.
|
|
// It is always an immediate with direct control transfers.
|
|
|
|
if ((intelDest >= IntelStart) && (intelDest <= IntelEnd)) {
|
|
//
|
|
// Destination of the control-transfer is within the
|
|
// instructionstream. Find the destination instruction.
|
|
//
|
|
if (intelDest > InstructionStream[i].IntelAddress) {
|
|
//
|
|
// The dest. address is at a higher address.
|
|
//
|
|
for (j=i+1; j<NumberOfInstructions; ++j) {
|
|
if (InstructionStream[j].IntelAddress == intelDest) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
//
|
|
// The dest. address is at a lower address.
|
|
//
|
|
for (j=i; j>0; --j) {
|
|
if (InstructionStream[j].IntelAddress == intelDest) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// An exact match may not be found in the event that the
|
|
// app is punning (either a real pun or the app is jumping
|
|
// into the middle of an optimized instruction). In
|
|
// either of the cases, defer entrypoint creation until
|
|
// the branch is actually taken.
|
|
//
|
|
if (j >= 0 && j < NumberOfInstructions) {
|
|
//
|
|
// Exact match was found. Create an Entrypoint.
|
|
//
|
|
InstructionStream[j].EntryPoint = (PENTRYPOINT)1;
|
|
}
|
|
}
|
|
} // if OPFL_CTRLTRNS
|
|
} // for ()
|
|
|
|
//
|
|
// Convert the EntryPoint field from NULL/non-NULL to a unique
|
|
// value for each range of instructions.
|
|
//
|
|
EntryPointCounter=1;
|
|
i=0;
|
|
while (i<NumberOfInstructions) {
|
|
//
|
|
// This instruction marks the beginning of a basic block
|
|
//
|
|
InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter;
|
|
j=i+1;
|
|
while (j < NumberOfInstructions) {
|
|
if ((j >= NumberOfInstructions) ||
|
|
(InstructionStream[j].Size && InstructionStream[j].EntryPoint)) {
|
|
//
|
|
// Either ran out of instructions, or encountered an instruction
|
|
// which marks the start of the next basic block. Note that
|
|
// 0-byte NOP instructions are not allowed to start basic blocks
|
|
// as that violates the rules of OPT_ instructions.
|
|
//
|
|
break;
|
|
}
|
|
InstructionStream[j].EntryPoint = (PENTRYPOINT)EntryPointCounter;
|
|
j++;
|
|
}
|
|
EntryPointCounter++;
|
|
i = j;
|
|
}
|
|
} // if not COMPFL_SLOW
|
|
|
|
//
|
|
// At this point, EntryPointCounter holds the number of EntryPoints
|
|
// plus one, because we started the counter at 1, not 0. Correct
|
|
// that now.
|
|
//
|
|
EntryPointCounter--;
|
|
|
|
return EntryPointCounter;
|
|
}
|
|
|
|
|
|
VOID
|
|
UpdateRegs(
|
|
PINSTRUCTION pInstr,
|
|
POPERAND Operand
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Updates the list of registers referenced and/or modified based on the
|
|
Operand.
|
|
|
|
Arguments:
|
|
|
|
pInstr -- the instruction to examine
|
|
|
|
Operand -- the operand of the instruction to examine
|
|
|
|
Return Value:
|
|
|
|
return-value - none
|
|
|
|
--*/
|
|
{
|
|
switch (Operand->Type) {
|
|
case OPND_NOCODEGEN:
|
|
case OPND_REGREF:
|
|
if (Operand->Reg != NO_REG) {
|
|
pInstr->RegsSet |= MapRegNumToRegBits[Operand->Reg];
|
|
}
|
|
break;
|
|
|
|
case OPND_REGVALUE:
|
|
if (Operand->Reg != NO_REG) {
|
|
pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg];
|
|
}
|
|
break;
|
|
|
|
case OPND_ADDRREF:
|
|
case OPND_ADDRVALUE8:
|
|
case OPND_ADDRVALUE16:
|
|
case OPND_ADDRVALUE32:
|
|
if (Operand->Reg != NO_REG) {
|
|
pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg];
|
|
}
|
|
if (Operand->IndexReg != NO_REG) {
|
|
pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->IndexReg];
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
VOID
|
|
CacheIntelRegs(
|
|
PINSTRUCTION InstructionStream,
|
|
ULONG numInstr)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function deterimes what x86 registers, if any, can be cached in
|
|
RISC preserved registers.
|
|
|
|
Arguments:
|
|
|
|
InstructionStream -- The instruction stream returned by the decoder
|
|
|
|
numInstr -- The length of InstructionStream
|
|
|
|
Return Value:
|
|
|
|
return-value - none
|
|
|
|
--*/
|
|
{
|
|
PINSTRUCTION pInstr;
|
|
BYTE RegUsage[REGCOUNT];
|
|
DWORD RegsToCache;
|
|
int i;
|
|
PENTRYPOINT PrevEntryPoint;
|
|
|
|
//
|
|
// Calculate the RegsSet and RegsNeeded for the bottommost instruction
|
|
//
|
|
pInstr = &InstructionStream[numInstr-1];
|
|
pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet;
|
|
PrevEntryPoint = pInstr->EntryPoint;
|
|
UpdateRegs(pInstr, &pInstr->Operand1);
|
|
UpdateRegs(pInstr, &pInstr->Operand2);
|
|
UpdateRegs(pInstr, &pInstr->Operand3);
|
|
|
|
//
|
|
// For each 32-bit register used as a parameter to this instruction,
|
|
// set the usage count to 1.
|
|
//
|
|
for (i=0; i<REGCOUNT; ++i) {
|
|
if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) {
|
|
RegUsage[i] = 1;
|
|
} else {
|
|
RegUsage[i] = 0;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Loop over instruction stream from bottom to top, starting at the
|
|
// second-to-last instruction
|
|
//
|
|
for (pInstr--; pInstr >= InstructionStream; pInstr--) {
|
|
|
|
//
|
|
// Calculate the RegsSet and RegsNeeded values for this instruction
|
|
//
|
|
pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet;
|
|
UpdateRegs(pInstr, &pInstr->Operand1);
|
|
UpdateRegs(pInstr, &pInstr->Operand2);
|
|
UpdateRegs(pInstr, &pInstr->Operand3);
|
|
|
|
RegsToCache = 0;
|
|
|
|
if (PrevEntryPoint != pInstr->EntryPoint) {
|
|
|
|
//
|
|
// The current instruction marks the end of an Entrypoint.
|
|
//
|
|
PrevEntryPoint = pInstr->EntryPoint;
|
|
|
|
//
|
|
// For all x86 registers which have been read more than once
|
|
// but not modified in the basic block, load them into the
|
|
// cache before executing the first instruction in the basic
|
|
// block.
|
|
//
|
|
for (i=0; i<REGCOUNT; ++i) {
|
|
if (RegUsage[i] > 1) {
|
|
RegsToCache |= (REGMASK<<(REGSHIFT*i));
|
|
}
|
|
}
|
|
|
|
//
|
|
// Reset the RegUsage[] array to indicate no registers are
|
|
// cached.
|
|
//
|
|
RtlZeroMemory(RegUsage, REGCOUNT);
|
|
|
|
} else {
|
|
|
|
//
|
|
// For each 32-bit x86 register modified by this instruction,
|
|
// update the caching info.
|
|
//
|
|
for (i=0; i<REGCOUNT; ++i) {
|
|
DWORD RegBits = pInstr->RegsSet & (REGMASK<<(REGSHIFT*i));
|
|
if (RegBits) {
|
|
//
|
|
// The ith 32-bit x86 register has been modified by this
|
|
// instruction
|
|
//
|
|
if (RegUsage[i] > 1) {
|
|
//
|
|
// There is more than one consumer of the modified
|
|
// value so it is worth caching.
|
|
//
|
|
RegsToCache |= RegBits;
|
|
}
|
|
|
|
//
|
|
// Since this x86 register was dirtied by this instruction,
|
|
// it usage count must be reset to 0.
|
|
//
|
|
RegUsage[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Update the list of x86 registers which can be loaded into
|
|
// cache registers before the next instruction executes.
|
|
//
|
|
pInstr[1].RegsToCache |= RegsToCache;
|
|
|
|
//
|
|
// For each 32-bit register used as a parameter to this instruction,
|
|
// bump the usage count.
|
|
//
|
|
for (i=0; i<REGCOUNT; ++i) {
|
|
if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) {
|
|
RegUsage[i]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
VOID
|
|
OptimizeInstructionStream(
|
|
PINSTRUCTION IS,
|
|
ULONG numInstr
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function performs various optimization on the instruction stream
|
|
retured by the decoder.
|
|
|
|
Arguments:
|
|
|
|
IS -- The instruction stream returned by the decoder
|
|
|
|
numInstr -- The length of IS
|
|
|
|
Return Value:
|
|
|
|
return-value - none
|
|
|
|
--*/
|
|
{
|
|
ULONG i;
|
|
|
|
CPUASSERTMSG(numInstr, "Cannot optimize 0-length instruction stream");
|
|
|
|
//
|
|
// Pass 1: Optimize x86 instruction stream, replacing single x86
|
|
// instructions with special-case instructions, and replacing
|
|
// multiple x86 instructions with single special-case OPT_
|
|
// instructions
|
|
//
|
|
for (i=0; i<numInstr; ++i) {
|
|
|
|
switch (IS[i].Operation) {
|
|
case OP_Push32:
|
|
if (i < numInstr-2
|
|
&& IS[i].Operand1.Type == OPND_REGVALUE){
|
|
|
|
if (IS[i].Operand1.Reg == GP_EBP) {
|
|
// OP_OPT_SetupStack --
|
|
// push ebp
|
|
// mov ebp, esp
|
|
// sub esp, x
|
|
if ((IS[i+1].Operation == OP_Mov32) &&
|
|
(IS[i+1].Operand1.Type == OPND_REGREF) &&
|
|
(IS[i+1].Operand1.Reg == GP_EBP) &&
|
|
(IS[i+1].Operand2.Type == OPND_REGVALUE) &&
|
|
(IS[i+1].Operand2.Reg == GP_ESP) &&
|
|
(IS[i+2].Operation == OP_Sub32) &&
|
|
(IS[i+2].Operand1.Type == OPND_REGREF) &&
|
|
(IS[i+2].Operand1.Reg == GP_ESP) &&
|
|
(IS[i+2].Operand2.Type == OPND_IMM)){
|
|
|
|
IS[i].Operation = OP_OPT_SetupStack;
|
|
IS[i].Operand1.Type = OPND_IMM;
|
|
IS[i].Operand1.Immed = IS[i+2].Operand2.Immed;
|
|
IS[i].Size += IS[i+1].Size + IS[i+2].Size;
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
IS[i+2].Operation = OP_Nop;
|
|
IS[i+2].Operand1.Type = OPND_NONE;
|
|
IS[i+2].Operand2.Type = OPND_NONE;
|
|
IS[i+2].Size = 0;
|
|
i+=2;
|
|
break;
|
|
}
|
|
} else if (IS[i].Operand1.Reg == GP_EBX) {
|
|
// OP_OPT_PushEbxEsiEdi --
|
|
// push ebx
|
|
// push esi
|
|
// push edi
|
|
if ((IS[i+1].Operation == OP_Push32) &&
|
|
(IS[i+1].Operand1.Type == OPND_REGVALUE) &&
|
|
(IS[i+1].Operand1.Reg == GP_ESI) &&
|
|
(IS[i+2].Operation == OP_Push32) &&
|
|
(IS[i+2].Operand1.Type == OPND_REGVALUE) &&
|
|
(IS[i+2].Operand1.Reg == GP_EDI)){
|
|
|
|
IS[i].Operation = OP_OPT_PushEbxEsiEdi;
|
|
IS[i].Size += IS[i+1].Size + IS[i+2].Size;
|
|
IS[i].Operand1.Type = OPND_NONE;
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
IS[i+2].Operation = OP_Nop;
|
|
IS[i+2].Operand1.Type = OPND_NONE;
|
|
IS[i+2].Operand2.Type = OPND_NONE;
|
|
IS[i+2].Size = 0;
|
|
i+=2;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// It is not one of the other special PUSH sequences, so see
|
|
// if there are two consecutive PUSHes to merge together. Note:
|
|
// If the second PUSH references ESP, the two cannot be merged
|
|
// because the value is computed before 4 is subtracted from ESP.
|
|
// ie. the following is disallowed:
|
|
// PUSH EAX
|
|
// PUSH ESP ; second operand to Push2 would have been
|
|
// ; built before the PUSH EAX was executed.
|
|
//
|
|
if (i < numInstr-1 &&
|
|
!IS[i].FsOverride &&
|
|
!IS[i+1].FsOverride &&
|
|
IS[i+1].Operation == OP_Push32 &&
|
|
IS[i+1].Operand1.Reg != GP_ESP &&
|
|
IS[i+1].Operand1.IndexReg != GP_ESP) {
|
|
|
|
IS[i].Operation = OP_OPT_Push232;
|
|
IS[i].Operand2 = IS[i+1].Operand1;
|
|
IS[i].Size += IS[i+1].Size;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
i++;
|
|
}
|
|
|
|
break;
|
|
|
|
case OP_Pop32:
|
|
// OP_OPT_PopEdiEsiEbx
|
|
// pop edi
|
|
// pop esi
|
|
// pop ebx
|
|
if (i < numInstr-2 &&
|
|
(IS[i].Operand1.Type == OPND_REGREF) &&
|
|
(IS[i].Operand1.Reg == GP_EDI) &&
|
|
(IS[i+1].Operation == OP_Pop32) &&
|
|
(IS[i+1].Operand1.Type == OPND_REGREF) &&
|
|
(IS[i+1].Operand1.Reg == GP_ESI) &&
|
|
(IS[i+2].Operation == OP_Pop32) &&
|
|
(IS[i+2].Operand1.Type == OPND_REGREF) &&
|
|
(IS[i+2].Operand1.Reg == GP_EBX)){
|
|
|
|
IS[i].Operation = OP_OPT_PopEdiEsiEbx;
|
|
IS[i].Size += IS[i+1].Size + IS[i+2].Size;
|
|
IS[i].Operand1.Type = OPND_NONE;
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
IS[i+2].Operation = OP_Nop;
|
|
IS[i+2].Operand1.Type = OPND_NONE;
|
|
IS[i+2].Operand2.Type = OPND_NONE;
|
|
IS[i+2].Size = 0;
|
|
i+=2;
|
|
} else if (i < numInstr-1 &&
|
|
!IS[i].FsOverride &&
|
|
!IS[i].FsOverride &&
|
|
IS[i].Operand1.Type == OPND_REGREF &&
|
|
IS[i+1].Operation == OP_Pop32 &&
|
|
IS[i+1].Operand1.Type == OPND_REGREF) {
|
|
|
|
// Fold the two POPs together. Both operands are REGREF,
|
|
// so there is no problem with interdependencies between
|
|
// memory touched by the first POP modifying the address
|
|
// of the second POP. ie. the following is not merged:
|
|
// POP EAX
|
|
// POP [EAX] ; depends on results of first POP
|
|
IS[i].Operation = OP_OPT_Pop232;
|
|
IS[i].Operand2 = IS[i+1].Operand1;
|
|
IS[i].Size += IS[i+1].Size;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
i++;
|
|
}
|
|
break;
|
|
|
|
case OP_Xor32:
|
|
case OP_Sub32:
|
|
if (IS[i].Operand1.Type == OPND_REGREF &&
|
|
IS[i].Operand2.Type == OPND_REGVALUE &&
|
|
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
|
|
// Instruction is XOR samereg, samereg (ie. XOR EAX, EAX),
|
|
// or SUB samereg, samereg (ie. SUB ECX, ECX).
|
|
// Emit OP_OPT_ZERO32 samereg
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i].Operation = OP_OPT_ZERO32;
|
|
}
|
|
break;
|
|
|
|
case OP_Test8:
|
|
if (IS[i].Operand1.Type == OPND_REGVALUE &&
|
|
IS[i].Operand2.Type == OPND_REGVALUE &&
|
|
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
|
|
// Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
|
|
// Emit OP_OPT_FastTest8/16/32
|
|
IS[i].Operand1.Type = OPND_REGVALUE;
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i].Operation = OP_OPT_FastTest8;
|
|
}
|
|
break;
|
|
|
|
case OP_Test16:
|
|
if (IS[i].Operand1.Type == OPND_REGVALUE &&
|
|
IS[i].Operand2.Type == OPND_REGVALUE &&
|
|
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
|
|
// Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
|
|
// Emit OP_OPT_FastTest8/16/32
|
|
IS[i].Operand1.Type = OPND_REGVALUE;
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i].Operation = OP_OPT_FastTest16;
|
|
}
|
|
break;
|
|
|
|
case OP_Test32:
|
|
if (IS[i].Operand1.Type == OPND_REGVALUE &&
|
|
IS[i].Operand2.Type == OPND_REGVALUE &&
|
|
IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
|
|
// Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
|
|
// Emit OP_OPT_FastTest8/16/32
|
|
IS[i].Operand1.Type = OPND_REGVALUE;
|
|
IS[i].Operand2.Type = OPND_NONE;
|
|
IS[i].Operation = OP_OPT_FastTest32;
|
|
}
|
|
break;
|
|
|
|
case OP_Cmp32:
|
|
if (i<numInstr+1 && IS[i+1].Operation == OP_Sbb32 &&
|
|
IS[i+1].Operand1.Type == OPND_REGREF &&
|
|
IS[i+1].Operand2.Type == OPND_REGVALUE &&
|
|
IS[i+1].Operand1.Reg == IS[i+1].Operand2.Reg) {
|
|
// The two instructions are:
|
|
// CMP anything1, anything2
|
|
// SBB samereg, samereg
|
|
// The optimized instruction is:
|
|
// Operation = either CmpSbb32 or CmpSbbNeg32
|
|
// Operand1 = &samereg (passed as REGREF)
|
|
// Operand2 = anything1 (passed as ADDRVAL32 or REGVAL)
|
|
// Operand3 = anything2 (passed as ADDRVAL32 or REGVAL)
|
|
IS[i].Operand3 = IS[i].Operand2;
|
|
IS[i].Operand2 = IS[i].Operand1;
|
|
IS[i].Operand1 = IS[i+1].Operand1;
|
|
if (i<numInstr+2 && IS[i+2].Operation == OP_Neg32 &&
|
|
IS[i+2].Operand1.Type == OPND_REGREF &&
|
|
IS[i+2].Operand1.Reg == IS[i+1].Operand1.Reg) {
|
|
// The third instruction is NEG samereg, samereg
|
|
IS[i].Operation = OP_OPT_CmpSbbNeg32;
|
|
IS[i+2].Operation = OP_Nop;
|
|
IS[i+2].Operand1.Type = OPND_NONE;
|
|
IS[i+2].Operand2.Type = OPND_NONE;
|
|
IS[i+2].Size = 0;
|
|
} else {
|
|
IS[i].Operation = OP_OPT_CmpSbb32;
|
|
}
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Operand2.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
i++;
|
|
}
|
|
break;
|
|
|
|
case OP_Cwd16:
|
|
if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv16) {
|
|
IS[i].Operation = OP_OPT_CwdIdiv16;
|
|
IS[i].Operand1 = IS[i+1].Operand1;
|
|
IS[i].Size += IS[i+1].Size;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
i++;
|
|
}
|
|
break;
|
|
|
|
case OP_Cwd32:
|
|
if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv32) {
|
|
IS[i].Operation = OP_OPT_CwdIdiv32;
|
|
IS[i].Operand1 = IS[i+1].Operand1;
|
|
IS[i].Size += IS[i+1].Size;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Operand1.Type = OPND_NONE;
|
|
IS[i+1].Size = 0;
|
|
i++;
|
|
}
|
|
break;
|
|
|
|
case OP_FP_FNSTSW:
|
|
if (i<numInstr+1 && IS[i+1].Operation == OP_Sahf &&
|
|
IS[i].Operand1.Type == OPND_REGREF &&
|
|
IS[i].Operand1.Reg == GP_AX) {
|
|
|
|
// Replace FNSTSW AX / SAHF by one instruction
|
|
IS[i].Operation = OP_OPT_FNSTSWAxSahf;
|
|
IS[i].Operand1.Type = OPND_NONE;
|
|
IS[i].Size += IS[i+1].Size;
|
|
IS[i+1].Operation = OP_Nop;
|
|
IS[i+1].Size = 0;
|
|
i++;
|
|
}
|
|
break;
|
|
|
|
case OP_FP_FSTP_STi:
|
|
if (IS[i].Operand1.Immed == 0) {
|
|
IS[i].Operand1.Type = OPND_NONE;
|
|
IS[i].Operation = OP_OPT_FSTP_ST0;
|
|
}
|
|
break;
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
VOID
|
|
OptimizeIntelFlags(
|
|
PINSTRUCTION IS,
|
|
ULONG numInstr
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function analysis x86 flag register usage and switches instructions
|
|
to use NoFlags versions if possible.
|
|
|
|
Arguments:
|
|
|
|
IS -- The instruction stream returned by the decoder
|
|
|
|
numInstr -- The length of IS
|
|
|
|
Return Value:
|
|
|
|
return-value - none
|
|
|
|
--*/
|
|
{
|
|
USHORT FlagsNeeded; // flags required to execute current x86 instr
|
|
USHORT FlagsToGenerate; // flags which current x86 instr must generate
|
|
PFRAGDESCR pFragDesc; // ptr to Fragments[] array for current instr
|
|
ULONG i; // instruction index
|
|
BOOL fPassNeeded = TRUE;// TRUE if the outer loop needs to loop once more
|
|
ULONG PassNumber = 0; // number of times outer loop has looped
|
|
PENTRYPOINT pEPDest; // Entrypoint for destination of a ctrl transfer
|
|
USHORT KnownFlagsNeeded[MAX_INSTR_COUNT]; // flags needed for each instr
|
|
|
|
while (fPassNeeded) {
|
|
|
|
//
|
|
// This loop is executed at most two times. The second pass is only
|
|
// required if there is a control-transfer instruction whose
|
|
// destination is within the Instruction Stream and at a lower
|
|
// Intel address (ie. a backwards JMP).
|
|
//
|
|
fPassNeeded = FALSE;
|
|
PassNumber++;
|
|
CPUASSERT(PassNumber <= 2);
|
|
|
|
//
|
|
// Iterate over all x86 instructions decoded, from bottom to top,
|
|
// propagating flags info up. Start off by assuming all x86 flags
|
|
// must be up-to-date at the end of the last basic block.
|
|
//
|
|
FlagsNeeded = ALLFLAGS;
|
|
i = numInstr;
|
|
do {
|
|
i--;
|
|
pFragDesc = &Fragments[IS[i].Operation];
|
|
|
|
//
|
|
// Calculate what flags will need to be computed by this
|
|
// instruction and ones before this.
|
|
//
|
|
KnownFlagsNeeded[i] = FlagsNeeded | pFragDesc->FlagsNeeded;
|
|
FlagsToGenerate = FlagsNeeded & pFragDesc->FlagsSet;
|
|
|
|
//
|
|
// Calculate what flags this instruction will need to have
|
|
// computed before it can be executed.
|
|
//
|
|
FlagsNeeded = (FlagsNeeded & ~FlagsToGenerate) |
|
|
pFragDesc->FlagsNeeded;
|
|
|
|
if (pFragDesc->Flags & OPFL_CTRLTRNS) {
|
|
ULONG IntelDest = IS[i].Operand1.Immed;
|
|
|
|
//
|
|
// For control-transfer instructions, FlagsNeeded also includes
|
|
// the flags required for the destination of the transfer.
|
|
//
|
|
if (IS[0].IntelAddress <= IntelDest &&
|
|
i > 0 && IS[i-1].IntelAddress >= IntelDest) {
|
|
//
|
|
// The destination of the control-transfer is at a lower
|
|
// address in the Instruction Stream.
|
|
//
|
|
|
|
if (PassNumber == 1) {
|
|
//
|
|
// Need to make a second pass over the flags
|
|
// optimizations in order to determine what flags are
|
|
// needed for the destination address.
|
|
//
|
|
fPassNeeded = TRUE;
|
|
FlagsNeeded = ALLFLAGS; // assume all flags are needed
|
|
} else {
|
|
ULONG j;
|
|
USHORT NewFlagsNeeded;
|
|
|
|
//
|
|
// Search for the IntelDest within the Instruction
|
|
// Stream. IntelDest may not be found if there is
|
|
// a pun.
|
|
//
|
|
NewFlagsNeeded = ALLFLAGS; // assume there is a pun
|
|
for (j=0; j < i; ++j) {
|
|
if (IS[j].IntelAddress == IntelDest) {
|
|
NewFlagsNeeded = KnownFlagsNeeded[j];
|
|
break;
|
|
}
|
|
}
|
|
|
|
FlagsNeeded |= NewFlagsNeeded;
|
|
}
|
|
} else if (IS[i+1].IntelAddress <= IntelDest &&
|
|
IntelDest <= IS[numInstr-1].IntelAddress) {
|
|
//
|
|
// The destination of the control-transfer is at a higher
|
|
// address in the Instruction Stream. Pick up the
|
|
// already-computed FlagsNeeded for the destination.
|
|
//
|
|
ULONG j;
|
|
USHORT NewFlagsNeeded = ALLFLAGS; // assume a pun
|
|
|
|
for (j=i+1; j < numInstr; ++j) {
|
|
if (IS[j].IntelAddress == IntelDest) {
|
|
NewFlagsNeeded = KnownFlagsNeeded[j];
|
|
break;
|
|
}
|
|
}
|
|
|
|
FlagsNeeded |= NewFlagsNeeded;
|
|
|
|
} else {
|
|
//
|
|
// Destination of the control-transfer is unknown. Assume
|
|
// the worst: all flags are required.
|
|
//
|
|
FlagsNeeded = ALLFLAGS;
|
|
}
|
|
}
|
|
|
|
if (!(FlagsToGenerate & pFragDesc->FlagsSet) &&
|
|
(pFragDesc->Flags & OPFL_HASNOFLAGS)) {
|
|
//
|
|
// This instruction is not required to generate any flags, and
|
|
// it has a NOFLAGS version. Update the flags that need to be
|
|
// computed by instructions before this one, and modify the
|
|
// Operation number to point at the NoFlags fragment.
|
|
//
|
|
FlagsToGenerate &= pFragDesc->FlagsSet;
|
|
if (pFragDesc->Flags & OPFL_ALIGN) {
|
|
IS[i].Operation += 2;
|
|
} else {
|
|
IS[i].Operation ++;
|
|
}
|
|
|
|
if (IS[i].Operation == OP_OPT_ZERONoFlags32) {
|
|
//
|
|
// Special-case this to be a "mov [value], zero" so it is
|
|
// inlined.
|
|
//
|
|
IS[i].Operation = OP_Mov32;
|
|
IS[i].Operand2.Type = OPND_IMM;
|
|
IS[i].Operand2.Immed = 0;
|
|
}
|
|
}
|
|
} while (i);
|
|
}
|
|
}
|
|
|
|
VOID
|
|
DetermineEbpAlignment(
|
|
PINSTRUCTION InstructionStream,
|
|
ULONG numInstr
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
For each instruction in InstructionStream[], sets Instruction->EbpAligned
|
|
based on whether EBP is assumed to be DWORD-aligned or not. EBP is
|
|
assumed to be DWORD-aligned if a "MOV EBP, ESP" instruction is seen, and
|
|
it is assumed to become unaligned at the first instruction which is
|
|
flagged as modifying EBP.
|
|
|
|
Arguments:
|
|
|
|
InstructionStream -- The instruction stream returned by the decoder
|
|
|
|
numInstr -- The length of InstructionStream
|
|
|
|
Return Value:
|
|
|
|
return-value - none
|
|
|
|
--*/
|
|
{
|
|
ULONG i;
|
|
BOOL EbpAligned = FALSE;
|
|
|
|
for (i=0; i<numInstr; ++i) {
|
|
if (InstructionStream[i].RegsSet & REGEBP) {
|
|
//
|
|
// This instruction modified EBP
|
|
//
|
|
if (InstructionStream[i].Operation == OP_OPT_SetupStack ||
|
|
InstructionStream[i].Operation == OP_OPT_SetupStackNoFlags ||
|
|
(InstructionStream[i].Operation == OP_Mov32 &&
|
|
InstructionStream[i].Operand2.Type == OPND_REGVALUE &&
|
|
InstructionStream[i].Operand2.Reg == GP_ESP)) {
|
|
//
|
|
// The instruction is either "MOV EBP, ESP" or one of the
|
|
// SetupStack fragments (which contains a "MOV EBP, ESP")
|
|
// assume Ebp is aligned from now on.
|
|
//
|
|
EbpAligned = TRUE;
|
|
} else {
|
|
EbpAligned = FALSE;
|
|
}
|
|
}
|
|
|
|
InstructionStream[i].EbpAligned = EbpAligned;
|
|
}
|
|
}
|
|
|
|
ULONG
|
|
GetInstructionStream(
|
|
PINSTRUCTION InstructionStream,
|
|
PULONG NumberOfInstructions,
|
|
PVOID pIntelInstruction,
|
|
PVOID pLastIntelInstruction
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Returns an instruction stream to the compiler. The instruction
|
|
stream is terminated either when the buffer is full, or when
|
|
we reach a control transfer instruction.
|
|
|
|
Arguments:
|
|
|
|
InstructionStream -- A pointer to the buffer where the decoded
|
|
instructions are stored.
|
|
|
|
NumberOfInstructions -- Upon entry, this variable contains the
|
|
maximal number of instructions the buffer can hold. When
|
|
returning, it contains the actual number of instructions
|
|
decoded.
|
|
|
|
pIntelInstruction -- A pointer to the first real intel instruction
|
|
to be decoded.
|
|
|
|
pLastIntelInstruction -- A pointer to the last intel instruction to be
|
|
compiled, 0xffffffff if not used.
|
|
|
|
Return Value:
|
|
|
|
Number of entrypoints required to describe the decoded instruction
|
|
stream.
|
|
|
|
--*/
|
|
{
|
|
ULONG numInstr=0;
|
|
ULONG maxBufferSize;
|
|
ULONG cEntryPoints;
|
|
|
|
maxBufferSize = (*NumberOfInstructions);
|
|
|
|
//
|
|
// Zero-fill the InstructionStream. The decoder depends on this.
|
|
//
|
|
RtlZeroMemory(InstructionStream, maxBufferSize*sizeof(INSTRUCTION));
|
|
|
|
#if DBG
|
|
//
|
|
// Do a little analysis on the address we're about to decode. If
|
|
// the address is part of a non-x86 image, log that to the debugger.
|
|
// That probably indicates a thunking problem. If the address is not
|
|
// part of an image, warn that the app is running generated code.
|
|
//
|
|
try {
|
|
USHORT Instr;
|
|
|
|
//
|
|
// Try to read the instruction about to be executed. If we get
|
|
// an access violation, use 0 as the value of the instruction.
|
|
//
|
|
Instr = 0;
|
|
|
|
//
|
|
// Ignore BOP instructions - we assume we know what's going on with
|
|
// them.
|
|
//
|
|
if (Instr != 0xc4c4) {
|
|
|
|
NTSTATUS st;
|
|
MEMORY_BASIC_INFORMATION mbi;
|
|
|
|
st = NtQueryVirtualMemory(NtCurrentProcess(),
|
|
pIntelInstruction,
|
|
MemoryBasicInformation,
|
|
&mbi,
|
|
sizeof(mbi),
|
|
NULL);
|
|
if (NT_SUCCESS(st)) {
|
|
PIMAGE_NT_HEADERS Headers;
|
|
|
|
Headers = RtlImageNtHeader(mbi.AllocationBase);
|
|
if (!Headers || Headers->FileHeader.Machine != IMAGE_FILE_MACHINE_I386) {
|
|
LOGPRINT((TRACELOG, "CPU Analysis warning: jumping from Intel to non-intel code at 0x%X\r\n", pIntelInstruction));
|
|
}
|
|
} else {
|
|
// Eip isn't pointing anywhere???
|
|
}
|
|
}
|
|
} except(EXCEPTION_EXECUTE_HANDLER) {
|
|
;
|
|
}
|
|
#endif //DBG
|
|
|
|
while (numInstr < maxBufferSize) {
|
|
|
|
DecodeInstruction ((DWORD) (ULONGLONG)pIntelInstruction, InstructionStream+numInstr);
|
|
if ((STOP_DECODING(InstructionStream[numInstr])) ||
|
|
(pIntelInstruction >= pLastIntelInstruction)) {
|
|
|
|
// We reached a control transfer instruction
|
|
numInstr++;
|
|
(*NumberOfInstructions) = numInstr;
|
|
break; // SUCCESS
|
|
}
|
|
pIntelInstruction = (PVOID) ((ULONGLONG)pIntelInstruction + (InstructionStream+numInstr)->Size);
|
|
|
|
numInstr++;
|
|
}
|
|
|
|
//
|
|
// Optimize x86 code by merging x86 instructions into meta-instructions
|
|
// and cleaning up special x86 idioms.
|
|
//
|
|
if (!(CompilerFlags & COMPFL_SLOW)) {
|
|
OptimizeInstructionStream (InstructionStream, numInstr);
|
|
}
|
|
|
|
//
|
|
// Determine where all basic blocks are by filling in the EntryPoint
|
|
// field in each instruction. This must be done after
|
|
// OptimizeInstructionStream() runs so that EntryPoints don't fall
|
|
// into the middle of meta-instructions.
|
|
//
|
|
cEntryPoints = LocateEntryPoints(InstructionStream, numInstr);
|
|
|
|
//
|
|
// Perform optimizations which require knowledge of EntryPoints
|
|
//
|
|
if (numInstr > 2 && !(CompilerFlags & COMPFL_SLOW)) {
|
|
if (!CpuDisableNoFlags) {
|
|
OptimizeIntelFlags(InstructionStream, numInstr);
|
|
}
|
|
|
|
if (!CpuDisableRegCache) {
|
|
CacheIntelRegs(InstructionStream, numInstr);
|
|
}
|
|
|
|
if (!CpuDisableEbpAlign) {
|
|
DetermineEbpAlignment(InstructionStream, numInstr);
|
|
}
|
|
}
|
|
|
|
return cEntryPoints;
|
|
}
|