/*++ Copyright (c) 1992-2000 Microsoft Corporation Module Name: analysis.c Abstract: This module contains the main file of the analysis module. Author: Ori Gershony (t-orig) creation-date 6-July-1995 Revision History: 24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit. --*/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ASSERTNAME; // // Macro to determine when to stop looking ahead during compilation. // #define STOP_DECODING(inst) (Fragments[inst.Operation].Flags & OPFL_STOP_COMPILE) // // Map a REG_ constant (offset into cpu struct) into register bit map // used by instruction data. // const DWORD MapRegNumToRegBits[0x1e] = {REGEAX, REGECX, REGEDX, REGEBX, REGESP, REGEBP, REGESI, REGEDI, 0, 0, 0, 0, 0, 0, REGAX, REGCX, REGDX, REGBX, REGSP, REGBP, REGSI, REGDI, REGAL, REGCL, REGDL, REGBL, REGAH, REGCH, REGDH, REGBH }; ULONG LocateEntryPoints( PINSTRUCTION InstructionStream, ULONG NumberOfInstructions ) /*++ Routine Description: This function scans the InstructionStream and marks instructions which begin entrypoint. An instruction begins an entrypoint if its EntryPoint field has a different value than the previous instruction's value. No instruction will have a NULL pointer. Note that in this pass, the EntryPoint field does *not* point to an ENTRYPOINT structure... it is only a marker. Arguments: IntelStart -- The intel address of the first instruction in the stream IntelStart -- The last byte of the last intel instruction in the stream Return Value: Count of EntryPoints located. --*/ { ULONG i, j, intelDest; ULONG EntryPointCounter; ULONG IntelStart; ULONG IntelEnd; if (CompilerFlags & COMPFL_SLOW) { // // The compiler is supposed to generate slowmode code. Each // x86 instruction gets its own ENTRYPOINT // EntryPointCounter=1; for (i=0; i= IntelStart) && (intelDest <= IntelEnd)) { // // Destination of the control-transfer is within the // instructionstream. Find the destination instruction. // if (intelDest > InstructionStream[i].IntelAddress) { // // The dest. address is at a higher address. // for (j=i+1; j0; --j) { if (InstructionStream[j].IntelAddress == intelDest) { break; } } } // // An exact match may not be found in the event that the // app is punning (either a real pun or the app is jumping // into the middle of an optimized instruction). In // either of the cases, defer entrypoint creation until // the branch is actually taken. // if (j >= 0 && j < NumberOfInstructions) { // // Exact match was found. Create an Entrypoint. // InstructionStream[j].EntryPoint = (PENTRYPOINT)1; } } } // if OPFL_CTRLTRNS } // for () // // Convert the EntryPoint field from NULL/non-NULL to a unique // value for each range of instructions. // EntryPointCounter=1; i=0; while (i= NumberOfInstructions) || (InstructionStream[j].Size && InstructionStream[j].EntryPoint)) { // // Either ran out of instructions, or encountered an instruction // which marks the start of the next basic block. Note that // 0-byte NOP instructions are not allowed to start basic blocks // as that violates the rules of OPT_ instructions. // break; } InstructionStream[j].EntryPoint = (PENTRYPOINT)EntryPointCounter; j++; } EntryPointCounter++; i = j; } } // if not COMPFL_SLOW // // At this point, EntryPointCounter holds the number of EntryPoints // plus one, because we started the counter at 1, not 0. Correct // that now. // EntryPointCounter--; return EntryPointCounter; } VOID UpdateRegs( PINSTRUCTION pInstr, POPERAND Operand ) /*++ Routine Description: Updates the list of registers referenced and/or modified based on the Operand. Arguments: pInstr -- the instruction to examine Operand -- the operand of the instruction to examine Return Value: return-value - none --*/ { switch (Operand->Type) { case OPND_NOCODEGEN: case OPND_REGREF: if (Operand->Reg != NO_REG) { pInstr->RegsSet |= MapRegNumToRegBits[Operand->Reg]; } break; case OPND_REGVALUE: if (Operand->Reg != NO_REG) { pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg]; } break; case OPND_ADDRREF: case OPND_ADDRVALUE8: case OPND_ADDRVALUE16: case OPND_ADDRVALUE32: if (Operand->Reg != NO_REG) { pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg]; } if (Operand->IndexReg != NO_REG) { pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->IndexReg]; } break; default: break; } } VOID CacheIntelRegs( PINSTRUCTION InstructionStream, ULONG numInstr) /*++ Routine Description: This function deterimes what x86 registers, if any, can be cached in RISC preserved registers. Arguments: InstructionStream -- The instruction stream returned by the decoder numInstr -- The length of InstructionStream Return Value: return-value - none --*/ { PINSTRUCTION pInstr; BYTE RegUsage[REGCOUNT]; DWORD RegsToCache; int i; PENTRYPOINT PrevEntryPoint; // // Calculate the RegsSet and RegsNeeded for the bottommost instruction // pInstr = &InstructionStream[numInstr-1]; pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet; PrevEntryPoint = pInstr->EntryPoint; UpdateRegs(pInstr, &pInstr->Operand1); UpdateRegs(pInstr, &pInstr->Operand2); UpdateRegs(pInstr, &pInstr->Operand3); // // For each 32-bit register used as a parameter to this instruction, // set the usage count to 1. // for (i=0; iRegsNeeded & (REGMASK<<(REGSHIFT*i))) { RegUsage[i] = 1; } else { RegUsage[i] = 0; } } // // Loop over instruction stream from bottom to top, starting at the // second-to-last instruction // for (pInstr--; pInstr >= InstructionStream; pInstr--) { // // Calculate the RegsSet and RegsNeeded values for this instruction // pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet; UpdateRegs(pInstr, &pInstr->Operand1); UpdateRegs(pInstr, &pInstr->Operand2); UpdateRegs(pInstr, &pInstr->Operand3); RegsToCache = 0; if (PrevEntryPoint != pInstr->EntryPoint) { // // The current instruction marks the end of an Entrypoint. // PrevEntryPoint = pInstr->EntryPoint; // // For all x86 registers which have been read more than once // but not modified in the basic block, load them into the // cache before executing the first instruction in the basic // block. // for (i=0; i 1) { RegsToCache |= (REGMASK<<(REGSHIFT*i)); } } // // Reset the RegUsage[] array to indicate no registers are // cached. // RtlZeroMemory(RegUsage, REGCOUNT); } else { // // For each 32-bit x86 register modified by this instruction, // update the caching info. // for (i=0; iRegsSet & (REGMASK<<(REGSHIFT*i)); if (RegBits) { // // The ith 32-bit x86 register has been modified by this // instruction // if (RegUsage[i] > 1) { // // There is more than one consumer of the modified // value so it is worth caching. // RegsToCache |= RegBits; } // // Since this x86 register was dirtied by this instruction, // it usage count must be reset to 0. // RegUsage[i] = 0; } } } // // Update the list of x86 registers which can be loaded into // cache registers before the next instruction executes. // pInstr[1].RegsToCache |= RegsToCache; // // For each 32-bit register used as a parameter to this instruction, // bump the usage count. // for (i=0; iRegsNeeded & (REGMASK<<(REGSHIFT*i))) { RegUsage[i]++; } } } } VOID OptimizeInstructionStream( PINSTRUCTION IS, ULONG numInstr ) /*++ Routine Description: This function performs various optimization on the instruction stream retured by the decoder. Arguments: IS -- The instruction stream returned by the decoder numInstr -- The length of IS Return Value: return-value - none --*/ { ULONG i; CPUASSERTMSG(numInstr, "Cannot optimize 0-length instruction stream"); // // Pass 1: Optimize x86 instruction stream, replacing single x86 // instructions with special-case instructions, and replacing // multiple x86 instructions with single special-case OPT_ // instructions // for (i=0; iFlagsNeeded; FlagsToGenerate = FlagsNeeded & pFragDesc->FlagsSet; // // Calculate what flags this instruction will need to have // computed before it can be executed. // FlagsNeeded = (FlagsNeeded & ~FlagsToGenerate) | pFragDesc->FlagsNeeded; if (pFragDesc->Flags & OPFL_CTRLTRNS) { ULONG IntelDest = IS[i].Operand1.Immed; // // For control-transfer instructions, FlagsNeeded also includes // the flags required for the destination of the transfer. // if (IS[0].IntelAddress <= IntelDest && i > 0 && IS[i-1].IntelAddress >= IntelDest) { // // The destination of the control-transfer is at a lower // address in the Instruction Stream. // if (PassNumber == 1) { // // Need to make a second pass over the flags // optimizations in order to determine what flags are // needed for the destination address. // fPassNeeded = TRUE; FlagsNeeded = ALLFLAGS; // assume all flags are needed } else { ULONG j; USHORT NewFlagsNeeded; // // Search for the IntelDest within the Instruction // Stream. IntelDest may not be found if there is // a pun. // NewFlagsNeeded = ALLFLAGS; // assume there is a pun for (j=0; j < i; ++j) { if (IS[j].IntelAddress == IntelDest) { NewFlagsNeeded = KnownFlagsNeeded[j]; break; } } FlagsNeeded |= NewFlagsNeeded; } } else if (IS[i+1].IntelAddress <= IntelDest && IntelDest <= IS[numInstr-1].IntelAddress) { // // The destination of the control-transfer is at a higher // address in the Instruction Stream. Pick up the // already-computed FlagsNeeded for the destination. // ULONG j; USHORT NewFlagsNeeded = ALLFLAGS; // assume a pun for (j=i+1; j < numInstr; ++j) { if (IS[j].IntelAddress == IntelDest) { NewFlagsNeeded = KnownFlagsNeeded[j]; break; } } FlagsNeeded |= NewFlagsNeeded; } else { // // Destination of the control-transfer is unknown. Assume // the worst: all flags are required. // FlagsNeeded = ALLFLAGS; } } if (!(FlagsToGenerate & pFragDesc->FlagsSet) && (pFragDesc->Flags & OPFL_HASNOFLAGS)) { // // This instruction is not required to generate any flags, and // it has a NOFLAGS version. Update the flags that need to be // computed by instructions before this one, and modify the // Operation number to point at the NoFlags fragment. // FlagsToGenerate &= pFragDesc->FlagsSet; if (pFragDesc->Flags & OPFL_ALIGN) { IS[i].Operation += 2; } else { IS[i].Operation ++; } if (IS[i].Operation == OP_OPT_ZERONoFlags32) { // // Special-case this to be a "mov [value], zero" so it is // inlined. // IS[i].Operation = OP_Mov32; IS[i].Operand2.Type = OPND_IMM; IS[i].Operand2.Immed = 0; } } } while (i); } } VOID DetermineEbpAlignment( PINSTRUCTION InstructionStream, ULONG numInstr ) /*++ Routine Description: For each instruction in InstructionStream[], sets Instruction->EbpAligned based on whether EBP is assumed to be DWORD-aligned or not. EBP is assumed to be DWORD-aligned if a "MOV EBP, ESP" instruction is seen, and it is assumed to become unaligned at the first instruction which is flagged as modifying EBP. Arguments: InstructionStream -- The instruction stream returned by the decoder numInstr -- The length of InstructionStream Return Value: return-value - none --*/ { ULONG i; BOOL EbpAligned = FALSE; for (i=0; iFileHeader.Machine != IMAGE_FILE_MACHINE_I386) { LOGPRINT((TRACELOG, "CPU Analysis warning: jumping from Intel to non-intel code at 0x%X\r\n", pIntelInstruction)); } } else { // Eip isn't pointing anywhere??? } } } except(EXCEPTION_EXECUTE_HANDLER) { ; } #endif //DBG while (numInstr < maxBufferSize) { DecodeInstruction ((DWORD) (ULONGLONG)pIntelInstruction, InstructionStream+numInstr); if ((STOP_DECODING(InstructionStream[numInstr])) || (pIntelInstruction >= pLastIntelInstruction)) { // We reached a control transfer instruction numInstr++; (*NumberOfInstructions) = numInstr; break; // SUCCESS } pIntelInstruction = (PVOID) ((ULONGLONG)pIntelInstruction + (InstructionStream+numInstr)->Size); numInstr++; } // // Optimize x86 code by merging x86 instructions into meta-instructions // and cleaning up special x86 idioms. // if (!(CompilerFlags & COMPFL_SLOW)) { OptimizeInstructionStream (InstructionStream, numInstr); } // // Determine where all basic blocks are by filling in the EntryPoint // field in each instruction. This must be done after // OptimizeInstructionStream() runs so that EntryPoints don't fall // into the middle of meta-instructions. // cEntryPoints = LocateEntryPoints(InstructionStream, numInstr); // // Perform optimizations which require knowledge of EntryPoints // if (numInstr > 2 && !(CompilerFlags & COMPFL_SLOW)) { if (!CpuDisableNoFlags) { OptimizeIntelFlags(InstructionStream, numInstr); } if (!CpuDisableRegCache) { CacheIntelRegs(InstructionStream, numInstr); } if (!CpuDisableEbpAlign) { DetermineEbpAlignment(InstructionStream, numInstr); } } return cEntryPoints; }