/*++ Copyright (c) 1995-1998 Microsoft Corporation Module Name: optfrag.c Abstract: Instruction Fragments which correspond to optimizations. Author: 6-July-1995 Ori Gershony (t-orig) Revision History: 24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit. --*/ #include #include #include #include #include #include "cpuassrt.h" #include "fragp.h" #include "optfrag.h" ASSERTNAME; // This fragment corresponds to: // push ebx // push esi // push edi FRAG0(OPT_PushEbxEsiEdiFrag) { ULONG *espval; espval=(ULONG *)esp; *(espval-1) = ebx; *(espval-2) = esi; *(espval-3) = edi; esp=(ULONG)(LONGLONG)espval-12; } // This fragment corresponds to: // pop edi // pop esi // pop ebx FRAG0(OPT_PopEdiEsiEbxFrag) { ULONG *espval; espval=(ULONG *)esp; edi=*espval; esi=*(espval+1); ebx=*(espval+2); esp=(ULONG)(LONGLONG)espval+12; } // This fragment corresponds to: // push ebp // mov ebp,esp // sub esp, op1 FRAG1IMM(OPT_SetupStackFrag, ULONG) { ULONG result, oldespminusfour; oldespminusfour = esp-4; result = oldespminusfour - op1; *(ULONG *)oldespminusfour = ebp; ebp = oldespminusfour; esp = result; SET_FLAGS_SUB32(result, oldespminusfour, op1, 0x80000000); } FRAG1IMM(OPT_SetupStackNoFlagsFrag, ULONG) { ULONG result, oldespminusfour; oldespminusfour = esp-4; result = oldespminusfour - op1; *(ULONG *)oldespminusfour = ebp; ebp = oldespminusfour; esp = result; } FRAG1(OPT_ZEROFrag32, LONG) { // implements: XOR samereg, samereg // SUB samereg, samereg // ie. XOR EAX, EAX or SUB ECX, ECX *pop1 = 0; SET_CFLAG_OFF; SET_OFLAG_OFF; SET_SFLAG_OFF; SET_ZFLAG(0); SET_PFLAG(0); SET_AUXFLAG(0); } FRAG1(OPT_ZERONoFlagsFrag32, LONG) { // implements: XOR samereg, samereg // SUB samereg, samereg // ie. XOR EAX, EAX or SUB ECX, ECX *pop1 = 0; } FRAG3(OPT_CmpSbbFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf; // // implements: CMP op2, op3 // SBB op1, op1 // result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); result = (ULONG)-(LONG)(cf >> 31); *pop1 = result; // pop1 is a pointer to a reg, so always aligned SET_OFLAG_OFF; SET_CFLAG(result); SET_SFLAG(result); SET_ZFLAG(result); SET_AUXFLAG(result); SET_PFLAG(result); } FRAG3(OPT_CmpSbbNoFlagsFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf; // // implements: CMP op2, op3 // SBB op1, op1 // result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); *pop1 = (ULONG)-(LONG)(cf >> 31); } FRAG3(OPT_CmpSbbNegFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf; // // implements: CMP op2, op3 // SBB op1, op1 // NEG op1 // result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); // pop1 is a pointer to a reg, so it is always aligned if (cf >= 0x80000000) { result = 1; *pop1 = result; // store the result before updating flags SET_CFLAG_ON; // set if result != 0 SET_AUXFLAG(0xfe); // this is (BYTE)(0xffffffff ^ 0x00000001) } else { result = 0; *pop1 = result; // store the result before updating flags SET_CFLAG_OFF; // cleared if result==0 SET_AUXFLAG(0); // this is (BYTE)(0x0 ^ 0x0) SET_OFLAG_OFF; // this is (0x0 & 0x0) << 31 } SET_ZFLAG(result); SET_PFLAG(result); SET_SFLAG_OFF; SET_OFLAG_OFF; // this is either (0xffffffff & 0x00000001) or (0 & 0) } FRAG3(OPT_CmpSbbNegNoFlagsFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf; // // implements: CMP op2, op3 // SBB op1, op1 // NEG op1 // result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); // result is 1 if high bit of cf is set, 0 if high bit is clear *pop1 = cf >> 31; } FRAG2IMM(OPT_Push2Frag32, ULONG, ULONG) { // // implements: PUSH op1 // PUSH op2 // Note that the analysis phase must ensure that the value of op2 does // not depend on the value of ESP, as op2 will be computed before the // first PUSH is excuted. // PUSH_LONG(op1); PUSH_LONG(op2); } FRAG2REF(OPT_Pop2Frag32, ULONG) { // // implements: POP pop1 // POP pop2 // // Note that the analysis phase must ensure that the value of pop2 does // not depend on the value of pop1, as pop1 will not have been popped // when the value of pop2 is computed. // POP_LONG(*pop1); POP_LONG(*pop2); } FRAG1(OPT_CwdIdivFrag16, USHORT) { short op1; short result; // // implements: CWD // IDIV EAX, *pop1 // The CWD sign-extends EAX into EDX:EAX, which means, we can // avoid a 64-bit division and just divide EAX. There is no // possibility of overflow. // op1 = (short)GET_SHORT(pop1); // Must do the divide before modifying edx, in case op1==0 and we fault. result = (short)ax / op1; dx = (short)ax % op1; ax = result; } FRAG1(OPT_CwdIdivFrag16A, USHORT) { short op1; short result; // // implements: CWD // IDIV EAX, *pop1 // The CWD sign-extends EAX into EDX:EAX, which means, we can // avoid a 64-bit division and just divide EAX. There is no // possibility of overflow. // op1 = (short)*pop1; // Must do the divide before modifying edx, in case op1==0 and we fault. result = (short)ax / op1; dx = (short)ax % op1; ax = result; } FRAG1(OPT_CwdIdivFrag32, ULONG) { long op1; long result; // // implements: CWD // IDIV EAX, *pop1 // The CWD sign-extends EAX into EDX:EAX, which means, we can // avoid a 64-bit division and just divide EAX. There is no // possibility of overflow. // op1 = (long)GET_LONG(pop1); // Must do the divide before modifying edx, in case op1==0 and we fault. result = (long)eax / op1; edx = (long)eax % op1; eax = result; } FRAG1(OPT_CwdIdivFrag32A, ULONG) { long op1; long result; // // implements: CWD // IDIV EAX, *pop1 // The CWD sign-extends EAX into EDX:EAX, which means, we can // avoid a 64-bit division and just divide EAX. There is no // possibility of overflow. // op1 = (long)*pop1; // Must do the divide before modifying edx, in case op1==0 and we fault. result = (long)eax / op1; edx = (long)eax % op1; eax = result; } // This fragment should never be called! FRAG0(OPT_OPTIMIZEDFrag) { CPUASSERTMSG(FALSE, "OPTIMIZED fragment should never be called!"); }