307 lines
7 KiB
C
307 lines
7 KiB
C
/*++
|
|
|
|
Copyright (c) 1995-1998 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
optfrag.c
|
|
|
|
Abstract:
|
|
|
|
Instruction Fragments which correspond to optimizations.
|
|
|
|
Author:
|
|
|
|
6-July-1995 Ori Gershony (t-orig)
|
|
|
|
Revision History:
|
|
|
|
24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
|
|
|
|
|
|
--*/
|
|
|
|
#include <nt.h>
|
|
#include <ntrtl.h>
|
|
#include <nturtl.h>
|
|
#include <windows.h>
|
|
#include <stdio.h>
|
|
#include "cpuassrt.h"
|
|
#include "fragp.h"
|
|
#include "optfrag.h"
|
|
|
|
ASSERTNAME;
|
|
|
|
// This fragment corresponds to:
|
|
// push ebx
|
|
// push esi
|
|
// push edi
|
|
FRAG0(OPT_PushEbxEsiEdiFrag)
|
|
{
|
|
ULONG *espval;
|
|
|
|
espval=(ULONG *)esp;
|
|
|
|
*(espval-1) = ebx;
|
|
*(espval-2) = esi;
|
|
*(espval-3) = edi;
|
|
esp=(ULONG)(LONGLONG)espval-12;
|
|
}
|
|
|
|
// This fragment corresponds to:
|
|
// pop edi
|
|
// pop esi
|
|
// pop ebx
|
|
FRAG0(OPT_PopEdiEsiEbxFrag)
|
|
{
|
|
ULONG *espval;
|
|
|
|
espval=(ULONG *)esp;
|
|
|
|
edi=*espval;
|
|
esi=*(espval+1);
|
|
ebx=*(espval+2);
|
|
esp=(ULONG)(LONGLONG)espval+12;
|
|
}
|
|
|
|
// This fragment corresponds to:
|
|
// push ebp
|
|
// mov ebp,esp
|
|
// sub esp, op1
|
|
FRAG1IMM(OPT_SetupStackFrag, ULONG)
|
|
{
|
|
ULONG result, oldespminusfour;
|
|
|
|
oldespminusfour = esp-4;
|
|
result = oldespminusfour - op1;
|
|
|
|
*(ULONG *)oldespminusfour = ebp;
|
|
ebp = oldespminusfour;
|
|
esp = result;
|
|
SET_FLAGS_SUB32(result, oldespminusfour, op1, 0x80000000);
|
|
}
|
|
FRAG1IMM(OPT_SetupStackNoFlagsFrag, ULONG)
|
|
{
|
|
ULONG result, oldespminusfour;
|
|
|
|
oldespminusfour = esp-4;
|
|
result = oldespminusfour - op1;
|
|
|
|
*(ULONG *)oldespminusfour = ebp;
|
|
ebp = oldespminusfour;
|
|
esp = result;
|
|
}
|
|
|
|
FRAG1(OPT_ZEROFrag32, LONG)
|
|
{
|
|
// implements: XOR samereg, samereg
|
|
// SUB samereg, samereg
|
|
// ie. XOR EAX, EAX or SUB ECX, ECX
|
|
|
|
*pop1 = 0;
|
|
SET_CFLAG_OFF;
|
|
SET_OFLAG_OFF;
|
|
SET_SFLAG_OFF;
|
|
SET_ZFLAG(0);
|
|
SET_PFLAG(0);
|
|
SET_AUXFLAG(0);
|
|
}
|
|
|
|
FRAG1(OPT_ZERONoFlagsFrag32, LONG)
|
|
{
|
|
// implements: XOR samereg, samereg
|
|
// SUB samereg, samereg
|
|
// ie. XOR EAX, EAX or SUB ECX, ECX
|
|
|
|
*pop1 = 0;
|
|
}
|
|
|
|
FRAG3(OPT_CmpSbbFrag32, ULONG, ULONG, ULONG)
|
|
{
|
|
ULONG result;
|
|
ULONG cf;
|
|
|
|
//
|
|
// implements: CMP op2, op3
|
|
// SBB op1, op1
|
|
//
|
|
result = op2-op3;
|
|
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
|
|
result = (ULONG)-(LONG)(cf >> 31);
|
|
*pop1 = result; // pop1 is a pointer to a reg, so always aligned
|
|
SET_OFLAG_OFF;
|
|
SET_CFLAG(result);
|
|
SET_SFLAG(result);
|
|
SET_ZFLAG(result);
|
|
SET_AUXFLAG(result);
|
|
SET_PFLAG(result);
|
|
}
|
|
FRAG3(OPT_CmpSbbNoFlagsFrag32, ULONG, ULONG, ULONG)
|
|
{
|
|
ULONG result;
|
|
ULONG cf;
|
|
|
|
//
|
|
// implements: CMP op2, op3
|
|
// SBB op1, op1
|
|
//
|
|
result = op2-op3;
|
|
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
|
|
*pop1 = (ULONG)-(LONG)(cf >> 31);
|
|
}
|
|
FRAG3(OPT_CmpSbbNegFrag32, ULONG, ULONG, ULONG)
|
|
{
|
|
ULONG result;
|
|
ULONG cf;
|
|
|
|
//
|
|
// implements: CMP op2, op3
|
|
// SBB op1, op1
|
|
// NEG op1
|
|
//
|
|
result = op2-op3;
|
|
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
|
|
// pop1 is a pointer to a reg, so it is always aligned
|
|
if (cf >= 0x80000000) {
|
|
result = 1;
|
|
*pop1 = result; // store the result before updating flags
|
|
SET_CFLAG_ON; // set if result != 0
|
|
SET_AUXFLAG(0xfe); // this is (BYTE)(0xffffffff ^ 0x00000001)
|
|
} else {
|
|
result = 0;
|
|
*pop1 = result; // store the result before updating flags
|
|
SET_CFLAG_OFF; // cleared if result==0
|
|
SET_AUXFLAG(0); // this is (BYTE)(0x0 ^ 0x0)
|
|
SET_OFLAG_OFF; // this is (0x0 & 0x0) << 31
|
|
}
|
|
SET_ZFLAG(result);
|
|
SET_PFLAG(result);
|
|
SET_SFLAG_OFF;
|
|
SET_OFLAG_OFF; // this is either (0xffffffff & 0x00000001) or (0 & 0)
|
|
}
|
|
FRAG3(OPT_CmpSbbNegNoFlagsFrag32, ULONG, ULONG, ULONG)
|
|
{
|
|
ULONG result;
|
|
ULONG cf;
|
|
|
|
//
|
|
// implements: CMP op2, op3
|
|
// SBB op1, op1
|
|
// NEG op1
|
|
//
|
|
result = op2-op3;
|
|
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
|
|
// result is 1 if high bit of cf is set, 0 if high bit is clear
|
|
*pop1 = cf >> 31;
|
|
}
|
|
|
|
FRAG2IMM(OPT_Push2Frag32, ULONG, ULONG)
|
|
{
|
|
//
|
|
// implements: PUSH op1
|
|
// PUSH op2
|
|
// Note that the analysis phase must ensure that the value of op2 does
|
|
// not depend on the value of ESP, as op2 will be computed before the
|
|
// first PUSH is excuted.
|
|
//
|
|
PUSH_LONG(op1);
|
|
PUSH_LONG(op2);
|
|
}
|
|
FRAG2REF(OPT_Pop2Frag32, ULONG)
|
|
{
|
|
//
|
|
// implements: POP pop1
|
|
// POP pop2
|
|
//
|
|
// Note that the analysis phase must ensure that the value of pop2 does
|
|
// not depend on the value of pop1, as pop1 will not have been popped
|
|
// when the value of pop2 is computed.
|
|
//
|
|
POP_LONG(*pop1);
|
|
POP_LONG(*pop2);
|
|
}
|
|
|
|
FRAG1(OPT_CwdIdivFrag16, USHORT)
|
|
{
|
|
short op1;
|
|
short result;
|
|
|
|
//
|
|
// implements: CWD
|
|
// IDIV EAX, *pop1
|
|
// The CWD sign-extends EAX into EDX:EAX, which means, we can
|
|
// avoid a 64-bit division and just divide EAX. There is no
|
|
// possibility of overflow.
|
|
//
|
|
op1 = (short)GET_SHORT(pop1);
|
|
// Must do the divide before modifying edx, in case op1==0 and we fault.
|
|
result = (short)ax / op1;
|
|
|
|
dx = (short)ax % op1;
|
|
ax = result;
|
|
}
|
|
FRAG1(OPT_CwdIdivFrag16A, USHORT)
|
|
{
|
|
short op1;
|
|
short result;
|
|
|
|
//
|
|
// implements: CWD
|
|
// IDIV EAX, *pop1
|
|
// The CWD sign-extends EAX into EDX:EAX, which means, we can
|
|
// avoid a 64-bit division and just divide EAX. There is no
|
|
// possibility of overflow.
|
|
//
|
|
op1 = (short)*pop1;
|
|
// Must do the divide before modifying edx, in case op1==0 and we fault.
|
|
result = (short)ax / op1;
|
|
|
|
dx = (short)ax % op1;
|
|
ax = result;
|
|
}
|
|
|
|
FRAG1(OPT_CwdIdivFrag32, ULONG)
|
|
{
|
|
long op1;
|
|
long result;
|
|
|
|
//
|
|
// implements: CWD
|
|
// IDIV EAX, *pop1
|
|
// The CWD sign-extends EAX into EDX:EAX, which means, we can
|
|
// avoid a 64-bit division and just divide EAX. There is no
|
|
// possibility of overflow.
|
|
//
|
|
op1 = (long)GET_LONG(pop1);
|
|
// Must do the divide before modifying edx, in case op1==0 and we fault.
|
|
result = (long)eax / op1;
|
|
|
|
edx = (long)eax % op1;
|
|
eax = result;
|
|
}
|
|
FRAG1(OPT_CwdIdivFrag32A, ULONG)
|
|
{
|
|
long op1;
|
|
long result;
|
|
|
|
//
|
|
// implements: CWD
|
|
// IDIV EAX, *pop1
|
|
// The CWD sign-extends EAX into EDX:EAX, which means, we can
|
|
// avoid a 64-bit division and just divide EAX. There is no
|
|
// possibility of overflow.
|
|
//
|
|
op1 = (long)*pop1;
|
|
// Must do the divide before modifying edx, in case op1==0 and we fault.
|
|
result = (long)eax / op1;
|
|
|
|
edx = (long)eax % op1;
|
|
eax = result;
|
|
}
|
|
|
|
// This fragment should never be called!
|
|
FRAG0(OPT_OPTIMIZEDFrag)
|
|
{
|
|
CPUASSERTMSG(FALSE, "OPTIMIZED fragment should never be called!");
|
|
}
|