windows-nt/Source/XPSP1/NT/base/wow64/mscpu/fraglib/optfrag.c
2020-09-26 16:20:57 +08:00

307 lines
7 KiB
C

/*++
Copyright (c) 1995-1998 Microsoft Corporation
Module Name:
optfrag.c
Abstract:
Instruction Fragments which correspond to optimizations.
Author:
6-July-1995 Ori Gershony (t-orig)
Revision History:
24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
--*/
#include <nt.h>
#include <ntrtl.h>
#include <nturtl.h>
#include <windows.h>
#include <stdio.h>
#include "cpuassrt.h"
#include "fragp.h"
#include "optfrag.h"
ASSERTNAME;
// This fragment corresponds to:
// push ebx
// push esi
// push edi
FRAG0(OPT_PushEbxEsiEdiFrag)
{
ULONG *espval;
espval=(ULONG *)esp;
*(espval-1) = ebx;
*(espval-2) = esi;
*(espval-3) = edi;
esp=(ULONG)(LONGLONG)espval-12;
}
// This fragment corresponds to:
// pop edi
// pop esi
// pop ebx
FRAG0(OPT_PopEdiEsiEbxFrag)
{
ULONG *espval;
espval=(ULONG *)esp;
edi=*espval;
esi=*(espval+1);
ebx=*(espval+2);
esp=(ULONG)(LONGLONG)espval+12;
}
// This fragment corresponds to:
// push ebp
// mov ebp,esp
// sub esp, op1
FRAG1IMM(OPT_SetupStackFrag, ULONG)
{
ULONG result, oldespminusfour;
oldespminusfour = esp-4;
result = oldespminusfour - op1;
*(ULONG *)oldespminusfour = ebp;
ebp = oldespminusfour;
esp = result;
SET_FLAGS_SUB32(result, oldespminusfour, op1, 0x80000000);
}
FRAG1IMM(OPT_SetupStackNoFlagsFrag, ULONG)
{
ULONG result, oldespminusfour;
oldespminusfour = esp-4;
result = oldespminusfour - op1;
*(ULONG *)oldespminusfour = ebp;
ebp = oldespminusfour;
esp = result;
}
FRAG1(OPT_ZEROFrag32, LONG)
{
// implements: XOR samereg, samereg
// SUB samereg, samereg
// ie. XOR EAX, EAX or SUB ECX, ECX
*pop1 = 0;
SET_CFLAG_OFF;
SET_OFLAG_OFF;
SET_SFLAG_OFF;
SET_ZFLAG(0);
SET_PFLAG(0);
SET_AUXFLAG(0);
}
FRAG1(OPT_ZERONoFlagsFrag32, LONG)
{
// implements: XOR samereg, samereg
// SUB samereg, samereg
// ie. XOR EAX, EAX or SUB ECX, ECX
*pop1 = 0;
}
FRAG3(OPT_CmpSbbFrag32, ULONG, ULONG, ULONG)
{
ULONG result;
ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
//
result = op2-op3;
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
result = (ULONG)-(LONG)(cf >> 31);
*pop1 = result; // pop1 is a pointer to a reg, so always aligned
SET_OFLAG_OFF;
SET_CFLAG(result);
SET_SFLAG(result);
SET_ZFLAG(result);
SET_AUXFLAG(result);
SET_PFLAG(result);
}
FRAG3(OPT_CmpSbbNoFlagsFrag32, ULONG, ULONG, ULONG)
{
ULONG result;
ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
//
result = op2-op3;
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
*pop1 = (ULONG)-(LONG)(cf >> 31);
}
FRAG3(OPT_CmpSbbNegFrag32, ULONG, ULONG, ULONG)
{
ULONG result;
ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
// NEG op1
//
result = op2-op3;
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
// pop1 is a pointer to a reg, so it is always aligned
if (cf >= 0x80000000) {
result = 1;
*pop1 = result; // store the result before updating flags
SET_CFLAG_ON; // set if result != 0
SET_AUXFLAG(0xfe); // this is (BYTE)(0xffffffff ^ 0x00000001)
} else {
result = 0;
*pop1 = result; // store the result before updating flags
SET_CFLAG_OFF; // cleared if result==0
SET_AUXFLAG(0); // this is (BYTE)(0x0 ^ 0x0)
SET_OFLAG_OFF; // this is (0x0 & 0x0) << 31
}
SET_ZFLAG(result);
SET_PFLAG(result);
SET_SFLAG_OFF;
SET_OFLAG_OFF; // this is either (0xffffffff & 0x00000001) or (0 & 0)
}
FRAG3(OPT_CmpSbbNegNoFlagsFrag32, ULONG, ULONG, ULONG)
{
ULONG result;
ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
// NEG op1
//
result = op2-op3;
cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
// result is 1 if high bit of cf is set, 0 if high bit is clear
*pop1 = cf >> 31;
}
FRAG2IMM(OPT_Push2Frag32, ULONG, ULONG)
{
//
// implements: PUSH op1
// PUSH op2
// Note that the analysis phase must ensure that the value of op2 does
// not depend on the value of ESP, as op2 will be computed before the
// first PUSH is excuted.
//
PUSH_LONG(op1);
PUSH_LONG(op2);
}
FRAG2REF(OPT_Pop2Frag32, ULONG)
{
//
// implements: POP pop1
// POP pop2
//
// Note that the analysis phase must ensure that the value of pop2 does
// not depend on the value of pop1, as pop1 will not have been popped
// when the value of pop2 is computed.
//
POP_LONG(*pop1);
POP_LONG(*pop2);
}
FRAG1(OPT_CwdIdivFrag16, USHORT)
{
short op1;
short result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (short)GET_SHORT(pop1);
// Must do the divide before modifying edx, in case op1==0 and we fault.
result = (short)ax / op1;
dx = (short)ax % op1;
ax = result;
}
FRAG1(OPT_CwdIdivFrag16A, USHORT)
{
short op1;
short result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (short)*pop1;
// Must do the divide before modifying edx, in case op1==0 and we fault.
result = (short)ax / op1;
dx = (short)ax % op1;
ax = result;
}
FRAG1(OPT_CwdIdivFrag32, ULONG)
{
long op1;
long result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (long)GET_LONG(pop1);
// Must do the divide before modifying edx, in case op1==0 and we fault.
result = (long)eax / op1;
edx = (long)eax % op1;
eax = result;
}
FRAG1(OPT_CwdIdivFrag32A, ULONG)
{
long op1;
long result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (long)*pop1;
// Must do the divide before modifying edx, in case op1==0 and we fault.
result = (long)eax / op1;
edx = (long)eax % op1;
eax = result;
}
// This fragment should never be called!
FRAG0(OPT_OPTIMIZEDFrag)
{
CPUASSERTMSG(FALSE, "OPTIMIZED fragment should never be called!");
}