windows-nt/Source/XPSP1/NT/sdktools/wst/wstdll/ia64/wstutl.s
2020-09-26 16:20:57 +08:00

786 lines
20 KiB
ArmAsm
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// TITLE("Wst Utility")
//++
// Copyright (c) 1992-1994, Microsoft Corporation.
//
// Description:
// SaveAllRegs ()
// - save all IA64 registers
//
// RestoreAllRegs ()
// - restore all IA64 registers
//
//
//--
#include "ksia64.h"
PublicFunction(c_penter)
//--------------------------------------------------------------------
// Routine:
//
// VOID
// SaveAllRegs(
// IN OUT PDWORDLONG pSaveBuffer)
//
// Description:
//
// This function saves the EM registers into the supplied buffer
//
// Input:
//
// a0: pSaveBuffer - Pointer CONTEXT buffer where the registers should be
// saved.
//
// Output:
//
// Stores the registers in the supplied buffer.
//
// Return value:
//
// None
//
//
//
//--------------------------------------------------------------------
LEAF_ENTRY(SaveAllRegs)
//
// Save all integer registers and flush the RSE
//
.prologue
.regstk 1, 10, 0, 0
rbsp = loc9
rpfs = loc8
rbrp = loc7
rpr = loc6
runat = loc4
tmpbsp = t20
alloc rpfs = ar.pfs, 1, 10, 0, 0
add loc0 = CxIntGp, a0
add loc1 = CxIntT8, a0
;;
flushrs
.save ar.unat, loc4
mov runat = ar.unat
mov rpr = pr
PROLOGUE_END
.mem.offset 0,0
st8.spill.nta [loc0] = gp, CxIntT0 - CxIntGp
.mem.offset 0,8
st8.spill.nta [loc1] = t8, CxIntT9 - CxIntT8
add loc2 = CxIntGp, a0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t0, CxIntT1 - CxIntT0
.mem.offset 0,8
st8.spill.nta [loc1] = t9, CxIntT10 - CxIntT9
shr loc2 = loc2, 3
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t1, CxIntS0 - CxIntT1
.mem.offset 0,8
st8.spill.nta [loc1] = t10, CxIntT11 - CxIntT10
and t0 = 0x3f, loc2
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s0, CxIntS1 - CxIntS0
.mem.offset 0,8
st8.spill.nta [loc1] = t11, CxIntT12 - CxIntT11
cmp4.ge pt1, pt0 = 1, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s1, CxIntS2 - CxIntS1
.mem.offset 0,8
st8.spill.nta [loc1] = t12, CxIntT13 - CxIntT12
(pt1) sub t1 = 1, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s2, CxIntS3 - CxIntS2
.mem.offset 0,8
st8.spill.nta [loc1] = t13, CxIntT14 - CxIntT13
(pt0) add t1 = -1, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s3, CxIntV0 - CxIntS3
.mem.offset 0,8
st8.spill.nta [loc1] = t14, CxIntT15 - CxIntT14
(pt0) sub t8 = 65, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = v0, CxIntTeb - CxIntV0
.mem.offset 0,8
st8.spill.nta [loc1] = t15, CxIntT16 - CxIntT15
nop.i 0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = teb, CxIntT2 - CxIntTeb
.mem.offset 0,8
st8.spill.nta [loc1] = t16, CxIntT17 - CxIntT16
mov rbrp = brp
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t2, CxIntT3 - CxIntT2
.mem.offset 0,8
st8.spill.nta [loc1] = t17, CxIntT18 - CxIntT17
mov t11 = bs0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t3, CxIntSp - CxIntT3
.mem.offset 0,8
st8.spill.nta [loc1] = t18, CxIntT19 - CxIntT18
mov t12 = bs1
;;
.mem.offset 0,0
st8.spill.nta [loc0] = sp, CxIntT4 - CxIntSp
.mem.offset 0,8
st8.spill.nta [loc1] = t19, CxIntT20 - CxIntT19
mov t13 = bs2
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t4, CxIntT5 - CxIntT4
.mem.offset 0,8
st8.spill.nta [loc1] = t20, CxIntT21 - CxIntT20
mov t14 = bs3
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t5, CxIntT6 - CxIntT5
.mem.offset 0,8
st8.spill.nta [loc1] = t21, CxIntT22 - CxIntT21
mov t15 = bs4
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t6, CxIntT7 - CxIntT6
.mem.offset 0,8
st8.spill.nta [loc1] = t22, CxPreds - CxIntT22
mov t16 = bt0
;;
st8.spill.nta [loc0] = t7
st8.nta [loc1] = rpr, CxIntNats - CxPreds // save predicates
mov t17 = bt1
;;
mov t9 = ar.unat
mov t4 = ar.fpsr
add loc2 = CxBrRp, a0
;;
add loc3 = CxBrS3, a0
(pt1) shl t9 = t9, t1
(pt0) shr.u t2 = t9, t1
;;
//
// Save branch registers.
//
st8.nta [loc2] = rbrp, CxBrS0 - CxBrRp // save brp
st8.nta [loc3] = t14, CxBrS4 - CxBrS3 // save bs3
(pt0) shl t3 = t9, t8
;;
st8.nta [loc2] = t11, CxBrS1 - CxBrS0 // save bs0
st8.nta [loc3] = t15, CxBrT0 - CxBrS4 // save bs4
(pt0) or t9 = t2, t3
;;
st8.nta [loc2] = t12, CxBrS2 - CxBrS1 // save bs1
st8.nta [loc3] = t16, CxBrT1 - CxBrT0 // save bt0
add loc0 = CxStFPSR, a0
;;
st8.nta [loc2] = t13 // save bs2
st8.nta [loc3] = t17 // save bt1
add loc2 = CxStFSR, a0
;;
st8.nta [loc0] = t4 // save fpsr
st8.nta [loc1] = t9 // save nat bits
add loc1 = CxStFIR, a0
;;
mov t0 = ar.fsr
mov t1 = ar.fir
mov t2 = ar.fdr
;;
st8.nta [loc2] = t0, CxStFDR - CxStFSR
st8.nta [loc1] = t1
;;
st8.nta [loc2] = t2
mov rbsp = ar.bsp
add loc2 = CxFltS0, a0
add loc3 = CxFltS1, a0
;;
//
// Save floating status and floating registers f0 - f127.
//
stf.spill.nta [loc2] = fs0, CxFltS2 - CxFltS0
stf.spill.nta [loc3] = fs1, CxFltS3 - CxFltS1
shr t0 = rpfs, 7
;;
stf.spill.nta [loc2] = fs2, CxFltT0 - CxFltS2
stf.spill.nta [loc3] = fs3, CxFltT1 - CxFltS3
and t0 = 0x7f, t0
;;
stf.spill.nta [loc2] = ft0, CxFltT2 - CxFltT0
stf.spill.nta [loc3] = ft1, CxFltT3 - CxFltT1
shr t1 = rbsp, 3
;;
stf.spill.nta [loc2] = ft2, CxFltT4 - CxFltT2
stf.spill.nta [loc3] = ft3, CxFltT5 - CxFltT3
and t1 = 0x3f, t1
;;
stf.spill.nta [loc2] = ft4, CxFltT6 - CxFltT4
stf.spill.nta [loc3] = ft5, CxFltT7 - CxFltT5
sub t2 = t0, t1
;;
stf.spill.nta [loc2] = ft6, CxFltT8 - CxFltT6
stf.spill.nta [loc3] = ft7, CxFltT9 - CxFltT7
cmp4.le pt1, pt0 = t2, zero
;;
stf.spill.nta [loc2] = ft8, CxFltS4 - CxFltT8
stf.spill.nta [loc3] = ft9, CxFltS5 - CxFltT9
(pt0) add t2 = -1, t2
;;
stf.spill.nta [loc2] = fs4, CxFltS6 - CxFltS4
stf.spill.nta [loc3] = fs5, CxFltS7 - CxFltS5
(pt0) add t0 = 1, t0
;;
stf.spill.nta [loc2] = fs6, CxFltS8 - CxFltS6
stf.spill.nta [loc3] = fs7, CxFltS9 - CxFltS7
(pt0) add t2 = -63, t2
;;
stf.spill.nta [loc2] = fs8, CxFltS10 - CxFltS8
stf.spill.nta [loc3] = fs9, CxFltS11 - CxFltS9
(pt0) cmp4.ge.unc pt2, pt3 = t2, zero
;;
stf.spill.nta [loc2] = fs10, CxFltS12 - CxFltS10
stf.spill.nta [loc3] = fs11, CxFltS13 - CxFltS11
(pt1) br.cond.spnt Rcc20
;;
Rcc10:
(pt2) add t0 = 1, t0
(pt2) add t2 = -63, t2
(pt3) br.cond.sptk Rcc20
;;
cmp4.ge pt2, pt3 = t2, zero
nop.m 0
br Rcc10
Rcc20:
stf.spill.nta [loc2] = fs12, CxFltS14 - CxFltS12
stf.spill.nta [loc3] = fs13, CxFltS15 - CxFltS13
add tmpbsp = -8, rbsp
;;
stf.spill.nta [loc2] = fs14, CxFltS16 - CxFltS14
stf.spill.nta [loc3] = fs15, CxFltS17 - CxFltS15
shl t0 = t0, 3
;;
stf.spill.nta [loc2] = fs16, CxFltS18 - CxFltS16
stf.spill.nta [loc3] = fs17, CxFltS19 - CxFltS17
sub rbsp = rbsp, t0
;;
stf.spill.nta [loc2] = fs18, CxFltF32 - CxFltS18
stf.spill.nta [loc3] = fs19, CxFltF33 - CxFltS19
nop.i 0
;;
//
// Save application registers, control information and set context flags.
//
User=pt0
Krnl=pt1
rdcr=t1
mask=t2
sol=t4
rpsr=t5
is=t6
rccv=t7
rlc=t8
rec=t9
rrsc=t10
rrnat=t11
flag=t16
addr0=t17
addr1=t18
tmp=t19
mov rrsc = ar.rsc
tbit.nz Krnl, User = sp, 62 // bit 62 is 1 when
mov rlc = ar.lc
;;
mov ar.rsc = zero // put RSE in lazy mode
mov rccv = ar.ccv
mov rec = ar.ec
;; // in kernel
(Krnl) mov rpsr = psr
(User) mov rpsr = psr.um
add addr0 = CxApUNAT, a0
mov rrnat = ar.rnat
add addr1 = CxApLC, a0
(Krnl) mov rdcr = cr.dcr
(Krnl) movl tmp = 1 << PSR_BN
;;
st8.nta [addr0] = runat, CxApEC - CxApUNAT
st8.nta [addr1] = rlc, CxApCCV - CxApLC
(Krnl) or rpsr = tmp, rpsr
;;
st8.nta [addr0] = rec, CxApDCR - CxApEC
st8.nta [addr1] = rccv, CxRsPFS - CxApCCV
mov tmp = 1
;;
st8.nta [addr0] = rdcr, CxRsBSP - CxApDCR
st8.nta [addr1] = rpfs, CxRsBSPSTORE - CxRsPFS
shl tmp = tmp, 63
;;
st8.nta [addr0] = rbsp, CxRsRSC - CxRsBSP
st8.nta [addr1] = rbsp, CxRsRNAT - CxRsBSPSTORE
or rpfs = rpfs, tmp // validate IFS
;;
st8.nta [addr0] = rrsc, CxStIIP - CxRsRSC
st8.nta [addr1] = rrnat, CxStIFS - CxRsRNAT
mov mask = RNAT_ALIGNMENT
;;
st8.nta [addr0] = rbrp, CxStIPSR - CxStIIP
add tmp = CxContextFlags, a0
mov flag = CONTEXT_FULL // full context saved.
;;
st8.nta [addr0] = rpsr // save psr
st8.nta [addr1] = rpfs
or tmpbsp = tmpbsp, mask
;;
mov ar.rsc = rrsc // restore RSC
st4.nta [tmp] = flag
mov ar.unat = runat // restore ar.unat
st8.nta [tmpbsp] = rrnat
(p0) br.ret.sptk brp // return to caller.
LEAF_EXIT(SaveAllRegs)
//++
//
// VOID
// RestoreAllRegs (
// IN PCONTEXT ContextRecord,
// )
//
// Routine Description:
//
// This function restores the registers of the caller from the specified
// context.
//
// N.B. The context record is assumed to be 16-byte aligned.
//
// Arguments:
//
// ContextRecord (a0) - Supplies the address of a context record.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(RestoreAllRegs)
dest1=t8
dest2=t9
rlc=t10
rpreds=t11
rbrp=t12
rbsp=t13
rpfs=t14
runat=t15
rpreds=t16
rsp=t17
rfpsr=t18
jb=t19
tmp=t20
src1=t21
src2=t22
.regstk 2, 9, 2, 0
alloc t4 = ar.pfs, 2, 11, 2, 0
ARGPTR(a0)
add src1 = CxIntNats, a0
add src2 = CxPreds, a0
add tmp = CxIntGp, a0
;;
ld8.nt1 t17 = [src1], CxBrRp - CxIntNats
ld8.nt1 t16 = [src2], CxBrS0 - CxPreds
shr tmp = tmp, 3
;;
ld8.nt1 t0 = [src1], CxBrS1 - CxBrRp
ld8.nt1 t1 = [src2], CxBrS2 - CxBrS0
and tmp = 0x3f, tmp
;;
ld8.nt1 t2 = [src1], CxBrS3 - CxBrS1
ld8.nt1 t3 = [src2], CxBrS4 - CxBrS2
cmp4.ge pt1, pt0 = 1, tmp
;;
ld8.nt1 t4 = [src1], CxBrT0 - CxBrS3
ld8.nt1 t5 = [src2], CxBrT1 - CxBrS4
(pt1) sub loc5 = 1, tmp
;;
ld8.nt1 t6 = [src1], CxApUNAT - CxBrT0
ld8.nt1 t7 = [src2], CxApLC - CxBrT1
(pt0) add loc5 = -1, tmp
;;
ld8.nt1 loc0 = [src1], CxApEC - CxApUNAT
ld8.nt1 t8 = [src2], CxApCCV - CxApLC
(pt0) sub loc6 = 65, tmp
;;
ld8.nt1 t9 = [src1], CxApDCR - CxApEC
ld8.nt1 t10 = [src2], CxRsPFS - CxApCCV
(pt1) shr.u t17 = t17, loc5
;;
ld8.nt1 loc1 = [src1], CxRsBSP - CxApDCR
ld8.nt1 t11 = [src2], CxRsRSC - CxRsPFS
(pt0) shl loc7 = t17, loc5
;;
ld8.nt1 loc2 = [src1], CxStIIP - CxRsBSP
ld8.nt1 loc3 = [src2], CxStIFS - CxRsRSC
(pt0) shr.u loc8 = t17, loc6
;;
ld8.nt1 loc9 = [src1]
ld8.nt1 loc10 = [src2]
(pt0) or t17 = loc7, loc8
;;
mov ar.unat = t17
add src1 = CxFltS0, a0
shr t12 = loc2, 3
;;
add src2 = CxFltS1, a0
and t12 = 0x3f, t12 // current rnat save index
and t13 = 0x7f, loc10 // total frame size
;;
mov ar.ccv = t10
add t14 = t13, t12
mov ar.pfs = t11
;;
Rrc20:
cmp4.gt pt1, pt0 = 63, t14
;;
(pt0) add t14 = -63, t14
(pt0) add t13 = 1, t13
;;
nop.m 0
(pt1) shl t13 = t13, 3
(pt0) br.cond.spnt Rrc20
;;
add loc2 = loc2, t13
nop.f 0
mov pr = t16, -1
ldf.fill.nt1 fs0 = [src1], CxFltS2 - CxFltS0
ldf.fill.nt1 fs1 = [src2], CxFltS3 - CxFltS1
mov brp = t0
;;
ldf.fill.nt1 fs2 = [src1], CxFltT0 - CxFltS2
ldf.fill.nt1 fs3 = [src2], CxFltT1 - CxFltS3
mov bs0 = t1
;;
ldf.fill.nt1 ft0 = [src1], CxFltT2 - CxFltT0
ldf.fill.nt1 ft1 = [src2], CxFltT3 - CxFltT1
mov bs1 = t2
;;
ldf.fill.nt1 ft2 = [src1], CxFltT4 - CxFltT2
ldf.fill.nt1 ft3 = [src2], CxFltT5 - CxFltT3
mov bs2 = t3
;;
ldf.fill.nt1 ft4 = [src1], CxFltT6 - CxFltT4
ldf.fill.nt1 ft5 = [src2], CxFltT7 - CxFltT5
mov bs3 = t4
;;
ldf.fill.nt1 ft6 = [src1], CxFltT8 - CxFltT6
ldf.fill.nt1 ft7 = [src2], CxFltT9 - CxFltT7
mov bs4 = t5
;;
ldf.fill.nt1 ft8 = [src1], CxFltS4 - CxFltT8
ldf.fill.nt1 ft9 = [src2], CxFltS5 - CxFltT9
mov bt0 = t6
;;
ldf.fill.nt1 fs4 = [src1], CxFltS6 - CxFltS4
ldf.fill.nt1 fs5 = [src2], CxFltS7 - CxFltS5
mov bt1 = t7
;;
ldf.fill.nt1 fs6 = [src1], CxFltS8 - CxFltS6
ldf.fill.nt1 fs7 = [src2], CxFltS9 - CxFltS7
mov ar.lc = t8
;;
ldf.fill.nt1 fs8 = [src1], CxFltS10 - CxFltS8
ldf.fill.nt1 fs9 = [src2], CxFltS11 - CxFltS9
mov ar.ec = t9
;;
ldf.fill.nt1 fs10 = [src1], CxFltS12 - CxFltS10
ldf.fill.nt1 fs11 = [src2], CxFltS13 - CxFltS11
nop.i 0
;;
ldf.fill.nt1 fs12 = [src1], CxFltS14 - CxFltS12
ldf.fill.nt1 fs13 = [src2], CxFltS15 - CxFltS13
add loc6 = CxIntGp, a0
;;
ldf.fill.nt1 fs14 = [src1], CxFltS16 - CxFltS14
ldf.fill.nt1 fs15 = [src2], CxFltS17 - CxFltS15
add loc7 = CxIntT0, a0
;;
ldf.fill.nt1 fs16 = [src1], CxFltS18 - CxFltS16
ldf.fill.nt1 fs17 = [src2], CxFltS19 - CxFltS17
add t19 = CxRsRNAT, a0
;;
ldf.fill.nt1 fs18 = [src1]
ldf.fill.nt1 fs19 = [src2]
add t7 = CxStFPSR, a0
;;
ld8.nt1 loc8 = [t7] // load fpsr from context
ld8.nt1 loc5 = [t19] // load rnat from context
nop.i 0
ld8.fill.nt1 gp = [loc6], CxIntT1 - CxIntGp
ld8.fill.nt1 t0 = [loc7], CxIntS0 - CxIntT0
;;
ld8.fill.nt1 t1 = [loc6], CxIntS1 - CxIntT1
ld8.fill.nt1 s0 = [loc7], CxIntS2 - CxIntS0
;;
ld8.fill.nt1 s1 = [loc6], CxIntS3 - CxIntS1
ld8.fill.nt1 s2 = [loc7], CxIntV0 - CxIntS2
;;
ld8.fill.nt1 s3 = [loc6], CxIntTeb - CxIntS3
ld8.fill.nt1 v0 = [loc7], CxIntT2 - CxIntV0
;;
ld8.fill.nt1 teb = [loc6], CxIntT3 - CxIntTeb
ld8.fill.nt1 t2 = [loc7], CxIntSp - CxIntT2
;;
ld8.fill.nt1 t3 = [loc6], CxIntT4 - CxIntT3
ld8.fill.nt1 loc4 = [loc7], CxIntT5 - CxIntSp
;;
ld8.fill.nt1 t4 = [loc6], CxIntT6 - CxIntT4
ld8.fill.nt1 t5 = [loc7], CxIntT7 - CxIntT5
;;
ld8.fill.nt1 t6 = [loc6], CxIntT8 - CxIntT6
ld8.fill.nt1 t7 = [loc7], CxIntT9 - CxIntT7
;;
ld8.fill.nt1 t8 = [loc6], CxIntT10 - CxIntT8
ld8.fill.nt1 t9 = [loc7], CxIntT11 - CxIntT9
;;
ld8.fill.nt1 t10 = [loc6], CxIntT12 - CxIntT10
ld8.fill.nt1 t11 = [loc7], CxIntT13 - CxIntT11
;;
ld8.fill.nt1 t12 = [loc6], CxIntT14 - CxIntT12
ld8.fill.nt1 t13 = [loc7], CxIntT15 - CxIntT13
;;
ld8.fill.nt1 t14 = [loc6], CxIntT16 - CxIntT14
ld8.fill.nt1 t15 = [loc7], CxIntT17 - CxIntT15
;;
ld8.fill.nt1 t16 = [loc6], CxIntT18 - CxIntT16
ld8.fill.nt1 t17 = [loc7], CxIntT19 - CxIntT17
;;
ld8.fill.nt1 t18 = [loc6], CxIntT20 - CxIntT18
ld8.fill.nt1 t19 = [loc7], CxIntT21 - CxIntT19
;;
ld8.fill.nt1 t20 = [loc6], CxIntT22 - CxIntT20
ld8.fill.nt1 t21 = [loc7]
;;
rsm 1 << PSR_I
ld8.fill.nt1 t22 = [loc6]
;;
rsm 1 << PSR_IC
movl t0 = 1 << IFS_V
;;
mov ar.fpsr = loc8 // set fpsr
mov ar.unat = loc0
;;
srlz.d
or loc10 = t0, loc10 // set ifs valid bit
;;
mov cr.iip = loc9
mov cr.ifs = loc10
bsw.0
;;
mov cr.dcr = loc1
mov r17 = loc2 // put BSP in a shadow reg
or r16 = 0x3, loc3 // put RSE in eager mode
mov ar.rsc = r0 // put RSE in enforced lazy
nop.m 0
add r20 = CxStIPSR, a0
;;
ld8.nt1 r20 = [r20] // load IPSR
mov r18 = loc4 // put SP in a shadow reg
mov r19 = loc5 // put RNaTs in a shadow reg
;;
alloc t0 = 0, 0, 0, 0
mov cr.ipsr = r20
mov sp = r18
;;
loadrs
;;
mov ar.bspstore = r17
nop.i 0
;;
mov ar.rnat = r19 // set rnat register
mov ar.rsc = r16 // restore RSC
bsw.1
;;
LEAF_EXIT(RestoreAllRegs)
//--------------------------------------------------------------------
// Routine:
//
// VOID
// penter(
// VOID)
//
// Description:
//
// This function loads the return address and current address in to two
// parameter registers, then calls the C version (c_penter)
//
// Input:
//
// None
//
// Output:
//
// None
//
// Return value:
//
// None
//
//
//
//--------------------------------------------------------------------
LEAF_ENTRY(penter)
.regstk 0, 1, 2, 0
//
// This to be filled in when the compiler implements /Gh switch
//
br.call.sptk brp = c_penter
LEAF_EXIT(penter)