windows-nt/Source/XPSP1/NT/sdktools/wst/wstdll/ia64/wstutl.s

786 lines
20 KiB
ArmAsm
Raw Normal View History

2020-09-26 03:20:57 -05:00
// TITLE("Wst Utility")
//++
// Copyright (c) 1992-1994, Microsoft Corporation.
//
// Description:
// SaveAllRegs ()
// - save all IA64 registers
//
// RestoreAllRegs ()
// - restore all IA64 registers
//
//
//--
#include "ksia64.h"
PublicFunction(c_penter)
//--------------------------------------------------------------------
// Routine:
//
// VOID
// SaveAllRegs(
// IN OUT PDWORDLONG pSaveBuffer)
//
// Description:
//
// This function saves the EM registers into the supplied buffer
//
// Input:
//
// a0: pSaveBuffer - Pointer CONTEXT buffer where the registers should be
// saved.
//
// Output:
//
// Stores the registers in the supplied buffer.
//
// Return value:
//
// None
//
//
//
//--------------------------------------------------------------------
LEAF_ENTRY(SaveAllRegs)
//
// Save all integer registers and flush the RSE
//
.prologue
.regstk 1, 10, 0, 0
rbsp = loc9
rpfs = loc8
rbrp = loc7
rpr = loc6
runat = loc4
tmpbsp = t20
alloc rpfs = ar.pfs, 1, 10, 0, 0
add loc0 = CxIntGp, a0
add loc1 = CxIntT8, a0
;;
flushrs
.save ar.unat, loc4
mov runat = ar.unat
mov rpr = pr
PROLOGUE_END
.mem.offset 0,0
st8.spill.nta [loc0] = gp, CxIntT0 - CxIntGp
.mem.offset 0,8
st8.spill.nta [loc1] = t8, CxIntT9 - CxIntT8
add loc2 = CxIntGp, a0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t0, CxIntT1 - CxIntT0
.mem.offset 0,8
st8.spill.nta [loc1] = t9, CxIntT10 - CxIntT9
shr loc2 = loc2, 3
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t1, CxIntS0 - CxIntT1
.mem.offset 0,8
st8.spill.nta [loc1] = t10, CxIntT11 - CxIntT10
and t0 = 0x3f, loc2
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s0, CxIntS1 - CxIntS0
.mem.offset 0,8
st8.spill.nta [loc1] = t11, CxIntT12 - CxIntT11
cmp4.ge pt1, pt0 = 1, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s1, CxIntS2 - CxIntS1
.mem.offset 0,8
st8.spill.nta [loc1] = t12, CxIntT13 - CxIntT12
(pt1) sub t1 = 1, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s2, CxIntS3 - CxIntS2
.mem.offset 0,8
st8.spill.nta [loc1] = t13, CxIntT14 - CxIntT13
(pt0) add t1 = -1, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = s3, CxIntV0 - CxIntS3
.mem.offset 0,8
st8.spill.nta [loc1] = t14, CxIntT15 - CxIntT14
(pt0) sub t8 = 65, t0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = v0, CxIntTeb - CxIntV0
.mem.offset 0,8
st8.spill.nta [loc1] = t15, CxIntT16 - CxIntT15
nop.i 0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = teb, CxIntT2 - CxIntTeb
.mem.offset 0,8
st8.spill.nta [loc1] = t16, CxIntT17 - CxIntT16
mov rbrp = brp
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t2, CxIntT3 - CxIntT2
.mem.offset 0,8
st8.spill.nta [loc1] = t17, CxIntT18 - CxIntT17
mov t11 = bs0
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t3, CxIntSp - CxIntT3
.mem.offset 0,8
st8.spill.nta [loc1] = t18, CxIntT19 - CxIntT18
mov t12 = bs1
;;
.mem.offset 0,0
st8.spill.nta [loc0] = sp, CxIntT4 - CxIntSp
.mem.offset 0,8
st8.spill.nta [loc1] = t19, CxIntT20 - CxIntT19
mov t13 = bs2
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t4, CxIntT5 - CxIntT4
.mem.offset 0,8
st8.spill.nta [loc1] = t20, CxIntT21 - CxIntT20
mov t14 = bs3
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t5, CxIntT6 - CxIntT5
.mem.offset 0,8
st8.spill.nta [loc1] = t21, CxIntT22 - CxIntT21
mov t15 = bs4
;;
.mem.offset 0,0
st8.spill.nta [loc0] = t6, CxIntT7 - CxIntT6
.mem.offset 0,8
st8.spill.nta [loc1] = t22, CxPreds - CxIntT22
mov t16 = bt0
;;
st8.spill.nta [loc0] = t7
st8.nta [loc1] = rpr, CxIntNats - CxPreds // save predicates
mov t17 = bt1
;;
mov t9 = ar.unat
mov t4 = ar.fpsr
add loc2 = CxBrRp, a0
;;
add loc3 = CxBrS3, a0
(pt1) shl t9 = t9, t1
(pt0) shr.u t2 = t9, t1
;;
//
// Save branch registers.
//
st8.nta [loc2] = rbrp, CxBrS0 - CxBrRp // save brp
st8.nta [loc3] = t14, CxBrS4 - CxBrS3 // save bs3
(pt0) shl t3 = t9, t8
;;
st8.nta [loc2] = t11, CxBrS1 - CxBrS0 // save bs0
st8.nta [loc3] = t15, CxBrT0 - CxBrS4 // save bs4
(pt0) or t9 = t2, t3
;;
st8.nta [loc2] = t12, CxBrS2 - CxBrS1 // save bs1
st8.nta [loc3] = t16, CxBrT1 - CxBrT0 // save bt0
add loc0 = CxStFPSR, a0
;;
st8.nta [loc2] = t13 // save bs2
st8.nta [loc3] = t17 // save bt1
add loc2 = CxStFSR, a0
;;
st8.nta [loc0] = t4 // save fpsr
st8.nta [loc1] = t9 // save nat bits
add loc1 = CxStFIR, a0
;;
mov t0 = ar.fsr
mov t1 = ar.fir
mov t2 = ar.fdr
;;
st8.nta [loc2] = t0, CxStFDR - CxStFSR
st8.nta [loc1] = t1
;;
st8.nta [loc2] = t2
mov rbsp = ar.bsp
add loc2 = CxFltS0, a0
add loc3 = CxFltS1, a0
;;
//
// Save floating status and floating registers f0 - f127.
//
stf.spill.nta [loc2] = fs0, CxFltS2 - CxFltS0
stf.spill.nta [loc3] = fs1, CxFltS3 - CxFltS1
shr t0 = rpfs, 7
;;
stf.spill.nta [loc2] = fs2, CxFltT0 - CxFltS2
stf.spill.nta [loc3] = fs3, CxFltT1 - CxFltS3
and t0 = 0x7f, t0
;;
stf.spill.nta [loc2] = ft0, CxFltT2 - CxFltT0
stf.spill.nta [loc3] = ft1, CxFltT3 - CxFltT1
shr t1 = rbsp, 3
;;
stf.spill.nta [loc2] = ft2, CxFltT4 - CxFltT2
stf.spill.nta [loc3] = ft3, CxFltT5 - CxFltT3
and t1 = 0x3f, t1
;;
stf.spill.nta [loc2] = ft4, CxFltT6 - CxFltT4
stf.spill.nta [loc3] = ft5, CxFltT7 - CxFltT5
sub t2 = t0, t1
;;
stf.spill.nta [loc2] = ft6, CxFltT8 - CxFltT6
stf.spill.nta [loc3] = ft7, CxFltT9 - CxFltT7
cmp4.le pt1, pt0 = t2, zero
;;
stf.spill.nta [loc2] = ft8, CxFltS4 - CxFltT8
stf.spill.nta [loc3] = ft9, CxFltS5 - CxFltT9
(pt0) add t2 = -1, t2
;;
stf.spill.nta [loc2] = fs4, CxFltS6 - CxFltS4
stf.spill.nta [loc3] = fs5, CxFltS7 - CxFltS5
(pt0) add t0 = 1, t0
;;
stf.spill.nta [loc2] = fs6, CxFltS8 - CxFltS6
stf.spill.nta [loc3] = fs7, CxFltS9 - CxFltS7
(pt0) add t2 = -63, t2
;;
stf.spill.nta [loc2] = fs8, CxFltS10 - CxFltS8
stf.spill.nta [loc3] = fs9, CxFltS11 - CxFltS9
(pt0) cmp4.ge.unc pt2, pt3 = t2, zero
;;
stf.spill.nta [loc2] = fs10, CxFltS12 - CxFltS10
stf.spill.nta [loc3] = fs11, CxFltS13 - CxFltS11
(pt1) br.cond.spnt Rcc20
;;
Rcc10:
(pt2) add t0 = 1, t0
(pt2) add t2 = -63, t2
(pt3) br.cond.sptk Rcc20
;;
cmp4.ge pt2, pt3 = t2, zero
nop.m 0
br Rcc10
Rcc20:
stf.spill.nta [loc2] = fs12, CxFltS14 - CxFltS12
stf.spill.nta [loc3] = fs13, CxFltS15 - CxFltS13
add tmpbsp = -8, rbsp
;;
stf.spill.nta [loc2] = fs14, CxFltS16 - CxFltS14
stf.spill.nta [loc3] = fs15, CxFltS17 - CxFltS15
shl t0 = t0, 3
;;
stf.spill.nta [loc2] = fs16, CxFltS18 - CxFltS16
stf.spill.nta [loc3] = fs17, CxFltS19 - CxFltS17
sub rbsp = rbsp, t0
;;
stf.spill.nta [loc2] = fs18, CxFltF32 - CxFltS18
stf.spill.nta [loc3] = fs19, CxFltF33 - CxFltS19
nop.i 0
;;
//
// Save application registers, control information and set context flags.
//
User=pt0
Krnl=pt1
rdcr=t1
mask=t2
sol=t4
rpsr=t5
is=t6
rccv=t7
rlc=t8
rec=t9
rrsc=t10
rrnat=t11
flag=t16
addr0=t17
addr1=t18
tmp=t19
mov rrsc = ar.rsc
tbit.nz Krnl, User = sp, 62 // bit 62 is 1 when
mov rlc = ar.lc
;;
mov ar.rsc = zero // put RSE in lazy mode
mov rccv = ar.ccv
mov rec = ar.ec
;; // in kernel
(Krnl) mov rpsr = psr
(User) mov rpsr = psr.um
add addr0 = CxApUNAT, a0
mov rrnat = ar.rnat
add addr1 = CxApLC, a0
(Krnl) mov rdcr = cr.dcr
(Krnl) movl tmp = 1 << PSR_BN
;;
st8.nta [addr0] = runat, CxApEC - CxApUNAT
st8.nta [addr1] = rlc, CxApCCV - CxApLC
(Krnl) or rpsr = tmp, rpsr
;;
st8.nta [addr0] = rec, CxApDCR - CxApEC
st8.nta [addr1] = rccv, CxRsPFS - CxApCCV
mov tmp = 1
;;
st8.nta [addr0] = rdcr, CxRsBSP - CxApDCR
st8.nta [addr1] = rpfs, CxRsBSPSTORE - CxRsPFS
shl tmp = tmp, 63
;;
st8.nta [addr0] = rbsp, CxRsRSC - CxRsBSP
st8.nta [addr1] = rbsp, CxRsRNAT - CxRsBSPSTORE
or rpfs = rpfs, tmp // validate IFS
;;
st8.nta [addr0] = rrsc, CxStIIP - CxRsRSC
st8.nta [addr1] = rrnat, CxStIFS - CxRsRNAT
mov mask = RNAT_ALIGNMENT
;;
st8.nta [addr0] = rbrp, CxStIPSR - CxStIIP
add tmp = CxContextFlags, a0
mov flag = CONTEXT_FULL // full context saved.
;;
st8.nta [addr0] = rpsr // save psr
st8.nta [addr1] = rpfs
or tmpbsp = tmpbsp, mask
;;
mov ar.rsc = rrsc // restore RSC
st4.nta [tmp] = flag
mov ar.unat = runat // restore ar.unat
st8.nta [tmpbsp] = rrnat
(p0) br.ret.sptk brp // return to caller.
LEAF_EXIT(SaveAllRegs)
//++
//
// VOID
// RestoreAllRegs (
// IN PCONTEXT ContextRecord,
// )
//
// Routine Description:
//
// This function restores the registers of the caller from the specified
// context.
//
// N.B. The context record is assumed to be 16-byte aligned.
//
// Arguments:
//
// ContextRecord (a0) - Supplies the address of a context record.
//
// Return Value:
//
// None.
//
//--
LEAF_ENTRY(RestoreAllRegs)
dest1=t8
dest2=t9
rlc=t10
rpreds=t11
rbrp=t12
rbsp=t13
rpfs=t14
runat=t15
rpreds=t16
rsp=t17
rfpsr=t18
jb=t19
tmp=t20
src1=t21
src2=t22
.regstk 2, 9, 2, 0
alloc t4 = ar.pfs, 2, 11, 2, 0
ARGPTR(a0)
add src1 = CxIntNats, a0
add src2 = CxPreds, a0
add tmp = CxIntGp, a0
;;
ld8.nt1 t17 = [src1], CxBrRp - CxIntNats
ld8.nt1 t16 = [src2], CxBrS0 - CxPreds
shr tmp = tmp, 3
;;
ld8.nt1 t0 = [src1], CxBrS1 - CxBrRp
ld8.nt1 t1 = [src2], CxBrS2 - CxBrS0
and tmp = 0x3f, tmp
;;
ld8.nt1 t2 = [src1], CxBrS3 - CxBrS1
ld8.nt1 t3 = [src2], CxBrS4 - CxBrS2
cmp4.ge pt1, pt0 = 1, tmp
;;
ld8.nt1 t4 = [src1], CxBrT0 - CxBrS3
ld8.nt1 t5 = [src2], CxBrT1 - CxBrS4
(pt1) sub loc5 = 1, tmp
;;
ld8.nt1 t6 = [src1], CxApUNAT - CxBrT0
ld8.nt1 t7 = [src2], CxApLC - CxBrT1
(pt0) add loc5 = -1, tmp
;;
ld8.nt1 loc0 = [src1], CxApEC - CxApUNAT
ld8.nt1 t8 = [src2], CxApCCV - CxApLC
(pt0) sub loc6 = 65, tmp
;;
ld8.nt1 t9 = [src1], CxApDCR - CxApEC
ld8.nt1 t10 = [src2], CxRsPFS - CxApCCV
(pt1) shr.u t17 = t17, loc5
;;
ld8.nt1 loc1 = [src1], CxRsBSP - CxApDCR
ld8.nt1 t11 = [src2], CxRsRSC - CxRsPFS
(pt0) shl loc7 = t17, loc5
;;
ld8.nt1 loc2 = [src1], CxStIIP - CxRsBSP
ld8.nt1 loc3 = [src2], CxStIFS - CxRsRSC
(pt0) shr.u loc8 = t17, loc6
;;
ld8.nt1 loc9 = [src1]
ld8.nt1 loc10 = [src2]
(pt0) or t17 = loc7, loc8
;;
mov ar.unat = t17
add src1 = CxFltS0, a0
shr t12 = loc2, 3
;;
add src2 = CxFltS1, a0
and t12 = 0x3f, t12 // current rnat save index
and t13 = 0x7f, loc10 // total frame size
;;
mov ar.ccv = t10
add t14 = t13, t12
mov ar.pfs = t11
;;
Rrc20:
cmp4.gt pt1, pt0 = 63, t14
;;
(pt0) add t14 = -63, t14
(pt0) add t13 = 1, t13
;;
nop.m 0
(pt1) shl t13 = t13, 3
(pt0) br.cond.spnt Rrc20
;;
add loc2 = loc2, t13
nop.f 0
mov pr = t16, -1
ldf.fill.nt1 fs0 = [src1], CxFltS2 - CxFltS0
ldf.fill.nt1 fs1 = [src2], CxFltS3 - CxFltS1
mov brp = t0
;;
ldf.fill.nt1 fs2 = [src1], CxFltT0 - CxFltS2
ldf.fill.nt1 fs3 = [src2], CxFltT1 - CxFltS3
mov bs0 = t1
;;
ldf.fill.nt1 ft0 = [src1], CxFltT2 - CxFltT0
ldf.fill.nt1 ft1 = [src2], CxFltT3 - CxFltT1
mov bs1 = t2
;;
ldf.fill.nt1 ft2 = [src1], CxFltT4 - CxFltT2
ldf.fill.nt1 ft3 = [src2], CxFltT5 - CxFltT3
mov bs2 = t3
;;
ldf.fill.nt1 ft4 = [src1], CxFltT6 - CxFltT4
ldf.fill.nt1 ft5 = [src2], CxFltT7 - CxFltT5
mov bs3 = t4
;;
ldf.fill.nt1 ft6 = [src1], CxFltT8 - CxFltT6
ldf.fill.nt1 ft7 = [src2], CxFltT9 - CxFltT7
mov bs4 = t5
;;
ldf.fill.nt1 ft8 = [src1], CxFltS4 - CxFltT8
ldf.fill.nt1 ft9 = [src2], CxFltS5 - CxFltT9
mov bt0 = t6
;;
ldf.fill.nt1 fs4 = [src1], CxFltS6 - CxFltS4
ldf.fill.nt1 fs5 = [src2], CxFltS7 - CxFltS5
mov bt1 = t7
;;
ldf.fill.nt1 fs6 = [src1], CxFltS8 - CxFltS6
ldf.fill.nt1 fs7 = [src2], CxFltS9 - CxFltS7
mov ar.lc = t8
;;
ldf.fill.nt1 fs8 = [src1], CxFltS10 - CxFltS8
ldf.fill.nt1 fs9 = [src2], CxFltS11 - CxFltS9
mov ar.ec = t9
;;
ldf.fill.nt1 fs10 = [src1], CxFltS12 - CxFltS10
ldf.fill.nt1 fs11 = [src2], CxFltS13 - CxFltS11
nop.i 0
;;
ldf.fill.nt1 fs12 = [src1], CxFltS14 - CxFltS12
ldf.fill.nt1 fs13 = [src2], CxFltS15 - CxFltS13
add loc6 = CxIntGp, a0
;;
ldf.fill.nt1 fs14 = [src1], CxFltS16 - CxFltS14
ldf.fill.nt1 fs15 = [src2], CxFltS17 - CxFltS15
add loc7 = CxIntT0, a0
;;
ldf.fill.nt1 fs16 = [src1], CxFltS18 - CxFltS16
ldf.fill.nt1 fs17 = [src2], CxFltS19 - CxFltS17
add t19 = CxRsRNAT, a0
;;
ldf.fill.nt1 fs18 = [src1]
ldf.fill.nt1 fs19 = [src2]
add t7 = CxStFPSR, a0
;;
ld8.nt1 loc8 = [t7] // load fpsr from context
ld8.nt1 loc5 = [t19] // load rnat from context
nop.i 0
ld8.fill.nt1 gp = [loc6], CxIntT1 - CxIntGp
ld8.fill.nt1 t0 = [loc7], CxIntS0 - CxIntT0
;;
ld8.fill.nt1 t1 = [loc6], CxIntS1 - CxIntT1
ld8.fill.nt1 s0 = [loc7], CxIntS2 - CxIntS0
;;
ld8.fill.nt1 s1 = [loc6], CxIntS3 - CxIntS1
ld8.fill.nt1 s2 = [loc7], CxIntV0 - CxIntS2
;;
ld8.fill.nt1 s3 = [loc6], CxIntTeb - CxIntS3
ld8.fill.nt1 v0 = [loc7], CxIntT2 - CxIntV0
;;
ld8.fill.nt1 teb = [loc6], CxIntT3 - CxIntTeb
ld8.fill.nt1 t2 = [loc7], CxIntSp - CxIntT2
;;
ld8.fill.nt1 t3 = [loc6], CxIntT4 - CxIntT3
ld8.fill.nt1 loc4 = [loc7], CxIntT5 - CxIntSp
;;
ld8.fill.nt1 t4 = [loc6], CxIntT6 - CxIntT4
ld8.fill.nt1 t5 = [loc7], CxIntT7 - CxIntT5
;;
ld8.fill.nt1 t6 = [loc6], CxIntT8 - CxIntT6
ld8.fill.nt1 t7 = [loc7], CxIntT9 - CxIntT7
;;
ld8.fill.nt1 t8 = [loc6], CxIntT10 - CxIntT8
ld8.fill.nt1 t9 = [loc7], CxIntT11 - CxIntT9
;;
ld8.fill.nt1 t10 = [loc6], CxIntT12 - CxIntT10
ld8.fill.nt1 t11 = [loc7], CxIntT13 - CxIntT11
;;
ld8.fill.nt1 t12 = [loc6], CxIntT14 - CxIntT12
ld8.fill.nt1 t13 = [loc7], CxIntT15 - CxIntT13
;;
ld8.fill.nt1 t14 = [loc6], CxIntT16 - CxIntT14
ld8.fill.nt1 t15 = [loc7], CxIntT17 - CxIntT15
;;
ld8.fill.nt1 t16 = [loc6], CxIntT18 - CxIntT16
ld8.fill.nt1 t17 = [loc7], CxIntT19 - CxIntT17
;;
ld8.fill.nt1 t18 = [loc6], CxIntT20 - CxIntT18
ld8.fill.nt1 t19 = [loc7], CxIntT21 - CxIntT19
;;
ld8.fill.nt1 t20 = [loc6], CxIntT22 - CxIntT20
ld8.fill.nt1 t21 = [loc7]
;;
rsm 1 << PSR_I
ld8.fill.nt1 t22 = [loc6]
;;
rsm 1 << PSR_IC
movl t0 = 1 << IFS_V
;;
mov ar.fpsr = loc8 // set fpsr
mov ar.unat = loc0
;;
srlz.d
or loc10 = t0, loc10 // set ifs valid bit
;;
mov cr.iip = loc9
mov cr.ifs = loc10
bsw.0
;;
mov cr.dcr = loc1
mov r17 = loc2 // put BSP in a shadow reg
or r16 = 0x3, loc3 // put RSE in eager mode
mov ar.rsc = r0 // put RSE in enforced lazy
nop.m 0
add r20 = CxStIPSR, a0
;;
ld8.nt1 r20 = [r20] // load IPSR
mov r18 = loc4 // put SP in a shadow reg
mov r19 = loc5 // put RNaTs in a shadow reg
;;
alloc t0 = 0, 0, 0, 0
mov cr.ipsr = r20
mov sp = r18
;;
loadrs
;;
mov ar.bspstore = r17
nop.i 0
;;
mov ar.rnat = r19 // set rnat register
mov ar.rsc = r16 // restore RSC
bsw.1
;;
LEAF_EXIT(RestoreAllRegs)
//--------------------------------------------------------------------
// Routine:
//
// VOID
// penter(
// VOID)
//
// Description:
//
// This function loads the return address and current address in to two
// parameter registers, then calls the C version (c_penter)
//
// Input:
//
// None
//
// Output:
//
// None
//
// Return value:
//
// None
//
//
//
//--------------------------------------------------------------------
LEAF_ENTRY(penter)
.regstk 0, 1, 2, 0
//
// This to be filled in when the compiler implements /Gh switch
//
br.call.sptk brp = c_penter
LEAF_EXIT(penter)