// TITLE("Wst Utility") //++ // Copyright (c) 1992-1994, Microsoft Corporation. // // Description: // SaveAllRegs () // - save all IA64 registers // // RestoreAllRegs () // - restore all IA64 registers // // //-- #include "ksia64.h" PublicFunction(c_penter) //-------------------------------------------------------------------- // Routine: // // VOID // SaveAllRegs( // IN OUT PDWORDLONG pSaveBuffer) // // Description: // // This function saves the EM registers into the supplied buffer // // Input: // // a0: pSaveBuffer - Pointer CONTEXT buffer where the registers should be // saved. // // Output: // // Stores the registers in the supplied buffer. // // Return value: // // None // // // //-------------------------------------------------------------------- LEAF_ENTRY(SaveAllRegs) // // Save all integer registers and flush the RSE // .prologue .regstk 1, 10, 0, 0 rbsp = loc9 rpfs = loc8 rbrp = loc7 rpr = loc6 runat = loc4 tmpbsp = t20 alloc rpfs = ar.pfs, 1, 10, 0, 0 add loc0 = CxIntGp, a0 add loc1 = CxIntT8, a0 ;; flushrs .save ar.unat, loc4 mov runat = ar.unat mov rpr = pr PROLOGUE_END .mem.offset 0,0 st8.spill.nta [loc0] = gp, CxIntT0 - CxIntGp .mem.offset 0,8 st8.spill.nta [loc1] = t8, CxIntT9 - CxIntT8 add loc2 = CxIntGp, a0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = t0, CxIntT1 - CxIntT0 .mem.offset 0,8 st8.spill.nta [loc1] = t9, CxIntT10 - CxIntT9 shr loc2 = loc2, 3 ;; .mem.offset 0,0 st8.spill.nta [loc0] = t1, CxIntS0 - CxIntT1 .mem.offset 0,8 st8.spill.nta [loc1] = t10, CxIntT11 - CxIntT10 and t0 = 0x3f, loc2 ;; .mem.offset 0,0 st8.spill.nta [loc0] = s0, CxIntS1 - CxIntS0 .mem.offset 0,8 st8.spill.nta [loc1] = t11, CxIntT12 - CxIntT11 cmp4.ge pt1, pt0 = 1, t0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = s1, CxIntS2 - CxIntS1 .mem.offset 0,8 st8.spill.nta [loc1] = t12, CxIntT13 - CxIntT12 (pt1) sub t1 = 1, t0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = s2, CxIntS3 - CxIntS2 .mem.offset 0,8 st8.spill.nta [loc1] = t13, CxIntT14 - CxIntT13 (pt0) add t1 = -1, t0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = s3, CxIntV0 - CxIntS3 .mem.offset 0,8 st8.spill.nta [loc1] = t14, CxIntT15 - CxIntT14 (pt0) sub t8 = 65, t0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = v0, CxIntTeb - CxIntV0 .mem.offset 0,8 st8.spill.nta [loc1] = t15, CxIntT16 - CxIntT15 nop.i 0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = teb, CxIntT2 - CxIntTeb .mem.offset 0,8 st8.spill.nta [loc1] = t16, CxIntT17 - CxIntT16 mov rbrp = brp ;; .mem.offset 0,0 st8.spill.nta [loc0] = t2, CxIntT3 - CxIntT2 .mem.offset 0,8 st8.spill.nta [loc1] = t17, CxIntT18 - CxIntT17 mov t11 = bs0 ;; .mem.offset 0,0 st8.spill.nta [loc0] = t3, CxIntSp - CxIntT3 .mem.offset 0,8 st8.spill.nta [loc1] = t18, CxIntT19 - CxIntT18 mov t12 = bs1 ;; .mem.offset 0,0 st8.spill.nta [loc0] = sp, CxIntT4 - CxIntSp .mem.offset 0,8 st8.spill.nta [loc1] = t19, CxIntT20 - CxIntT19 mov t13 = bs2 ;; .mem.offset 0,0 st8.spill.nta [loc0] = t4, CxIntT5 - CxIntT4 .mem.offset 0,8 st8.spill.nta [loc1] = t20, CxIntT21 - CxIntT20 mov t14 = bs3 ;; .mem.offset 0,0 st8.spill.nta [loc0] = t5, CxIntT6 - CxIntT5 .mem.offset 0,8 st8.spill.nta [loc1] = t21, CxIntT22 - CxIntT21 mov t15 = bs4 ;; .mem.offset 0,0 st8.spill.nta [loc0] = t6, CxIntT7 - CxIntT6 .mem.offset 0,8 st8.spill.nta [loc1] = t22, CxPreds - CxIntT22 mov t16 = bt0 ;; st8.spill.nta [loc0] = t7 st8.nta [loc1] = rpr, CxIntNats - CxPreds // save predicates mov t17 = bt1 ;; mov t9 = ar.unat mov t4 = ar.fpsr add loc2 = CxBrRp, a0 ;; add loc3 = CxBrS3, a0 (pt1) shl t9 = t9, t1 (pt0) shr.u t2 = t9, t1 ;; // // Save branch registers. // st8.nta [loc2] = rbrp, CxBrS0 - CxBrRp // save brp st8.nta [loc3] = t14, CxBrS4 - CxBrS3 // save bs3 (pt0) shl t3 = t9, t8 ;; st8.nta [loc2] = t11, CxBrS1 - CxBrS0 // save bs0 st8.nta [loc3] = t15, CxBrT0 - CxBrS4 // save bs4 (pt0) or t9 = t2, t3 ;; st8.nta [loc2] = t12, CxBrS2 - CxBrS1 // save bs1 st8.nta [loc3] = t16, CxBrT1 - CxBrT0 // save bt0 add loc0 = CxStFPSR, a0 ;; st8.nta [loc2] = t13 // save bs2 st8.nta [loc3] = t17 // save bt1 add loc2 = CxStFSR, a0 ;; st8.nta [loc0] = t4 // save fpsr st8.nta [loc1] = t9 // save nat bits add loc1 = CxStFIR, a0 ;; mov t0 = ar.fsr mov t1 = ar.fir mov t2 = ar.fdr ;; st8.nta [loc2] = t0, CxStFDR - CxStFSR st8.nta [loc1] = t1 ;; st8.nta [loc2] = t2 mov rbsp = ar.bsp add loc2 = CxFltS0, a0 add loc3 = CxFltS1, a0 ;; // // Save floating status and floating registers f0 - f127. // stf.spill.nta [loc2] = fs0, CxFltS2 - CxFltS0 stf.spill.nta [loc3] = fs1, CxFltS3 - CxFltS1 shr t0 = rpfs, 7 ;; stf.spill.nta [loc2] = fs2, CxFltT0 - CxFltS2 stf.spill.nta [loc3] = fs3, CxFltT1 - CxFltS3 and t0 = 0x7f, t0 ;; stf.spill.nta [loc2] = ft0, CxFltT2 - CxFltT0 stf.spill.nta [loc3] = ft1, CxFltT3 - CxFltT1 shr t1 = rbsp, 3 ;; stf.spill.nta [loc2] = ft2, CxFltT4 - CxFltT2 stf.spill.nta [loc3] = ft3, CxFltT5 - CxFltT3 and t1 = 0x3f, t1 ;; stf.spill.nta [loc2] = ft4, CxFltT6 - CxFltT4 stf.spill.nta [loc3] = ft5, CxFltT7 - CxFltT5 sub t2 = t0, t1 ;; stf.spill.nta [loc2] = ft6, CxFltT8 - CxFltT6 stf.spill.nta [loc3] = ft7, CxFltT9 - CxFltT7 cmp4.le pt1, pt0 = t2, zero ;; stf.spill.nta [loc2] = ft8, CxFltS4 - CxFltT8 stf.spill.nta [loc3] = ft9, CxFltS5 - CxFltT9 (pt0) add t2 = -1, t2 ;; stf.spill.nta [loc2] = fs4, CxFltS6 - CxFltS4 stf.spill.nta [loc3] = fs5, CxFltS7 - CxFltS5 (pt0) add t0 = 1, t0 ;; stf.spill.nta [loc2] = fs6, CxFltS8 - CxFltS6 stf.spill.nta [loc3] = fs7, CxFltS9 - CxFltS7 (pt0) add t2 = -63, t2 ;; stf.spill.nta [loc2] = fs8, CxFltS10 - CxFltS8 stf.spill.nta [loc3] = fs9, CxFltS11 - CxFltS9 (pt0) cmp4.ge.unc pt2, pt3 = t2, zero ;; stf.spill.nta [loc2] = fs10, CxFltS12 - CxFltS10 stf.spill.nta [loc3] = fs11, CxFltS13 - CxFltS11 (pt1) br.cond.spnt Rcc20 ;; Rcc10: (pt2) add t0 = 1, t0 (pt2) add t2 = -63, t2 (pt3) br.cond.sptk Rcc20 ;; cmp4.ge pt2, pt3 = t2, zero nop.m 0 br Rcc10 Rcc20: stf.spill.nta [loc2] = fs12, CxFltS14 - CxFltS12 stf.spill.nta [loc3] = fs13, CxFltS15 - CxFltS13 add tmpbsp = -8, rbsp ;; stf.spill.nta [loc2] = fs14, CxFltS16 - CxFltS14 stf.spill.nta [loc3] = fs15, CxFltS17 - CxFltS15 shl t0 = t0, 3 ;; stf.spill.nta [loc2] = fs16, CxFltS18 - CxFltS16 stf.spill.nta [loc3] = fs17, CxFltS19 - CxFltS17 sub rbsp = rbsp, t0 ;; stf.spill.nta [loc2] = fs18, CxFltF32 - CxFltS18 stf.spill.nta [loc3] = fs19, CxFltF33 - CxFltS19 nop.i 0 ;; // // Save application registers, control information and set context flags. // User=pt0 Krnl=pt1 rdcr=t1 mask=t2 sol=t4 rpsr=t5 is=t6 rccv=t7 rlc=t8 rec=t9 rrsc=t10 rrnat=t11 flag=t16 addr0=t17 addr1=t18 tmp=t19 mov rrsc = ar.rsc tbit.nz Krnl, User = sp, 62 // bit 62 is 1 when mov rlc = ar.lc ;; mov ar.rsc = zero // put RSE in lazy mode mov rccv = ar.ccv mov rec = ar.ec ;; // in kernel (Krnl) mov rpsr = psr (User) mov rpsr = psr.um add addr0 = CxApUNAT, a0 mov rrnat = ar.rnat add addr1 = CxApLC, a0 (Krnl) mov rdcr = cr.dcr (Krnl) movl tmp = 1 << PSR_BN ;; st8.nta [addr0] = runat, CxApEC - CxApUNAT st8.nta [addr1] = rlc, CxApCCV - CxApLC (Krnl) or rpsr = tmp, rpsr ;; st8.nta [addr0] = rec, CxApDCR - CxApEC st8.nta [addr1] = rccv, CxRsPFS - CxApCCV mov tmp = 1 ;; st8.nta [addr0] = rdcr, CxRsBSP - CxApDCR st8.nta [addr1] = rpfs, CxRsBSPSTORE - CxRsPFS shl tmp = tmp, 63 ;; st8.nta [addr0] = rbsp, CxRsRSC - CxRsBSP st8.nta [addr1] = rbsp, CxRsRNAT - CxRsBSPSTORE or rpfs = rpfs, tmp // validate IFS ;; st8.nta [addr0] = rrsc, CxStIIP - CxRsRSC st8.nta [addr1] = rrnat, CxStIFS - CxRsRNAT mov mask = RNAT_ALIGNMENT ;; st8.nta [addr0] = rbrp, CxStIPSR - CxStIIP add tmp = CxContextFlags, a0 mov flag = CONTEXT_FULL // full context saved. ;; st8.nta [addr0] = rpsr // save psr st8.nta [addr1] = rpfs or tmpbsp = tmpbsp, mask ;; mov ar.rsc = rrsc // restore RSC st4.nta [tmp] = flag mov ar.unat = runat // restore ar.unat st8.nta [tmpbsp] = rrnat (p0) br.ret.sptk brp // return to caller. LEAF_EXIT(SaveAllRegs) //++ // // VOID // RestoreAllRegs ( // IN PCONTEXT ContextRecord, // ) // // Routine Description: // // This function restores the registers of the caller from the specified // context. // // N.B. The context record is assumed to be 16-byte aligned. // // Arguments: // // ContextRecord (a0) - Supplies the address of a context record. // // Return Value: // // None. // //-- LEAF_ENTRY(RestoreAllRegs) dest1=t8 dest2=t9 rlc=t10 rpreds=t11 rbrp=t12 rbsp=t13 rpfs=t14 runat=t15 rpreds=t16 rsp=t17 rfpsr=t18 jb=t19 tmp=t20 src1=t21 src2=t22 .regstk 2, 9, 2, 0 alloc t4 = ar.pfs, 2, 11, 2, 0 ARGPTR(a0) add src1 = CxIntNats, a0 add src2 = CxPreds, a0 add tmp = CxIntGp, a0 ;; ld8.nt1 t17 = [src1], CxBrRp - CxIntNats ld8.nt1 t16 = [src2], CxBrS0 - CxPreds shr tmp = tmp, 3 ;; ld8.nt1 t0 = [src1], CxBrS1 - CxBrRp ld8.nt1 t1 = [src2], CxBrS2 - CxBrS0 and tmp = 0x3f, tmp ;; ld8.nt1 t2 = [src1], CxBrS3 - CxBrS1 ld8.nt1 t3 = [src2], CxBrS4 - CxBrS2 cmp4.ge pt1, pt0 = 1, tmp ;; ld8.nt1 t4 = [src1], CxBrT0 - CxBrS3 ld8.nt1 t5 = [src2], CxBrT1 - CxBrS4 (pt1) sub loc5 = 1, tmp ;; ld8.nt1 t6 = [src1], CxApUNAT - CxBrT0 ld8.nt1 t7 = [src2], CxApLC - CxBrT1 (pt0) add loc5 = -1, tmp ;; ld8.nt1 loc0 = [src1], CxApEC - CxApUNAT ld8.nt1 t8 = [src2], CxApCCV - CxApLC (pt0) sub loc6 = 65, tmp ;; ld8.nt1 t9 = [src1], CxApDCR - CxApEC ld8.nt1 t10 = [src2], CxRsPFS - CxApCCV (pt1) shr.u t17 = t17, loc5 ;; ld8.nt1 loc1 = [src1], CxRsBSP - CxApDCR ld8.nt1 t11 = [src2], CxRsRSC - CxRsPFS (pt0) shl loc7 = t17, loc5 ;; ld8.nt1 loc2 = [src1], CxStIIP - CxRsBSP ld8.nt1 loc3 = [src2], CxStIFS - CxRsRSC (pt0) shr.u loc8 = t17, loc6 ;; ld8.nt1 loc9 = [src1] ld8.nt1 loc10 = [src2] (pt0) or t17 = loc7, loc8 ;; mov ar.unat = t17 add src1 = CxFltS0, a0 shr t12 = loc2, 3 ;; add src2 = CxFltS1, a0 and t12 = 0x3f, t12 // current rnat save index and t13 = 0x7f, loc10 // total frame size ;; mov ar.ccv = t10 add t14 = t13, t12 mov ar.pfs = t11 ;; Rrc20: cmp4.gt pt1, pt0 = 63, t14 ;; (pt0) add t14 = -63, t14 (pt0) add t13 = 1, t13 ;; nop.m 0 (pt1) shl t13 = t13, 3 (pt0) br.cond.spnt Rrc20 ;; add loc2 = loc2, t13 nop.f 0 mov pr = t16, -1 ldf.fill.nt1 fs0 = [src1], CxFltS2 - CxFltS0 ldf.fill.nt1 fs1 = [src2], CxFltS3 - CxFltS1 mov brp = t0 ;; ldf.fill.nt1 fs2 = [src1], CxFltT0 - CxFltS2 ldf.fill.nt1 fs3 = [src2], CxFltT1 - CxFltS3 mov bs0 = t1 ;; ldf.fill.nt1 ft0 = [src1], CxFltT2 - CxFltT0 ldf.fill.nt1 ft1 = [src2], CxFltT3 - CxFltT1 mov bs1 = t2 ;; ldf.fill.nt1 ft2 = [src1], CxFltT4 - CxFltT2 ldf.fill.nt1 ft3 = [src2], CxFltT5 - CxFltT3 mov bs2 = t3 ;; ldf.fill.nt1 ft4 = [src1], CxFltT6 - CxFltT4 ldf.fill.nt1 ft5 = [src2], CxFltT7 - CxFltT5 mov bs3 = t4 ;; ldf.fill.nt1 ft6 = [src1], CxFltT8 - CxFltT6 ldf.fill.nt1 ft7 = [src2], CxFltT9 - CxFltT7 mov bs4 = t5 ;; ldf.fill.nt1 ft8 = [src1], CxFltS4 - CxFltT8 ldf.fill.nt1 ft9 = [src2], CxFltS5 - CxFltT9 mov bt0 = t6 ;; ldf.fill.nt1 fs4 = [src1], CxFltS6 - CxFltS4 ldf.fill.nt1 fs5 = [src2], CxFltS7 - CxFltS5 mov bt1 = t7 ;; ldf.fill.nt1 fs6 = [src1], CxFltS8 - CxFltS6 ldf.fill.nt1 fs7 = [src2], CxFltS9 - CxFltS7 mov ar.lc = t8 ;; ldf.fill.nt1 fs8 = [src1], CxFltS10 - CxFltS8 ldf.fill.nt1 fs9 = [src2], CxFltS11 - CxFltS9 mov ar.ec = t9 ;; ldf.fill.nt1 fs10 = [src1], CxFltS12 - CxFltS10 ldf.fill.nt1 fs11 = [src2], CxFltS13 - CxFltS11 nop.i 0 ;; ldf.fill.nt1 fs12 = [src1], CxFltS14 - CxFltS12 ldf.fill.nt1 fs13 = [src2], CxFltS15 - CxFltS13 add loc6 = CxIntGp, a0 ;; ldf.fill.nt1 fs14 = [src1], CxFltS16 - CxFltS14 ldf.fill.nt1 fs15 = [src2], CxFltS17 - CxFltS15 add loc7 = CxIntT0, a0 ;; ldf.fill.nt1 fs16 = [src1], CxFltS18 - CxFltS16 ldf.fill.nt1 fs17 = [src2], CxFltS19 - CxFltS17 add t19 = CxRsRNAT, a0 ;; ldf.fill.nt1 fs18 = [src1] ldf.fill.nt1 fs19 = [src2] add t7 = CxStFPSR, a0 ;; ld8.nt1 loc8 = [t7] // load fpsr from context ld8.nt1 loc5 = [t19] // load rnat from context nop.i 0 ld8.fill.nt1 gp = [loc6], CxIntT1 - CxIntGp ld8.fill.nt1 t0 = [loc7], CxIntS0 - CxIntT0 ;; ld8.fill.nt1 t1 = [loc6], CxIntS1 - CxIntT1 ld8.fill.nt1 s0 = [loc7], CxIntS2 - CxIntS0 ;; ld8.fill.nt1 s1 = [loc6], CxIntS3 - CxIntS1 ld8.fill.nt1 s2 = [loc7], CxIntV0 - CxIntS2 ;; ld8.fill.nt1 s3 = [loc6], CxIntTeb - CxIntS3 ld8.fill.nt1 v0 = [loc7], CxIntT2 - CxIntV0 ;; ld8.fill.nt1 teb = [loc6], CxIntT3 - CxIntTeb ld8.fill.nt1 t2 = [loc7], CxIntSp - CxIntT2 ;; ld8.fill.nt1 t3 = [loc6], CxIntT4 - CxIntT3 ld8.fill.nt1 loc4 = [loc7], CxIntT5 - CxIntSp ;; ld8.fill.nt1 t4 = [loc6], CxIntT6 - CxIntT4 ld8.fill.nt1 t5 = [loc7], CxIntT7 - CxIntT5 ;; ld8.fill.nt1 t6 = [loc6], CxIntT8 - CxIntT6 ld8.fill.nt1 t7 = [loc7], CxIntT9 - CxIntT7 ;; ld8.fill.nt1 t8 = [loc6], CxIntT10 - CxIntT8 ld8.fill.nt1 t9 = [loc7], CxIntT11 - CxIntT9 ;; ld8.fill.nt1 t10 = [loc6], CxIntT12 - CxIntT10 ld8.fill.nt1 t11 = [loc7], CxIntT13 - CxIntT11 ;; ld8.fill.nt1 t12 = [loc6], CxIntT14 - CxIntT12 ld8.fill.nt1 t13 = [loc7], CxIntT15 - CxIntT13 ;; ld8.fill.nt1 t14 = [loc6], CxIntT16 - CxIntT14 ld8.fill.nt1 t15 = [loc7], CxIntT17 - CxIntT15 ;; ld8.fill.nt1 t16 = [loc6], CxIntT18 - CxIntT16 ld8.fill.nt1 t17 = [loc7], CxIntT19 - CxIntT17 ;; ld8.fill.nt1 t18 = [loc6], CxIntT20 - CxIntT18 ld8.fill.nt1 t19 = [loc7], CxIntT21 - CxIntT19 ;; ld8.fill.nt1 t20 = [loc6], CxIntT22 - CxIntT20 ld8.fill.nt1 t21 = [loc7] ;; rsm 1 << PSR_I ld8.fill.nt1 t22 = [loc6] ;; rsm 1 << PSR_IC movl t0 = 1 << IFS_V ;; mov ar.fpsr = loc8 // set fpsr mov ar.unat = loc0 ;; srlz.d or loc10 = t0, loc10 // set ifs valid bit ;; mov cr.iip = loc9 mov cr.ifs = loc10 bsw.0 ;; mov cr.dcr = loc1 mov r17 = loc2 // put BSP in a shadow reg or r16 = 0x3, loc3 // put RSE in eager mode mov ar.rsc = r0 // put RSE in enforced lazy nop.m 0 add r20 = CxStIPSR, a0 ;; ld8.nt1 r20 = [r20] // load IPSR mov r18 = loc4 // put SP in a shadow reg mov r19 = loc5 // put RNaTs in a shadow reg ;; alloc t0 = 0, 0, 0, 0 mov cr.ipsr = r20 mov sp = r18 ;; loadrs ;; mov ar.bspstore = r17 nop.i 0 ;; mov ar.rnat = r19 // set rnat register mov ar.rsc = r16 // restore RSC bsw.1 ;; LEAF_EXIT(RestoreAllRegs) //-------------------------------------------------------------------- // Routine: // // VOID // penter( // VOID) // // Description: // // This function loads the return address and current address in to two // parameter registers, then calls the C version (c_penter) // // Input: // // None // // Output: // // None // // Return value: // // None // // // //-------------------------------------------------------------------- LEAF_ENTRY(penter) .regstk 0, 1, 2, 0 // // This to be filled in when the compiler implements /Gh switch // br.call.sptk brp = c_penter LEAF_EXIT(penter)