windows-nt/Source/XPSP1/NT/base/ntos/ke/ia64/ctxswap.s
2020-09-26 16:20:57 +08:00

2133 lines
61 KiB
ArmAsm
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//++
//
// Copyright (c) 1989-2000 Microsoft Corporation
//
// Component Name:
//
// NT / KE
//
// Module Name:
//
// ctxswap.s
//
// Abstract:
//
// This module implements the IA64 Process and Thread Context Swaps.
//
// Author:
//
// David N. Cutler (davec) 5-Mar-1989
//
// Environment:
//
// Kernel mode only
//
// Revision History:
//
// Bernard Lint Jul-12-1995
//
// Initial IA64 version
//
//--
#include "ksia64.h"
.file "ctxswap.s"
.text
//
// Globals imported:
//
.global KiReadySummary
.global KiIdleSummary
.global KiDispatcherReadyListHead
.global KeTickCount
.global KiMasterSequence
.global KiMasterRid
.global PPerfGlobalGroupMask
PublicFunction(KiDeliverApc)
PublicFunction(KiSaveExceptionFrame)
PublicFunction(KiRestoreExceptionFrame)
PublicFunction(KiActivateWaiterQueue)
PublicFunction(KiReadyThread)
PublicFunction(KeFlushEntireTb)
PublicFunction(KiQuantumEnd)
PublicFunction(KiSyncNewRegionId)
PublicFunction(KiCheckForSoftwareInterrupt)
PublicFunction(KiSaveHigherFPVolatileAtDispatchLevel)
PublicFunction(KeAcquireQueuedSpinLockAtDpcLevel)
PublicFunction(KeReleaseQueuedSpinLockFromDpcLevel)
PublicFunction(KeTryToAcquireQueuedSpinLockRaiseToSynch)
PublicFunction(WmiTraceContextSwap)
#if DBG
PublicFunction(KeBugCheckEx)
#endif // DBG
SBTTL("Unlock Dispatcher Database")
//++
//--------------------------------------------------------------------
//
// VOID
// KiUnlockDispatcherDatabase (
// IN KIRQL OldIrql
// )
//
// Routine Description:
//
// This routine is entered at synchronization level with the dispatcher
// database locked. Its function is to either unlock the dispatcher
// database and return or initiate a context switch if another thread
// has been selected for execution.
//
// N.B. A context switch CANNOT be initiated if the previous IRQL
// is greater than or equal to DISPATCH_LEVEL.
//
// N.B. This routine is carefully written to be a leaf function. If,
// however, a context swap should be performed, the routine is
// switched to a nested fucntion.
//
// Arguments:
//
// OldIrql (a0) - Supplies the IRQL when the dispatcher database
// lock was acquired (in low order byte, not zero extended).
//
// Return Value:
//
// None.
//
//--------------------------------------------------------------------
//--
NESTED_ENTRY(KiUnlockDispatcherDatabase)
NESTED_SETUP(1,3,1,0)
//
// Register aliases
//
rDPC = loc2 // DPC active flag
rpT1 = t1 // temp pointer
rpT2 = t2 // temp pointer
rpT3 = t3 // temp pointer
rT1 = t5 // temp regs
rT2 = t6
rPrcb = t8 // PRCB pointer
pNotNl = pt2 // true if next thread not NULL
pIRQGE = pt3 // true if DISPATCH_LEVEL <= old irql
pIRQLT = pt4 // true if DISPATCH_LEVEL > old irql
pDPC = pt5 // true if DPC active
pNoAPC = pt2 // do not dispatch APC
pAPC = pt9
PROLOGUE_END
//
// Check if a thread has been scheduled to execute on the current processor
//
movl rPrcb = KiPcr + PcPrcb
;;
LDPTR (rPrcb, rPrcb) // rPrcb -> PRCB
;;
add rpT1 = PbNextThread, rPrcb // -> next thread
add rpT2 = PbDpcRoutineActive,rPrcb // -> DPC active flag
;;
LDPTR (v0, rpT1) // v0 = next thread
;;
cmp.ne pNotNl = zero, v0 // pNotNl = next thread is 0
zxt1 a0 = a0 // isolate old IRQL
;;
(pNotNl) cmp.leu.unc pIRQGE, pIRQLT = DISPATCH_LEVEL, a0
mov rDPC = 1 // speculate that DPC is active
(pIRQLT) br.spnt KxUnlockDispatcherDatabase
;;
//
// Case 1:
// Next thread is NULL:
// Release dispatcher database lock, restore IRQL to its previous level
// and return
//
//
// Case 2:
// A new thread has been selected to run on the current processor, but
// the new IRQL is not below dispatch level. Release the dispatcher
// lock and restore IRQL. If the current processor is
// not executing a DPC, then request a dispatch interrupt on the current
// processor.
//
// At this point pNotNl = 1 if thread not NULL, 0 if NULL
//
(pIRQGE) ld4 rDPC = [rpT2] // rDPC.4 = DPC active flag
#if !defined(NT_UP)
add out0 = (LockQueueDispatcherLock * 16) + PbLockQueue, rPrcb
br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel
#endif // !defined(NT_UP)
;;
LOWER_IRQL(a0)
cmp4.eq pDPC = rDPC, zero // pDPC = request DPC intr
REQUEST_DISPATCH_INT(pDPC) // request DPC interrupt
NESTED_RETURN
NESTED_EXIT(KiUnlockDispatcherDatabase)
//
// N.B. This routine is carefully written as a nested function.
// Control only reaches this routine from above.
//
// rPrcb contains the address of PRCB
// v0 contains the next thread
//
NESTED_ENTRY(KxUnlockDispatcherDatabase)
PROLOGUE_BEGIN
.regstk 1, 2, 1, 0
alloc t16 = ar.pfs, 1, 2, 1, 0
.save rp, loc0
mov loc0 = brp
.fframe SwitchFrameLength
add sp = -SwitchFrameLength, sp
;;
.save ar.unat, loc1
mov loc1 = ar.unat
add t0 = ExFltS19+SwExFrame+STACK_SCRATCH_AREA, sp
add t1 = ExFltS18+SwExFrame+STACK_SCRATCH_AREA, sp
;;
.save.gf 0x0, 0xC0000
stf.spill [t0] = fs19, ExFltS17-ExFltS19
stf.spill [t1] = fs18, ExFltS16-ExFltS18
;;
.save.gf 0x0, 0x30000
stf.spill [t0] = fs17, ExFltS15-ExFltS17
stf.spill [t1] = fs16, ExFltS14-ExFltS16
mov t10 = bs4
;;
.save.gf 0x0, 0xC000
stf.spill [t0] = fs15, ExFltS13-ExFltS15
stf.spill [t1] = fs14, ExFltS12-ExFltS14
mov t11 = bs3
;;
.save.gf 0x0, 0x3000
stf.spill [t0] = fs13, ExFltS11-ExFltS13
stf.spill [t1] = fs12, ExFltS10-ExFltS12
mov t12 = bs2
;;
.save.gf 0x0, 0xC00
stf.spill [t0] = fs11, ExFltS9-ExFltS11
stf.spill [t1] = fs10, ExFltS8-ExFltS10
mov t13 = bs1
;;
.save.gf 0x0, 0x300
stf.spill [t0] = fs9, ExFltS7-ExFltS9
stf.spill [t1] = fs8, ExFltS6-ExFltS8
mov t14 = bs0
;;
.save.gf 0x0, 0xC0
stf.spill [t0] = fs7, ExFltS5-ExFltS7
stf.spill [t1] = fs6, ExFltS4-ExFltS6
mov t15 = ar.lc
;;
.save.gf 0x0, 0x30
stf.spill [t0] = fs5, ExFltS3-ExFltS5
stf.spill [t1] = fs4, ExFltS2-ExFltS4
;;
.save.f 0xC
stf.spill [t0] = fs3, ExFltS1-ExFltS3 // save fs3
stf.spill [t1] = fs2, ExFltS0-ExFltS2 // save fs2
;;
.save.f 0x3
stf.spill [t0] = fs1, ExBrS4-ExFltS1 // save fs1
stf.spill [t1] = fs0, ExBrS3-ExFltS0 // save fs0
;;
.save.b 0x18
st8 [t0] = t10, ExBrS2-ExBrS4 // save bs4
st8 [t1] = t11, ExBrS1-ExBrS3 // save bs3
;;
.save.b 0x6
st8 [t0] = t12, ExBrS0-ExBrS2 // save bs2
st8 [t1] = t13, ExIntS2-ExBrS1 // save bs1
;;
.save.b 0x1
st8 [t0] = t14, ExIntS3-ExBrS0 // save bs0
movl t12 = KiPcr + PcCurrentThread
;;
.save.gf 0xC, 0x0
.mem.offset 0,0
st8.spill [t0] = s3, ExIntS1-ExIntS3 // save s3
.mem.offset 8,0
st8.spill [t1] = s2, ExIntS0-ExIntS2 // save s2
;;
.save.gf 0x3, 0x0
.mem.offset 0,0
st8.spill [t0] = s1, ExApLC-ExIntS1 // save s1
.mem.offset 8,0
st8.spill [t1] = s0, ExApEC-ExIntS0 // save s0
;;
.savepsp ar.pfs, ExceptionFrameLength-ExApEC-STACK_SCRATCH_AREA
st8 [t1] = t16, ExIntNats-ExApEC
mov t4 = ar.unat // captured Nats of s0-s3
mov s0 = rPrcb
LDPTR (s1, t12) // current thread
;;
.savepsp ar.lc, ExceptionFrameLength-ExApLC-STACK_SCRATCH_AREA
st8 [t0] = t15
.savepsp @priunat, ExceptionFrameLength-ExIntNats-STACK_SCRATCH_AREA
st8 [t1] = t4 // save Nats of s0-s3
mov s2 = v0
PROLOGUE_END
add rpT2 = PbNextThread, s0 // -> next thread
add out0 = ThWaitIrql, s1 // -> previous IRQL
;;
STPTRINC (rpT2, zero,PbCurrentThread-PbNextThread) // clear NextThread
st1 [out0] = a0, ThIdleSwapBlock-ThWaitIrql // save old IRQL
mov rpT3 = 1
;;
//
// Reready current thread for execution and swap context to the selected
// thread.
//
// Note: Set IdleSwapBlock in the current thread so no idle processor
// can switch to this processor before it is removed from the current
// processor.
STPTR (rpT2, s2) // set current thread object
st1 [out0] = rpT3, -ThIdleSwapBlock// out0 -> previous thread
br.call.sptk brp = KiReadyThread
;;
br.call.sptk brp = SwapContext
;;
//
// Lower IRQL, deallocate exception/switch frame.
//
// N.B. SwapContext releases the dispatcher database lock.
//
// N.B. v0 contains the kernel APC pending state on return.
//
// N.B. s2 contains the address of the new thread on return.
//
add rpT2 = ThWaitIrql, s2 // -> ThWaitIrql
cmp.ne pAPC, pNoAPC = zero, v0
;;
ld1 a0 = [rpT2] // a0 = original wait IRQL
;;
(pAPC) cmp.ne pNoAPC = zero, a0 // APC pending and IRQL == 0
(pNoAPC) br.spnt Kudd_Exit
;;
.regstk 1, 2, 3, 0
alloc t16 = ar.pfs, 1, 2, 3, 0
mov rT2 = APC_LEVEL
;;
SET_IRQL(rT2)
mov out0 = KernelMode
mov out1 = zero
mov out2 = zero
br.call.sptk brp = KiDeliverApc
;;
//
// Lower IRQL to wait level, set return status, restore registers, and return.
//
Kudd_Exit:
LOWER_IRQL(a0) // a0 = new irql
add out0 = STACK_SCRATCH_AREA+SwExFrame, sp
br.call.sptk brp = KiRestoreExceptionFrame
;;
add rpT1 = ExApEC+SwExFrame+STACK_SCRATCH_AREA, sp
;;
ld8 rT1 = [rpT1]
mov brp = loc0
;;
mov ar.unat = loc1
nop.f 0
mov ar.pfs = rT1
.restore
add sp = SwitchFrameLength, sp
nop.i 0
br.ret.sptk brp
;;
NESTED_EXIT(KxUnlockDispatcherDatabase)
SBTTL("Swap Thread")
//++
//--------------------------------------------------------------------
//
// BOOLEAN
// KiSwapContext (
// IN PKTHREAD Thread
// )
//
// Routine Description:
//
// This routine saves the non-volatile registers, marshals the
// arguments for SwapContext and calls SwapContext to perform
// the actual thread switch.
//
// Arguments:
//
// Thread - Supplies the address of the new thread.
//
// Return Value:
//
// If a kernel APC is pending, then a value of TRUE is returned.
// Otherwise, FALSE is returned.
//
// Notes:
//
// GP valid on entry -- GP is not switched, just use kernel GP
//--------------------------------------------------------------------
//--
NESTED_ENTRY(KiSwapContext)
//
// Register aliases
//
pNoAPC = pt2 // do not dispatch APC
rpT1 = t0 // temp pointer
rpT2 = t1 // temp pointer
rT1 = t10 // temp regs
PROLOGUE_BEGIN
.regstk 1, 2, 1, 0
alloc t16 = ar.pfs, 1, 2, 1, 0
.save rp, loc0
mov loc0 = brp
.fframe SwitchFrameLength
add sp = -SwitchFrameLength, sp
;;
.save ar.unat, loc1
mov loc1 = ar.unat
add t0 = ExFltS19+SwExFrame+STACK_SCRATCH_AREA, sp
add t1 = ExFltS18+SwExFrame+STACK_SCRATCH_AREA, sp
;;
.save.gf 0x0, 0xC0000
stf.spill [t0] = fs19, ExFltS17-ExFltS19
stf.spill [t1] = fs18, ExFltS16-ExFltS18
;;
.save.gf 0x0, 0x30000
stf.spill [t0] = fs17, ExFltS15-ExFltS17
stf.spill [t1] = fs16, ExFltS14-ExFltS16
mov t10 = bs4
;;
.save.gf 0x0, 0xC000
stf.spill [t0] = fs15, ExFltS13-ExFltS15
stf.spill [t1] = fs14, ExFltS12-ExFltS14
mov t11 = bs3
;;
.save.gf 0x0, 0x3000
stf.spill [t0] = fs13, ExFltS11-ExFltS13
stf.spill [t1] = fs12, ExFltS10-ExFltS12
mov t12 = bs2
;;
.save.gf 0x0, 0xC00
stf.spill [t0] = fs11, ExFltS9-ExFltS11
stf.spill [t1] = fs10, ExFltS8-ExFltS10
mov t13 = bs1
;;
.save.gf 0x0, 0x300
stf.spill [t0] = fs9, ExFltS7-ExFltS9
stf.spill [t1] = fs8, ExFltS6-ExFltS8
mov t14 = bs0
;;
.save.gf 0x0, 0xC0
stf.spill [t0] = fs7, ExFltS5-ExFltS7
stf.spill [t1] = fs6, ExFltS4-ExFltS6
mov t15 = ar.lc
;;
.save.gf 0x0, 0x30
stf.spill [t0] = fs5, ExFltS3-ExFltS5
stf.spill [t1] = fs4, ExFltS2-ExFltS4
;;
.save.f 0xC
stf.spill [t0] = fs3, ExFltS1-ExFltS3 // save fs3
stf.spill [t1] = fs2, ExFltS0-ExFltS2 // save fs2
;;
.save.f 0x3
stf.spill [t0] = fs1, ExBrS4-ExFltS1 // save fs1
stf.spill [t1] = fs0, ExBrS3-ExFltS0 // save fs0
;;
.save.b 0x18
st8 [t0] = t10, ExBrS2-ExBrS4 // save bs4
st8 [t1] = t11, ExBrS1-ExBrS3 // save bs3
;;
.save.b 0x6
st8 [t0] = t12, ExBrS0-ExBrS2 // save bs2
st8 [t1] = t13, ExIntS2-ExBrS1 // save bs1
;;
.save.b 0x1
st8 [t0] = t14, ExIntS3-ExBrS0 // save bs0
;;
.save.gf 0xC, 0x0
.mem.offset 0,0
st8.spill [t0] = s3, ExIntS1-ExIntS3 // save s3
.mem.offset 8,0
st8.spill [t1] = s2, ExIntS0-ExIntS2 // save s2
;;
.save.gf 0x3, 0x0
.mem.offset 0,0
st8.spill [t0] = s1, ExApLC-ExIntS1 // save s1
.mem.offset 8,0
st8.spill [t1] = s0, ExApEC-ExIntS0 // save s0
;;
.savepsp ar.pfs, ExceptionFrameLength-ExApEC-STACK_SCRATCH_AREA
st8 [t1] = t16, ExIntNats-ExApEC
mov t4 = ar.unat // captured Nats of s0-s3
;;
.savepsp ar.lc, ExceptionFrameLength-ExApLC-STACK_SCRATCH_AREA
st8 [t0] = t15
.savepsp @priunat, ExceptionFrameLength-ExIntNats-STACK_SCRATCH_AREA
st8 [t1] = t4 // save Nats of s0-s3
PROLOGUE_END
//
// For the call to SwapContext-
//
// s0 // Prcb address
// s1 // old thread address
// s2 // new thread address
// pt0 = 1
//
mov s2 = a0 // s2 <- New Thread
movl rpT1 = KiPcr + PcPrcb
;;
LDPTRINC (s0, rpT1, PcCurrentThread-PcPrcb)// s0 <- Prcb
;;
LDPTR (s1, rpT1) // s1 <- Old Thread
add rpT2 = PbCurrentThread, s0
;;
//
// Swap context to the next thread.
//
STPTR (rpT2, a0) // Set new thread current
cmp.eq pt0 = zero, zero // indicate lock context swap
br.call.sptk brp = SwapContext // call SwapContext(prcb, OldTh, NewTh)
;;
//
// Deallocate exception/switch frame.
//
// N.B. SwapContext releases the dispatcher database lock.
//
// N.B. v0 contains the kernel APC pending state on return, ie, 0 if
// no APC pending, 1 if APC pending. v0 will be forced to 0 if
// the new IRQL doesn't allow APCs.
//
// N.B. KiRestoreExceptionFrame doesn't touch v0, t21 or t22.
//
add rpT2 = ThWaitIrql, s2 // -> ThWaitIrql
add rpT1 = ExApEC+SwExFrame+STACK_SCRATCH_AREA, sp
add out0 = STACK_SCRATCH_AREA+SwExFrame, sp
;;
ld1 t21 = [rpT2] // t21 = original wait IRQL
ld8 t22 = [rpT1] // t22 = PFS
br.call.sptk brp = KiRestoreExceptionFrame
;;
mov brp = loc0
cmp.ne pNoAPC = zero, t21 // no APC if IRQL != 0
;;
mov ar.unat = loc1
nop.f 0
mov ar.pfs = t22
.restore
add sp = SwitchFrameLength, sp
(pNoAPC) mov v0 = zero
br.ret.sptk brp
;;
NESTED_EXIT(KiSwapContext)
SBTTL("Swap Context to Next Thread")
//++
//--------------------------------------------------------------------
// Routine:
//
// SwapContext
//
// Routine Description:
//
// This routine is called to swap context from one thread to the next.
//
// Arguments:
//
// s0 - Address of Processor Control Block (PRCB).
// s1 - Address of previous thread object.
// s2 - Address of next thread object.
//
// Return value:
//
// v0 - Kernel APC pending flag
// s0 - Address of Processor Control Block (PRCB).
// s1 - Address of previous thread object.
// s2 - Address of current thread object.
//
// Note:
// Kernel GP is not saved and restored across context switch
//
// !!WARNING!! - Thierry. 03/01/2000.
// Be aware that this implementation is a result of performance analysis.
// Please consider this when you are making changes...
//
//--------------------------------------------------------------------
//--
NESTED_ENTRY(SwapContext)
//
// Register aliases
//
rT1 = t1 // temp
rT2 = t2 // temp
rT3 = t3 // temp
rNewproc = t4 // next process object
rOldproc = t5 // previous process object
rpThBSL = t6 // pointer to new thread backing store limit
rpT1 = t7 // temp pointer
rpT2 = t8 // temp pointer
rpT3 = t9 // temp pointer
rAr1 = t10
rAr2 = t11
rAr3 = t12
rAr4 = t13
rNewIKS = t14 // new initial kernel stack
rNewKSL = t15 // new kernel stack limit
rNewBSP = t16 // new thread BSP/BSPSTORE
rOldBSP = t16 // old thread BSP
rOldRNAT = t17 // old thread RNAT
rNewRNAT = t17 // new thread RNAT
rOldSbase = t18 // old thread kstack base
pUsTh = pt4 // is user thread?
pKrTh = pt5 // is user thread?
pSave = pt7 // is high fp set dirty?
pDiff = ps4 // if new and old process different
pSame = ps5 // if new and old process same
//
// Set new thread's state to running. Note this must be done
// under the dispatcher lock so that KiSetPriorityThread sees
// the correct state.
//
PROLOGUE_BEGIN
#if !defined(NT_UP)
alloc rT2 = ar.pfs, 0, 0, 4, 0
mov rT1 = brp // move from brp takes 2 cycles
add rpT3 = ThState, s2
;;
lfetch.excl [rpT3]
mov rAr1 = Running
add rpT2 = SwPFS+STACK_SCRATCH_AREA, sp
;;
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, s0
.savesp ar.pfs, SwPFS+STACK_SCRATCH_AREA
st8.nta [rpT2] = rT2, SwRp-SwPFS // save pfs
;;
.savesp brp, SwRp+STACK_SCRATCH_AREA
st8.nta [rpT2] = rT1 // save return link
st1.nta [rpT3] = rAr1 // set thread state to Running
br.call.sptk brp = KeAcquireQueuedSpinLockAtDpcLevel
;;
//
// Release DispatcherLock.
//
add out0 = (LockQueueDispatcherLock * 16) + PbLockQueue, s0
br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel
;;
mov out0 = ar.fpsr // move from ar.fpsr takes 12 cycles
movl rpT1 = KiPcr+PcHighFpOwner // setup for prefetching
;;
{ .mmi
lfetch [rpT1]
cmp.ne pUsTh = zero, teb // test for ia32 save required
// must not have a nop.f for next 10 cycles--
// Using temporarely the explicit templating
// for the next cycles.
add out1 = ThStackBase, s1 // move early to start access for rOldSbase
{ .mmi
add rpT1 = SwFPSR+STACK_SCRATCH_AREA, sp
add rpT2 = SwPreds+STACK_SCRATCH_AREA, sp
nop.i 0x0
}
;;
{ .mmi
ld8.nta rOldSbase = [out1] // speculative start early for ia32 saves
lfetch.excl [rpT1]
add out2 = ThNumber, s2 // setup for prefetching
}
{ .mmi
mov.m ar.rsc = r0 // put RSE in lazy mode
mov rOldBSP = ar.bsp // move from ar.bsp takes 12 cycles
nop.i 0x0
}
;;
{ .mmi
lfetch [out2]
nop.m 0x0
mov rT1 = pr // move from pr takes 2 cycles
}
;;
{ .mmi
flushrs
mov rT3 = psr.um // move from psr.um takes 12 cycles
nop.i 0x0
}
;;
{ .mmi
lfetch.excl [rpT2]
mov.m rOldRNAT = ar.rnat // move from ar.rnat takes 5 cycles
add out2 = @gprel(PPerfGlobalGroupMask), gp
}
;;
{ .mli
lfetch [out2]
movl out3 = KiPcr + PcInterruptionCount // INTERRUPTION_LOGGING on or off, we are prefetching this line.
// If any real performance problem is detected, we will undef these lines.
}
;;
{ .mmi
lfetch [out3]
add rpT3 = SwRnat+STACK_SCRATCH_AREA, sp
}
;;
#else // NT_UP
alloc rT2 = ar.pfs, 0, 0, 4, 0
cmp.ne pUsTh = zero, teb // test for ia32 save required
;;
mov.m ar.rsc = r0 // put RSE in lazy mode
add out1 = ThStackBase, s1 // move early to start access for rOldSbase
mov out0 = ar.fpsr // move from ar.fpsr takes 12 cycles
// must not have a nop.f for next 10 cycles--
// Using temporarely the explicit templating
// for the next cycles.
;;
{ .mmi
ld8.nta rOldSbase = [out1] // speculative start early for ia32 saves
mov rOldBSP = ar.bsp // move from ar.bsp takes 12 cycles
add rpT1 = SwRp+STACK_SCRATCH_AREA, sp
}
;;
flushrs
mov rT3 = psr.um // move from psr.um takes 12 cycles
add rpT2 = SwPFS+STACK_SCRATCH_AREA, sp
;;
mov.m rOldRNAT = ar.rnat // move from ar.rnat takes 5 cycles
mov rT1 = brp // move from brp takes 2 cycles
add rpT3 = ThState, s2
;;
{ .mmi
mov rAr1 = Running
.savesp brp, SwRp+STACK_SCRATCH_AREA
st8.nta [rpT1] = rT1, SwFPSR-SwRp // save return link
nop.i 0x0
}
;;
{ .mii
st1.nta [rpT3] = rAr1 // set thread state to Running
mov rT1 = pr // move from pr takes 2 cycles
nop.i 0x0
}
;;
{ .mii
.savesp ar.pfs, SwPFS+STACK_SCRATCH_AREA
st8.nta [rpT2] = rT2, SwPreds-SwPFS // save pfs
add rpT3 = SwRnat+STACK_SCRATCH_AREA, sp
nop.i 0x0
}
;;
#endif // NT_UP
{ .mmi
st8.nta [rpT3] = rOldRNAT
nop.m 0x0
nop.i 0x0
}
st8 [rpT1] = out0, SwBsp-SwFPSR // save kernel FPSR
st8 [rpT2] = rT1 // save preserved predicates
;;
st8.nta [rpT1] = rOldBSP
add rpT3 = ThKernelBStore, s1
tbit.nz pSave = rT3, PSR_MFH // check mfh bit
(pUsTh) br.call.spnt brp = SwapContextIA32Save
;;
st8.nta [rpT3] = rOldBSP
(pSave) add out0 = -ThreadStateSaveAreaLength+TsHigherFPVolatile, rOldSbase
(pSave) br.call.spnt brp = KiSaveHigherFPVolatileAtDispatchLevel
;;
//
// Acquire the context swap lock so the address space of the old process
// cannot be deleted and then release the dispatcher database lock.
//
// N.B. This lock is used to protect the address space until the context
// switch has sufficiently progressed to the point where the address
// space is no longer needed. This lock is also acquired by the reaper
// thread before it finishes thread termination.
//
PROLOGUE_END
//
// ***** TBD ****** Save performance counters? (user vs. kernel)
//
//
// Accumlate the total time spent in a thread.
//
#if defined(PERF_DATA)
**** TBD **** MIPS code
addu a0,sp,ExFltF20 // compute address of result
move a1,zero // set address of optional frequency
jal KeQueryPerformanceCounter // query performance counter
lw t0,ExFltF20(sp) // get current cycle count
lw t1,ExFltF20 + 4(sp) //
lw t2,PbStartCount(s0) // get starting cycle count
lw t3,PbStartCount + 4(s0) //
sw t0,PbStartCount(s0) // set starting cycle count
sw t1,PbStartCount + 4(s0) //
lw t4,EtPerformanceCountLow(s1) // get accumulated cycle count
lw t5,EtPerformanceCountHigh(s1) //
subu t6,t0,t2 // subtract low parts
subu t7,t1,t3 // subtract high parts
sltu v0,t0,t2 // generate borrow from high part
subu t7,t7,v0 // subtract borrow
addu t6,t6,t4 // add low parts
addu t7,t7,t5 // add high parts
sltu v0,t6,t4 // generate carry into high part
addu t7,t7,v0 // add carry
sw t6,EtPerformanceCountLow(s1) // set accumulated cycle count
sw t7,EtPerformanceCountHigh(s1) //
#endif // defined(PERF_DATA)
//
// The following entry point is used to switch from the idle thread to
// another thread.
//
;;
ALTERNATE_ENTRY(SwapFromIdle)
alloc rT1 = ar.pfs, 2, 0, 2, 0
//
// Check if we are tracing context swaps
//
mov out0 = s1 // assign out0 to old ethread pointer
add rpT3 = @gprel(PPerfGlobalGroupMask), gp
;;
ld8.nta rpT3 = [rpT3] // get value of PperfGlobalGroupMask
mov out1 = s2 // assign out1 to new ethread pointer
;;
add rpT2 = PERF_CONTEXTSWAP_OFFSET, rpT3
cmp.ne pt3 = zero, rpT3 // if it's non-zero, then trace on
;;
(pt3) ld4.nta rpT2 = [rpT2]
;;
(pt3) and rpT2 = PERF_CONTEXTSWAP_FLAG, rpT2
;;
(pt3) cmp.ne.unc pt4 = zero, rpT2
(pt4) br.call.spnt brp = WmiTraceContextSwap // optimize for no tracing case
;;
//
// Get address of old and new process objects.
//
add rpT2 = ThApcState+AsProcess,s2 // -> new thread AsProcess
add rpT1 = ThApcState+AsProcess,s1 // -> old thread AsProcess
;;
LDPTR (rOldproc, rpT1) // old process
LDPTR (rNewproc, rpT2) // new process
#if !defined(NT_UP)
//
// In MP system,
// should a thread address is recycled and the thread is migrated to a
// processor that holds the stale values in the high fp register set,
// set KiPcr->HighFpOwner to zero (i.e. when pt4 is set to TRUE)
//
add rpT1 = ThNumber, s2
movl rpT2 = KiPcr+PcHighFpOwner
;;
ld1 rT1 = [rpT1]
ld8 rT2 = [rpT2], PcNumber-PcHighFpOwner
add out0 = ThIdleSwapBlock, s1
;;
ld1 rT3 = [rpT2], PcHighFpOwner-PcNumber
st1 [out0] = zero // clear OldThread->IdleSwapBlock
cmp.eq pt3 = rT2, s2
;;
(pt3) cmp.ne.unc pt4 = rT1, rT3
;;
(pt4) st8 [rpT2] = zero
#endif // !defined(NT_UP)
;;
flushrs
FAST_DISABLE_INTERRUPTS
;;
//
// Thierry - 03/29/2000
// It should be noticed that the performance analysis for SwapContext
// was done with INTERRUPTION_LOGGING defined as 1.
//
#define INTERRUPTION_LOGGING 1
#if defined(INTERRUPTION_LOGGING)
// For Conditional Interrupt Logging
#define ContextSwitchBit 63
.global KiVectorLogMask
mov rT3 = gp
;;
movl gp = _gp
;;
add rpT1 = @gprel(KiVectorLogMask), gp
;;
ld8 rT1 = [rpT1]
mov gp = rT3
;;
tbit.z pt4 = rT1, ContextSwitchBit
(pt4) br.cond.sptk EndOfLogging0
movl rpT1 = KiPcr+PcInterruptionCount
mov rT3 = MAX_NUMBER_OF_IHISTORY_RECORDS - 1
cmp.ne pDiff,pSame=rOldproc,rNewproc
;;
(pDiff) mov rT1 = 0x91 // process switch
ld4.nt1 rT2 = [rpT1] // get current count
;;
(pSame) mov rT1 = 0x90 // thread switch
add rpT3 = 1, rT2 // incr count
and rT2 = rT3, rT2 // index of current entry
add rpT2 = 0x1000-PcInterruptionCount, rpT1 // base of history
;;
st4.nta [rpT1] = rpT3 // save count
shl rT2 = rT2, 5 // offset of current entry
;;
add rpT2 = rpT2, rT2 // address of current entry
;;
st8 [rpT2] = rT1, 8 // save switch type
;;
st8 [rpT2] = s2, 8 // save new thread pointer
;;
st8 [rpT2] = s1, 8 // save old thread
;;
st8 [rpT2] = sp // save old sp
;;
// For Conditional Interrupt Logging
EndOfLogging0:
#endif // INTERRUPTION_LOGGING
mov ar.rsc = r0 // put RSE in lazy mode
add rpT1 = ThInitialStack, s2
add rpT2 = ThKernelStack, s1
;;
//
// Store the kernel stack pointer in the previous thread object,
// load the new kernel stack pointer from the new thread object,
// switch backing store pointers, select new process id and swap
// to the new process.
//
ld8.nta rNewIKS = [rpT1], ThKernelStack-ThInitialStack
st8.nta [rpT2] = sp // save current sp
;;
ld8.nta sp = [rpT1], ThStackLimit-ThKernelStack
movl rpT2 = KiPcr + PcInitialStack
;;
alloc rT1 = 0,0,0,0 // make current frame 0 size
ld8.nta rNewKSL = [rpT1], ThInitialBStore-ThStackLimit
;;
loadrs // invalidate RSE and ALAT
ld8.nta rT1 = [rpT1], ThBStoreLimit-ThInitialBStore
;;
ld8.nta rT2 = [rpT1], ThDebugActive-ThBStoreLimit
st8 [rpT2] = rNewIKS, PcStackLimit-PcInitialStack
;;
// get debugger active state
ld1.nta rT3 = [rpT1], ThTeb-ThDebugActive
st8 [rpT2] = rNewKSL, PcInitialBStore-PcStackLimit
add rpT3 = SwBsp+STACK_SCRATCH_AREA, sp
;;
ld8 rNewBSP = [rpT3], SwRnat-SwBsp
st8 [rpT2] = rT1, PcBStoreLimit-PcInitialBStore
;;
ld8 rNewRNAT = [rpT3]
st8 [rpT2] = rT2, PcDebugActive-PcBStoreLimit
;;
// load new teb
ld8 teb = [rpT1], ThApcState+AsKernelApcPending-ThTeb
// set new debugger active state
st1 [rpT2] = rT3, PcCurrentThread-PcDebugActive
invala
//
// Setup PCR intial kernel BSP and BSTORE limit
//
mov ar.bspstore = rNewBSP // load new bspstore
cmp.ne pDiff,pSame=rOldproc,rNewproc // if ne, switch process
;;
mov ar.rnat = rNewRNAT // load new RNATs
;;
mov ar.rsc = RSC_KERNEL // enable RSE
;;
//
// If the new process is not the same as the old process, then swap the
// address space to the new process.
//
// N.B. The context swap lock cannot be dropped until all references to the
// old process address space are complete. This includes any possible
// TB Misses that could occur referencing the new address space while
// still executing in the old address space.
//
// N.B. The process address space swap is executed with interrupts disabled.
//
alloc rT1 = 0,4,2,0
STPTR (rpT2, s2)
;;
mov kteb = teb // update kernel TEB
FAST_ENABLE_INTERRUPTS
ld1 loc0 = [rpT1] // load the ApcPending flag
#if !defined(NT_UP)
//
// Release the context swap lock
// N.B. ContextSwapLock is always released in KxSwapProcess, if called
//
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, s0
add loc1 = PcApcInterrupt-PcCurrentThread, rpT2
(pSame) br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel
;;
#else // !defined(NT_UP)
add loc1 = PcApcInterrupt-PcCurrentThread, rpT2
;;
#endif // !defined(NT_UP)
mov out0 = rNewproc // set address of new process
mov out1 = rOldproc // set address of old process
(pDiff) br.call.sptk brp = KxSwapProcess // call swap address space(NewProc, OldProc)
;;
//
// In new address space, if changed.
//
st1 [loc1] = loc0 // request (or clear) APC pend.
add rpT1 = PbContextSwitches, s0
add rpT2 = ThContextSwitches, s2
;;
//
// If the new thread has a kernel mode APC pending, then request an APC
// interrupt.
//
ld4 loc1 = [rpT1]
ld4 loc2 = [rpT2]
;;
//
// Increment context switch counters
//
cmp.ne pUsTh, pKrTh = zero, teb
add loc1 = loc1, zero, 1
add loc2 = loc2, zero, 1
;;
st4 [rpT1] = loc1 // increment # of context switches
st4 [rpT2] = loc2 // increment # of context switches
add rpT1 = SwFPSR+STACK_SCRATCH_AREA, sp
add rpT2 = SwPFS+STACK_SCRATCH_AREA, sp
;;
ld8 loc1 = [rpT1], SwRp-SwFPSR // restore brp and pfs
ld8 loc2 = [rpT2], SwPreds-SwPFS
;;
ld8 rT3 = [rpT1]
ld8 rT2 = [rpT2]
mov v0 = loc0 // set v0 = apc pending
(pUsTh) br.call.spnt brp = SwapContextIA32Restore
;;
//
// Note: at this point s0 = Prcb, s1 = previous thread, s2 = current thread
//
mov ar.fpsr = loc1
mov ar.pfs = loc2
mov brp = rT3
mov pr = rT2 // Restore preserved preds
#if 0
//
// Thierry 03/22/2000:
//
// The following memory synchronization of the local processor
// I-cache and D-cache because of I-stream modifications is not
// required if the modifying code is written following the NT
// Core Team specifications:
// - [Allocate VA]
// - Modify the code
// - Call FlushIntructionCache()
// -> calls KiSweepIcache[Range]()
// - Execute the code.
//
// The removal of this instruction eliminates a "> 100 cycle" stall.
//
sync.i
#endif // 0
;;
srlz.i
br.ret.sptk brp
NESTED_EXIT(SwapContext)
//++
//--------------------------------------------------------------------
// Routine:
//
// SwapContextIA32Save
//
// Routine Description:
//
// This function saves the IA32 context on the kernel stack.
// Called from SwapContext.
//
// Arguments:
//
// rOldSbase : old thread kstack base.
//
// Return value:
//
// None.
//
// Note:
//
// SwapContext registers context.
//
//--------------------------------------------------------------------
//--
LEAF_ENTRY(SwapContextIA32Save)
mov rAr1 = ar21 // IA32 FP control register FCR
;;
mov rAr2 = ar24 // IA32 EFLAG register
;;
mov rAr3 = ar25
;;
mov rAr4 = ar26
;;
//
// we may skip saving ar27 because it cannot be modified by user code
//
mov rT1 = ar30
;;
mov rT2 = ar28
;;
mov rT3 = ar29
;;
// these are separated out due to cache miss potential
add rpT1 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr21, rOldSbase
add rpT2 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr24, rOldSbase
;;
st8 [rpT1] = rAr1, TsAr25-TsAr21
st8 [rpT2] = rAr2, TsAr26-TsAr24
;;
st8 [rpT1] = rAr3, TsAr29-TsAr25
st8 [rpT2] = rAr4, TsAr28-TsAr26
;;
st8 [rpT2] = rT2, TsAr30-TsAr28
;;
st8 [rpT2] = rT1
st8 [rpT1] = rT3
br.ret.sptk.few.clr brp
LEAF_EXIT(SwapContextIA32Save)
//++
//--------------------------------------------------------------------
// Routine:
//
// SwapContextIA32Restore
//
// Routine Description:
//
// This function restores the IA32 registers context.
// Called from SwapContext.
//
// Arguments:
//
// s2 - Address of next thread object.
//
// Return value:
//
// None.
//
// Note:
//
// SwapContext registers context.
//
//--------------------------------------------------------------------
//--
LEAF_ENTRY(SwapContextIA32Restore)
add rpT1 = ThStackBase, s2
;;
ld8.nta rpT1 = [rpT1]
;;
add rpT2 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr21, rpT1
add rpT3 = -ThreadStateSaveAreaLength+TsAppRegisters+TsAr24, rpT1
;;
ld8.nta rAr1 = [rpT2], TsAr25-TsAr21
ld8.nta rAr2 = [rpT3], TsAr26-TsAr24
;;
ld8.nta rAr3 = [rpT2], TsAr27-TsAr25
ld8.nta rAr4 = [rpT3], TsAr28-TsAr26
;;
mov ar21 = rAr1
mov ar24 = rAr2
mov ar25 = rAr3
mov ar26 = rAr4
ld8.nta rAr1 = [rpT2], TsAr29-TsAr27
ld8.nta rAr2 = [rpT3], TsAr30-TsAr28
;;
ld8.nta rAr3 = [rpT2]
ld8.nta rAr4 = [rpT3]
;;
mov ar27 = rAr1
mov ar28 = rAr2
mov ar29 = rAr3
mov ar30 = rAr4
br.ret.sptk.few.clr brp
LEAF_EXIT(SwapContextIA32Restore)
SBTTL("Swap Process")
//++
//--------------------------------------------------------------------
//
// VOID
// KiSwapProcess (
// IN PKPROCESS NewProcess,
// IN PKPROCESS OldProcess
// )
//
// Routine Description:
//
// This function swaps the address space from one process to another by
// assigning a new region id, if necessary, and loading the fixed entry
// in the TB that maps the process page directory page. This routine follows
// the PowerPC design for handling RID wrap.
//
// On entry/exit:
//
// Interrupt enabled.
//
// Arguments:
//
// NewProcess (a0) - Supplies a pointer to a control object of type process
// which represents the new process that is switched to (32-bit address).
//
// OldProcess (a1) - Supplies a pointer to a control object of type process
// which represents the old process that is switched from (32-bit address).
//
// Return Value:
//
// None.
//
//--------------------------------------------------------------------
//--
NESTED_ENTRY(KiSwapProcess)
NESTED_SETUP(2,3,3,0)
PROLOGUE_END
//
// Register aliases
//
rNewProc = a0
rOldProc = a1
rpCSLock = loc2
rpT1 = t0
rpT2 = t1
rProcSet = t2
rNewActive= t3
rOldActive= t4
rMasterSeq= t5
rNewSeq = t6
rOldPsrL = t7
rVa = t8
rPDE0 = t9 // PDE for page directory page 0
rVa2 = t10
rSessionBase = t11
rSessionInfo = t12
rT1 = t13
rT2 = t14
//
// KiSwapProcess must get the context swap lock
// KxSwapProcess is called from SwapContext with the lock held
//
#if !defined(NT_UP)
movl rpT1 = KiPcr+PcPrcb
;;
ld8 rpT1 = [rpT1]
;;
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, rpT1
br.call.sptk brp = KeAcquireQueuedSpinLockAtDpcLevel
;;
br.sptk Ksp_Continue
#endif // !defined(NT_UP)
;;
ALTERNATE_ENTRY(KxSwapProcess)
NESTED_SETUP(2,3,3,0)
PROLOGUE_END
//
// Clear the processor set member number in the old process and set the
// processor member number in the new process.
//
Ksp_Continue:
#if !defined(NT_UP)
add rpT2 = PrActiveProcessors, rOldProc // -> old active processor set
movl rpT1 = KiPcr + PcSetMember // -> processor set member
;;
ld4 rProcSet= [rpT1] // rProcSet.4 = processor set member
add rpT1 = PrActiveProcessors, rNewProc // -> new active processor set
;;
ld4 rNewActive = [rpT1] // rNewActive.4 = new active processor set
ld4 rOldActive = [rpT2] // rOldActive.4 = old active processor set
;;
or rNewActive = rNewActive,rProcSet // set processor member in new set
xor rOldActive = rOldActive,rProcSet // clear processor member in old set
;;
st4 [rpT1] = rNewActive // set new active processor set
st4 [rpT2] = rOldActive // set old active processor set
#endif // !defined(NT_UP)
//
// If the process sequence number matches the system sequence number, then
// use the process RID. Otherwise, allocate a new process RID.
//
// N.B. KiMasterRid, KiMasterSequence are changed only when holding the
// KiContextSwapLock.
//
add rT2 = PrSessionMapInfo, rNewProc
add out0 = PrProcessRegion, rNewProc
;;
ld8 out1 = [rT2]
br.call.sptk brp = KiSyncNewRegionId
;;
//
// Switch address space to new process
// v0 = rRid = new process rid
//
fwb // hint to flush write buffers
FAST_DISABLE_INTERRUPTS
add rpT1 = PrDirectoryTableBase, rNewProc
movl rVa = KiPcr+PcPdeUtbase
add rpT2 = PrSessionParentBase, rNewProc
movl rVa2 = KiPcr+PcPdeStbase
;;
ld8.nta rPDE0 = [rpT1] // rPDE0 = Page directory page 0
ld8.nta rSessionBase = [rpT2]
ld8.nta rVa = [rVa]
ld8.nta rVa2 = [rVa2]
;;
//
// To access IFA, ITDR registers, PSR.ic bit must be 0. Otherwise,
// it causes an illegal operation fault. While PSR.ic=0, any
// interruption can not be afforded. Make sure there will be no
// TLB miss and no interrupt coming in during this period.
//
rsm 1 << PSR_IC // PSR.ic=0
;;
srlz.d // must serialize
mov rT1 = PAGE_SHIFT << IDTR_PS // load page size field for IDTR
;;
mov cr.itir = rT1 // set up IDTR for dirbase
ptr.d rVa, rT1 // remove DTR for user space
;;
mov cr.ifa = rVa // set up IFA for dirbase vaddr
mov rT2 = DTR_UTBASE_INDEX
;;
itr.d dtr[rT2] = rPDE0 // insert PDE0 to DTR
;;
ptr.d rVa2, rT1 // remove DTR for session
;; // to avoid a overlapping error
mov cr.ifa = rVa2
mov rT2 = DTR_STBASE_INDEX
;;
itr.d dtr[rT2] = rSessionBase // insert the root for session space
;;
ssm 1 << PSR_IC // PSR.ic=1
;;
srlz.i // must I serialize
#if DBG
mov t0 = PbProcessorState+KpsSpecialRegisters+KsTrD0+(8*DTR_UTBASE_INDEX)
movl t3 = KiPcr + PcPrcb
;;
ld8 t3 = [t3]
mov t1 = PbProcessorState+KpsSpecialRegisters+KsTrD0+(8*DTR_STBASE_INDEX)
;;
add t0 = t3, t0
add t1 = t3, t1
;;
st8 [t0] = rPDE0
st8 [t1] = rSessionBase
;;
#endif
FAST_ENABLE_INTERRUPTS
//
// Now make sure branch history is enabled for non wow processes
// and disabled for wow processes
//
add t1 = @gprel(KiVectorLogMask), gp
;;
ld8 t1 = [t1]
;;
cmp.eq pt0 = t1, r0
(pt0) br.cond.sptk SkipBranchHistory
mov t1 = 3
;;
mov t2 = cpuid[t1]
add t3 = PrWow64Process, rNewProc
;;
extr.u t2 = t2, 24, 8
ld4 t4 = [t3];
;;
cmp.ne pt1 = 7, t2
;;
mov t1 = 675
(pt1) br.dpnt SkipBranchHistory
;;
mov t2 = msr[t1]
cmp.eq pt1,pt2 = zero, t4 // Wow64 is non-zero
;;
(pt1) mov t3 = 2 // Enable the HB for ia64 procs
(pt2) mov t3 = 256 // Disable the HB for wow64 procs
;;
dep t2 = t3, t2, 0, 9 // Disable the HB for wow64 procs
;;
mov msr[t1] = t2;
;;
SkipBranchHistory:
#if !defined(NT_UP)
//
// Can now release the context swap lock
//
movl rpT1 = KiPcr+PcPrcb
;;
ld8 rpT1 = [rpT1]
;;
add out0 = (LockQueueContextSwapLock * 16) + PbLockQueue, rpT1
br.call.sptk brp = KeReleaseQueuedSpinLockFromDpcLevel
;;
#endif // !defined(NT_UP)
NESTED_RETURN
NESTED_EXIT(KiSwapProcess)
SBTTL("Retire Deferred Procedure Call List")
//++
// Routine:
//
// VOID
// KiRetireDpcList (
// PKPRCB Prcb,
// )
//
// Routine Description:
//
// This routine is called to retire the specified deferred procedure
// call list. DPC routines are called using the idle thread (current)
// stack.
//
// N.B. Interrupts must be disabled on entry to this routine. Control is returned
// to the caller with the same conditions true.
//
// Arguments:
//
// a0 - Address of the current PRCB.
//
// Return value:
//
// None.
//
//--
NESTED_ENTRY(KiRetireDpcList)
NESTED_SETUP(1,2,4,0)
PROLOGUE_END
Krdl_Restart:
add t0 = PbDpcQueueDepth, a0
add t1 = PbDpcRoutineActive, a0
add t2 = PbDpcLock, a0
;;
ld4 t4 = [t0]
add t3 = PbDpcListHead+LsFlink, a0
;;
Krdl_Restart2:
cmp4.eq pt1 = zero, t4
st4 [t1] = t4
(pt1) br.spnt Krdl_Exit
;;
#if !defined(NT_UP)
ACQUIRE_SPINLOCK(t2, a0, Krdl_20)
#endif // !defined(NT_UP)
ld4 t4 = [t0]
LDPTR (t5, t3) // -> first DPC entry
;;
cmp4.eq pt1, pt2 = zero, t4
;;
(pt2) add t10 = LsFlink, t5
(pt2) add out0 = -DpDpcListEntry, t5
(pt1) br.spnt Krdl_Unlock
;;
LDPTR (t6, t10)
add t11 = DpDeferredRoutine, out0
add t12 = DpSystemArgument1, out0
;;
//
// Setup call to DPC routine
//
// arguments are:
// dpc object address (out0)
// deferred context (out1)
// system argument 1 (out2)
// system argument 2 (out3)
//
// N.B. the arguments must be loaded from the DPC object BEFORE
// the inserted flag is cleared to prevent the object being
// overwritten before its time.
//
ld8.nt1 t13 = [t11], DpDeferredContext-DpDeferredRoutine
ld8.nt1 out2 = [t12], DpSystemArgument2-DpSystemArgument1
;;
ld8.nt1 out1 = [t11], DpLock-DpDeferredContext
ld8.nt1 out3 = [t12]
add t4 = -1, t4
STPTRINC (t3, t6, -LsFlink)
ld8.nt1 t14 = [t13], 8
add t15 = LsBlink, t6
;;
ld8.nt1 gp = [t13]
STPTR (t15, t3)
STPTR (t11, zero)
st4 [t0] = t4
#if !defined(NT_UP)
RELEASE_SPINLOCK(t2) // set spin lock not owned
#endif //!defined(NT_UP)
FAST_ENABLE_INTERRUPTS
mov bt0 = t14
br.call.sptk.few.clr brp = bt0 // call DPC routine
;;
//
// Check to determine if any more DPCs are available to process.
//
FAST_DISABLE_INTERRUPTS
br Krdl_Restart
;;
//
// The DPC list became empty while we were acquiring the DPC queue lock.
// Clear DPC routine active. The race condition mentioned above doesn't
// exist here because we hold the DPC queue lock.
//
Krdl_Unlock:
#if !defined(NT_UP)
add t2 = PbDpcLock, a0
;;
RELEASE_SPINLOCK(t2)
#endif // !defined(NT_UP)
Krdl_Exit:
add t0 = PbDpcQueueDepth, a0
add t1 = PbDpcRoutineActive, a0
add out0 = PbDpcInterruptRequested, a0
;;
st4.nta [t1] = zero
st4.rel.nta [out0] = zero
add t2 = PbDpcLock, a0
ld4 t4 = [t0]
add t3 = PbDpcListHead+LsFlink, a0
;;
cmp4.eq pt1, pt2 = zero, t4
(pt2) br.spnt Krdl_Restart2
;;
NESTED_RETURN
NESTED_EXIT(KiRetireDpcList)
SBTTL("Dispatch Interrupt")
//++
//--------------------------------------------------------------------
// Routine:
//
// KiDispatchInterrupt
//
// Routine Description:
//
// This routine is entered as the result of a software interrupt generated
// at DISPATCH_LEVEL. Its function is to process the Deferred Procedure Call
// (DPC) list, and then perform a context switch if a new thread has been
// selected for execution on the processor.
//
// This routine is entered at IRQL DISPATCH_LEVEL with the dispatcher
// database unlocked. When a return to the caller finally occurs, the
// IRQL remains at DISPATCH_LEVEL, and the dispatcher database is still
// unlocked.
//
// N.B. On entry to this routine the volatile states (excluding high
// floating point register set) have been saved.
//
// On entry:
//
// sp - points to stack scratch area.
//
// Arguments:
//
// None
//
// Return Value:
//
// None.
//--------------------------------------------------------------------
//--
NESTED_ENTRY(KiDispatchInterrupt)
PROLOGUE_BEGIN
.regstk 0, 4, 2, 0
alloc t16 = ar.pfs, 0, 4, 2, 0
.save rp, loc0
mov loc0 = brp
.fframe SwitchFrameLength
add sp = -SwitchFrameLength, sp
;;
.save ar.unat, loc1
mov loc1 = ar.unat
add t0 = ExFltS19+SwExFrame+STACK_SCRATCH_AREA, sp
add t1 = ExFltS18+SwExFrame+STACK_SCRATCH_AREA, sp
;;
.save.gf 0x0, 0xC0000
stf.spill [t0] = fs19, ExFltS17-ExFltS19
stf.spill [t1] = fs18, ExFltS16-ExFltS18
;;
.save.gf 0x0, 0x30000
stf.spill [t0] = fs17, ExFltS15-ExFltS17
stf.spill [t1] = fs16, ExFltS14-ExFltS16
mov t10 = bs4
;;
.save.gf 0x0, 0xC000
stf.spill [t0] = fs15, ExFltS13-ExFltS15
stf.spill [t1] = fs14, ExFltS12-ExFltS14
mov t11 = bs3
;;
.save.gf 0x0, 0x3000
stf.spill [t0] = fs13, ExFltS11-ExFltS13
stf.spill [t1] = fs12, ExFltS10-ExFltS12
mov t12 = bs2
;;
.save.gf 0x0, 0xC00
stf.spill [t0] = fs11, ExFltS9-ExFltS11
stf.spill [t1] = fs10, ExFltS8-ExFltS10
mov t13 = bs1
;;
.save.gf 0x0, 0x300
stf.spill [t0] = fs9, ExFltS7-ExFltS9
stf.spill [t1] = fs8, ExFltS6-ExFltS8
mov t14 = bs0
;;
.save.gf 0x0, 0xC0
stf.spill [t0] = fs7, ExFltS5-ExFltS7
stf.spill [t1] = fs6, ExFltS4-ExFltS6
mov t15 = ar.lc
;;
.save.gf 0x0, 0x30
stf.spill [t0] = fs5, ExFltS3-ExFltS5
stf.spill [t1] = fs4, ExFltS2-ExFltS4
;;
.save.f 0xC
stf.spill [t0] = fs3, ExFltS1-ExFltS3 // save fs3
stf.spill [t1] = fs2, ExFltS0-ExFltS2 // save fs2
;;
.save.f 0x3
stf.spill [t0] = fs1, ExBrS4-ExFltS1 // save fs1
stf.spill [t1] = fs0, ExBrS3-ExFltS0 // save fs0
;;
.save.b 0x18
st8 [t0] = t10, ExBrS2-ExBrS4 // save bs4
st8 [t1] = t11, ExBrS1-ExBrS3 // save bs3
;;
.save.b 0x6
st8 [t0] = t12, ExBrS0-ExBrS2 // save bs2
st8 [t1] = t13, ExIntS2-ExBrS1 // save bs1
;;
.save.b 0x1
st8 [t0] = t14, ExIntS3-ExBrS0 // save bs0
;;
.save.gf 0xC, 0x0
.mem.offset 0,0
st8.spill [t0] = s3, ExIntS1-ExIntS3 // save s3
.mem.offset 8,0
st8.spill [t1] = s2, ExIntS0-ExIntS2 // save s2
;;
.save.gf 0x3, 0x0
.mem.offset 0,0
st8.spill [t0] = s1, ExApLC-ExIntS1 // save s1
.mem.offset 8,0
st8.spill [t1] = s0, ExApEC-ExIntS0 // save s0
;;
.savepsp ar.pfs, ExceptionFrameLength-ExApEC-STACK_SCRATCH_AREA
st8 [t1] = t16, ExIntNats-ExApEC
mov t4 = ar.unat // captured Nats of s0-s3
;;
.savepsp ar.lc, ExceptionFrameLength-ExApLC-STACK_SCRATCH_AREA
st8 [t0] = t15
.savepsp @priunat, ExceptionFrameLength-ExIntNats-STACK_SCRATCH_AREA
st8 [t1] = t4 // save Nats of s0-s3
PROLOGUE_END
//
// Register aliases
//
rPrcb = loc2
rKerGP = loc3
rpT1 = t0
rpT2 = t1
rT1 = t2
rT2 = t3
rpDPLock = t4 // pointer to dispatcher lock
pNoTh = pt1 // No next thread to run
pNext = pt2 // next thread not null
pNull = pt3 // no thread available
pOwned = pt4 // dispatcher lock already owned
pNotOwned = pt5
pQEnd = pt6 // quantum end request pending
pNoQEnd = pt7 // no quantum end request pending
//
// Increment the dispatch interrupt count
//
mov rKerGP = gp // save gp
movl rPrcb = KiPcr + PcPrcb
;;
LDPTR (rPrcb, rPrcb) // rPrcb -> Prcb
;;
add rpT1 = PbDispatchInterruptCount, rPrcb
;;
ld4 rT1 = [rpT1]
;;
add rT1 = rT1, zero, 1
;;
st4 [rpT1] = rT1
// **** TBD **** use alpha optimization to first check Dpc Q depth
//
// Process the DPC list
//
Kdi_PollDpcList:
//
// Process the deferred procedure call list.
//
FAST_ENABLE_INTERRUPTS
;;
srlz.d
//
// **** TBD ***** No stack switch as in alpha, mips...
// Save current initial stack address and set new initial stack address.
//
FAST_DISABLE_INTERRUPTS
mov out0 = rPrcb
br.call.sptk brp = KiRetireDpcList
;;
//
// Check to determine if quantum end has occured.
//
// N.B. If a new thread is selected as a result of processing a quantum
// end request, then the new thread is returned with the dispatcher
// database locked. Otherwise, NULL is returned with the dispatcher
// database unlocked.
//
FAST_ENABLE_INTERRUPTS
add rpT1 = PbQuantumEnd, rPrcb
;;
ld4 rT1 = [rpT1] // get quantum end indicator
;;
cmp4.ne pQEnd, pNoQEnd = rT1, zero // if zero, no quantum end reqs
mov gp = rKerGP // restore gp
;;
(pQEnd) st4 [rpT1] = zero // clear quantum end indicator
(pNoQEnd) br.cond.sptk Kdi_NoQuantumEnd
(pQEnd) br.call.spnt brp = KiQuantumEnd // call KiQuantumEnd (C code)
;;
cmp4.eq pNoTh, pNext = v0, zero // pNoTh = no next thread
(pNoTh) br.dpnt Kdi_Exit // br to exit if no next thread
(pNext) br.dpnt Kdi_Swap // br to swap to next thread
//
// If no quantum end requests:
// Check to determine if a new thread has been selected for execution on
// this processor.
//
Kdi_NoQuantumEnd:
add rpT2 = PbNextThread, rPrcb
;;
LDPTR (rT1, rpT2) // rT1 = address of next thread object
;;
cmp.eq pNull = rT1, zero // pNull => no thread selected
(pNull) br.dpnt Kdi_Exit // exit if no thread selected
#if !defined(NT_UP)
//
// try to acquire the dispatcher database lock.
//
mov out0 = LockQueueDispatcherLock
movl out1 = KiPcr+PcSystemReserved+8
br.call.sptk brp = KeTryToAcquireQueuedSpinLockRaiseToSynch
;;
cmp.ne pOwned, pNotOwned = TRUE, v0 // pOwned = 1 if not free
(pOwned) br.dpnt Kdi_PollDpcList // br out if owned
;;
#else
mov rT1 = SYNCH_LEVEL
;;
SET_IRQL (rT1)
#endif // !defined(NT_UP)
//
// Reread address of next thread object since it is possible for it to
// change in a multiprocessor system.
//
Kdi_Swap:
add rpT2 = PbNextThread, rPrcb // -> next thread
movl rpT1 = KiPcr + PcCurrentThread
;;
LDPTR (s1, rpT1) // current thread object
LDPTR (s2, rpT2) // next thread object
add rpT1 = PbCurrentThread, rPrcb
;;
//
// Reready current thread for execution and swap context to the selected
// thread.
//
// Note: Set IdleSwapBlock in the current thread so no idle processor
// can switch to this processor before it is removed from the current
// processor.
//
STPTR (rpT2, zero) // clear addr of next thread
add out0 = ThIdleSwapBlock, s1 // block swap from idle
mov rT1 = 1
;;
STPTR (rpT1, s2) // set addr of current thread
st1 [out0] = rT1, -ThIdleSwapBlock// set addr of previous thread
br.call.sptk brp = KiReadyThread // call KiReadyThread(OldTh)
;;
mov s0 = rPrcb // setup call
cmp.ne pt0 = zero, zero // no need to lock context swap
br.call.sptk brp = SwapContext // call SwapContext(Prcb, OldTh, NewTh)
;;
//
// Restore saved registers, and return.
//
add out0 = STACK_SCRATCH_AREA+SwExFrame, sp
br.call.sptk brp = KiRestoreExceptionFrame
;;
Kdi_Exit:
add rpT1 = ExApEC+SwExFrame+STACK_SCRATCH_AREA, sp
;;
ld8 rT1 = [rpT1]
mov brp = loc0
;;
mov ar.unat = loc1
mov ar.pfs = rT1
.restore
add sp = SwitchFrameLength, sp
br.ret.sptk brp
NESTED_EXIT(KiDispatchInterrupt)