windows-nt/Source/XPSP1/NT/base/ntos/rtl/i386/movemem.asm
2020-09-26 16:20:57 +08:00

1328 lines
28 KiB
NASM
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

title "User Mode Zero and Move Memory functions"
;++
;
; Copyright (c) 1989 Microsoft Corporation
;
; Module Name:
;
; movemem.asm
;
; Abstract:
;
; This module implements functions to zero and copy blocks of memory
;
;
; Author:
;
; Steven R. Wood (stevewo) 25-May-1990
;
; Environment:
;
; User mode only.
;
; Revision History:
;
;--
.386p
.xlist
include ks386.inc
include callconv.inc ; calling convention macros
.list
if DBG
_DATA SEGMENT DWORD PUBLIC 'DATA'
public _RtlpZeroCount
public _RtlpZeroBytes
_RtlpZeroCount dd 0
_RtlpZeroBytes dd 0
ifndef BLDR_KERNEL_RUNTIME
_MsgUnalignedPtr db 'RTL: RtlCompare/FillMemoryUlong called with unaligned pointer (%x)\n',0
_MsgUnalignedCount db 'RTL: RtlCompare/FillMemoryUlong called with unaligned count (%x)\n',0
endif
_DATA ENDS
ifndef BLDR_KERNEL_RUNTIME
ifdef NTOS_KERNEL_RUNTIME
extrn _KdDebuggerEnabled:BYTE
endif
EXTRNP _DbgBreakPoint,0
extrn _DbgPrint:near
endif
endif
;
; Alignment parameters for zeroing and moving memory.
;
ZERO_MEMORY_ALIGNMENT = 4
ZERO_MEMORY_ALIGNMENT_LOG2 = 2
ZERO_MEMORY_ALIGNMENT_MASK = ZERO_MEMORY_ALIGNMENT - 1
MEMORY_ALIGNMENT = 4
MEMORY_ALIGNMENT_LOG2 = 2
MEMORY_ALIGNMENT_MASK = MEMORY_ALIGNMENT - 1
;
; Alignment for functions in this module
;
CODE_ALIGNMENT macro
align 16
endm
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
page , 132
subttl "RtlCompareMemory"
;++
;
; ULONG
; RtlCompareMemory (
; IN PVOID Source1,
; IN PVOID Source2,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function compares two blocks of memory and returns the number
; of bytes that compared equal.
;
; Arguments:
;
; Source1 (esp+4) - Supplies a pointer to the first block of memory to
; compare.
;
; Source2 (esp+8) - Supplies a pointer to the second block of memory to
; compare.
;
; Length (esp+12) - Supplies the Length, in bytes, of the memory to be
; compared.
;
; Return Value:
;
; The number of bytes that compared equal is returned as the function
; value. If all bytes compared equal, then the length of the orginal
; block of memory is returned.
;
;--
RcmSource1 equ [esp+12]
RcmSource2 equ [esp+16]
RcmLength equ [esp+20]
CODE_ALIGNMENT
cPublicProc _RtlCompareMemory,3
cPublicFpo 3,0
push esi ; save registers
push edi ;
cld ; clear direction
mov esi,RcmSource1 ; (esi) -> first block to compare
mov edi,RcmSource2 ; (edi) -> second block to compare
;
; Compare dwords, if any.
;
rcm10: mov ecx,RcmLength ; (ecx) = length in bytes
shr ecx,2 ; (ecx) = length in dwords
jz rcm20 ; no dwords, try bytes
repe cmpsd ; compare dwords
jnz rcm40 ; mismatch, go find byte
;
; Compare residual bytes, if any.
;
rcm20: mov ecx,RcmLength ; (ecx) = length in bytes
and ecx,3 ; (ecx) = length mod 4
jz rcm30 ; 0 odd bytes, go do dwords
repe cmpsb ; compare odd bytes
jnz rcm50 ; mismatch, go report how far we got
;
; All bytes in the block match.
;
rcm30: mov eax,RcmLength ; set number of matching bytes
pop edi ; restore registers
pop esi ;
stdRET _RtlCompareMemory
;
; When we come to rcm40, esi (and edi) points to the dword after the
; one which caused the mismatch. Back up 1 dword and find the byte.
; Since we know the dword didn't match, we can assume one byte won't.
;
rcm40: sub esi,4 ; back up
sub edi,4 ; back up
mov ecx,5 ; ensure that ecx doesn't count out
repe cmpsb ; find mismatch byte
;
; When we come to rcm50, esi points to the byte after the one that
; did not match, which is TWO after the last byte that did match.
;
rcm50: dec esi ; back up
sub esi,RcmSource1 ; compute bytes that matched
mov eax,esi ;
pop edi ; restore registers
pop esi ;
stdRET _RtlCompareMemory
stdENDP _RtlCompareMemory
subttl "RtlCompareMemory"
EcmlSource equ [esp + 4 + 4]
EcmlLength equ [esp + 4 + 8]
EcmlPattern equ [esp + 4 + 12]
; end of arguments
CODE_ALIGNMENT
cPublicProc _RtlCompareMemoryUlong ,3
;
; Save the non-volatile registers that we will use, without the benefit of
; a frame pointer. No exception handling in this routine.
;
push edi
;
; Setup the registers for using REP STOS instruction to zero memory.
;
; edi -> memory to zero
; ecx = number of 32-bit words to zero
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
; eax = value to store in destination
; direction flag is clear for auto-increment
;
mov edi,EcmlSource
if DBG
ifndef BLDR_KERNEL_RUNTIME
test edi,3
jz @F
push edi
push offset FLAT:_MsgUnalignedPtr
call _DbgPrint
add esp, 2 * 4
ifdef NTOS_KERNEL_RUNTIME
cmp _KdDebuggerEnabled,0
else
mov eax,fs:[PcTeb]
mov eax,[eax].TebPeb
cmp byte ptr [eax].PebBeingDebugged,0
endif
je @F
call _DbgBreakPoint@0
@@:
endif
endif
mov ecx,EcmlLength
mov eax,EcmlPattern
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
;
; If number of 32-bit words to compare is non-zero, then do it.
;
repe scasd
je @F
sub edi,4
@@:
sub edi,EcmlSource
mov eax,edi
pop edi
stdRET _RtlCompareMemoryUlong
stdENDP _RtlCompareMemoryUlong
subttl "RtlFillMemory"
;++
;
; VOID
; RtlFillMemory (
; IN PVOID Destination,
; IN ULONG Length,
; IN UCHAR Fill
; )
;
; Routine Description:
;
; This function fills memory with a byte value.
;
; Arguments:
;
; Destination - Supplies a pointer to the memory to zero.
;
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
;
; Fill - Supplies the byte value to fill memory with.
;
; Return Value:
;
; None.
;
;--
; definitions for arguments
; (TOS) = Return address
EfmDestination equ [esp + 4 + 4]
EfmLength equ [esp + 4 + 8]
EfmFill equ byte ptr [esp + 4 + 12]
; end of arguments
CODE_ALIGNMENT
cPublicProc _RtlFillMemory ,3
cPublicFpo 3,1
;
; Save the non-volatile registers that we will use, without the benefit of
; a frame pointer. No exception handling in this routine.
;
push edi
;
; Setup the registers for using REP STOS instruction to zero memory.
;
; edi -> memory to zero
; ecx = number of 32-bit words to zero
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
; eax = value to store in destination
; direction flag is clear for auto-increment
;
mov edi,EfmDestination
mov ecx,EfmLength
mov al,EfmFill
mov ah,al
shl eax,16
mov al,EfmFill
mov ah,al
cld
mov edx,ecx
and edx,ZERO_MEMORY_ALIGNMENT_MASK
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
;
; If number of 32-bit words to zero is non-zero, then do it.
;
rep stosd
;
; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
; case restore non-volatile registers and return.
;
or ecx,edx
jnz @F
pop edi
stdRET _RtlFillMemory
@@:
rep stosb
pop edi
stdRET _RtlFillMemory
stdENDP _RtlFillMemory
subttl "RtlFillMemory"
;++
;
; VOID
; RtlFillMemoryUlonglong (
; IN PVOID Destination,
; IN ULONG Length,
; IN ULONG Fill
; )
;
; Routine Description:
;
; This function fills memory with a 64-bit value. The Destination pointer
; must be aligned on an 8 byte boundary and the low order two bits of the
; Length parameter are ignored.
;
; Arguments:
;
; Destination - Supplies a pointer to the memory to zero.
;
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
;
; Fill - Supplies the 64-bit value to fill memory with.
;
; Return Value:
;
; None.
;
;--
; definitions for arguments
; (TOS) = Return address
EfmlDestination equ [esp + 0ch]
EfmlLength equ [esp + 10h]
EfmlFillLow equ [esp + 14h]
EfmlFillHigh equ [esp + 18h]
; end of arguments
CODE_ALIGNMENT
cPublicProc _RtlFillMemoryUlonglong ,4
cPublicFpo 4,1
;
; Save the non-volatile registers that we will use, without the benefit of
; a frame pointer. No exception handling in this routine.
;
push esi
push edi
;
; Setup the registers for using REP MOVSD instruction to zero memory.
;
; edi -> memory to fill
; esi -> first 8 byte chunk of the memory destination to fill
; ecx = number of 32-bit words to zero
; eax = value to store in destination
; direction flag is clear for auto-increment
;
mov ecx,EfmlLength ; # of bytes
mov esi,EfmlDestination ; Destination pointer
if DBG
ifndef BLDR_KERNEL_RUNTIME
test ecx,7
jz @F
push ecx
push offset FLAT:_MsgUnalignedPtr
call _DbgPrint
add esp, 2 * 4
mov ecx,EfmlLength ; # of bytes
ifdef NTOS_KERNEL_RUNTIME
cmp _KdDebuggerEnabled,0
else
mov eax,fs:[PcTeb]
mov eax,[eax].TebPeb
cmp byte ptr [eax].PebBeingDebugged,0
endif
je @F
call _DbgBreakPoint@0
@@:
test esi,3
jz @F
push esi
push offset FLAT:_MsgUnalignedPtr
call _DbgPrint
add esp, 2 * 4
ifdef NTOS_KERNEL_RUNTIME
cmp _KdDebuggerEnabled,0
else
mov eax,fs:[PcTeb]
mov eax,[eax].TebPeb
cmp byte ptr [eax].PebBeingDebugged,0
endif
je @F
call _DbgBreakPoint@0
@@:
endif
endif
mov eax,EfmlFillLow ; get low portion of the fill arg
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2 ; convert bytes to dwords
sub ecx,2 ; doing the 1st one by hand
mov [esi],eax ; fill 1st highpart
mov eax,EfmlFillHigh ; get high portion of the fill arg
lea edi,[esi+08] ; initialize the dest pointer
mov [esi+04],eax ; fill 1st lowpart
rep movsd ; ripple the rest
pop edi
pop esi
stdRET _RtlFillMemoryUlonglong
stdENDP _RtlFillMemoryUlonglong
subttl "RtlZeroMemory"
;++
;
; VOID
; RtlFillMemoryUlong (
; IN PVOID Destination,
; IN ULONG Length,
; IN ULONG Fill
; )
;
; Routine Description:
;
; This function fills memory with a 32-bit value. The Destination pointer
; must be aligned on a 4 byte boundary and the low order two bits of the
; Length parameter are ignored.
;
; Arguments:
;
; Destination - Supplies a pointer to the memory to zero.
;
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
;
; Fill - Supplies the 32-bit value to fill memory with.
;
; Return Value:
;
; None.
;
;--
; definitions for arguments
; (TOS) = Return address
EfmlDestination equ [esp + 4 + 4]
EfmlLength equ [esp + 4 + 8]
EfmlFill equ [esp + 4 + 12]
; end of arguments
CODE_ALIGNMENT
cPublicProc _RtlFillMemoryUlong ,3
cPublicFpo 3,1
;
; Save the non-volatile registers that we will use, without the benefit of
; a frame pointer. No exception handling in this routine.
;
push edi
;
; Setup the registers for using REP STOS instruction to zero memory.
;
; edi -> memory to zero
; ecx = number of 32-bit words to zero
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
; eax = value to store in destination
; direction flag is clear for auto-increment
;
mov edi,EfmlDestination
if DBG
ifndef BLDR_KERNEL_RUNTIME
test edi,3
jz @F
push edi
push offset FLAT:_MsgUnalignedPtr
call _DbgPrint
add esp, 2 * 4
ifdef NTOS_KERNEL_RUNTIME
cmp _KdDebuggerEnabled,0
else
mov eax,fs:[PcTeb]
mov eax,[eax].TebPeb
cmp byte ptr [eax].PebBeingDebugged,0
endif
je @F
call _DbgBreakPoint@0
@@:
endif
endif
mov ecx,EfmlLength
mov eax,EfmlFill
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
;
; If number of 32-bit words to zero is non-zero, then do it.
;
rep stosd
pop edi
stdRET _RtlFillMemoryUlong
stdENDP _RtlFillMemoryUlong
subttl "RtlZeroMemory"
;++
;
; VOID
; RtlZeroMemory (
; IN PVOID Destination,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function zeros memory.
;
; Arguments:
;
; Destination - Supplies a pointer to the memory to zero.
;
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
;
; Return Value:
;
; None.
;
;--
; definitions for arguments
; (TOS) = Return address
EzmDestination equ [esp + 4 + 4]
EzmLength equ [esp + 4 + 8]
; end of arguments
CODE_ALIGNMENT
cPublicProc _RtlZeroMemory ,2
cPublicFpo 2,1
;
; Save the non-volatile registers that we will use, without the benefit of
; a frame pointer. No exception handling in this routine.
;
push edi
;
; Setup the registers for using REP STOS instruction to zero memory.
;
; edi -> memory to zero
; ecx = number of 32-bit words to zero
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
; eax = zero (value to store in destination)
; direction flag is clear for auto-increment
;
mov edi,EzmDestination
mov ecx,EzmLength
xor eax,eax
cld
mov edx,ecx
and edx,ZERO_MEMORY_ALIGNMENT_MASK
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
;
; If number of 32-bit words to zero is non-zero, then do it.
;
rep stosd
;
; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
; case restore non-volatile registers and return.
;
or ecx,edx
jnz @F
pop edi
stdRET _RtlZeroMemory
@@:
rep stosb
pop edi
stdRET _RtlZeroMemory
stdENDP _RtlZeroMemory
page , 132
subttl "RtlMoveMemory"
;++
;
; VOID
; RtlMoveMemory (
; IN PVOID Destination,
; IN PVOID Source OPTIONAL,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function moves memory either forward or backward, aligned or
; unaligned, in 4-byte blocks, followed by any remaining bytes.
;
; Arguments:
;
; Destination - Supplies a pointer to the destination of the move.
;
; Source - Supplies a pointer to the memory to move.
;
; Length - Supplies the Length, in bytes, of the memory to be moved.
;
; Return Value:
;
; None.
;
;--
; Definitions of arguments
; (TOS) = Return address
EmmDestination equ [esp + 8 + 4]
EmmSource equ [esp + 8 + 8]
EmmLength equ [esp + 8 + 12]
; End of arguments
CODE_ALIGNMENT
cPublicProc _RtlMoveMemory ,3
cPublicFpo 3,2
;
; Save the non-volatile registers that we will use, without the benefit of
; a frame pointer. No exception handling in this routine.
;
push esi
push edi
;
; Setup the registers for using REP MOVS instruction to move memory.
;
; esi -> memory to move (NULL implies the destination will be zeroed)
; edi -> destination of move
; ecx = number of 32-bit words to move
; edx = number of extra 8-bit bytes to move at the end (0 - 3)
; direction flag is clear for auto-increment
;
mov esi,EmmSource
mov edi,EmmDestination
mov ecx,EmmLength
if DBG
inc _RtlpZeroCount
add _RtlpZeroBytes,ecx
endif
cld
cmp esi,edi ; Special case if Source > Destination
jbe overlap
nooverlap:
mov edx,ecx
and edx,MEMORY_ALIGNMENT_MASK
shr ecx,MEMORY_ALIGNMENT_LOG2
;
; If number of 32-bit words to move is non-zero, then do it.
;
rep movsd
;
; If number of extra 8-bit bytes to move is non-zero, then do it. In either
; case restore non-volatile registers and return.
;
or ecx,edx
jnz @F
pop edi
pop esi
stdRET _RtlMoveMemory
@@:
rep movsb
movedone:
pop edi
pop esi
stdRET _RtlMoveMemory
;
; Here to handle special case when Source > Destination and therefore is a
; potential overlapping move. If Source == Destination, then nothing to do.
; Otherwise, increment the Source and Destination pointers by Length and do
; the move backwards, a byte at a time.
;
overlap:
je movedone
mov eax,edi
sub eax,esi
cmp ecx,eax
jbe nooverlap
std
add esi,ecx
add edi,ecx
dec esi
dec edi
rep movsb
cld
jmp short movedone
stdENDP _RtlMoveMemory
subttl "RtlCopyMemoryNonTemporal"
;
; Register Definitions (for instruction macros).
;
rEAX equ 0
rECX equ 1
rEDX equ 2
rEBX equ 3
rESP equ 4
rEBP equ 5
rESI equ 6
rEDI equ 7
MEMORY_ALIGNMENT_MASK0 = 63
MEMORY_ALIGNMENT_LOG2_0 = 6
MEMORY_ALIGNMENT_MASK1 = 3
MEMORY_ALIGNMENT_LOG2_1 = 2
sfence macro
db 0FH, 0AEH, 0F8H
endm
prefetchnta_short macro GeneralReg, Offset
db 0FH, 018H, 040H + GeneralReg, Offset
endm
prefetchnta_long macro GeneralReg, Offset
db 0FH, 018H, 080h + GeneralReg
dd Offset
endm
movnti_eax macro GeneralReg, Offset
db 0FH, 0C3H, 040H + GeneralReg, Offset
endm
movnti_eax_0_disp macro GeneralReg
db 0FH, 0C3H, 000H + GeneralReg
endm
movnti_ebx macro GeneralReg, Offset
db 0FH, 0C3H, 058H + GeneralReg, Offset
endm
;
;
; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
;
;
movnticopy64bytes macro
mov eax, [esi]
mov ebx, [esi + 4]
movnti_eax_0_disp rEDI
movnti_ebx rEDI, 4
mov eax, [esi + 8]
mov ebx, [esi + 12]
movnti_eax rEDI, 8
movnti_ebx rEDI, 12
mov eax, [esi + 16]
mov ebx, [esi + 20]
movnti_eax rEDI, 16
movnti_ebx rEDI, 20
mov eax, [esi + 24]
mov ebx, [esi + 28]
movnti_eax rEDI, 24
movnti_ebx rEDI, 28
mov eax, [esi + 32]
mov ebx, [esi + 36]
movnti_eax rEDI,32
movnti_ebx rEDI, 36
mov eax, [esi + 40]
mov ebx, [esi + 44]
movnti_eax rEDI, 40
movnti_ebx rEDI, 44
mov eax, [esi + 48]
mov ebx, [esi + 52]
movnti_eax rEDI,48
movnti_ebx rEDI, 52
mov eax, [esi + 56]
mov ebx, [esi + 60]
movnti_eax rEDI, 56
movnti_ebx rEDI, 60
endm
;++
;
; VOID
; RtlCopyMemoryNonTemporal(
; IN PVOID Destination,
; IN PVOID Source ,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function copies nonoverlapping from one buffer to another
; using nontemporal moves that do not polute the cache.
;
; Arguments:
;
; Destination - Supplies a pointer to the destination of the move.
;
; Source - Supplies a pointer to the memory to move.
;
; Length - Supplies the Length, in bytes, of the memory to be moved.
;
; Return Value:
;
; None.
;
;--
cPublicProc _RtlCopyMemoryNonTemporal ,3
; Definitions of arguments
; (TOS) = Return address
CPNDestination equ [ebp + 4 + 4]
CPNSource equ [ebp + 4 + 8]
CPNLength equ [ebp + 4 + 12]
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov esi, CPNSource
mov edi, CPNDestination
mov ecx, CPNLength
;
; Before prefetching we must guarantee the TLB is valid.
;
mov eax, [esi]
cld
;
;Check if less than 64 bytes
;
mov edx, ecx
and ecx, MEMORY_ALIGNMENT_MASK0
shr edx, MEMORY_ALIGNMENT_LOG2_0
je Copy4
dec edx
je copy64
prefetchnta_short rESI, 128
dec edx
je copy128
prefetchnta_short rESI, 192
dec edx
je copy192
copyLoop:
prefetchnta_long rESI, 256
movnticopy64bytes
lea esi, [esi + 64]
lea edi, [edi + 64]
dec edx
jnz copyLoop
copy192:
movnticopy64bytes
lea esi, [esi + 64]
lea edi, [edi + 64]
copy128:
movnticopy64bytes
lea esi, [esi + 64]
lea edi, [edi + 64]
copy64:
movnticopy64bytes
or ecx, ecx ; anything less than 64 to do?
jz ExitRoutine
prefetchnta_short rESI, 0
;
;Update pointer for last copy
;
lea esi, [esi + 64]
lea edi, [edi + 64]
;
;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
;
Copy4:
mov edx, ecx
and ecx, MEMORY_ALIGNMENT_MASK1
shr edx, MEMORY_ALIGNMENT_LOG2_1
;
; If the number of 32-bit words to move is non-zero, then do it
;
jz RemainingBytes
Copy4Loop:
mov eax, [esi]
movnti_eax_0_disp rEDI
lea esi, [esi+4]
lea edi, [edi+4]
dec edx
jnz Copy4Loop
RemainingBytes:
or ecx, ecx
jz ExitRoutine
rep movsb
ExitRoutine:
sfence ;Make all stores globally visible
pop ebx
pop edi
pop esi
pop ebp
stdRET _RtlCopyMemoryNonTemporal
stdENDP _RtlCopyMemoryNonTemporal
;++
;
; VOID
; RtlPrefetchCopyMemory(
; IN PVOID Destination,
; IN PVOID Source ,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function copies nonoverlapping from one buffer to another
; prefetching the source 256 bytes ahead.
;
; Arguments:
;
; Destination - Supplies a pointer to the destination of the move.
;
; Source - Supplies a pointer to the memory to move.
;
; Length - Supplies the Length, in bytes, of the memory to be moved.
;
; Return Value:
;
; None.
;
;--
cPublicProc _RtlPrefetchCopyMemory,3
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov esi, CPNSource
mov edi, CPNDestination
mov ecx, CPNLength
;
; Before prefetching we must guarantee the TLB is valid.
;
mov eax, [esi]
cld
;
;Check if less than 64 bytes
;
mov edx, ecx
and ecx, MEMORY_ALIGNMENT_MASK0
shr edx, MEMORY_ALIGNMENT_LOG2_0
je short pcmCopy4
dec edx
push ecx
je short pcmcopy64
prefetchnta_short rESI, 128
dec edx
je short pcmcopy128
prefetchnta_short rESI, 192
dec edx
je short pcmcopy192
pcmcopyLoop:
prefetchnta_long rESI, 256
mov ecx, 16
rep movsd
dec edx
jnz short pcmcopyLoop
pcmcopy192:
mov ecx, 16
rep movsd
pcmcopy128:
mov ecx, 16
rep movsd
pcmcopy64:
mov ecx, 16
rep movsd
pop ecx
or ecx, ecx ; anything less than 64 to do?
jz short pcmExitRoutine
prefetchnta_short rESI, 0
;
; Copy last part byte by byte.
;
pcmCopy4:
or ecx, ecx
jz short pcmExitRoutine
rep movsb
pcmExitRoutine:
pop ebx
pop edi
pop esi
pop ebp
stdRET _RtlPrefetchCopyMemory
stdENDP _RtlPrefetchCopyMemory
;++
;
; VOID
; RtlPrefetchCopyMemory32(
; IN PVOID Destination,
; IN PVOID Source ,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function copies nonoverlapping from one buffer to another
; prefetching the source 256 bytes ahead.
;
; Arguments:
;
; Destination - Supplies a pointer to the destination of the move.
;
; Source - Supplies a pointer to the memory to move.
;
; Length - Supplies the Length, in bytes, of the memory to be moved.
;
; Return Value:
;
; None.
;
;--
cPublicProc _RtlPrefetchCopyMemory32,3
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov esi, CPNSource
mov edi, CPNDestination
mov ecx, CPNLength
;
; Before prefetching we must guarantee the TLB is valid.
;
mov eax, [esi]
cld
;
;Check if less than 64 bytes
;
mov edx, ecx
and ecx, MEMORY_ALIGNMENT_MASK0
shr edx, MEMORY_ALIGNMENT_LOG2_0
je short pcm32Copy4
dec edx
prefetchnta_short rESI, 32
push ecx
je short pcm32copy64
prefetchnta_short rESI, 128
prefetchnta_short rESI, 160
dec edx
je short pcm32copy128
prefetchnta_short rESI, 192
prefetchnta_short rESI, 124
dec edx
je short pcm32copy192
pcm32copyLoop:
prefetchnta_long rESI, 256
prefetchnta_long rESI, 288
mov ecx, 16
rep movsd
dec edx
jnz short pcm32copyLoop
pcm32copy192:
mov ecx, 16
rep movsd
pcm32copy128:
mov ecx, 16
rep movsd
pcm32copy64:
mov ecx, 16
rep movsd
pop ecx
or ecx, ecx ; anything less than 64 to do?
jz short pcm32ExitRoutine
prefetchnta_short rESI, 0
;
; Copy last part byte by byte.
;
pcm32Copy4:
or ecx, ecx
jz short pcm32ExitRoutine
rep movsb
pcm32ExitRoutine:
pop ebx
pop edi
pop esi
pop ebp
stdRET _RtlPrefetchCopyMemory32
stdENDP _RtlPrefetchCopyMemory32
subttl "RtlPrefetchMemoryNonTemporal"
;++
;
; VOID
; FASTCALL
; RtlPrefetchMemoryNonTemporal(
; IN PVOID Source,
; IN SIZE_T Length
; )
;
; Routine Description:
;
; This function prefetches memory at Source, for Length bytes into the
; closest cache to the processor.
;
; Arguments:
;
; Source - Supplies a pointer to the memory to prefetch.
;
; Length - Supplies the Length, in bytes, of the memory to be moved.
;
; Return Value:
;
; None.
;
;--
ifndef BLDR_KERNEL_RUNTIME
ifdef NTOS_KERNEL_RUNTIME
extrn _KePrefetchNTAGranularity:DWORD
cPublicFastCall RtlPrefetchMemoryNonTemporal ,2
;
; The following instruction will be patched out at boot time if
; this processor supports the prefetch instruction.
;
ret ; patched out at boot.
mov eax, _KePrefetchNTAGranularity ; get d-cache line size
@@: prefetchnta_short rECX, 0 ; prefetch line
add ecx, eax ; bump prefetch address
sub edx, eax ; decrement length
ja short @b ; loop if more to get
fstRET RtlPrefetchMemoryNonTemporal ; return
fstENDP RtlPrefetchMemoryNonTemporal
endif
endif
_TEXT$00 ends
end