1328 lines
28 KiB
NASM
1328 lines
28 KiB
NASM
title "User Mode Zero and Move Memory functions"
|
||
;++
|
||
;
|
||
; Copyright (c) 1989 Microsoft Corporation
|
||
;
|
||
; Module Name:
|
||
;
|
||
; movemem.asm
|
||
;
|
||
; Abstract:
|
||
;
|
||
; This module implements functions to zero and copy blocks of memory
|
||
;
|
||
;
|
||
; Author:
|
||
;
|
||
; Steven R. Wood (stevewo) 25-May-1990
|
||
;
|
||
; Environment:
|
||
;
|
||
; User mode only.
|
||
;
|
||
; Revision History:
|
||
;
|
||
;--
|
||
.386p
|
||
.xlist
|
||
include ks386.inc
|
||
include callconv.inc ; calling convention macros
|
||
.list
|
||
|
||
if DBG
|
||
_DATA SEGMENT DWORD PUBLIC 'DATA'
|
||
|
||
public _RtlpZeroCount
|
||
public _RtlpZeroBytes
|
||
|
||
_RtlpZeroCount dd 0
|
||
_RtlpZeroBytes dd 0
|
||
|
||
ifndef BLDR_KERNEL_RUNTIME
|
||
_MsgUnalignedPtr db 'RTL: RtlCompare/FillMemoryUlong called with unaligned pointer (%x)\n',0
|
||
_MsgUnalignedCount db 'RTL: RtlCompare/FillMemoryUlong called with unaligned count (%x)\n',0
|
||
endif
|
||
|
||
_DATA ENDS
|
||
|
||
ifndef BLDR_KERNEL_RUNTIME
|
||
ifdef NTOS_KERNEL_RUNTIME
|
||
extrn _KdDebuggerEnabled:BYTE
|
||
endif
|
||
EXTRNP _DbgBreakPoint,0
|
||
extrn _DbgPrint:near
|
||
endif
|
||
endif
|
||
|
||
;
|
||
; Alignment parameters for zeroing and moving memory.
|
||
;
|
||
|
||
ZERO_MEMORY_ALIGNMENT = 4
|
||
ZERO_MEMORY_ALIGNMENT_LOG2 = 2
|
||
ZERO_MEMORY_ALIGNMENT_MASK = ZERO_MEMORY_ALIGNMENT - 1
|
||
|
||
MEMORY_ALIGNMENT = 4
|
||
MEMORY_ALIGNMENT_LOG2 = 2
|
||
MEMORY_ALIGNMENT_MASK = MEMORY_ALIGNMENT - 1
|
||
|
||
|
||
;
|
||
; Alignment for functions in this module
|
||
;
|
||
|
||
CODE_ALIGNMENT macro
|
||
align 16
|
||
endm
|
||
|
||
|
||
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
|
||
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
||
|
||
page , 132
|
||
subttl "RtlCompareMemory"
|
||
;++
|
||
;
|
||
; ULONG
|
||
; RtlCompareMemory (
|
||
; IN PVOID Source1,
|
||
; IN PVOID Source2,
|
||
; IN ULONG Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function compares two blocks of memory and returns the number
|
||
; of bytes that compared equal.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Source1 (esp+4) - Supplies a pointer to the first block of memory to
|
||
; compare.
|
||
;
|
||
; Source2 (esp+8) - Supplies a pointer to the second block of memory to
|
||
; compare.
|
||
;
|
||
; Length (esp+12) - Supplies the Length, in bytes, of the memory to be
|
||
; compared.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; The number of bytes that compared equal is returned as the function
|
||
; value. If all bytes compared equal, then the length of the orginal
|
||
; block of memory is returned.
|
||
;
|
||
;--
|
||
|
||
RcmSource1 equ [esp+12]
|
||
RcmSource2 equ [esp+16]
|
||
RcmLength equ [esp+20]
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlCompareMemory,3
|
||
cPublicFpo 3,0
|
||
|
||
push esi ; save registers
|
||
push edi ;
|
||
cld ; clear direction
|
||
mov esi,RcmSource1 ; (esi) -> first block to compare
|
||
mov edi,RcmSource2 ; (edi) -> second block to compare
|
||
|
||
;
|
||
; Compare dwords, if any.
|
||
;
|
||
|
||
rcm10: mov ecx,RcmLength ; (ecx) = length in bytes
|
||
shr ecx,2 ; (ecx) = length in dwords
|
||
jz rcm20 ; no dwords, try bytes
|
||
repe cmpsd ; compare dwords
|
||
jnz rcm40 ; mismatch, go find byte
|
||
|
||
;
|
||
; Compare residual bytes, if any.
|
||
;
|
||
|
||
rcm20: mov ecx,RcmLength ; (ecx) = length in bytes
|
||
and ecx,3 ; (ecx) = length mod 4
|
||
jz rcm30 ; 0 odd bytes, go do dwords
|
||
repe cmpsb ; compare odd bytes
|
||
jnz rcm50 ; mismatch, go report how far we got
|
||
|
||
;
|
||
; All bytes in the block match.
|
||
;
|
||
|
||
rcm30: mov eax,RcmLength ; set number of matching bytes
|
||
pop edi ; restore registers
|
||
pop esi ;
|
||
stdRET _RtlCompareMemory
|
||
|
||
;
|
||
; When we come to rcm40, esi (and edi) points to the dword after the
|
||
; one which caused the mismatch. Back up 1 dword and find the byte.
|
||
; Since we know the dword didn't match, we can assume one byte won't.
|
||
;
|
||
|
||
rcm40: sub esi,4 ; back up
|
||
sub edi,4 ; back up
|
||
mov ecx,5 ; ensure that ecx doesn't count out
|
||
repe cmpsb ; find mismatch byte
|
||
|
||
;
|
||
; When we come to rcm50, esi points to the byte after the one that
|
||
; did not match, which is TWO after the last byte that did match.
|
||
;
|
||
|
||
rcm50: dec esi ; back up
|
||
sub esi,RcmSource1 ; compute bytes that matched
|
||
mov eax,esi ;
|
||
pop edi ; restore registers
|
||
pop esi ;
|
||
stdRET _RtlCompareMemory
|
||
|
||
stdENDP _RtlCompareMemory
|
||
|
||
|
||
subttl "RtlCompareMemory"
|
||
EcmlSource equ [esp + 4 + 4]
|
||
EcmlLength equ [esp + 4 + 8]
|
||
EcmlPattern equ [esp + 4 + 12]
|
||
|
||
; end of arguments
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlCompareMemoryUlong ,3
|
||
|
||
;
|
||
; Save the non-volatile registers that we will use, without the benefit of
|
||
; a frame pointer. No exception handling in this routine.
|
||
;
|
||
|
||
push edi
|
||
|
||
;
|
||
; Setup the registers for using REP STOS instruction to zero memory.
|
||
;
|
||
; edi -> memory to zero
|
||
; ecx = number of 32-bit words to zero
|
||
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
||
; eax = value to store in destination
|
||
; direction flag is clear for auto-increment
|
||
;
|
||
|
||
mov edi,EcmlSource
|
||
if DBG
|
||
ifndef BLDR_KERNEL_RUNTIME
|
||
test edi,3
|
||
jz @F
|
||
push edi
|
||
push offset FLAT:_MsgUnalignedPtr
|
||
call _DbgPrint
|
||
add esp, 2 * 4
|
||
ifdef NTOS_KERNEL_RUNTIME
|
||
cmp _KdDebuggerEnabled,0
|
||
else
|
||
mov eax,fs:[PcTeb]
|
||
mov eax,[eax].TebPeb
|
||
cmp byte ptr [eax].PebBeingDebugged,0
|
||
endif
|
||
je @F
|
||
call _DbgBreakPoint@0
|
||
@@:
|
||
endif
|
||
endif
|
||
mov ecx,EcmlLength
|
||
mov eax,EcmlPattern
|
||
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
||
|
||
|
||
;
|
||
; If number of 32-bit words to compare is non-zero, then do it.
|
||
;
|
||
|
||
repe scasd
|
||
je @F
|
||
sub edi,4
|
||
@@:
|
||
sub edi,EcmlSource
|
||
mov eax,edi
|
||
pop edi
|
||
stdRET _RtlCompareMemoryUlong
|
||
|
||
stdENDP _RtlCompareMemoryUlong
|
||
|
||
|
||
subttl "RtlFillMemory"
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlFillMemory (
|
||
; IN PVOID Destination,
|
||
; IN ULONG Length,
|
||
; IN UCHAR Fill
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function fills memory with a byte value.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the memory to zero.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
||
;
|
||
; Fill - Supplies the byte value to fill memory with.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
; definitions for arguments
|
||
; (TOS) = Return address
|
||
|
||
EfmDestination equ [esp + 4 + 4]
|
||
EfmLength equ [esp + 4 + 8]
|
||
EfmFill equ byte ptr [esp + 4 + 12]
|
||
|
||
; end of arguments
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlFillMemory ,3
|
||
cPublicFpo 3,1
|
||
|
||
;
|
||
; Save the non-volatile registers that we will use, without the benefit of
|
||
; a frame pointer. No exception handling in this routine.
|
||
;
|
||
|
||
push edi
|
||
|
||
;
|
||
; Setup the registers for using REP STOS instruction to zero memory.
|
||
;
|
||
; edi -> memory to zero
|
||
; ecx = number of 32-bit words to zero
|
||
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
||
; eax = value to store in destination
|
||
; direction flag is clear for auto-increment
|
||
;
|
||
|
||
mov edi,EfmDestination
|
||
mov ecx,EfmLength
|
||
mov al,EfmFill
|
||
mov ah,al
|
||
shl eax,16
|
||
mov al,EfmFill
|
||
mov ah,al
|
||
cld
|
||
|
||
mov edx,ecx
|
||
and edx,ZERO_MEMORY_ALIGNMENT_MASK
|
||
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
||
|
||
|
||
;
|
||
; If number of 32-bit words to zero is non-zero, then do it.
|
||
;
|
||
|
||
rep stosd
|
||
|
||
;
|
||
; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
|
||
; case restore non-volatile registers and return.
|
||
;
|
||
|
||
or ecx,edx
|
||
jnz @F
|
||
pop edi
|
||
stdRET _RtlFillMemory
|
||
@@:
|
||
rep stosb
|
||
pop edi
|
||
stdRET _RtlFillMemory
|
||
|
||
stdENDP _RtlFillMemory
|
||
|
||
subttl "RtlFillMemory"
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlFillMemoryUlonglong (
|
||
; IN PVOID Destination,
|
||
; IN ULONG Length,
|
||
; IN ULONG Fill
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function fills memory with a 64-bit value. The Destination pointer
|
||
; must be aligned on an 8 byte boundary and the low order two bits of the
|
||
; Length parameter are ignored.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the memory to zero.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
||
;
|
||
; Fill - Supplies the 64-bit value to fill memory with.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
; definitions for arguments
|
||
; (TOS) = Return address
|
||
|
||
EfmlDestination equ [esp + 0ch]
|
||
EfmlLength equ [esp + 10h]
|
||
EfmlFillLow equ [esp + 14h]
|
||
EfmlFillHigh equ [esp + 18h]
|
||
|
||
; end of arguments
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlFillMemoryUlonglong ,4
|
||
cPublicFpo 4,1
|
||
|
||
;
|
||
; Save the non-volatile registers that we will use, without the benefit of
|
||
; a frame pointer. No exception handling in this routine.
|
||
;
|
||
|
||
push esi
|
||
push edi
|
||
|
||
;
|
||
; Setup the registers for using REP MOVSD instruction to zero memory.
|
||
;
|
||
; edi -> memory to fill
|
||
; esi -> first 8 byte chunk of the memory destination to fill
|
||
; ecx = number of 32-bit words to zero
|
||
; eax = value to store in destination
|
||
; direction flag is clear for auto-increment
|
||
;
|
||
|
||
mov ecx,EfmlLength ; # of bytes
|
||
mov esi,EfmlDestination ; Destination pointer
|
||
|
||
if DBG
|
||
ifndef BLDR_KERNEL_RUNTIME
|
||
test ecx,7
|
||
jz @F
|
||
push ecx
|
||
push offset FLAT:_MsgUnalignedPtr
|
||
call _DbgPrint
|
||
add esp, 2 * 4
|
||
mov ecx,EfmlLength ; # of bytes
|
||
ifdef NTOS_KERNEL_RUNTIME
|
||
cmp _KdDebuggerEnabled,0
|
||
else
|
||
mov eax,fs:[PcTeb]
|
||
mov eax,[eax].TebPeb
|
||
cmp byte ptr [eax].PebBeingDebugged,0
|
||
endif
|
||
je @F
|
||
call _DbgBreakPoint@0
|
||
@@:
|
||
|
||
test esi,3
|
||
jz @F
|
||
push esi
|
||
push offset FLAT:_MsgUnalignedPtr
|
||
call _DbgPrint
|
||
add esp, 2 * 4
|
||
ifdef NTOS_KERNEL_RUNTIME
|
||
cmp _KdDebuggerEnabled,0
|
||
else
|
||
mov eax,fs:[PcTeb]
|
||
mov eax,[eax].TebPeb
|
||
cmp byte ptr [eax].PebBeingDebugged,0
|
||
endif
|
||
je @F
|
||
call _DbgBreakPoint@0
|
||
@@:
|
||
endif
|
||
endif
|
||
mov eax,EfmlFillLow ; get low portion of the fill arg
|
||
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2 ; convert bytes to dwords
|
||
sub ecx,2 ; doing the 1st one by hand
|
||
mov [esi],eax ; fill 1st highpart
|
||
mov eax,EfmlFillHigh ; get high portion of the fill arg
|
||
lea edi,[esi+08] ; initialize the dest pointer
|
||
mov [esi+04],eax ; fill 1st lowpart
|
||
|
||
rep movsd ; ripple the rest
|
||
|
||
pop edi
|
||
pop esi
|
||
stdRET _RtlFillMemoryUlonglong
|
||
|
||
stdENDP _RtlFillMemoryUlonglong
|
||
|
||
subttl "RtlZeroMemory"
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlFillMemoryUlong (
|
||
; IN PVOID Destination,
|
||
; IN ULONG Length,
|
||
; IN ULONG Fill
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function fills memory with a 32-bit value. The Destination pointer
|
||
; must be aligned on a 4 byte boundary and the low order two bits of the
|
||
; Length parameter are ignored.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the memory to zero.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
||
;
|
||
; Fill - Supplies the 32-bit value to fill memory with.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
; definitions for arguments
|
||
; (TOS) = Return address
|
||
|
||
EfmlDestination equ [esp + 4 + 4]
|
||
EfmlLength equ [esp + 4 + 8]
|
||
EfmlFill equ [esp + 4 + 12]
|
||
|
||
; end of arguments
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlFillMemoryUlong ,3
|
||
cPublicFpo 3,1
|
||
|
||
;
|
||
; Save the non-volatile registers that we will use, without the benefit of
|
||
; a frame pointer. No exception handling in this routine.
|
||
;
|
||
|
||
push edi
|
||
|
||
;
|
||
; Setup the registers for using REP STOS instruction to zero memory.
|
||
;
|
||
; edi -> memory to zero
|
||
; ecx = number of 32-bit words to zero
|
||
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
||
; eax = value to store in destination
|
||
; direction flag is clear for auto-increment
|
||
;
|
||
|
||
mov edi,EfmlDestination
|
||
if DBG
|
||
ifndef BLDR_KERNEL_RUNTIME
|
||
test edi,3
|
||
jz @F
|
||
push edi
|
||
push offset FLAT:_MsgUnalignedPtr
|
||
call _DbgPrint
|
||
add esp, 2 * 4
|
||
ifdef NTOS_KERNEL_RUNTIME
|
||
cmp _KdDebuggerEnabled,0
|
||
else
|
||
mov eax,fs:[PcTeb]
|
||
mov eax,[eax].TebPeb
|
||
cmp byte ptr [eax].PebBeingDebugged,0
|
||
endif
|
||
je @F
|
||
call _DbgBreakPoint@0
|
||
@@:
|
||
endif
|
||
endif
|
||
mov ecx,EfmlLength
|
||
mov eax,EfmlFill
|
||
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
||
|
||
|
||
;
|
||
; If number of 32-bit words to zero is non-zero, then do it.
|
||
;
|
||
|
||
rep stosd
|
||
|
||
pop edi
|
||
stdRET _RtlFillMemoryUlong
|
||
|
||
stdENDP _RtlFillMemoryUlong
|
||
|
||
subttl "RtlZeroMemory"
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlZeroMemory (
|
||
; IN PVOID Destination,
|
||
; IN ULONG Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function zeros memory.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the memory to zero.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be zeroed.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
; definitions for arguments
|
||
; (TOS) = Return address
|
||
|
||
EzmDestination equ [esp + 4 + 4]
|
||
EzmLength equ [esp + 4 + 8]
|
||
|
||
; end of arguments
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlZeroMemory ,2
|
||
cPublicFpo 2,1
|
||
|
||
;
|
||
; Save the non-volatile registers that we will use, without the benefit of
|
||
; a frame pointer. No exception handling in this routine.
|
||
;
|
||
|
||
push edi
|
||
|
||
;
|
||
; Setup the registers for using REP STOS instruction to zero memory.
|
||
;
|
||
; edi -> memory to zero
|
||
; ecx = number of 32-bit words to zero
|
||
; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
|
||
; eax = zero (value to store in destination)
|
||
; direction flag is clear for auto-increment
|
||
;
|
||
|
||
mov edi,EzmDestination
|
||
mov ecx,EzmLength
|
||
xor eax,eax
|
||
cld
|
||
|
||
mov edx,ecx
|
||
and edx,ZERO_MEMORY_ALIGNMENT_MASK
|
||
shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
|
||
|
||
|
||
;
|
||
; If number of 32-bit words to zero is non-zero, then do it.
|
||
;
|
||
|
||
rep stosd
|
||
|
||
;
|
||
; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
|
||
; case restore non-volatile registers and return.
|
||
;
|
||
|
||
or ecx,edx
|
||
jnz @F
|
||
pop edi
|
||
stdRET _RtlZeroMemory
|
||
@@:
|
||
rep stosb
|
||
pop edi
|
||
stdRET _RtlZeroMemory
|
||
|
||
stdENDP _RtlZeroMemory
|
||
|
||
page , 132
|
||
subttl "RtlMoveMemory"
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlMoveMemory (
|
||
; IN PVOID Destination,
|
||
; IN PVOID Source OPTIONAL,
|
||
; IN ULONG Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function moves memory either forward or backward, aligned or
|
||
; unaligned, in 4-byte blocks, followed by any remaining bytes.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the destination of the move.
|
||
;
|
||
; Source - Supplies a pointer to the memory to move.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
; Definitions of arguments
|
||
; (TOS) = Return address
|
||
|
||
EmmDestination equ [esp + 8 + 4]
|
||
EmmSource equ [esp + 8 + 8]
|
||
EmmLength equ [esp + 8 + 12]
|
||
|
||
; End of arguments
|
||
|
||
CODE_ALIGNMENT
|
||
cPublicProc _RtlMoveMemory ,3
|
||
cPublicFpo 3,2
|
||
|
||
;
|
||
; Save the non-volatile registers that we will use, without the benefit of
|
||
; a frame pointer. No exception handling in this routine.
|
||
;
|
||
|
||
push esi
|
||
push edi
|
||
|
||
;
|
||
; Setup the registers for using REP MOVS instruction to move memory.
|
||
;
|
||
; esi -> memory to move (NULL implies the destination will be zeroed)
|
||
; edi -> destination of move
|
||
; ecx = number of 32-bit words to move
|
||
; edx = number of extra 8-bit bytes to move at the end (0 - 3)
|
||
; direction flag is clear for auto-increment
|
||
;
|
||
|
||
mov esi,EmmSource
|
||
mov edi,EmmDestination
|
||
mov ecx,EmmLength
|
||
if DBG
|
||
inc _RtlpZeroCount
|
||
add _RtlpZeroBytes,ecx
|
||
endif
|
||
cld
|
||
|
||
cmp esi,edi ; Special case if Source > Destination
|
||
jbe overlap
|
||
|
||
nooverlap:
|
||
mov edx,ecx
|
||
and edx,MEMORY_ALIGNMENT_MASK
|
||
shr ecx,MEMORY_ALIGNMENT_LOG2
|
||
|
||
;
|
||
; If number of 32-bit words to move is non-zero, then do it.
|
||
;
|
||
|
||
rep movsd
|
||
|
||
;
|
||
; If number of extra 8-bit bytes to move is non-zero, then do it. In either
|
||
; case restore non-volatile registers and return.
|
||
;
|
||
|
||
or ecx,edx
|
||
jnz @F
|
||
pop edi
|
||
pop esi
|
||
stdRET _RtlMoveMemory
|
||
@@:
|
||
rep movsb
|
||
|
||
movedone:
|
||
pop edi
|
||
pop esi
|
||
stdRET _RtlMoveMemory
|
||
|
||
;
|
||
; Here to handle special case when Source > Destination and therefore is a
|
||
; potential overlapping move. If Source == Destination, then nothing to do.
|
||
; Otherwise, increment the Source and Destination pointers by Length and do
|
||
; the move backwards, a byte at a time.
|
||
;
|
||
|
||
overlap:
|
||
je movedone
|
||
mov eax,edi
|
||
sub eax,esi
|
||
cmp ecx,eax
|
||
jbe nooverlap
|
||
|
||
std
|
||
add esi,ecx
|
||
add edi,ecx
|
||
dec esi
|
||
dec edi
|
||
rep movsb
|
||
cld
|
||
jmp short movedone
|
||
|
||
stdENDP _RtlMoveMemory
|
||
|
||
subttl "RtlCopyMemoryNonTemporal"
|
||
;
|
||
; Register Definitions (for instruction macros).
|
||
;
|
||
|
||
rEAX equ 0
|
||
rECX equ 1
|
||
rEDX equ 2
|
||
rEBX equ 3
|
||
rESP equ 4
|
||
rEBP equ 5
|
||
rESI equ 6
|
||
rEDI equ 7
|
||
|
||
MEMORY_ALIGNMENT_MASK0 = 63
|
||
MEMORY_ALIGNMENT_LOG2_0 = 6
|
||
|
||
MEMORY_ALIGNMENT_MASK1 = 3
|
||
MEMORY_ALIGNMENT_LOG2_1 = 2
|
||
|
||
sfence macro
|
||
db 0FH, 0AEH, 0F8H
|
||
endm
|
||
|
||
prefetchnta_short macro GeneralReg, Offset
|
||
db 0FH, 018H, 040H + GeneralReg, Offset
|
||
endm
|
||
|
||
prefetchnta_long macro GeneralReg, Offset
|
||
db 0FH, 018H, 080h + GeneralReg
|
||
dd Offset
|
||
endm
|
||
|
||
movnti_eax macro GeneralReg, Offset
|
||
db 0FH, 0C3H, 040H + GeneralReg, Offset
|
||
endm
|
||
|
||
movnti_eax_0_disp macro GeneralReg
|
||
db 0FH, 0C3H, 000H + GeneralReg
|
||
endm
|
||
|
||
movnti_ebx macro GeneralReg, Offset
|
||
db 0FH, 0C3H, 058H + GeneralReg, Offset
|
||
endm
|
||
|
||
;
|
||
;
|
||
; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
|
||
;
|
||
;
|
||
|
||
movnticopy64bytes macro
|
||
mov eax, [esi]
|
||
mov ebx, [esi + 4]
|
||
movnti_eax_0_disp rEDI
|
||
movnti_ebx rEDI, 4
|
||
|
||
mov eax, [esi + 8]
|
||
mov ebx, [esi + 12]
|
||
movnti_eax rEDI, 8
|
||
movnti_ebx rEDI, 12
|
||
|
||
mov eax, [esi + 16]
|
||
mov ebx, [esi + 20]
|
||
movnti_eax rEDI, 16
|
||
movnti_ebx rEDI, 20
|
||
|
||
mov eax, [esi + 24]
|
||
mov ebx, [esi + 28]
|
||
movnti_eax rEDI, 24
|
||
movnti_ebx rEDI, 28
|
||
|
||
mov eax, [esi + 32]
|
||
mov ebx, [esi + 36]
|
||
movnti_eax rEDI,32
|
||
movnti_ebx rEDI, 36
|
||
|
||
mov eax, [esi + 40]
|
||
mov ebx, [esi + 44]
|
||
movnti_eax rEDI, 40
|
||
movnti_ebx rEDI, 44
|
||
|
||
mov eax, [esi + 48]
|
||
mov ebx, [esi + 52]
|
||
movnti_eax rEDI,48
|
||
movnti_ebx rEDI, 52
|
||
|
||
mov eax, [esi + 56]
|
||
mov ebx, [esi + 60]
|
||
movnti_eax rEDI, 56
|
||
movnti_ebx rEDI, 60
|
||
endm
|
||
|
||
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlCopyMemoryNonTemporal(
|
||
; IN PVOID Destination,
|
||
; IN PVOID Source ,
|
||
; IN ULONG Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function copies nonoverlapping from one buffer to another
|
||
; using nontemporal moves that do not polute the cache.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the destination of the move.
|
||
;
|
||
; Source - Supplies a pointer to the memory to move.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
cPublicProc _RtlCopyMemoryNonTemporal ,3
|
||
|
||
; Definitions of arguments
|
||
; (TOS) = Return address
|
||
|
||
CPNDestination equ [ebp + 4 + 4]
|
||
CPNSource equ [ebp + 4 + 8]
|
||
CPNLength equ [ebp + 4 + 12]
|
||
|
||
push ebp
|
||
mov ebp, esp
|
||
push esi
|
||
push edi
|
||
push ebx
|
||
|
||
mov esi, CPNSource
|
||
mov edi, CPNDestination
|
||
mov ecx, CPNLength
|
||
|
||
|
||
;
|
||
; Before prefetching we must guarantee the TLB is valid.
|
||
;
|
||
mov eax, [esi]
|
||
|
||
cld
|
||
|
||
;
|
||
;Check if less than 64 bytes
|
||
;
|
||
|
||
mov edx, ecx
|
||
and ecx, MEMORY_ALIGNMENT_MASK0
|
||
shr edx, MEMORY_ALIGNMENT_LOG2_0
|
||
je Copy4
|
||
dec edx
|
||
je copy64
|
||
|
||
prefetchnta_short rESI, 128
|
||
dec edx
|
||
je copy128
|
||
|
||
prefetchnta_short rESI, 192
|
||
dec edx
|
||
je copy192
|
||
|
||
|
||
|
||
copyLoop:
|
||
|
||
prefetchnta_long rESI, 256
|
||
|
||
movnticopy64bytes
|
||
lea esi, [esi + 64]
|
||
lea edi, [edi + 64]
|
||
|
||
dec edx
|
||
jnz copyLoop
|
||
|
||
|
||
copy192:
|
||
|
||
|
||
movnticopy64bytes
|
||
lea esi, [esi + 64]
|
||
lea edi, [edi + 64]
|
||
|
||
copy128:
|
||
|
||
|
||
movnticopy64bytes
|
||
lea esi, [esi + 64]
|
||
lea edi, [edi + 64]
|
||
|
||
copy64:
|
||
|
||
movnticopy64bytes
|
||
|
||
or ecx, ecx ; anything less than 64 to do?
|
||
jz ExitRoutine
|
||
|
||
prefetchnta_short rESI, 0
|
||
;
|
||
;Update pointer for last copy
|
||
;
|
||
|
||
lea esi, [esi + 64]
|
||
lea edi, [edi + 64]
|
||
|
||
;
|
||
;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
|
||
;
|
||
|
||
Copy4:
|
||
mov edx, ecx
|
||
and ecx, MEMORY_ALIGNMENT_MASK1
|
||
shr edx, MEMORY_ALIGNMENT_LOG2_1
|
||
|
||
;
|
||
; If the number of 32-bit words to move is non-zero, then do it
|
||
;
|
||
jz RemainingBytes
|
||
|
||
Copy4Loop:
|
||
mov eax, [esi]
|
||
movnti_eax_0_disp rEDI
|
||
lea esi, [esi+4]
|
||
lea edi, [edi+4]
|
||
dec edx
|
||
jnz Copy4Loop
|
||
|
||
RemainingBytes:
|
||
or ecx, ecx
|
||
jz ExitRoutine
|
||
rep movsb
|
||
|
||
ExitRoutine:
|
||
|
||
sfence ;Make all stores globally visible
|
||
pop ebx
|
||
pop edi
|
||
pop esi
|
||
pop ebp
|
||
stdRET _RtlCopyMemoryNonTemporal
|
||
|
||
stdENDP _RtlCopyMemoryNonTemporal
|
||
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlPrefetchCopyMemory(
|
||
; IN PVOID Destination,
|
||
; IN PVOID Source ,
|
||
; IN ULONG Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function copies nonoverlapping from one buffer to another
|
||
; prefetching the source 256 bytes ahead.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the destination of the move.
|
||
;
|
||
; Source - Supplies a pointer to the memory to move.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
cPublicProc _RtlPrefetchCopyMemory,3
|
||
|
||
push ebp
|
||
mov ebp, esp
|
||
push esi
|
||
push edi
|
||
push ebx
|
||
|
||
mov esi, CPNSource
|
||
mov edi, CPNDestination
|
||
mov ecx, CPNLength
|
||
|
||
|
||
;
|
||
; Before prefetching we must guarantee the TLB is valid.
|
||
;
|
||
mov eax, [esi]
|
||
|
||
cld
|
||
|
||
;
|
||
;Check if less than 64 bytes
|
||
;
|
||
|
||
mov edx, ecx
|
||
and ecx, MEMORY_ALIGNMENT_MASK0
|
||
shr edx, MEMORY_ALIGNMENT_LOG2_0
|
||
je short pcmCopy4
|
||
dec edx
|
||
push ecx
|
||
je short pcmcopy64
|
||
|
||
prefetchnta_short rESI, 128
|
||
dec edx
|
||
je short pcmcopy128
|
||
|
||
prefetchnta_short rESI, 192
|
||
dec edx
|
||
je short pcmcopy192
|
||
|
||
|
||
|
||
pcmcopyLoop:
|
||
|
||
prefetchnta_long rESI, 256
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
dec edx
|
||
jnz short pcmcopyLoop
|
||
|
||
pcmcopy192:
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
pcmcopy128:
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
pcmcopy64:
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
pop ecx
|
||
or ecx, ecx ; anything less than 64 to do?
|
||
jz short pcmExitRoutine
|
||
|
||
prefetchnta_short rESI, 0
|
||
|
||
;
|
||
; Copy last part byte by byte.
|
||
;
|
||
|
||
pcmCopy4:
|
||
or ecx, ecx
|
||
jz short pcmExitRoutine
|
||
rep movsb
|
||
|
||
pcmExitRoutine:
|
||
|
||
pop ebx
|
||
pop edi
|
||
pop esi
|
||
pop ebp
|
||
stdRET _RtlPrefetchCopyMemory
|
||
|
||
stdENDP _RtlPrefetchCopyMemory
|
||
|
||
;++
|
||
;
|
||
; VOID
|
||
; RtlPrefetchCopyMemory32(
|
||
; IN PVOID Destination,
|
||
; IN PVOID Source ,
|
||
; IN ULONG Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function copies nonoverlapping from one buffer to another
|
||
; prefetching the source 256 bytes ahead.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Destination - Supplies a pointer to the destination of the move.
|
||
;
|
||
; Source - Supplies a pointer to the memory to move.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
cPublicProc _RtlPrefetchCopyMemory32,3
|
||
|
||
push ebp
|
||
mov ebp, esp
|
||
push esi
|
||
push edi
|
||
push ebx
|
||
|
||
mov esi, CPNSource
|
||
mov edi, CPNDestination
|
||
mov ecx, CPNLength
|
||
|
||
|
||
;
|
||
; Before prefetching we must guarantee the TLB is valid.
|
||
;
|
||
mov eax, [esi]
|
||
|
||
cld
|
||
|
||
;
|
||
;Check if less than 64 bytes
|
||
;
|
||
|
||
mov edx, ecx
|
||
and ecx, MEMORY_ALIGNMENT_MASK0
|
||
shr edx, MEMORY_ALIGNMENT_LOG2_0
|
||
je short pcm32Copy4
|
||
dec edx
|
||
prefetchnta_short rESI, 32
|
||
push ecx
|
||
je short pcm32copy64
|
||
|
||
prefetchnta_short rESI, 128
|
||
prefetchnta_short rESI, 160
|
||
dec edx
|
||
je short pcm32copy128
|
||
|
||
prefetchnta_short rESI, 192
|
||
prefetchnta_short rESI, 124
|
||
dec edx
|
||
je short pcm32copy192
|
||
|
||
|
||
|
||
pcm32copyLoop:
|
||
|
||
prefetchnta_long rESI, 256
|
||
prefetchnta_long rESI, 288
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
dec edx
|
||
jnz short pcm32copyLoop
|
||
|
||
pcm32copy192:
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
pcm32copy128:
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
pcm32copy64:
|
||
|
||
mov ecx, 16
|
||
rep movsd
|
||
|
||
pop ecx
|
||
or ecx, ecx ; anything less than 64 to do?
|
||
jz short pcm32ExitRoutine
|
||
|
||
prefetchnta_short rESI, 0
|
||
|
||
;
|
||
; Copy last part byte by byte.
|
||
;
|
||
|
||
pcm32Copy4:
|
||
or ecx, ecx
|
||
jz short pcm32ExitRoutine
|
||
rep movsb
|
||
|
||
pcm32ExitRoutine:
|
||
|
||
pop ebx
|
||
pop edi
|
||
pop esi
|
||
pop ebp
|
||
stdRET _RtlPrefetchCopyMemory32
|
||
|
||
stdENDP _RtlPrefetchCopyMemory32
|
||
|
||
subttl "RtlPrefetchMemoryNonTemporal"
|
||
|
||
;++
|
||
;
|
||
; VOID
|
||
; FASTCALL
|
||
; RtlPrefetchMemoryNonTemporal(
|
||
; IN PVOID Source,
|
||
; IN SIZE_T Length
|
||
; )
|
||
;
|
||
; Routine Description:
|
||
;
|
||
; This function prefetches memory at Source, for Length bytes into the
|
||
; closest cache to the processor.
|
||
;
|
||
; Arguments:
|
||
;
|
||
; Source - Supplies a pointer to the memory to prefetch.
|
||
;
|
||
; Length - Supplies the Length, in bytes, of the memory to be moved.
|
||
;
|
||
; Return Value:
|
||
;
|
||
; None.
|
||
;
|
||
;--
|
||
|
||
ifndef BLDR_KERNEL_RUNTIME
|
||
ifdef NTOS_KERNEL_RUNTIME
|
||
|
||
extrn _KePrefetchNTAGranularity:DWORD
|
||
|
||
cPublicFastCall RtlPrefetchMemoryNonTemporal ,2
|
||
|
||
;
|
||
; The following instruction will be patched out at boot time if
|
||
; this processor supports the prefetch instruction.
|
||
;
|
||
|
||
ret ; patched out at boot.
|
||
|
||
mov eax, _KePrefetchNTAGranularity ; get d-cache line size
|
||
@@: prefetchnta_short rECX, 0 ; prefetch line
|
||
add ecx, eax ; bump prefetch address
|
||
sub edx, eax ; decrement length
|
||
ja short @b ; loop if more to get
|
||
|
||
fstRET RtlPrefetchMemoryNonTemporal ; return
|
||
|
||
fstENDP RtlPrefetchMemoryNonTemporal
|
||
|
||
endif
|
||
endif
|
||
|
||
_TEXT$00 ends
|
||
end
|