windows-nt/Source/XPSP1/NT/base/hals/halx86/i386/ixmovnti.asm

285 lines
6.1 KiB
NASM
Raw Normal View History

2020-09-26 03:20:57 -05:00
title "Hal Copy using Movnti"
;++
;
;Copyright (c) 2000 Microsoft Corporation
;
;Module Name:
;
; ixmovnti.asm
;
;Abstract:
;
; HAL routine that uses movnti instruction to copy buffer
; similar to RtlMovememory but does not support backwards and
; overlapped move
; Based on a previously tested fast copy by Jim crossland.
;Author:
; Gautham chinya
; Intel Corp
;
;Revision History:
;
;--
.386p
.xlist
include callconv.inc ; calling convention macros
include mac386.inc
.list
;
; Register Definitions (for instruction macros).
;
rEAX equ 0
rECX equ 1
rEDX equ 2
rEBX equ 3
rESP equ 4
rEBP equ 5
rESI equ 6
rEDI equ 7
MEMORY_ALIGNMENT_MASK0 = 63
MEMORY_ALIGNMENT_LOG2_0 = 6
MEMORY_ALIGNMENT_MASK1 = 3
MEMORY_ALIGNMENT_LOG2_1 = 2
sfence macro
db 0FH, 0AEH, 0F8H
endm
prefetchnta_short macro GeneralReg, Offset
db 0FH, 018H, 040H + GeneralReg, Offset
endm
prefetchnta_long macro GeneralReg, Offset
db 0FH, 018H, 080h + GeneralReg
dd Offset
endm
movnti_eax macro GeneralReg, Offset
db 0FH, 0C3H, 040H + GeneralReg, Offset
endm
movnti_eax_0_disp macro GeneralReg
db 0FH, 0C3H, 000H + GeneralReg
endm
movnti_ebx macro GeneralReg, Offset
db 0FH, 0C3H, 058H + GeneralReg, Offset
endm
;
;
; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
;
;
movnticopy64bytes macro
mov eax, [esi]
mov ebx, [esi + 4]
movnti_eax_0_disp rEDI
movnti_ebx rEDI, 4
mov eax, [esi + 8]
mov ebx, [esi + 12]
movnti_eax rEDI, 8
movnti_ebx rEDI, 12
mov eax, [esi + 16]
mov ebx, [esi + 20]
movnti_eax rEDI, 16
movnti_ebx rEDI, 20
mov eax, [esi + 24]
mov ebx, [esi + 28]
movnti_eax rEDI, 24
movnti_ebx rEDI, 28
mov eax, [esi + 32]
mov ebx, [esi + 36]
movnti_eax rEDI,32
movnti_ebx rEDI, 36
mov eax, [esi + 40]
mov ebx, [esi + 44]
movnti_eax rEDI, 40
movnti_ebx rEDI, 44
mov eax, [esi + 48]
mov ebx, [esi + 52]
movnti_eax rEDI,48
movnti_ebx rEDI, 52
mov eax, [esi + 56]
mov ebx, [esi + 60]
movnti_eax rEDI, 56
movnti_ebx rEDI, 60
endm
_TEXT$03 SEGMENT DWORD PUBLIC 'CODE'
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
page ,132
subttl "HalpMovntiCopyBuffer"
;++
;
; VOID
; HalpMovntiCopyBuffer(
; IN PVOID Destination,
; IN PVOID Source ,
; IN ULONG Length
; )
;
; Routine Description:
;
; This function copies buffers
; in 4-byte blocks using movnti.
;
; Arguments:
;
; Destination - Supplies a pointer to the destination of the move.
;
; Source - Supplies a pointer to the memory to move.
;
; Length - Supplies the Length, in bytes, of the memory to be moved.
;
; Return Value:
;
; None.
;
;--
cPublicProc _HalpMovntiCopyBuffer ,3
; Definitions of arguments
; (TOS) = Return address
EmmDestination equ [ebp + 4 + 4]
EmmSource equ [ebp + 4 + 8]
EmmLength equ [ebp + 4 + 12]
push ebp
mov ebp, esp
push esi
push edi
push ebx
mov esi, EmmSource
mov edi, EmmDestination
mov ecx, EmmLength
;
; Before prefetching we must guarantee the TLB is valid.
;
mov eax, [esi]
cld
;
;Check if less than 64 bytes
;
mov edx, ecx
and ecx, MEMORY_ALIGNMENT_MASK0
shr edx, MEMORY_ALIGNMENT_LOG2_0
je Copy4
dec edx
je copy64
prefetchnta_short rESI, 128
dec edx
je copy128
prefetchnta_short rESI, 192
dec edx
je copy192
copyLoop:
prefetchnta_long rESI, 256
movnticopy64bytes
lea esi, [esi + 64]
lea edi, [edi + 64]
dec edx
jnz copyLoop
copy192:
movnticopy64bytes
lea esi, [esi + 64]
lea edi, [edi + 64]
copy128:
movnticopy64bytes
lea esi, [esi + 64]
lea edi, [edi + 64]
copy64:
movnticopy64bytes
or ecx, ecx ; anything less than 64 to do?
jz ExitRoutine
prefetchnta_short rESI, 0
;
;Update pointer for last copy
;
lea esi, [esi + 64]
lea edi, [edi + 64]
;
;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
;
Copy4:
mov edx, ecx
and ecx, MEMORY_ALIGNMENT_MASK1
shr edx, MEMORY_ALIGNMENT_LOG2_1
;
; If the number of 32-bit words to move is non-zero, then do it
;
jz RemainingBytes
Copy4Loop:
mov eax, [esi]
movnti_eax_0_disp rEDI
lea esi, [esi+4]
lea edi, [edi+4]
dec edx
jnz Copy4Loop
RemainingBytes:
or ecx, ecx
jz ExitRoutine
rep movsb
ExitRoutine:
sfence ;Make all stores globally visible
pop ebx
pop edi
pop esi
pop ebp
stdRET _HalpMovntiCopyBuffer
stdENDP _HalpMovntiCopyBuffer
_TEXT$03 ends
end