398 lines
10 KiB
C
398 lines
10 KiB
C
#include <windows.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
BOOL fShowScaling;
|
|
|
|
#define MAX_THREADS 32
|
|
|
|
|
|
//
|
|
// - hStartOfRace is a manual reset event that is signalled when
|
|
// all of the threads are supposed to cut loose and begin working
|
|
//
|
|
// - hEndOfRace is a manual reset event that is signalled once the end time
|
|
// has been retrieved and it is ok for the threads to exit
|
|
//
|
|
|
|
HANDLE hStartOfRace;
|
|
HANDLE hEndOfRace;
|
|
|
|
|
|
//
|
|
// - ThreadReadyDoneEvents are an array of autoclearing events. The threads
|
|
// initially signal these events once they have reached their start routines
|
|
// and are ready to being processing. Once they are done processing, they
|
|
// signal thier event to indicate that they are done processing.
|
|
//
|
|
// - ThreadHandles are an array of thread handles to the worker threads. The
|
|
// main thread waits on these to know when all of the threads have exited.
|
|
//
|
|
|
|
HANDLE ThreadReadyDoneEvents[MAX_THREADS];
|
|
HANDLE ThreadHandles[MAX_THREADS];
|
|
|
|
DWORD WorkerThread(PVOID ThreadIndex);
|
|
DWORD ThreadId;
|
|
DWORD StartTicks, EndTicks;
|
|
HANDLE IoHandle;
|
|
|
|
#define SIXTY_FOUR_K (64*1024)
|
|
#define SIXTEEN_K (16*1024)
|
|
unsigned int InitialBuffer[SIXTY_FOUR_K/sizeof(unsigned int)];
|
|
#define NUMBER_OF_WRITES ((1024*1024*8)/SIXTY_FOUR_K)
|
|
#define BUFFER_MAX (64*1024)
|
|
#define FILE_SIZE ((1024*1024*8)-BUFFER_MAX)
|
|
|
|
/*
|
|
// Each thread has a THREAD_WORK structure. This contains the address
|
|
// of the cells that this thread is responsible for, and the number of
|
|
// cells it is supposed to process.
|
|
*/
|
|
|
|
typedef struct _THREAD_WORK {
|
|
unsigned long *CellVector;
|
|
int NumberOfCells;
|
|
int RecalcResult;
|
|
BOOL GlobalMode;
|
|
} THREAD_WORK, *PTHREAD_WORK;
|
|
|
|
unsigned int GlobalData[MAX_THREADS];
|
|
THREAD_WORK ThreadWork[MAX_THREADS];
|
|
#define ONE_MB (1024*1024)
|
|
|
|
unsigned long Mb = 16;
|
|
unsigned long ExpectedRecalcValue;
|
|
unsigned long ActualRecalcValue;
|
|
unsigned long ContentionValue;
|
|
int fGlobalMode;
|
|
int WorkIndex;
|
|
int BufferSize;
|
|
unsigned long *CellVector;
|
|
|
|
|
|
|
|
DWORD
|
|
DoAnInteration(
|
|
int NumberOfThreads,
|
|
BOOL GlobalMode
|
|
)
|
|
{
|
|
int i;
|
|
int fShowUsage;
|
|
char c, *p, *whocares;
|
|
int NumberOfDwords;
|
|
int CNumberOfDwords;
|
|
int DwordsPerThread;
|
|
char *Answer;
|
|
unsigned long x;
|
|
|
|
fGlobalMode = 0;
|
|
|
|
BufferSize = 1024;
|
|
|
|
hStartOfRace = CreateEvent(NULL,TRUE,FALSE,NULL);
|
|
hEndOfRace = CreateEvent(NULL,TRUE,FALSE,NULL);
|
|
|
|
if ( !hStartOfRace || !hEndOfRace ) {
|
|
fprintf(stderr,"SMPSCALE Race Event Creation Failed\n");
|
|
ExitProcess(1);
|
|
}
|
|
|
|
|
|
/*
|
|
// Prepare the ready done events. These are auto clearing events
|
|
*/
|
|
|
|
for(i=0; i<NumberOfThreads; i++ ) {
|
|
ThreadReadyDoneEvents[i] = CreateEvent(NULL,FALSE,FALSE,NULL);
|
|
if ( !ThreadReadyDoneEvents[i] ) {
|
|
fprintf(stderr,"SMPSCALE Ready Done Event Creation Failed %d\n",GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
}
|
|
|
|
NumberOfDwords = (Mb*ONE_MB) / sizeof(unsigned long);
|
|
CNumberOfDwords = NumberOfDwords;
|
|
DwordsPerThread = NumberOfDwords / NumberOfThreads;
|
|
|
|
/*
|
|
// Initialize the Cell Vector
|
|
*/
|
|
|
|
for(i=0, ExpectedRecalcValue=0; i<NumberOfDwords; i++ ){
|
|
ExpectedRecalcValue += i;
|
|
CellVector[i] = i;
|
|
}
|
|
|
|
/*
|
|
// Partition the work to the worker threads
|
|
*/
|
|
|
|
for(i=0; i<NumberOfThreads; i++ ){
|
|
ThreadWork[i].CellVector = &CellVector[i*DwordsPerThread];
|
|
ThreadWork[i].NumberOfCells = DwordsPerThread;
|
|
NumberOfDwords -= DwordsPerThread;
|
|
|
|
/*
|
|
// If we have a remainder, give the remaining work to the last thread
|
|
*/
|
|
|
|
if ( NumberOfDwords < DwordsPerThread ) {
|
|
ThreadWork[i].NumberOfCells += NumberOfDwords;
|
|
}
|
|
}
|
|
|
|
/*
|
|
// Create the worker threads
|
|
*/
|
|
|
|
for(i=0; i<NumberOfThreads; i++ ) {
|
|
ThreadWork[i].RecalcResult = 0;
|
|
ThreadWork[i].GlobalMode = GlobalMode;
|
|
GlobalData[i] = 0;
|
|
|
|
ThreadHandles[i] = CreateThread(
|
|
NULL,
|
|
0,
|
|
WorkerThread,
|
|
(PVOID)i,
|
|
0,
|
|
&ThreadId
|
|
);
|
|
if ( !ThreadHandles[i] ) {
|
|
fprintf(stderr,"SMPSCALE Worker Thread Creation Failed %d\n",GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
CloseHandle(ThreadHandles[i]);
|
|
|
|
}
|
|
|
|
/*
|
|
// All of the worker threads will signal thier ready done event
|
|
// when they are idle and ready to proceed. Once all events have been
|
|
// set, then setting the hStartOfRaceEvent will begin the recalc
|
|
*/
|
|
|
|
i = WaitForMultipleObjects(
|
|
NumberOfThreads,
|
|
ThreadReadyDoneEvents,
|
|
TRUE,
|
|
INFINITE
|
|
);
|
|
|
|
if ( i == WAIT_FAILED ) {
|
|
fprintf(stderr,"SMPSCALE Wait for threads to stabalize Failed %d\n",GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
/*
|
|
// Everthing is set to begin the recalc operation
|
|
*/
|
|
|
|
StartTicks = GetTickCount();
|
|
if ( !SetEvent(hStartOfRace) ) {
|
|
fprintf(stderr,"SMPSCALE SetEvent(hStartOfRace) Failed %d\n",GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
/*
|
|
// Now just wait for the recalc to complete
|
|
*/
|
|
|
|
i = WaitForMultipleObjects(
|
|
NumberOfThreads,
|
|
ThreadReadyDoneEvents,
|
|
TRUE,
|
|
INFINITE
|
|
);
|
|
|
|
if ( i == WAIT_FAILED ) {
|
|
fprintf(stderr,"SMPSCALE Wait for threads to complete Failed %d\n",GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
/*
|
|
// Now pick up the individual recalc values
|
|
*/
|
|
|
|
for(i=0, ActualRecalcValue = 0; i<NumberOfThreads; i++ ){
|
|
ActualRecalcValue += ThreadWork[i].RecalcResult;
|
|
}
|
|
|
|
EndTicks = GetTickCount();
|
|
|
|
if ( ActualRecalcValue != ExpectedRecalcValue ) {
|
|
fprintf(stderr,"SMPSCALE Recalc Failuer !\n");
|
|
ExitProcess(1);
|
|
}
|
|
|
|
return (EndTicks - StartTicks);
|
|
}
|
|
|
|
int __cdecl
|
|
main(
|
|
int argc,
|
|
char *argv[],
|
|
char *envp[]
|
|
)
|
|
{
|
|
DWORD Time,GlobalModeTime;
|
|
DWORD BaseLine;
|
|
DWORD i;
|
|
SYSTEM_INFO SystemInfo;
|
|
|
|
/*
|
|
// Allocate and initialize the CellVector
|
|
*/
|
|
|
|
if ( argc > 1 ) {
|
|
fShowScaling = TRUE;
|
|
}
|
|
else {
|
|
fShowScaling = FALSE;
|
|
}
|
|
|
|
CellVector = (PDWORD)VirtualAlloc(NULL,Mb*ONE_MB,MEM_COMMIT,PAGE_READWRITE);
|
|
if ( !CellVector ) {
|
|
fprintf(stderr,"SMPSCALE Cell Vector Allocation Failed %d\n",GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
BaseLine = DoAnInteration(1,FALSE);
|
|
i = 0;
|
|
while(i++<10) {
|
|
Time = DoAnInteration(1,FALSE);
|
|
if ( Time == BaseLine ) {
|
|
break;
|
|
}
|
|
if ( abs(Time-BaseLine) < 2 ) {
|
|
break;
|
|
}
|
|
BaseLine = Time;
|
|
}
|
|
|
|
GetSystemInfo(&SystemInfo);
|
|
|
|
fprintf(stdout,"%d Processor System. Single Processor BaseLine %dms\n\n",
|
|
SystemInfo.dwNumberOfProcessors,
|
|
BaseLine
|
|
);
|
|
|
|
if ( !fShowScaling ) {
|
|
fprintf(stdout," Time Time with Cache Sloshing\n");
|
|
}
|
|
|
|
for ( i=0;i<SystemInfo.dwNumberOfProcessors;i++) {
|
|
Time = DoAnInteration(i+1,FALSE);
|
|
GlobalModeTime = DoAnInteration(i+1,TRUE);
|
|
|
|
if ( fShowScaling ) {
|
|
if ( i > 0 ) {
|
|
fprintf(stdout,"%1d Processors %4dms (%3d%%) %4dms (%3d%%) (with cache contention)\n",
|
|
i+1,Time,((BaseLine*100)/Time-100),GlobalModeTime,((BaseLine*100)/GlobalModeTime-100)
|
|
);
|
|
}
|
|
else {
|
|
fprintf(stdout,"%1d Processors %4dms %4dms (with cache contention)\n",
|
|
i+1,Time,GlobalModeTime
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
fprintf(stdout,"%1d Processors %4dms vs %4dms\n",
|
|
i+1,Time,GlobalModeTime
|
|
);
|
|
}
|
|
}
|
|
|
|
ExitProcess(2);
|
|
}
|
|
|
|
|
|
/*
|
|
// The worker threads perform the recalc operation on their
|
|
// assigned cells. They begin by setting their ready done event
|
|
// to indicate that they are ready to begin the recalc. Then they
|
|
// wait until the hStartOfRace event is signaled. Once this occurs, they
|
|
// do their part of the recalc and when done they signal their ready done
|
|
// event and then wait on the hEndOfRaceEvent
|
|
*/
|
|
|
|
DWORD
|
|
WorkerThread(
|
|
PVOID ThreadIndex
|
|
)
|
|
{
|
|
|
|
unsigned long Me;
|
|
unsigned long *MyCellVectorBase;
|
|
unsigned long *CurrentCellVector;
|
|
unsigned long MyRecalcValue;
|
|
unsigned long MyNumberOfCells;
|
|
unsigned long i,j;
|
|
int GlobalMode;
|
|
HANDLE hEvent;
|
|
BOOL b;
|
|
|
|
Me = (unsigned long)ThreadIndex;
|
|
MyRecalcValue = 0;
|
|
MyCellVectorBase = ThreadWork[Me].CellVector;
|
|
MyNumberOfCells = ThreadWork[Me].NumberOfCells;
|
|
GlobalMode = ThreadWork[Me].GlobalMode;
|
|
|
|
/*
|
|
// Signal that I am ready to go
|
|
*/
|
|
|
|
if ( !SetEvent(ThreadReadyDoneEvents[Me]) ) {
|
|
fprintf(stderr,"SMPSCALE (1) SetEvent(ThreadReadyDoneEvent[%d]) Failed %d\n",Me,GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
/*
|
|
// Wait for the master to release us to do the recalc
|
|
*/
|
|
|
|
i = WaitForSingleObject(hStartOfRace,INFINITE);
|
|
if ( i == WAIT_FAILED ) {
|
|
fprintf(stderr,"SMPSCALE Thread %d Wait for start of recalc Failed %d\n",Me,GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
/*
|
|
// perform the recalc operation
|
|
*/
|
|
|
|
for (i=0, CurrentCellVector = MyCellVectorBase,j=0; i<MyNumberOfCells; i++ ) {
|
|
if (GlobalMode){
|
|
GlobalData[Me] += *CurrentCellVector++;
|
|
}
|
|
else {
|
|
MyRecalcValue += *CurrentCellVector++;
|
|
}
|
|
}
|
|
if (GlobalMode){
|
|
MyRecalcValue = GlobalData[Me];
|
|
}
|
|
ThreadWork[Me].RecalcResult = MyRecalcValue;
|
|
|
|
/*
|
|
// Signal that I am done and then wait for further instructions
|
|
*/
|
|
|
|
if ( !SetEvent(ThreadReadyDoneEvents[Me]) ) {
|
|
fprintf(stderr,"SMPSCALE (2) SetEvent(ThreadReadyDoneEvent[%d]) Failed %d\n",Me,GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
i = WaitForSingleObject(hEndOfRace,INFINITE);
|
|
if ( i == WAIT_FAILED ) {
|
|
fprintf(stderr,"SMPSCALE Thread %d Wait for end of recalc Failed %d\n",Me,GetLastError());
|
|
ExitProcess(1);
|
|
}
|
|
|
|
return MyRecalcValue;
|
|
}
|