vmware-svga/examples/screen-dma-coalesce/main.c

224 lines
6.2 KiB
C

/*
* Demonstration for DMA coalescing in SVGA devices which support SVGA
* Screen Object.
*
* All DMA operations, including Screen Object DMAs and legacy
* "UPDATE" commands" include extra guarantees on hosts which support
* the Screen Object extension. DMAs must occur when and only when
* the guest specifies them in the FIFO. All DMA side-effects must
* occur in FIFO order, and at any FENCE the guest is guaranteed that
* all DMAs prior to that fence have taken place.
*
* So, this is fairly strict compared to pre-Screen-Object hosts, but
* the SVGA device still has room to optimize away redundant DMA
* operations that occur between two FENCEs. If the guest asks the
* SVGA device to perform the same DMA 100 times, there is no way to
* tell the difference between performing one DMA and performing 100
* DMAs if they all occur at exactly the same instant.
*
* Many things can act as a barrier for this optimization: other types
* of DMA operations, including readback. DMA from a different
* GMRFB. DMA with incompatible source/dest offsets. FENCEs or legacy
* SYNCs.
*
* As a simple demonstration of this feature, this demo runs three
* timed tests:
*
* - One DMA followed by a fence
* - Ten overlapping DMAs, followed by one fence
* - Ten overlapping DMAs, each followed by a fence
*
* If this optimization is working correctly, the first two tests
* should take nearly the same amount of time, with the third test
* running at about 1/10th the speed.
*/
#include "svga.h"
#include "gmr.h"
#include "screen.h"
#include "intr.h"
#include "screendraw.h"
#include "vmbackdoor.h"
#include "math.h"
#include "mt19937ar.h"
#define GMRID_SCREEN_DRAW 0
#define GMRID_NOISE 1
typedef struct {
int numDMAs;
Bool fencePerDMA;
Bool finalFence;
const char *text;
} TestInfo;
TestInfo testInfoArray[] = {
{
1, FALSE, TRUE,
"One DMA followed by one Fence."
},
{
10, FALSE, TRUE,
"Ten DMAs followed by one Fence.\nShould be nearly the same as #1."
},
{
10, TRUE, FALSE,
"Ten DMAs, each followed by a Fence.\nShould take 10x as long as #1."
},
};
/*
* allocNoise --
*
* Allocates a new GMR, and fills it with random noise.
*/
static void
allocNoise(void)
{
const uint32 numPages = 500;
const uint32 numWords = numPages * PAGE_SIZE / sizeof(uint32);
PPN pages = GMR_DefineContiguous(GMRID_NOISE, numPages);
uint32 *ptr = PPN_POINTER(pages);
int i;
init_genrand(0);
for (i = 0; i < numWords; i++) {
ptr[i] = genrand_int32();
}
}
/*
* prepareNoiseRect --
*
* Prepare some noise as the source for a blit.
* This defines the GMRFB, and generates a random source origin.
*/
static void
prepareNoiseRect(SVGASignedPoint *origin) // OUT
{
const uint32 bytesPerLine = 512;
static const SVGAGMRImageFormat format = {{{ 32, 24 }}};
const SVGAGuestPtr gPtr = { GMRID_NOISE, 0 };
const uint32 rand = genrand_int32();
Screen_DefineGMRFB(gPtr, bytesPerLine, format);
origin->x = rand & 0x7F;
origin->y = (rand >> 8) & 0x7F;
}
/*
* main --
*/
int
main(void)
{
Intr_Init();
Intr_SetFaultHandlers(SVGA_DefaultFaultHandler);
SVGA_Init();
GMR_Init();
Heap_Reset();
SVGA_SetMode(0, 0, 32);
Screen_Init();
ScreenDraw_Init(GMRID_SCREEN_DRAW);
allocNoise();
/*
* Define a screen.
*/
SVGAScreenObject myScreen = {
.structSize = sizeof(SVGAScreenObject),
.id = 0,
.flags = SVGA_SCREEN_HAS_ROOT | SVGA_SCREEN_IS_PRIMARY,
.size = { 800, 600 },
.root = { 0, 0 },
};
Screen_Define(&myScreen);
/*
* Draw some intro text.
*/
ScreenDraw_SetScreen(myScreen.id, myScreen.size.width, myScreen.size.height);
Console_Clear();
ScreenDraw_Border(0, 0, myScreen.size.width, myScreen.size.height, 0xFF0000, 1);
Console_WriteString("Screen DMA Coalescing test.\n"
"\n"
"This example demonstrates an optimization which "
"eliminates redundant DMA operations.\n"
"The three tests below each issue a different "
"combination of DMAs and Fences. The text\n"
"below explains the expected peformance of each test.\n");
/*
* Main loop. Alternate between the three tests, timing each.
*/
while (1) {
int testNum;
const int numRepeats = 200;
for (testNum = 0; testNum < arraysize(testInfoArray); testNum++) {
TestInfo *testInfo = &testInfoArray[testNum];
VMTime before, after;
int repeat;
SVGA_SyncToFence(SVGA_InsertFence());
VMBackdoor_GetTime(&before);
for (repeat = 0; repeat < numRepeats; repeat++) {
int numDMA;
SVGASignedPoint blitOrigin;
/*
* To be coalesced, the DMAs below need the same offset
* between source and dest.
*/
prepareNoiseRect(&blitOrigin);
for (numDMA = 0; numDMA < testInfo->numDMAs; numDMA++) {
const uint32 dmaWidth = 256;
const uint32 dmaHeight = 256;
const uint32 margin = 5;
SVGASignedRect blitDest = { myScreen.size.width - margin - dmaWidth,
myScreen.size.height - margin - dmaHeight,
myScreen.size.width - margin,
myScreen.size.height - margin };
/*
* We can redefine the GMRFB, but coalescing will only
* occur if it's exactly the same for each DMA.
*/
Screen_BlitFromGMRFB(&blitOrigin, &blitDest, myScreen.id);
if (testInfo->fencePerDMA) {
SVGA_InsertFence();
}
}
if (testInfo->finalFence) {
SVGA_InsertFence();
}
}
SVGA_SyncToFence(SVGA_InsertFence());
VMBackdoor_GetTime(&after);
Console_MoveTo(10, 150 + 120 * testNum);
Console_Format("Test #%d: %s\n\nSpeed: %d us ",
testNum + 1, testInfo->text,
VMBackdoor_TimeDiffUS(&before, &after) / numRepeats);
}
}
return 0;
}