vmware-svga/examples/screen-dma-coalesce/main.c
Micah Dowty 68478eab4b There are many new tests and examples that we wrote internally at VMware,
but couldn't release immediately since they depended on virtual GPU features
which were not yet publicly released in any products.  This checkin moves those
features from our internal repository to the open source repository. Future   
development on these tests and examples will take place directly in the open
source repository.

The primary feature added by this patch is 'Screen Object', a new dynamic
display management extension supported by Workstation 7.0 and Fusion 3.0.
See the README for a quick explanation.
2009-10-21 20:20:49 +00:00

224 lines
6.2 KiB
C

/*
* Demonstration for DMA coalescing in SVGA devices which support SVGA
* Screen Object.
*
* All DMA operations, including Screen Object DMAs and legacy
* "UPDATE" commands" include extra guarantees on hosts which support
* the Screen Object extension. DMAs must occur when and only when
* the guest specifies them in the FIFO. All DMA side-effects must
* occur in FIFO order, and at any FENCE the guest is guaranteed that
* all DMAs prior to that fence have taken place.
*
* So, this is fairly strict compared to pre-Screen-Object hosts, but
* the SVGA device still has room to optimize away redundant DMA
* operations that occur between two FENCEs. If the guest asks the
* SVGA device to perform the same DMA 100 times, there is no way to
* tell the difference between performing one DMA and performing 100
* DMAs if they all occur at exactly the same instant.
*
* Many things can act as a barrier for this optimization: other types
* of DMA operations, including readback. DMA from a different
* GMRFB. DMA with incompatible source/dest offsets. FENCEs or legacy
* SYNCs.
*
* As a simple demonstration of this feature, this demo runs three
* timed tests:
*
* - One DMA followed by a fence
* - Ten overlapping DMAs, followed by one fence
* - Ten overlapping DMAs, each followed by a fence
*
* If this optimization is working correctly, the first two tests
* should take nearly the same amount of time, with the third test
* running at about 1/10th the speed.
*/
#include "svga.h"
#include "gmr.h"
#include "screen.h"
#include "intr.h"
#include "screendraw.h"
#include "vmbackdoor.h"
#include "math.h"
#include "mt19937ar.h"
#define GMRID_SCREEN_DRAW 0
#define GMRID_NOISE 1
typedef struct {
int numDMAs;
Bool fencePerDMA;
Bool finalFence;
const char *text;
} TestInfo;
TestInfo testInfoArray[] = {
{
1, FALSE, TRUE,
"One DMA followed by one Fence."
},
{
10, FALSE, TRUE,
"Ten DMAs followed by one Fence.\nShould be nearly the same as #1."
},
{
10, TRUE, FALSE,
"Ten DMAs, each followed by a Fence.\nShould take 10x as long as #1."
},
};
/*
* allocNoise --
*
* Allocates a new GMR, and fills it with random noise.
*/
static void
allocNoise(void)
{
const uint32 numPages = 500;
const uint32 numWords = numPages * PAGE_SIZE / sizeof(uint32);
PPN pages = GMR_DefineContiguous(GMRID_NOISE, numPages);
uint32 *ptr = PPN_POINTER(pages);
int i;
init_genrand(0);
for (i = 0; i < numWords; i++) {
ptr[i] = genrand_int32();
}
}
/*
* prepareNoiseRect --
*
* Prepare some noise as the source for a blit.
* This defines the GMRFB, and generates a random source origin.
*/
static void
prepareNoiseRect(SVGASignedPoint *origin) // OUT
{
const uint32 bytesPerLine = 512;
static const SVGAGMRImageFormat format = {{{ 32, 24 }}};
const SVGAGuestPtr gPtr = { GMRID_NOISE, 0 };
const uint32 rand = genrand_int32();
Screen_DefineGMRFB(gPtr, bytesPerLine, format);
origin->x = rand & 0x7F;
origin->y = (rand >> 8) & 0x7F;
}
/*
* main --
*/
int
main(void)
{
Intr_Init();
Intr_SetFaultHandlers(SVGA_DefaultFaultHandler);
SVGA_Init();
GMR_Init();
Heap_Reset();
SVGA_SetMode(0, 0, 32);
Screen_Init();
ScreenDraw_Init(GMRID_SCREEN_DRAW);
allocNoise();
/*
* Define a screen.
*/
SVGAScreenObject myScreen = {
.structSize = sizeof(SVGAScreenObject),
.id = 0,
.flags = SVGA_SCREEN_HAS_ROOT | SVGA_SCREEN_IS_PRIMARY,
.size = { 800, 600 },
.root = { 0, 0 },
};
Screen_Define(&myScreen);
/*
* Draw some intro text.
*/
ScreenDraw_SetScreen(myScreen.id, myScreen.size.width, myScreen.size.height);
Console_Clear();
ScreenDraw_Border(0, 0, myScreen.size.width, myScreen.size.height, 0xFF0000, 1);
Console_WriteString("Screen DMA Coalescing test.\n"
"\n"
"This example demonstrates an optimization which "
"eliminates redundant DMA operations.\n"
"The three tests below each issue a different "
"combination of DMAs and Fences. The text\n"
"below explains the expected peformance of each test.\n");
/*
* Main loop. Alternate between the three tests, timing each.
*/
while (1) {
int testNum;
const int numRepeats = 200;
for (testNum = 0; testNum < arraysize(testInfoArray); testNum++) {
TestInfo *testInfo = &testInfoArray[testNum];
VMTime before, after;
int repeat;
SVGA_SyncToFence(SVGA_InsertFence());
VMBackdoor_GetTime(&before);
for (repeat = 0; repeat < numRepeats; repeat++) {
int numDMA;
SVGASignedPoint blitOrigin;
/*
* To be coalesced, the DMAs below need the same offset
* between source and dest.
*/
prepareNoiseRect(&blitOrigin);
for (numDMA = 0; numDMA < testInfo->numDMAs; numDMA++) {
const uint32 dmaWidth = 256;
const uint32 dmaHeight = 256;
const uint32 margin = 5;
SVGASignedRect blitDest = { myScreen.size.width - margin - dmaWidth,
myScreen.size.height - margin - dmaHeight,
myScreen.size.width - margin,
myScreen.size.height - margin };
/*
* We can redefine the GMRFB, but coalescing will only
* occur if it's exactly the same for each DMA.
*/
Screen_BlitFromGMRFB(&blitOrigin, &blitDest, myScreen.id);
if (testInfo->fencePerDMA) {
SVGA_InsertFence();
}
}
if (testInfo->finalFence) {
SVGA_InsertFence();
}
}
SVGA_SyncToFence(SVGA_InsertFence());
VMBackdoor_GetTime(&after);
Console_MoveTo(10, 150 + 120 * testNum);
Console_Format("Test #%d: %s\n\nSpeed: %d us ",
testNum + 1, testInfo->text,
VMBackdoor_TimeDiffUS(&before, &after) / numRepeats);
}
}
return 0;
}