485 lines
14 KiB
C
485 lines
14 KiB
C
/******************************Module*Header*******************************\
|
|
* Module Name: blt32.c
|
|
*
|
|
* This module contains the low-level blt functions that are specific to
|
|
* 32bpp.
|
|
*
|
|
* Copyright (c) 1992-1996 Microsoft Corporation
|
|
* Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
|
|
\**************************************************************************/
|
|
|
|
#include "precomp.h"
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vMgaPatRealize24bpp
|
|
*
|
|
\**************************************************************************/
|
|
|
|
VOID vMgaPatRealize24bpp(
|
|
PDEV* ppdev,
|
|
RBRUSH* prb)
|
|
{
|
|
BYTE* pjBase;
|
|
BRUSHENTRY* pbe;
|
|
LONG iBrushCache;
|
|
LONG i;
|
|
ULONG* pulSrc;
|
|
|
|
pjBase = ppdev->pjBase;
|
|
|
|
// We have to allocate a new off-screen cache brush entry for
|
|
// the brush:
|
|
|
|
iBrushCache = ppdev->iBrushCache;
|
|
pbe = &ppdev->pbe[iBrushCache];
|
|
|
|
iBrushCache++;
|
|
if (iBrushCache >= ppdev->cBrushCache)
|
|
iBrushCache = 0;
|
|
|
|
ppdev->iBrushCache = iBrushCache;
|
|
|
|
// Update our links:
|
|
|
|
pbe->prbVerify = prb;
|
|
prb->apbe[IBOARD(ppdev)] = pbe;
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 11);
|
|
|
|
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + blockm_OFF +
|
|
bop_SRCCOPY + bltmod_BUCOL + pattern_OFF +
|
|
transc_BG_OPAQUE + hcprs_SRC_24_BPP));
|
|
|
|
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_SGN, 0);
|
|
}
|
|
|
|
// The SRC0 - SRC3 registers will be trashed by the blt:
|
|
|
|
ppdev->HopeFlags = SIGN_CACHE;
|
|
|
|
// Since our brushes are always interleaved, we want to send down
|
|
// 2 pels, skip 2 pels, send down 2 pels, etc. So we contrive to
|
|
// adjust the blt width and pitch to do that automatically for us:
|
|
|
|
CP_WRITE(pjBase, DWG_AR3, 0); // Source start address, not
|
|
// included in ARX_CACHE
|
|
CP_WRITE(pjBase, DWG_SHIFT, 0);
|
|
CP_WRITE(pjBase, DWG_LEN, 8); // Transfering 8 scans
|
|
CP_WRITE(pjBase, DWG_AR0, 8); // Source width is 9
|
|
CP_WRITE(pjBase, DWG_AR5, 32); // Source pitch is 32
|
|
|
|
// I'm guessing that there was a hardware bug found with FXLEFT
|
|
// or FXRIGHT being 32 or greater, because the old code does a
|
|
// modulo 32 on 'x' for a reason:
|
|
|
|
CP_WRITE(pjBase, DWG_FXLEFT, pbe->ulLeft);
|
|
CP_WRITE(pjBase, DWG_FXRIGHT, pbe->ulLeft + 15);
|
|
CP_WRITE(pjBase, DWG_YDST, pbe->ulYDst);
|
|
CP_START(pjBase, DWG_PITCH, 32 + ylin_LINEARIZE_NOT);
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 32);
|
|
|
|
ASSERTDD(ppdev->iBitmapFormat == BMF_24BPP,
|
|
"Expect 24bpp packed pattern. You may have to change RealizeBrush");
|
|
|
|
for (pulSrc = prb->aulPattern, i = 8; i != 0; i--, pulSrc += 6)
|
|
{
|
|
CP_WRITE_SRC(pjBase, *(pulSrc));
|
|
CP_WRITE_SRC(pjBase, *(pulSrc + 1));
|
|
CP_WRITE_SRC(pjBase, *(pulSrc + 2));
|
|
CP_WRITE_SRC(pjBase, *(pulSrc + 3));
|
|
CP_WRITE_SRC(pjBase, *(pulSrc + 4));
|
|
CP_WRITE_SRC(pjBase, *(pulSrc + 5));
|
|
|
|
// The pattern has to be 9 pixels wide, with an extra copy of
|
|
// the first pixel:
|
|
|
|
CP_WRITE_SRC(pjBase, *(pulSrc ));
|
|
}
|
|
|
|
// Don't forget to restore the pitch:
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 1);
|
|
CP_WRITE(pjBase, DWG_PITCH, ppdev->cxMemory);
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vMgaFillPat24bpp
|
|
*
|
|
\**************************************************************************/
|
|
|
|
VOID vMgaFillPat24bpp( // Type FNFILL
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // List of rectangles to be filled, in relative
|
|
// coordinates
|
|
ULONG rop4, // Rop4
|
|
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
|
|
POINTL* pptlBrush) // Pattern alignment
|
|
{
|
|
BYTE* pjBase;
|
|
BRUSHENTRY* pbe;
|
|
LONG xOffset;
|
|
LONG yOffset;
|
|
CHAR cFifo;
|
|
ULONG ulHwMix;
|
|
LONG xLeft;
|
|
LONG xRight;
|
|
LONG yTop;
|
|
LONG cx;
|
|
LONG cy;
|
|
LONG xBrush;
|
|
LONG yBrush;
|
|
ULONG ulLinear;
|
|
LONG i;
|
|
|
|
ASSERTDD(!(rbc.prb->fl & RBRUSH_2COLOR), "Can't do 2 colour brushes here");
|
|
|
|
ASSERTDD((rbc.prb != NULL) && (rbc.prb->apbe[IBOARD(ppdev)] != NULL),
|
|
"apbe[iBoard] should be initialized to &beUnrealizedBrush");
|
|
|
|
// We have to ensure that no other brush took our spot in off-screen
|
|
// memory, or we might have to realize the brush for the first time:
|
|
|
|
pbe = rbc.prb->apbe[IBOARD(ppdev)];
|
|
if (pbe->prbVerify != rbc.prb)
|
|
{
|
|
vMgaPatRealize24bpp(ppdev, rbc.prb);
|
|
pbe = rbc.prb->apbe[IBOARD(ppdev)];
|
|
}
|
|
|
|
pjBase = ppdev->pjBase;
|
|
xOffset = ppdev->xOffset;
|
|
yOffset = ppdev->yOffset;
|
|
|
|
do {
|
|
cFifo = GET_FIFO_SPACE(pjBase) - 4;
|
|
} while (cFifo < 0);
|
|
|
|
if (rop4 == 0xf0f0) // PATCOPY
|
|
{
|
|
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RPL + blockm_OFF +
|
|
trans_0 + bltmod_BUCOL + pattern_ON +
|
|
transc_BG_OPAQUE + bop_SRCCOPY));
|
|
}
|
|
else
|
|
{
|
|
ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
|
|
|
|
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RSTR + blockm_OFF +
|
|
trans_0 + bltmod_BUCOL + pattern_ON +
|
|
transc_BG_OPAQUE + (ulHwMix << 16)));
|
|
}
|
|
|
|
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_SGN, 0);
|
|
}
|
|
|
|
ppdev->HopeFlags = SIGN_CACHE;
|
|
|
|
CP_WRITE(pjBase, DWG_SHIFT, 0);
|
|
CP_WRITE(pjBase, DWG_AR5, 32);
|
|
|
|
do {
|
|
yTop = prcl->top;
|
|
cy = prcl->bottom - yTop;
|
|
xLeft = prcl->left;
|
|
cx = prcl->right - xLeft - 1; // Note inclusiveness
|
|
xBrush = (xLeft - pptlBrush->x) & 7;
|
|
yBrush = (yTop - pptlBrush->y) & 7;
|
|
ulLinear = pbe->ulLinear + (yBrush << 5);
|
|
|
|
// Convert to absolute coordinates:
|
|
|
|
xLeft += xOffset;
|
|
yTop += yOffset;
|
|
|
|
// Due to hardware limitations, we have to draw the rectangle
|
|
// in four or five passes. On each pass, a maximum of two columns
|
|
// of the brush can be drawn.
|
|
|
|
if (xLeft & 1)
|
|
{
|
|
// It seems to be a hardware limitation that our passes always
|
|
// to start on an even pixel when the width is more than one.
|
|
// As such, do an initial strip of width one to align to an even
|
|
// pixel:
|
|
|
|
cFifo -= 6;
|
|
while (cFifo < 0)
|
|
{
|
|
cFifo = GET_FIFO_SPACE(pjBase) - 6;
|
|
}
|
|
|
|
CP_WRITE(pjBase, DWG_LEN, cy);
|
|
CP_WRITE(pjBase, DWG_YDST, yTop);
|
|
CP_WRITE(pjBase, DWG_AR3, ulLinear + xBrush);
|
|
CP_WRITE(pjBase, DWG_AR0, ulLinear + xBrush + 3);
|
|
CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
|
|
CP_START(pjBase, DWG_FXRIGHT, xLeft);
|
|
|
|
xBrush = (xBrush + 1) & 7;
|
|
xLeft++;
|
|
cx--;
|
|
if (cx < 0) // Recall inclusiveness
|
|
continue;
|
|
}
|
|
|
|
i = 4;
|
|
do {
|
|
cFifo -= 6;
|
|
while (cFifo < 0)
|
|
{
|
|
cFifo = GET_FIFO_SPACE(pjBase) - 6;
|
|
}
|
|
|
|
CP_WRITE(pjBase, DWG_LEN, cy);
|
|
CP_WRITE(pjBase, DWG_YDST, yTop);
|
|
CP_WRITE(pjBase, DWG_AR3, ulLinear + xBrush);
|
|
CP_WRITE(pjBase, DWG_AR0, ulLinear + xBrush + 3);
|
|
CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
|
|
|
|
xRight = xLeft + (cx & ~7);
|
|
if (cx & 7)
|
|
xRight++;
|
|
|
|
CP_START(pjBase, DWG_FXRIGHT, xRight);
|
|
|
|
if (--i == 0)
|
|
break;
|
|
|
|
xBrush = (xBrush + 2) & 7;
|
|
xLeft += 2;
|
|
cx -= 2;
|
|
|
|
} while (cx >= 0);
|
|
|
|
} while (prcl++, --c != 0);
|
|
}
|
|
|
|
/******************************Public*Routine******************************\
|
|
* VOID vMgaGetBits24bpp
|
|
*
|
|
* Reads the bits from the screen at 24bpp
|
|
\**************************************************************************/
|
|
|
|
VOID vMgaGetBits24bpp(
|
|
PDEV* ppdev, // Current src pdev
|
|
SURFOBJ* psoDst, // Destination surface for the color bits
|
|
RECTL* prclDst, // Area to be modified within the dest surface,
|
|
// in absolute coordinates
|
|
POINTL* pptlSrc) // Upper left corner of source rectangle,
|
|
// in absolute coordinates
|
|
{
|
|
BYTE* pjBase;
|
|
BYTE* pbScan0;
|
|
BYTE* pbDestRect;
|
|
LONG xSrc, ySrc, xTrg, yTrg, cxTrg, cyTrg, lDestDelta;
|
|
ULONG temp, ulSSA, ulSSAIncrement, HstStatus, AbortCnt;
|
|
LONG i, NbDWords;
|
|
ULONG* pulDest;
|
|
ULONG* locpulDest;
|
|
ULONG* pDMAWindow;
|
|
|
|
pjBase = ppdev->pjBase;
|
|
|
|
AbortCnt = 1000;
|
|
|
|
// Calculate the size of the target rectangle, and pick up
|
|
// some convenient locals.
|
|
|
|
// Starting (x,y) and extents within the destination bitmap.
|
|
// If an extent is 0 or negative, we don't have anything to do.
|
|
|
|
cxTrg = prclDst->right - prclDst->left;
|
|
cyTrg = prclDst->bottom - prclDst->top;
|
|
xTrg = prclDst->left;
|
|
yTrg = prclDst->top;
|
|
|
|
ASSERTDD(cxTrg > 0 && cyTrg > 0, "Shouldn't get empty extents");
|
|
|
|
// First scanline of the destination bitmap.
|
|
|
|
pbScan0 = (BYTE*) psoDst->pvScan0;
|
|
|
|
// Starting (x,y) on the screen.
|
|
|
|
xSrc = pptlSrc->x;
|
|
ySrc = pptlSrc->y;
|
|
|
|
// Scan increment within the destination bitmap.
|
|
|
|
lDestDelta = psoDst->lDelta;
|
|
|
|
// Calculate the location of the destination rectangle.
|
|
|
|
pbDestRect = pbScan0 + (yTrg * lDestDelta);
|
|
pbDestRect += 3*xTrg;
|
|
|
|
// Set the registers that can be set now for the operation.
|
|
// SIGN_CACHE=1 and cuts 1 register from the setup.
|
|
|
|
CHECK_FIFO_SPACE(pjBase, 7);
|
|
|
|
// DWGCTL IDUMP+RPL+SRCCOPY+blockm_OFF+bltmod_BUCOL+patt_OFF+BG_OPAQUE
|
|
// SGN 0
|
|
// SHIFT 0
|
|
// AR0 sea: ySrc*pitch + xSrc + cxTrg - 1
|
|
// AR3 ssa: ySrc*pitch + xSrc
|
|
// AR5 Screen pitch
|
|
// FXLEFT 0
|
|
// FXRIGHT cxTrg - 1
|
|
// LEN cyTrg
|
|
|
|
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
|
|
{
|
|
CP_WRITE(pjBase, DWG_SGN, 0);
|
|
ppdev->HopeFlags |= SIGN_CACHE;
|
|
}
|
|
|
|
CP_WRITE(pjBase, DWG_SHIFT, 0);
|
|
CP_WRITE(pjBase, DWG_YDST, 0);
|
|
|
|
CP_WRITE(pjBase, DWG_FXLEFT, 0);
|
|
CP_WRITE(pjBase, DWG_FXRIGHT, (cxTrg - 1));
|
|
|
|
CP_WRITE(pjBase, DWG_AR5, ppdev->cxMemory);
|
|
|
|
// The SRC0-3 registers are trashed by the blt.
|
|
|
|
ppdev->HopeFlags &= ~(ARX_CACHE | PATTERN_CACHE);
|
|
|
|
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_IDUMP+atype_RPL+blockm_OFF+
|
|
bop_SRCCOPY+bltmod_BUCOL+pattern_OFF+
|
|
transc_BG_OPAQUE));
|
|
|
|
// We won't have a full-speed routine, because we must read 32 bits per
|
|
// pixel and either store only 24 bits (if the destination bitmap is
|
|
// 24bpp), or mask out the eight msb's and then store 32 bits (if the
|
|
// destination bitmap is 32bpp).
|
|
|
|
// Source Start Address of the first slice.
|
|
|
|
ulSSA = ySrc * ppdev->cxMemory + xSrc + ppdev->ulYDstOrg;
|
|
|
|
// Increment to get to the SSA of the next scanline.
|
|
|
|
ulSSAIncrement = ppdev->cxMemory;
|
|
|
|
// Number of full dwords to be read within the loop on each scan.
|
|
|
|
NbDWords = cxTrg - 1;
|
|
|
|
pDMAWindow = (ULONG*) (ppdev->pjBase + DMAWND);
|
|
|
|
locpulDest = (ULONG*) pbDestRect;
|
|
|
|
// No color translation while copying.
|
|
while (cyTrg-- > 0)
|
|
{
|
|
do {
|
|
CHECK_FIFO_SPACE(pjBase, 3);
|
|
|
|
// This is where we'll start storing data.
|
|
|
|
pulDest = locpulDest;
|
|
|
|
// Complete the IDUMP setup.
|
|
|
|
CP_WRITE(pjBase, DWG_AR3, ulSSA);
|
|
CP_WRITE(pjBase, DWG_AR0, ulSSA + cxTrg - 1);
|
|
|
|
// Turn the pseudoDMA on.
|
|
|
|
BLT_READ_ON(ppdev, pjBase);
|
|
|
|
CP_START(pjBase, DWG_LEN, 1);
|
|
|
|
// Make sure the setup is complete.
|
|
|
|
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
|
|
|
|
if (NbDWords)
|
|
{
|
|
// There is at least one dword left to be read.
|
|
|
|
// Copy a number of full dwords from the current scanline.
|
|
|
|
for (i = 0; i < NbDWords; i++)
|
|
{
|
|
temp = CP_READ_DMA(ppdev, pDMAWindow);
|
|
|
|
* ((UCHAR*)pulDest + 0) = (UCHAR) (temp);
|
|
* ((UCHAR*)pulDest + 1) = (UCHAR) (temp >> 8);
|
|
* ((UCHAR*)pulDest + 2) = (UCHAR) (temp >> 16);
|
|
(UCHAR*)pulDest += 3;
|
|
}
|
|
}
|
|
|
|
// Check for the EngineBusy flag.
|
|
|
|
for (i = 0; i < 7; i++)
|
|
{
|
|
HstStatus = CP_READ_STATUS(pjBase);
|
|
}
|
|
if (HstStatus &= (dwgengsts_MASK >> 16))
|
|
{
|
|
// The drawing engine is still busy, while it should not be:
|
|
// there was a problem with this slice.
|
|
|
|
// Empty the DMA window.
|
|
|
|
do {
|
|
CP_READ_DMA(ppdev, pDMAWindow);
|
|
|
|
// Check for the EngineBusy flag. If the engine is still
|
|
// busy, then we'll have to read another dword.
|
|
|
|
for (i = 0; i < 7; i++)
|
|
{
|
|
temp = CP_READ_STATUS(pjBase);
|
|
}
|
|
} while (temp & (dwgengsts_MASK >> 16));
|
|
|
|
// The DMA window should now be empty.
|
|
|
|
// We cannot check the HST_STATUS two lower bytes anymore,
|
|
// so this is new.
|
|
|
|
if (--AbortCnt > 0)
|
|
{
|
|
// Signal we'll have to do this again.
|
|
HstStatus = 1;
|
|
}
|
|
else
|
|
{
|
|
// We tried hard enough, desist.
|
|
HstStatus = 0;
|
|
}
|
|
}
|
|
// The last dword to be read should be available now.
|
|
|
|
temp = CP_READ_DMA(ppdev, pDMAWindow);
|
|
* ((UCHAR*)pulDest + 0) = (UCHAR) (temp);
|
|
* ((UCHAR*)pulDest + 1) = (UCHAR) (temp >> 8);
|
|
* ((UCHAR*)pulDest + 2) = (UCHAR) (temp >> 16);
|
|
|
|
// Turn the pseudoDMA off.
|
|
|
|
BLT_READ_OFF(ppdev, pjBase);
|
|
|
|
// Redo the whole thing if there was a problem with this slice.
|
|
|
|
} while (HstStatus);
|
|
|
|
// We're done with the current scanline, deal with the next one.
|
|
|
|
(UCHAR*) locpulDest += lDestDelta;
|
|
ulSSA += ulSSAIncrement;
|
|
}
|
|
}
|
|
|