windows-nt/Source/XPSP1/NT/drivers/video/matrox/mga/disp/blt16.c
2020-09-26 16:20:57 +08:00

765 lines
24 KiB
C

/******************************Module*Header*******************************\
* Module Name: blt16.c
*
* This module contains the low-level blt functions that are specific to
* 16bpp.
*
* Copyright (c) 1992-1996 Microsoft Corporation
* Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd.
\**************************************************************************/
#include "precomp.h"
/******************************Public*Routine******************************\
* VOID vMgaPatRealize16bpp
*
\**************************************************************************/
VOID vMgaPatRealize16bpp(
PDEV* ppdev,
RBRUSH* prb)
{
BYTE* pjBase;
BRUSHENTRY* pbe;
LONG iBrushCache;
LONG i;
ULONG* pulSrc;
pjBase = ppdev->pjBase;
// We have to allocate a new off-screen cache brush entry for
// the brush:
iBrushCache = ppdev->iBrushCache;
pbe = &ppdev->pbe[iBrushCache];
iBrushCache++;
if (iBrushCache >= ppdev->cBrushCache)
iBrushCache = 0;
ppdev->iBrushCache = iBrushCache;
// Update our links:
pbe->prbVerify = prb;
prb->apbe[IBOARD(ppdev)] = pbe;
CHECK_FIFO_SPACE(pjBase, 11);
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + blockm_OFF +
bop_SRCCOPY + bltmod_BFCOL + pattern_OFF +
transc_BG_OPAQUE));
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
{
CP_WRITE(pjBase, DWG_SGN, 0);
}
// The SRC0 - SRC3 registers will be trashed by the blt:
ppdev->HopeFlags = SIGN_CACHE;
// Since our brushes are always interleaved, we want to send down
// 2 pels, skip 2 pels, send down 2 pels, etc. So we contrive to
// adjust the blt width and pitch to do that automatically for us:
CP_WRITE(pjBase, DWG_AR3, 0); // Source start address, not
// included in ARX_CACHE
CP_WRITE(pjBase, DWG_SHIFT, 0);
CP_WRITE(pjBase, DWG_LEN, 8); // Transfering 8 scans
CP_WRITE(pjBase, DWG_AR0, 15); // Source width is 16
CP_WRITE(pjBase, DWG_AR5, 32); // Source pitch is 32
CP_WRITE(pjBase, DWG_FXLEFT, pbe->ulLeft);
CP_WRITE(pjBase, DWG_FXRIGHT, pbe->ulLeft + 15);
CP_WRITE(pjBase, DWG_YDST, pbe->ulYDst);
CP_START(pjBase, DWG_PITCH, 32 + ylin_LINEARIZE_NOT);
CHECK_FIFO_SPACE(pjBase, 32); // Make sure MGA is ready
for (pulSrc = prb->aulPattern, i = 8; i != 0; i--, pulSrc += 4)
{
CP_WRITE_SRC(pjBase, *(pulSrc));
CP_WRITE_SRC(pjBase, *(pulSrc + 1));
CP_WRITE_SRC(pjBase, *(pulSrc + 2));
CP_WRITE_SRC(pjBase, *(pulSrc + 3));
// Repeat the brush's scan, because the off-screen pattern has to
// be 16 x 8:
CP_WRITE_SRC(pjBase, *(pulSrc));
CP_WRITE_SRC(pjBase, *(pulSrc + 1));
CP_WRITE_SRC(pjBase, *(pulSrc + 2));
CP_WRITE_SRC(pjBase, *(pulSrc + 3));
}
// Don't forget to restore the pitch:
CHECK_FIFO_SPACE(pjBase, 1);
CP_WRITE(pjBase, DWG_PITCH, ppdev->cxMemory);
}
/******************************Public*Routine******************************\
* VOID vMgaFillPat16bpp
*
\**************************************************************************/
VOID vMgaFillPat16bpp( // Type FNFILL
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // Rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{
BYTE* pjBase;
BRUSHENTRY* pbe;
LONG xOffset;
LONG yOffset;
ULONG ulHwMix;
LONG yTop;
LONG xLeft;
LONG xBrush;
LONG yBrush;
LONG cx;
LONG cy;
ULONG ulAr3;
ULONG ulAr0;
CHAR cFifo;
LONG xAlign;
LONG cxThis;
ASSERTDD(!(rbc.prb->fl & RBRUSH_2COLOR), "Can't do 2 colour brushes here");
ASSERTDD((rbc.prb != NULL) && (rbc.prb->apbe[IBOARD(ppdev)] != NULL),
"apbe[iBoard] should be initialized to &beUnrealizedBrush");
// We have to ensure that no other brush took our spot in off-screen
// memory, or we might have to realize the brush for the first time:
pbe = rbc.prb->apbe[IBOARD(ppdev)];
if (pbe->prbVerify != rbc.prb)
{
vMgaPatRealize16bpp(ppdev, rbc.prb);
pbe = rbc.prb->apbe[IBOARD(ppdev)];
}
pjBase = ppdev->pjBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
do {
cFifo = GET_FIFO_SPACE(pjBase) - 4;
} while (cFifo < 0);
if (rop4 == 0xf0f0) // PATCOPY
{
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RPL + blockm_OFF +
trans_0 + bltmod_BFCOL + pattern_ON +
transc_BG_OPAQUE + bop_SRCCOPY));
}
else
{
ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2);
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RSTR + blockm_OFF +
trans_0 + bltmod_BFCOL + pattern_ON +
transc_BG_OPAQUE + (ulHwMix << 16)));
}
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
{
CP_WRITE(pjBase, DWG_SGN, 0);
}
ppdev->HopeFlags = SIGN_CACHE;
CP_WRITE(pjBase, DWG_SHIFT, 0);
CP_WRITE(pjBase, DWG_AR5, 32);
while (TRUE)
{
// We must be careful here, there are some hardware limitations. We
// must check the width and the alignment of the blt to decide how to
// slice the operation along X. Here are the limitations:
//
// - if the destination is aligned on a 16-pel address, then we are
// limited to 16-pel wide slices;
// - if the destination is not aligned on a 16-pel address, then we
// are limited to 8-pel wide slices.
//
// This means that if the width is 8 or less, we can do it right away;
// if not, then we must first do one or two slices limited to 8 pels,
// then a bunch of 16-pel slices, and maybe a last slice to complete
// the blt.
yTop = prcl->top;
xLeft = prcl->left;
cy = prcl->bottom - yTop;
cx = prcl->right - xLeft;
xBrush = (xLeft - pptlBrush->x) & 7;
yBrush = (yTop - pptlBrush->y) & 7;
ulAr3 = pbe->ulLinear + (yBrush << 5) + xBrush;
ulAr0 = pbe->ulLinear + (yBrush << 5) + 15;
xLeft += xOffset; // Convert to absolute coordinates
yTop += yOffset;
if (cx > 8)
{
xAlign = (xLeft & 7);
if (xAlign != 0)
{
cFifo -= 6;
while (cFifo < 0)
{
cFifo = GET_FIFO_SPACE(pjBase) - 6;
}
cxThis = 8 - xAlign;
CP_WRITE(pjBase, DWG_AR3, ulAr3);
CP_WRITE(pjBase, DWG_AR0, ulAr0);
CP_WRITE(pjBase, DWG_LEN, cy);
CP_WRITE(pjBase, DWG_YDST, yTop);
CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
CP_START(pjBase, DWG_FXRIGHT, xLeft + cxThis - 1);
xLeft += cxThis;
cx -= cxThis;
ulAr3 = ulAr0 - 15 + ((ulAr3 + cxThis) & 7);
}
if (cx > 8)
{
if (xLeft & 15)
{
cFifo -= 6;
while (cFifo < 0)
{
cFifo = GET_FIFO_SPACE(pjBase) - 6;
}
CP_WRITE(pjBase, DWG_AR3, ulAr3);
CP_WRITE(pjBase, DWG_AR0, ulAr0);
CP_WRITE(pjBase, DWG_LEN, cy);
CP_WRITE(pjBase, DWG_YDST, yTop);
CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
CP_START(pjBase, DWG_FXRIGHT, xLeft + 7);
xLeft += 8;
cx -= 8;
}
while (cx > 16)
{
cFifo -= 6;
while (cFifo < 0)
{
cFifo = GET_FIFO_SPACE(pjBase) - 6;
}
CP_WRITE(pjBase, DWG_AR3, ulAr3);
CP_WRITE(pjBase, DWG_AR0, ulAr0);
CP_WRITE(pjBase, DWG_LEN, cy);
CP_WRITE(pjBase, DWG_YDST, yTop);
CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
CP_START(pjBase, DWG_FXRIGHT, xLeft + 15);
xLeft += 16;
cx -= 16;
}
}
}
// Do the final strip:
cFifo -= 6;
while (cFifo < 0)
{
cFifo = GET_FIFO_SPACE(pjBase) - 6;
}
CP_WRITE(pjBase, DWG_AR3, ulAr3);
CP_WRITE(pjBase, DWG_AR0, ulAr0);
CP_WRITE(pjBase, DWG_LEN, cy);
CP_WRITE(pjBase, DWG_YDST, yTop);
CP_WRITE(pjBase, DWG_FXLEFT, xLeft);
CP_START(pjBase, DWG_FXRIGHT, xLeft + cx - 1);
if (--c == 0)
break;
prcl++;
}
}
/******************************Public*Routine******************************\
* VOID vMgaGet16bppSliceFromScreen
*
* Get a limited number of pels from the screen and make sure that the
* transfer went OK. This assumes that the IDUMP is almost fully set up,
* and that a number of dwords must be jumped over at the end of each
* destination scanline.
*
\**************************************************************************/
VOID vMgaGet16bppSliceFromScreen(
PDEV* ppdev, // pdev
ULONG ulSSA, // Source start address for current slice
ULONG ulSEA, // Source end address for current slice
ULONG ulLen, // Nb of scanlines in current slice
LONG NbDWordsPerScan,// Nb of dwords to be read in each scanline
LONG lPreDWordBytes, // Nb bytes before any dword on a scan
LONG lDWords, // Nb dwords to be moved on a scan
LONG lPostDWordBytes,// Nb bytes after all dwords on a scan
LONG lDestDelta, // Increment to get from one dest scan to the next
BYTE bPreShift, // Shift to align first byte to be stored
ULONG** ppulDest) // Ptr to where to store the first dword we read
{
BYTE* pjBase;
ULONG temp, HstStatus, AbortCnt;
ULONG* pulDest;
ULONG* locpulDest;
ULONG* pDMAWindow;
LONG i, TotalDWords, locTotalDWords;
BYTE* pbDest;
pjBase = ppdev->pjBase;
AbortCnt = 1000;
pDMAWindow = (ULONG*) (ppdev->pjBase + DMAWND);
// We want to stop reading just before the last dword is read.
TotalDWords = (NbDWordsPerScan * ulLen) - 1;
do {
CHECK_FIFO_SPACE(pjBase, 3);
// This is where we'll start storing data.
pulDest = *ppulDest;
// Complete the IDUMP setup.
CP_WRITE(pjBase, DWG_AR3, ulSSA);
CP_WRITE(pjBase, DWG_AR0, ulSEA);
// Turn the pseudoDMA on.
BLT_READ_ON(ppdev, pjBase);
CP_START(pjBase, DWG_LEN, ulLen);
// Make sure the setup is complete.
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
if (TotalDWords)
{
// There is at least one dword left to be read.
// Make a copy so that we can play with it.
locTotalDWords = TotalDWords;
do {
// Make a copy for updating to the next scan.
locpulDest = pulDest;
if (lPreDWordBytes)
{
// There are pixels to be stored as bytes.
// Read 2 pixels and shift them into place.
locTotalDWords--;
temp = CP_READ_DMA(ppdev, pDMAWindow);
temp &= ppdev->ulPlnWt;
temp >>= bPreShift;
pbDest = (BYTE*)pulDest;
for (i = 0; i < lPreDWordBytes; i++)
{
*pbDest = (BYTE) temp;
temp >>= 8;
pbDest++;
}
pulDest = (ULONG*)pbDest;
if (locTotalDWords == 0)
{
// This was the end of the current slice.
// Exit the do-while loop.
if ((NbDWordsPerScan == 1) && (lPreDWordBytes != 0))
{
// Since it was a narrow slice, the next read
// goes on the next scan, so add in the delta:
(UCHAR*) pulDest = (UCHAR*) locpulDest + lDestDelta;
pbDest = (UCHAR*) pulDest;
}
break;
}
}
// We should be dword-aligned in the destination now.
// Copy a number of full dwords from the current scanline.
for (i = 0; i < lDWords; i++)
{
temp = CP_READ_DMA(ppdev, pDMAWindow);
*pulDest++ = temp & ppdev->ulPlnWt;
}
// We're left with this many dwords to be read.
locTotalDWords -= lDWords;
if (locTotalDWords != 0)
{
// This was not the last scanline, so we must read a
// possibly partial dword to end this scan.
if (lPostDWordBytes)
{
// There are pixels to be stored as bytes.
locTotalDWords--;
temp = CP_READ_DMA(ppdev, pDMAWindow);
temp &= ppdev->ulPlnWt;
if (lPostDWordBytes == 4)
{
*pulDest = temp;
}
else
{
pbDest = (BYTE*)pulDest;
*pbDest = (BYTE)temp;
pbDest++;
temp >>= 8;
*pbDest = (BYTE)temp;
}
}
// We should be done with this scan.
// We're done with the current scan, go to the next one.
(UCHAR*) pulDest = (UCHAR*) locpulDest + lDestDelta;
}
} while (locTotalDWords > 0);
}
// Check for the EngineBusy flag.
for (i = 0; i < 7; i++)
{
HstStatus = CP_READ_STATUS(pjBase);
}
if (HstStatus &= (dwgengsts_MASK >> 16))
{
// The drawing engine is still busy, while it should not be:
// there was a problem with this slice.
// Empty the DMA window.
do {
CP_READ_DMA(ppdev, pDMAWindow);
// Check for the EngineBusy flag. If the engine is still
// busy, then we'll have to read another dword.
for (i = 0; i < 7; i++)
{
temp = CP_READ_STATUS(pjBase);
}
} while (temp & (dwgengsts_MASK >> 16));
// The DMA window should now be empty.
// We cannot check the HST_STATUS two lower bytes anymore,
// so this is new.
if (--AbortCnt > 0)
{
// Signal we'll have to do this again.
HstStatus = 1;
}
else
{
// We tried hard enough, desist.
HstStatus = 0;
}
}
// The last dword to be read should be available now.
temp = CP_READ_DMA(ppdev, pDMAWindow);
temp &= ppdev->ulPlnWt;
// We must take some care so as not to write after the end of the
// destination bitmap.
pbDest = (BYTE*)pulDest;
if ((NbDWordsPerScan == 1) && (lPreDWordBytes != 0))
{
// The X extent was smaller than 2.
for (i = 0; i < lPreDWordBytes; i++)
{
*pbDest = (BYTE)temp;
pbDest++;
temp >>= 8;
}
}
else if (lPostDWordBytes > 0)
{
// There are pixels to be stored as bytes.
if (lPostDWordBytes == 4)
{
// We can store a dword.
*pulDest = temp;
}
else
{
*pbDest = (BYTE)temp;
pbDest++;
temp >>= 8;
*pbDest = (BYTE)temp;
}
}
else
{
// Store the last dword.
*pulDest = temp;
}
// Turn the pseudoDMA off.
BLT_READ_OFF(ppdev, pjBase);
// Redo the whole thing if there was a problem with this slice.
} while (HstStatus);
// Update the destination pointer for the calling routine.
*ppulDest += ((ulLen * lDestDelta) / sizeof(ULONG));
}
/******************************Public*Routine******************************\
* VOID vMgaGetBits16bpp
*
* Reads the bits from the screen at 16bpp.
*
\**************************************************************************/
VOID vMgaGetBits16bpp(
PDEV* ppdev, // Current src pdev
SURFOBJ* psoDst, // Destination surface for the color bits
RECTL* prclDst, // Area to be modified within the dest surface,
// in absolute coordinates
POINTL* pptlSrc) // Upper left corner of source rectangle,
// in absolute coordinates
{
BYTE* pjBase;
BYTE* pbScan0;
BYTE* pbDestRect;
LONG xSrc, ySrc, xTrg, yTrg, cxTrg, cyTrg, lDestDelta, cySlice,
xTrgAl, cxTrgAl, lPreDWordBytes, lDWords,
lPostDWordBytes, NbDWordsPerScan, TotalDWords, i;
ULONG ulSSA, ulSEA, ulSSAIncrement, temp,
NbDWords, NbBytesPerScan;
ULONG* pDW;
ULONG* locpDW;
BYTE bPreShift;
DWORD dwi, dwo;
BYTE* pbDest;
pjBase = ppdev->pjBase;
// Calculate the size of the target rectangle, and pick up
// some convenient locals.
// Starting (x,y) and extents within the destination bitmap.
// If an extent is 0 or negative, we don't have anything to do.
cxTrg = prclDst->right - prclDst->left;
cyTrg = prclDst->bottom - prclDst->top;
xTrg = prclDst->left;
yTrg = prclDst->top;
ASSERTDD(cxTrg > 0 && cyTrg > 0, "Shouldn't get empty extents");
// First scanline of the destination bitmap.
pbScan0 = (BYTE*) psoDst->pvScan0;
// Starting (x,y) on the screen.
xSrc = pptlSrc->x;
ySrc = pptlSrc->y;
// Scan increment within the destination bitmap.
lDestDelta = psoDst->lDelta;
// Calculate the location of the destination rectangle.
pbDestRect = pbScan0 + (yTrg * lDestDelta) + (2 * xTrg);
// Set the registers that can be set now for the operation.
// SIGN_CACHE=1 and cuts 1 register from the setup.
CHECK_FIFO_SPACE(pjBase, 6);
// DWGCTL IDUMP+RPL+SRCCOPY+blockm_OFF+bltmod_BFCOL+patt_OFF+BG_OPAQUE
// SGN 0
// SHIFT 0
// AR0 sea: ySrc*pitch + xSrc + cxTrg - 1
// AR3 ssa: ySrc*pitch + xSrc
// AR5 Screen pitch
// FXLEFT 0
// FXRIGHT cxTrg - 1
// LEN cyTrg
// MCTLWTST special value required by IDUMP bug fix
if (!(GET_CACHE_FLAGS(ppdev, SIGN_CACHE)))
{
CP_WRITE(pjBase, DWG_SGN, 0);
}
// The SRC0-3 registers are trashed by the blt.
ppdev->HopeFlags = SIGN_CACHE;
CP_WRITE(pjBase, DWG_SHIFT, 0);
CP_WRITE(pjBase, DWG_FXLEFT, 0);
CP_WRITE(pjBase, DWG_AR5, ppdev->cxMemory);
CP_WRITE(pjBase, DWG_DWGCTL, (opcode_IDUMP+atype_RPL+blockm_OFF+
bop_SRCCOPY+bltmod_BFCOL+pattern_OFF+
transc_BG_OPAQUE));
// Recipe for IDUMP fix. We must break the IDUMP into a number of
// smaller IDUMPS, according to the following formula:
//
// 0 < cx < 256 ==> cYSlice = int(1024/(cx << 2)) << 2 = int( 256/cx)<<2
// 256 < cx < 1024 ==> cYSlice = int(4096/(cx << 2)) << 2 = int(1024/cx)<<2
// 1024 < cx < 1600 ==> cYSlice = int(1600/(cx << 2)) << 2 = int(1600/cx)<<2
//
// We will modify it this way:
//
// 0 < cx <= 256 ==> cYSlice = int(1024/(cx << 2)) << 2 = int( 256/cx)<<2
// 256 < cx <= 512 ==> cYSlice = int(4096/(cx << 2)) << 2 = int(1024/cx)<<2
// 512 < cx ==> cYSlice = 4
if (cxTrg > 512)
{
cySlice = 4;
}
else if (cxTrg > 256)
{
cySlice = (1024 / cxTrg) << 2;
}
else
{
cySlice = (256 / cxTrg) << 2;
}
// Number of bytes, padded to the next dword, to be moved per scanline.
NbBytesPerScan = (2*cxTrg + 3) & -4;
NbDWords = NbBytesPerScan >> 2;
pDW = (ULONG*) pbDestRect;
// There will probably be a number of full slices (of height cySlice).
// Source Start Address of the first slice.
ulSSA = ySrc * ppdev->cxMemory + xSrc + ppdev->ulYDstOrg;
ulSEA = ulSSA + cxTrg - 1;
// Increment to get to the SSA of the next full slice.
ulSSAIncrement = cySlice * ppdev->cxMemory;
// We can't go full speed.
// Compute alignment parameters for the blt. We want to read the
// minimum number of dwords from the screen, and we want to align
// the write into memory on dword boundaries. We want to do it
// this way:
//
// width -> 1 2 3 4 5
// ---- ---- --------- -------------- --------------
// xTrg&1
// 0 --10 DWxx DWxx --10 DWxx DWxx DWxx DWxx --10
// 1 --10 3210 32-- DWxx 32-- DWxx --10 32-- DWxx DWxx
//
// where 0, 1, 2, or 3 means that the corresponding byte of the dword
// that was read in is stored as a byte, and DWxx means that the dword
// that was read in is stored as a dword.
// Compute some useful values.
xTrgAl = xTrg & 0x01; // 0, 1
cxTrgAl = cxTrg - xTrgAl;
if (cxTrgAl < 2)
{
// The width is really small.
// On each scanline:
lPreDWordBytes = 2*cxTrg; // Nb of bytes before the first dword
lDWords = 0; // Nb of dwords to be stored
lPostDWordBytes = 0; // Nb of bytes after the last dword.
NbDWordsPerScan = 1; // Nb of dwords to be read in.
bPreShift = 0; // How to shift the first dword.
}
else
{
// Pixels will be stored as bytes and dwords.
lPreDWordBytes = 2*xTrgAl; // Nb of bytes before the first dword
lDWords = cxTrgAl / 2;
if((lPostDWordBytes = 2 * (cxTrgAl & 1)) == 0)
{
lPostDWordBytes = 4;
lDWords--;
}
NbDWordsPerScan = (xTrgAl + cxTrg + 1)/2;
bPreShift = (BYTE)(16 * xTrgAl);
ulSSA -= xTrgAl;
}
CP_WRITE(pjBase, DWG_FXRIGHT, (bPreShift/16) + cxTrg - 1);
while ((cyTrg -= cySlice) >= 0)
{
// There is another full height slice to be read.
vMgaGet16bppSliceFromScreen(ppdev, ulSSA, ulSEA,
(ULONG) cySlice, NbDWordsPerScan,
lPreDWordBytes, lDWords,
lPostDWordBytes, lDestDelta,
bPreShift, &pDW);
// Bump Source Start Address to the start of the next slice.
ulSSA += ulSSAIncrement;
ulSEA += ulSSAIncrement;
}
// Make cyTrg positive again, and read the last slice, if any.
if ((cyTrg += cySlice) != 0)
{
// There is a last, partial slice to be read.
vMgaGet16bppSliceFromScreen(ppdev, ulSSA, ulSEA,
(ULONG) cyTrg, NbDWordsPerScan,
lPreDWordBytes, lDWords,
lPostDWordBytes, lDestDelta,
bPreShift, &pDW);
}
}