windows-nt/Source/XPSP1/NT/drivers/video/ms/ati/disp/bltm32.c
2020-09-26 16:20:57 +08:00

1094 lines
35 KiB
C

/******************************Module*Header*******************************\
* Module Name: bltm32.c
*
* Contains the low-level memory-mapped I/O blt functions for the Mach32.
*
* Hopefully, if you're basing your display driver on this code, to
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
* the following routines. You shouldn't have to modify much in
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
* and efficient as I could, while still accelerating as many calls as
* possible that would be cost-effective in terms of performance wins
* versus size and effort.
*
* Note: In the following, 'relative' coordinates refers to coordinates
* that haven't yet had the offscreen bitmap (DFB) offset applied.
* 'Absolute' coordinates have had the offset applied. For example,
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
* be sitting in offscreen memory starting at coordinate (0, 768) --
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
* would be the 'absolute' start coordinate'.
*
* Copyright (c) 1992-1995 Microsoft Corporation
*
\**************************************************************************/
#include "precomp.h"
/******************************Public*Routine******************************\
* VOID vM32FillSolid
*
* Fills a list of rectangles with a solid colour.
*
\**************************************************************************/
VOID vM32FillSolid( // Type FNFILL
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // Drawing colour is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
LONG x;
ASSERTDD(c > 0, "Can't handle zero rectangles");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 8);
M32_OW(pjMmBase, FRGD_COLOR, rbc.iSolidColor);
M32_OW(pjMmBase, ALU_FG_FN, gaul32HwMixFromRop2[(rop4 >> 2) & 0xf]);
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_FG | WRITE | DRAW);
while (TRUE)
{
x = xOffset + prcl->left;
M32_OW(pjMmBase, CUR_X, x);
M32_OW(pjMmBase, DEST_X_START, x);
M32_OW(pjMmBase, DEST_X_END, xOffset + prcl->right);
M32_OW(pjMmBase, CUR_Y, yOffset + prcl->top);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yOffset + prcl->bottom);
if (--c == 0)
return;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
}
}
/******************************Public*Routine******************************\
* VOID vM32FillPatMonochrome
*
* This routine uses the pattern hardware to draw a monochrome patterned
* list of rectangles.
*
* See Blt_DS_P8x8_ENG_IO_66_D0 and Blt_DS_P8x8_ENG_IO_66_D1.
*
\**************************************************************************/
VOID vM32FillPatMonochrome( // Type FNFILL
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
ULONG ulHwForeMix;
BYTE* pjSrc;
BYTE* pjDst;
LONG xPattern;
LONG yPattern;
LONG xOld;
LONG yOld;
LONG iLeftShift;
LONG iRightShift;
LONG i;
BYTE j;
LONG xLeft;
ULONG aulTmp[2];
WORD* pwPattern;
ASSERTDD(ppdev->iAsic == ASIC_68800_6 || ppdev->iAsic == ASIC_68800AX,
"Wrong ASIC type for monochrome 8x8 patterns");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
xPattern = (pptlBrush->x + xOffset) & 7;
yPattern = (pptlBrush->y + yOffset) & 7;
// If the alignment isn't correct, we'll have to change it:
if ((xPattern != rbc.prb->ptlBrush.x) || (yPattern != rbc.prb->ptlBrush.y))
{
// Remember that we've changed the alignment on our cached brush:
xOld = rbc.prb->ptlBrush.x;
yOld = rbc.prb->ptlBrush.y;
rbc.prb->ptlBrush.x = xPattern;
rbc.prb->ptlBrush.y = yPattern;
// Now do the alignment:
yPattern = (yOld - yPattern);
iRightShift = (xPattern - xOld) & 7;
iLeftShift = 8 - iRightShift;
pjSrc = (BYTE*) &rbc.prb->aulPattern[0];
pjDst = (BYTE*) &aulTmp[0];
for (i = 0; i < 8; i++)
{
j = *(pjSrc + (yPattern++ & 7));
*pjDst++ = (j << iLeftShift) | (j >> iRightShift);
}
rbc.prb->aulPattern[0] = aulTmp[0];
rbc.prb->aulPattern[1] = aulTmp[1];
}
ulHwForeMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 16);
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_FG | EXT_MONO_SRC_PATT | DRAW |
WRITE);
M32_OW(pjMmBase, ALU_FG_FN, ulHwForeMix);
M32_OW(pjMmBase, ALU_BG_FN, ((rop4 & 0xff00) == 0xaa00) ? LEAVE_ALONE
: ulHwForeMix);
M32_OW(pjMmBase, FRGD_COLOR, rbc.prb->ulForeColor);
M32_OW(pjMmBase, BKGD_COLOR, rbc.prb->ulBackColor);
M32_OW(pjMmBase, PATT_LENGTH, 128);
M32_OW(pjMmBase, PATT_DATA_INDEX, 16);
pwPattern = (WORD*) &rbc.prb->aulPattern[0];
M32_OW(pjMmBase, PATT_DATA, *(pwPattern));
M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 1));
M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 2));
M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 3));
while(TRUE)
{
xLeft = xOffset + prcl->left;
M32_OW(pjMmBase, CUR_X, xLeft);
M32_OW(pjMmBase, DEST_X_START, xLeft);
M32_OW(pjMmBase, DEST_X_END, xOffset + prcl->right);
M32_OW(pjMmBase, CUR_Y, yOffset + prcl->top);
M32_OW(pjMmBase, DEST_Y_END, yOffset + prcl->bottom);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 5);
}
}
/******************************Public*Routine******************************\
* VOID vM32FillPatColor
*
* This routine uses the pattern hardware to draw a colour patterned list of
* rectangles.
*
* See Blt_DS_PCOL_ENG_IO_F0_D0 and Blt_DS_PCOL_ENG_IO_F0_D1.
*
\**************************************************************************/
VOID vM32FillPatColor( // Type FNFILL
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // List of rectangles to be filled, in relative
// coordinates
ULONG rop4, // rop4
RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure
POINTL* pptlBrush) // Pattern alignment
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
ULONG ulHwMix;
LONG xLeft;
LONG xRight;
LONG yTop;
LONG cy;
LONG cyVenetian;
LONG cyRoll;
WORD* pwPattern;
LONG xPattern;
LONG yPattern;
ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP,
"Colour patterns work only at 8bpp");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
ulHwMix = gaul32HwMixFromRop2[(rop4 >> 2) & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
M32_OW(pjMmBase, ALU_FG_FN, ulHwMix);
M32_OW(pjMmBase, SRC_Y_DIR, 1);
M32_OW(pjMmBase, PATT_LENGTH, 7); // 8 pixel wide pattern
while (TRUE)
{
xLeft = xOffset + prcl->left;
xRight = xOffset + prcl->right;
yTop = yOffset + prcl->top;
cy = prcl->bottom - prcl->top;
xPattern = (xLeft - pptlBrush->x - xOffset) & 7;
yPattern = (yTop - pptlBrush->y - yOffset) & 7;
if (ulHwMix == OVERPAINT)
{
cyVenetian = min(cy, 8);
cyRoll = cy - cyVenetian;
}
else
{
cyVenetian = cy;
cyRoll = 0;
}
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_PATT | DATA_WIDTH | DRAW | WRITE);
M32_OW(pjMmBase, PATT_INDEX, xPattern);
M32_OW(pjMmBase, DEST_X_START, xLeft);
M32_OW(pjMmBase, CUR_X, xLeft);
M32_OW(pjMmBase, DEST_X_END, xRight);
M32_OW(pjMmBase, CUR_Y, yTop);
do {
// Each scan of the pattern is eight bytes:
pwPattern = (WORD*) ((BYTE*) &rbc.prb->aulPattern[0]
+ (yPattern << 3));
yPattern = (yPattern + 1) & 7;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6);
M32_OW(pjMmBase, PATT_DATA_INDEX, 0); // Reset index for download
M32_OW(pjMmBase, PATT_DATA, *(pwPattern));
M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 1));
M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 2));
M32_OW(pjMmBase, PATT_DATA, *(pwPattern + 3));
yTop++;
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop);
} while (--cyVenetian != 0);
if (cyRoll != 0)
{
// When the ROP is PATCOPY, we can take advantage of the fact
// that we've just laid down an entire row of the pattern, and
// can do a 'rolling' screen-to-screen blt to draw the rest:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DATA_WIDTH |
DRAW | WRITE);
M32_OW(pjMmBase, M32_SRC_X, xLeft);
M32_OW(pjMmBase, M32_SRC_X_START, xLeft);
M32_OW(pjMmBase, M32_SRC_X_END, xRight);
M32_OW(pjMmBase, M32_SRC_Y, yTop - 8);
M32_OW(pjMmBase, CUR_Y, yTop);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop + cyRoll);
}
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 6);
}
}
/******************************Public*Routine******************************\
* VOID vM32Xfer1bpp
*
* This routine colour expands a monochrome bitmap, possibly with different
* Rop2's for the foreground and background. It will be called in the
* following cases:
*
* 1) To colour-expand the monochrome text buffer for the vFastText routine.
* 2) To blt a 1bpp source with a simple Rop2 between the source and
* destination.
* 3) To blt a true Rop3 when the source is a 1bpp bitmap that expands to
* white and black, and the pattern is a solid colour.
* 4) To handle a true Rop4 that works out to be Rop2's between the pattern
* and destination.
*
* Needless to say, making this routine fast can leverage a lot of
* performance.
*
\**************************************************************************/
VOID vM32Xfer1bpp( // Type FNXFER
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
ULONG* pulXlate;
ULONG ulHwForeMix;
LONG dx;
LONG dy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
LONG xLeft;
LONG xRight;
LONG yTop;
LONG cy;
LONG cx;
LONG xBias;
LONG culScan;
LONG lSrcSkip;
ULONG* pulSrc;
LONG i;
ULONG ulFifo;
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
ulFifo = 0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
pulXlate = pxlo->pulXlate;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 12);
M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_FG | BG_COLOR_SRC_BG | BIT16 |
EXT_MONO_SRC_HOST | DRAW | WRITE | LSB_FIRST) );
M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
M32_OW(pjMmBase, BKGD_COLOR, (WORD) pulXlate[0]);
M32_OW(pjMmBase, FRGD_COLOR, (WORD) pulXlate[1]);
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
while (TRUE)
{
xLeft = prcl->left;
xRight = prcl->right;
// The Mach32 'bit packs' monochrome transfers, but GDI gives
// us monochrome bitmaps whose scans are always dword aligned.
// Consequently, we use the Mach32's clip registers to make
// our transfers a multiple of 32 to match the dword alignment:
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top;
cy = prcl->bottom - yTop;
xBias = (xLeft + dx) & 31; // Floor
xLeft -= xBias;
cx = (xRight - xLeft + 31) & ~31; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
+ ((xLeft + dx) >> 3));
culScan = cx >> 5;
lSrcSkip = lSrcDelta - (culScan << 2);
ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
do {
i = culScan;
do {
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc) );
M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc + 1) );
pulSrc++;
} while (--i != 0);
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cy != 0);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
}
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
}
/******************************Public*Routine******************************\
* VOID vM32XferNative
*
* Transfers a bitmap that is the same colour depth as the display to
* the screen via the data transfer register, with no translation.
*
\**************************************************************************/
VOID vM32XferNative( // Type FNXFER
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
ULONG ulHwForeMix;
LONG dx;
LONG dy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
LONG xLeft;
LONG xRight;
LONG yTop;
LONG cy;
LONG cx;
LONG xBias;
ULONG* pulSrc;
ULONG culScan;
LONG lSrcSkip;
LONG i;
ULONG ulFifo;
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
ulFifo = 0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
DRAW | WRITE | LSB_FIRST) );
M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
while (TRUE)
{
xLeft = prcl->left;
xRight = prcl->right;
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top;
cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias;
cx = (xRight - xLeft + 3) & ~3; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pulSrc = (ULONG*) (pjSrcScan0 + (yTop + dy) * lSrcDelta
+ ((xLeft + dx) * ppdev->cjPelSize));
culScan = (cx * ppdev->cjPelSize) >> 2;
lSrcSkip = lSrcDelta - (culScan << 2);
ASSERTDD(((ULONG_PTR)pulSrc & 3) == 0, "Source should be dword aligned");
do {
i = culScan;
do {
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc) );
M32_OW(pjMmBase, PIX_TRANS, *((USHORT*) pulSrc + 1) );
pulSrc++;
} while (--i != 0);
pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip);
} while (--cy != 0);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
}
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
}
/******************************Public*Routine******************************\
* VOID vM32Xfer4bpp
*
* Does a 4bpp transfer from a bitmap to the screen.
*
* The reason we implement this is that a lot of resources are kept as 4bpp,
* and used to initialize DFBs, some of which we of course keep off-screen.
*
\**************************************************************************/
VOID vM32Xfer4bpp( // Type FNXFER
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
LONG cjPelSize;
ULONG ulHwForeMix;
LONG xLeft;
LONG xRight;
LONG yTop;
LONG xBias;
LONG dx;
LONG dy;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
BYTE* pjSrc;
BYTE jSrc;
ULONG* pulXlate;
LONG i;
USHORT uw;
LONG cjSrc;
LONG lSrcSkip;
ULONG ulFifo;
ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
cjPelSize = ppdev->cjPelSize;
pulXlate = pxlo->pulXlate;
ulFifo = 0;
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
DRAW | WRITE | LSB_FIRST) );
M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
while(TRUE)
{
xLeft = prcl->left;
xRight = prcl->right;
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top;
cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias;
cx = (xRight - xLeft + 3) & ~3; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
+ ((xLeft + dx) >> 1);
cjSrc = cx >> 1; // Number of source bytes touched
lSrcSkip = lSrcDelta - cjSrc;
if (cjPelSize == 1)
{
// This part handles 8bpp output:
do {
i = cjSrc;
do {
jSrc = *pjSrc++;
uw = (USHORT) (pulXlate[jSrc >> 4]);
uw |= (USHORT) (pulXlate[jSrc & 0xf] << 8);
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
M32_OW(pjMmBase, PIX_TRANS, uw );
} while (--i != 0);
pjSrc += lSrcSkip;
} while (--cy != 0);
}
else if (cjPelSize == 2)
{
// This part handles 16bpp output:
do {
i = cjSrc;
do {
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
jSrc = *pjSrc++;
uw = (USHORT) (pulXlate[jSrc >> 4]);
M32_OW(pjMmBase, PIX_TRANS, uw );
uw = (USHORT) (pulXlate[jSrc & 0xf]);
M32_OW(pjMmBase, PIX_TRANS, uw );
} while (--i != 0);
pjSrc += lSrcSkip;
} while (--cy != 0);
}
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
}
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
}
/******************************Public*Routine******************************\
* VOID vM32Xfer8bpp
*
* Does a 8bpp transfer from a bitmap to the screen.
*
* The reason we implement this is that a lot of resources are kept as 8bpp,
* and used to initialize DFBs, some of which we of course keep off-screen.
*
\**************************************************************************/
VOID vM32Xfer8bpp( // Type FNXFER
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // Rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
LONG cjPelSize;
ULONG ulHwForeMix;
LONG xLeft;
LONG xRight;
LONG yTop;
LONG xBias;
LONG dx;
LONG dy;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
BYTE* pjSrc;
ULONG* pulXlate;
LONG i;
USHORT uw;
LONG cwSrc;
LONG cxRem;
LONG lSrcSkip;
ULONG ulFifo;
ASSERTDD(psoSrc->iBitmapFormat == BMF_8BPP, "Source must be 8bpp");
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(ppdev->iBitmapFormat != BMF_24BPP, "Can't handle 24bpp");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
cjPelSize = ppdev->cjPelSize;
pulXlate = pxlo->pulXlate;
ulFifo = 0;
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
ulHwForeMix = gaul32HwMixFromRop2[rop4 & 0xf];
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 10);
M32_OW(pjMmBase, DP_CONFIG, (WORD)(FG_COLOR_SRC_HOST | BIT16 |
DRAW | WRITE | LSB_FIRST) );
M32_OW(pjMmBase, ALU_FG_FN, (WORD) ulHwForeMix );
M32_OW(pjMmBase, ALU_BG_FN, (WORD) ulHwForeMix );
while(TRUE)
{
xLeft = prcl->left;
xRight = prcl->right;
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) (xLeft + xOffset) );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) (xRight + xOffset - 1) );
yTop = prcl->top;
cy = prcl->bottom - yTop;
// We compute 'xBias' in order to dword-align the source pointer.
// This way, we don't have to do unaligned reads of the source,
// and we're guaranteed not to read even a byte past the end of
// the bitmap.
//
// Note that this bias works at 24bpp, too:
xBias = (xLeft + dx) & 3; // Floor
xLeft -= xBias;
cx = (xRight - xLeft + 3) & ~3; // Ceiling
M32_OW(pjMmBase, CUR_X, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_START, (WORD) xLeft + xOffset );
M32_OW(pjMmBase, DEST_X_END, (WORD) (xLeft + xOffset + cx) );
M32_OW(pjMmBase, CUR_Y, (WORD) yTop + yOffset );
M32_OW(pjMmBase, DEST_Y_END, (WORD) (yTop + yOffset + cy) );
pjSrc = pjSrcScan0 + (yTop + dy) * lSrcDelta
+ (xLeft + dx);
lSrcSkip = lSrcDelta - cx;
if (cjPelSize == 1)
{
// This part handles 8bpp output:
cwSrc = (cx >> 1);
cxRem = (cx & 1);
do {
for (i = cwSrc; i != 0; i--)
{
uw = (USHORT) (pulXlate[*pjSrc++]);
uw |= (USHORT) (pulXlate[*pjSrc++] << 8);
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
M32_OW(pjMmBase, PIX_TRANS, uw );
}
if (cxRem > 0)
{
uw = (USHORT) (pulXlate[*pjSrc++]);
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
M32_OW(pjMmBase, PIX_TRANS, uw );
}
pjSrc += lSrcSkip;
} while (--cy != 0);
}
else if (cjPelSize == 2)
{
// This part handles 16bpp output:
do {
for (i = cx; i != 0; i--)
{
uw = (USHORT) (pulXlate[*pjSrc++]);
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 1);
M32_OW(pjMmBase, PIX_TRANS, uw );
}
pjSrc += lSrcSkip;
} while (--cy != 0);
}
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 7);
}
// Don't forget to reset the clip register:
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 2);
M32_OW(pjMmBase, EXT_SCISSOR_L, (SHORT) 0 );
M32_OW(pjMmBase, EXT_SCISSOR_R, (SHORT) M32_MAX_SCISSOR );
}
/******************************Public*Routine******************************\
* VOID vM32CopyBlt
*
* Does a screen-to-screen blt of a list of rectangles.
*
* See Blt_DS_SS_ENG_IO_D0 and Blt_DS_SS_TLBR_ENG_IO_D1.
*
\**************************************************************************/
VOID vM32CopyBlt( // Type FNCOPY
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{
BYTE* pjMmBase;
LONG xOffset;
LONG yOffset;
LONG dx;
LONG dy;
LONG xLeft;
LONG yTop;
LONG cx;
LONG cy;
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
pjMmBase = ppdev->pjMmBase;
xOffset = ppdev->xOffset;
yOffset = ppdev->yOffset;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 12);
M32_OW(pjMmBase, DP_CONFIG, FG_COLOR_SRC_BLIT | DRAW | WRITE);
M32_OW(pjMmBase, ALU_FG_FN, gaul32HwMixFromRop2[rop4 & 0xf]);
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
// The accelerator may not be as fast at doing right-to-left copies, so
// only do them when the rectangles truly overlap:
if (!OVERLAP(prclDst, pptlSrc))
{
M32_OW(pjMmBase, SRC_Y_DIR, 1);
goto Top_Down_Left_To_Right;
}
M32_OW(pjMmBase, SRC_Y_DIR, (prclDst->top <= pptlSrc->y));
if (prclDst->top <= pptlSrc->y)
{
if (prclDst->left <= pptlSrc->x)
{
Top_Down_Left_To_Right:
while (TRUE)
{
xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy;
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft);
M32_OW(pjMmBase, M32_SRC_X_START, xLeft);
M32_OW(pjMmBase, M32_SRC_X_END, xLeft + cx);
M32_OW(pjMmBase, M32_SRC_Y, yTop);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft);
M32_OW(pjMmBase, DEST_X_START, xLeft);
M32_OW(pjMmBase, DEST_X_END, xLeft + cx);
M32_OW(pjMmBase, CUR_Y, yTop);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop + cy);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
}
}
else
{
while (TRUE)
{
xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy;
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft + cx);
M32_OW(pjMmBase, M32_SRC_X_START, xLeft + cx);
M32_OW(pjMmBase, M32_SRC_X_END, xLeft);
M32_OW(pjMmBase, M32_SRC_Y, yTop);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft + cx);
M32_OW(pjMmBase, DEST_X_START, xLeft + cx);
M32_OW(pjMmBase, DEST_X_END, xLeft);
M32_OW(pjMmBase, CUR_Y, yTop);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop + cy);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
}
}
}
else
{
if (prclDst->left <= pptlSrc->x)
{
while (TRUE)
{
xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy - 1;
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft);
M32_OW(pjMmBase, M32_SRC_X_START, xLeft);
M32_OW(pjMmBase, M32_SRC_X_END, xLeft + cx);
M32_OW(pjMmBase, M32_SRC_Y, yTop + cy);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft);
M32_OW(pjMmBase, DEST_X_START, xLeft);
M32_OW(pjMmBase, DEST_X_END, xLeft + cx);
M32_OW(pjMmBase, CUR_Y, yTop + cy);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
}
}
else
{
while (TRUE)
{
xLeft = xOffset + prcl->left + dx; // Destination coordinates
yTop = yOffset + prcl->top + dy - 1;
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
M32_OW(pjMmBase, M32_SRC_X, xLeft + cx);
M32_OW(pjMmBase, M32_SRC_X_START, xLeft + cx);
M32_OW(pjMmBase, M32_SRC_X_END, xLeft);
M32_OW(pjMmBase, M32_SRC_Y, yTop + cy);
xLeft -= dx; // Source coordinates
yTop -= dy;
M32_OW(pjMmBase, CUR_X, xLeft + cx);
M32_OW(pjMmBase, DEST_X_START, xLeft + cx);
M32_OW(pjMmBase, DEST_X_END, xLeft);
M32_OW(pjMmBase, CUR_Y, yTop + cy);
vM32QuietDown(ppdev, pjMmBase);
M32_OW(pjMmBase, DEST_Y_END, yTop);
if (--c == 0)
break;
prcl++;
M32_CHECK_FIFO_SPACE(ppdev, pjMmBase, 9);
}
}
}
}