windows-nt/Source/XPSP1/NT/drivers/video/ms/cirrus/disp/bltio.c
2020-09-26 16:20:57 +08:00

1020 lines
32 KiB
C

/******************************************************************************\
*
* $Workfile: bltio.c $
*
* Contains the low-level IO blt functions.
*
* Hopefully, if you're basing your display driver on this code, to
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
* the following routines. You shouldn't have to modify much in
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
* and efficient as I could, while still accelerating as many calls as
* possible that would be cost-effective in terms of performance wins
* versus size and effort.
*
* Note: In the following, 'relative' coordinates refers to coordinates
* that haven't yet had the offscreen bitmap (DFB) offset applied.
* 'Absolute' coordinates have had the offset applied. For example,
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
* be sitting in offscreen memory starting at coordinate (0, 768) --
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
* would be the 'absolute' start coordinate'.
*
* Copyright (c) 1992-1995 Microsoft Corporation
* Copyright (c) 1996 Cirrus Logic, Inc.
*
* $Log: S:/projects/drivers/ntsrc/display/bltio.c_v $
*
* Rev 1.2 Nov 07 1996 16:47:52 unknown
* Clean up CAPS flags
*
* Rev 1.1 Oct 10 1996 15:36:10 unknown
*
*
* Rev 1.1 12 Aug 1996 16:49:42 frido
* Removed unaccessed local parameters.
*
* jl01 10-08-96 Do Transparent BLT w/o Solid Fill. Refer to PDRs#5511/6817.
\******************************************************************************/
#include "precomp.h"
/**************************************************************************
* VOID vIoFastPatRealize
*
* Realizes a pattern into offscreen memory.
*
**************************************************************************/
VOID vIoFastPatRealize(
PDEV* ppdev,
RBRUSH* prb) // Points to brush realization structure
{
BRUSHENTRY* pbe;
LONG iBrushCache;
BYTE* pjPattern;
LONG cjPattern;
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
LONG lDeltaPat;
LONG xCnt;
LONG yCnt;
ULONG ulDst;
DISPDBG((10,"vFastPatRealize called"));
pbe = prb->pbe;
if ((pbe == NULL) || (pbe->prbVerify != prb))
{
// We have to allocate a new offscreen cache brush entry for
// the brush:
iBrushCache = ppdev->iBrushCache;
pbe = &ppdev->abe[iBrushCache];
iBrushCache++;
if (iBrushCache >= ppdev->cBrushCache)
iBrushCache = 0;
ppdev->iBrushCache = iBrushCache;
// Update our links:
pbe->prbVerify = prb;
prb->pbe = pbe;
}
//
// Download brush into cache
//
pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
cjPattern = PELS_TO_BYTES(TOTAL_BRUSH_SIZE);
lDeltaPat = PELS_TO_BYTES(8);
xCnt = PELS_TO_BYTES(8);
yCnt = 8;
ulDst = (pbe->y * ppdev->lDelta) + PELS_TO_BYTES(pbe->x);
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2));
CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1));
CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulDst);
CP_IO_START_BLT(ppdev, pjPorts);
vImageTransfer(ppdev, pjPattern, lDeltaPat, xCnt, yCnt);
//
// Duplicate brush horizontally
//
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1));
CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
CP_IO_BLT_MODE(ppdev, pjPorts, 0);
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2));
CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst);
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + lDeltaPat));
CP_IO_START_BLT(ppdev, pjPorts);
//
// Duplicate brush vertically
//
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (xCnt * 2));
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (xCnt * 2));
CP_IO_BLT_MODE(ppdev, pjPorts, 0);
CP_IO_XCNT(ppdev, pjPorts, ((xCnt * 2) - 1));
CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1));
CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst);
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + PELS_TO_BYTES(128)));
CP_IO_START_BLT(ppdev, pjPorts);
#if 0
{
////////////////////////////////////////////////////////////////
// DEBUG TILED PATTERNS
//
// The following code helps to debug patterns if you break the
// realization code. It copies the 2x2 tiled copy of the brush
// to the visible screen.
//
POINTL ptl;
RECTL rcl;
ptl.x = pbe->x;
ptl.y = pbe->y;
rcl.left = 10;
rcl.right = 10 + 16;
rcl.top = ppdev->cyScreen - 10 - 16;
rcl.bottom = ppdev->cyScreen - 10;
{
LONG lDelta = ppdev->lDelta;
BYTE jHwRop;
BYTE jMode;
//
// Make sure we can write to the video registers.
//
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16));
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
{
//
// Top to Bottom - Left to Right
//
jMode |= DIR_TBLR;
CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor);
{
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(rcl.right - rcl.left) - 1));
CP_IO_YCNT(ppdev, pjPorts, (rcl.bottom - rcl.top - 1));
CP_IO_SRC_ADDR(ppdev, pjPorts, (0 + ((ptl.y) * lDelta) + PELS_TO_BYTES(ptl.x)));
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ((rcl.top * lDelta) + PELS_TO_BYTES(rcl.left)));
CP_IO_START_BLT(ppdev, pjPorts);
}
}
}
}
#endif
}
/**************************************************************************
* VOID vIoFillPat
*
* This routine uses the pattern hardware to draw a patterned list of
* rectangles.
*
**************************************************************************/
VOID vIoFillPat(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) //
{
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
ULONG ulAlignedPatternOffset = ppdev->ulAlignedPatternOffset;
ULONG ulPatternAddrBase;
BYTE jHwRop;
BYTE jMode;
BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
// for keeping track of the location and status
// of the pattern bits cached in off-screen
// memory
DISPDBG((10,"vFillPat called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(ppdev->cBpp < 3, "vFillPat only works at 8bpp and 16bpp");
if ((rbc.prb->pbe == NULL) ||
(rbc.prb->pbe->prbVerify != rbc.prb))
{
vIoFastPatRealize(ppdev, rbc.prb);
DISPDBG((5, " -- Brush cache miss, put it at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
}
else
{
DISPDBG((5, " -- Brush cache hit on brush at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y));
}
pbe = rbc.prb->pbe;
//
// Fill the list of rectangles
//
ulPatternAddrBase = pbe->xy;
jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
jMode = ppdev->jModeColor | ENABLE_8x8_PATTERN_COPY;
do {
ULONG offset = 0;
offset = PELS_TO_BYTES(
(((prcl->top-pptlBrush->y)&7) << 4)
+((prcl->left-pptlBrush->x)&7)
);
// align the pattern to a new location
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_BLT_MODE(ppdev, pjPorts, 0);
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16));
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(8));
CP_IO_SRC_ADDR(ppdev, pjPorts, (ulPatternAddrBase + offset));
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(8) - 1));
CP_IO_YCNT(ppdev, pjPorts, (8 - 1));
CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulAlignedPatternOffset);
CP_IO_START_BLT(ppdev, pjPorts);
// fill using aligned pattern
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_BLT_MODE(ppdev, pjPorts, jMode);
CP_IO_ROP(ppdev, pjPorts, jHwRop);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
CP_IO_SRC_ADDR(ppdev, pjPorts, ulAlignedPatternOffset);
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
CP_IO_START_BLT(ppdev, pjPorts);
prcl++;
} while (--c != 0);
}
/**************************************************************************
* VOID vIoFillSolid
*
* Does a solid fill to a list of rectangles.
*
**************************************************************************/
VOID vIoFillSolid(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinate destination rects
ROP4 rop4, // Obvious?
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
POINTL* pptlBrush) // Not used
{
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
LONG cBpp = ppdev->cBpp;
ULONG ulSolidColor;
BYTE jHwRop;
DISPDBG((10,"vFillSolid called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
ulSolidColor = rbc.iSolidColor;
if (cBpp == 1)
{
ulSolidColor |= ulSolidColor << 8;
ulSolidColor |= ulSolidColor << 16;
}
else if (cBpp == 2)
{
ulSolidColor |= ulSolidColor << 16;
}
jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf];
//
// Make sure we can write to the video registers.
//
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_ROP(ppdev, pjPorts, jHwRop);
CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
CP_IO_BLT_MODE(ppdev, pjPorts, ENABLE_COLOR_EXPAND |
ENABLE_8x8_PATTERN_COPY |
ppdev->jModeColor);
CP_IO_FG_COLOR(ppdev, pjPorts, ulSolidColor);
//
// Fill the list of rectangles
//
while (TRUE)
{
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
CP_IO_START_BLT(ppdev, pjPorts);
if (--c == 0)
return;
prcl++;
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
}
}
/**************************************************************************
* VOID vIoCopyBlt
*
* Does a screen-to-screen blt of a list of rectangles.
*
**************************************************************************/
VOID vIoCopyBlt(
PDEV* ppdev,
LONG c, // Can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ROP4 rop4, // Obvious?
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst) // Original unclipped destination rectangle
{
LONG dx;
LONG dy; // Add delta to destination to get source
LONG xyOffset = ppdev->xyOffset;
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
BYTE jHwRop;
DISPDBG((10,"vCopyBlt called"));
ASSERTDD(c > 0, "Can't handle zero rectangles");
//
// The src-dst delta will be the same for all rectangles
//
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
//
// Make sure we can write to the video registers.
//
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
jHwRop = gajHwMixFromRop2[rop4 & 0xf];
CP_IO_ROP(ppdev, pjPorts, jHwRop);
CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, lDelta);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
//
// The accelerator may not be as fast at doing right-to-left copies, so
// only do them when the rectangles truly overlap:
//
if (!OVERLAP(prclDst, pptlSrc) ||
(prclDst->top < pptlSrc->y) ||
((prclDst->top == pptlSrc->y) && (prclDst->left <= pptlSrc->x))
)
{
//
// Top to Bottom - Left to Right
//
DISPDBG((12,"Top to Bottom - Left to Right"));
CP_IO_BLT_MODE(ppdev, pjPorts, DIR_TBLR);
while (TRUE)
{
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->top + dy) * lDelta) + PELS_TO_BYTES(prcl->left + dx)));
CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left)));
CP_IO_START_BLT(ppdev, pjPorts);
if (--c == 0)
return;
prcl++;
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
}
}
else
{
//
// Bottom to Top - Right to Left
//
DISPDBG((12,"Bottom to Top - Right to Left"));
CP_IO_BLT_MODE(ppdev, pjPorts, DIR_BTRL);
while (TRUE)
{
CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1));
CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1));
CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + PELS_TO_BYTES(prcl->right + dx) - 1));
CP_IO_DST_ADDR(ppdev, pjPorts, (((prcl->bottom - 1) * lDelta) + PELS_TO_BYTES(prcl->right) - 1));
CP_IO_START_BLT(ppdev, pjPorts);
if (--c == 0)
return;
prcl++;
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
}
}
}
/******************************Public*Routine******************************\
* VOID vIoXfer1bpp
*
* Low-level routine used to transfer monochrome data to the screen using
* DWORD writes to the blt engine.
*
* This can handle opaque or transparent expansions. It does opaque
* expansions by drawing the opaque rectangle first and then transparently
* expands the foreground bits.
*
\**************************************************************************/
VOID vIoXfer1bpp(
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ROP4 rop4, // Actually had better be a rop3
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides color-expansion information
{
ULONG* pulXfer;
ULONG* pul;
LONG ix;
LONG iy;
LONG cxWidthInBytes;
BYTE* pjBits;
POINTL ptlDst;
POINTL ptlSrc;
SIZEL sizlDst;
LONG cxLeftMask;
LONG cxRightMask;
ULONG ulDstAddr;
INT nDwords;
ULONG ulLeftMask;
ULONG ulRightMask;
LONG dx;
LONG dy;
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
LONG lDeltaSrc = psoSrc->lDelta;
LONG cBpp = ppdev->cBpp;
ULONG ulFgColor = pxlo->pulXlate[1];
ULONG ulBgColor = pxlo->pulXlate[0];
// Since the hardware clipping on some of the Cirrus chips is broken, we
// do the clipping by rounding out the edges to dword boundaries and then
// doing the blt transparently. In the event that we want the expansion
// to be opaque, we do the opaquing blt in advance. One side effect of
// this is that the destination bits are no longer valid for processing
// the rop. This could probably be optimized by doing the edges seperately
// and then doing the middle section in one pass. However, this is
// complicated by a 5434 bug that breaks blts less than 10 pixels wide.
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) == 0xcc00), "Expected foreground rop of 0xcc");
//
// The src-dst delta will be the same for all rectangles
//
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top;
if (cBpp == 1)
{
ulFgColor = (ulFgColor << 8) | (ulFgColor & 0xff);
ulBgColor = (ulBgColor << 8) | (ulBgColor & 0xff);
ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
}
else if (cBpp == 2)
{
ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff);
ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff);
}
pulXfer = ppdev->pulXfer;
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
if (rop4 != 0xCCAA)
{
LONG lCnt = c;
RECTL* prclTmp = prcl;
BYTE jHwBgRop = gajHwMixFromRop2[rop4 & 0xf];
CP_IO_ROP(ppdev, pjPorts, jHwBgRop);
CP_IO_FG_COLOR(ppdev, pjPorts, ulBgColor);
CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset);
CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor |
ENABLE_COLOR_EXPAND |
ENABLE_8x8_PATTERN_COPY);
do
{
// calculate the size of the blt
ptlDst.x = prclTmp->left;
ptlDst.y = prclTmp->top;
sizlDst.cx = prclTmp->right - ptlDst.x;
sizlDst.cy = prclTmp->bottom - ptlDst.y;
//
// Fill the background rectangle with the background color
//
// Set the dest addresses
ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
//
// Tell the hardware how many bytes we'd like to write:
// sizlDst.cx * sizelDst.cy
//
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1);
CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1);
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
// Start the blt operation
CP_IO_START_BLT(ppdev, pjPorts);
prclTmp++;
} while (--lCnt != 0);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
}
CP_IO_FG_COLOR(ppdev, pjPorts, ulFgColor);
CP_IO_BG_COLOR(ppdev, pjPorts, ~ulFgColor);
CP_IO_XPAR_COLOR(ppdev, pjPorts, ~ulFgColor);
CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY);
CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor |
ENABLE_COLOR_EXPAND |
ENABLE_TRANSPARENCY_COMPARE |
SRC_CPU_DATA);
CP_IO_BLT_EXT_MODE(ppdev, pjPorts, 0); // jl01
do
{
// calculate the size of the blt
ptlDst.x = prcl->left;
ptlDst.y = prcl->top;
sizlDst.cx = prcl->right - ptlDst.x;
sizlDst.cy = prcl->bottom - ptlDst.y;
// calculate the number of dwords per scan line
ptlSrc.x = prcl->left + dx;
ptlSrc.y = prcl->top + dy;
// Floor the source.
// Extend the width by the amount required to floor to a dword boundary.
// Set the size of the left mask.
// Floor the dest, so it aligns with the floored source.
if ((cxLeftMask = (ptlSrc.x & 31)))
{
sizlDst.cx += cxLeftMask;
ptlSrc.x &= ~31;
ptlDst.x -= cxLeftMask;
}
ulLeftMask = gaulLeftClipMask[cxLeftMask];
// Ceil the cx to a dword boundary.
if (cxRightMask = (sizlDst.cx & 31))
{
cxRightMask = 32 - cxRightMask;
sizlDst.cx = (sizlDst.cx + 31) & ~31;
}
ulRightMask = gaulRightClipMask[cxRightMask];
if (sizlDst.cx == 32)
{
ulLeftMask &= ulRightMask;
ulRightMask = 0;
}
// Note: At this point sizlDst.cx is the width of the blt in pixels,
// floored to a dword boundary, and ceiled to a dword boundary.
// Calculate the width in Bytes
cxWidthInBytes = sizlDst.cx >> 3;
// Calculate the number of Dwords and any remaining bytes
nDwords = cxWidthInBytes >> 2;
ASSERTDD(((cxWidthInBytes & 0x03) == 0),
"cxWidthInBytes is not a DWORD multiple");
// Calculate the address of the source bitmap
// This is to a byte boundary.
pjBits = (PBYTE) psoSrc->pvScan0;
pjBits += ptlSrc.y * lDeltaSrc;
pjBits += ptlSrc.x >> 3;
ASSERTDD((((ULONG_PTR)pjBits & 0x03) == 0),
"pjBits not DWORD aligned like it should be");
//
// Blt the 1 bpp bitmap
//
ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1);
CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1);
//
// The 542x chips require a write to the Src Address Register when
// doing a host transfer with color expansion. The value is
// irrelevant, but the write is crucial. This is documented in
// the manual, not the errata. Go figure.
//
CP_IO_SRC_ADDR(ppdev, pjPorts, 0);
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
CP_IO_START_BLT(ppdev, pjPorts);
//
// Transfer the host bitmap.
//
if (ulRightMask)
{
//
// Blt is > 1 DWORD wide (nDwords > 1)
//
for (iy = 0; iy < sizlDst.cy; iy++)
{
pul = (ULONG*) pjBits;
//*pulXfer++ = *(((ULONG*)pul)++) & ulLeftMask;
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulLeftMask));
pul++;
for (ix = 0; ix < (nDwords-2); ix++)
{
//*pulXfer++ = *(((ULONG*)pul)++);
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul)));
pul++;
}
//*pulXfer++ = *(((ULONG*)pul)++) & ulRightMask;
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulRightMask));
pul++;
pjBits += lDeltaSrc;
//pulXfer = ppdev->pulXfer;
CP_MEMORY_BARRIER(); // Flush memory cache when we reset the address
}
}
else
{
//
// Blt is 1 DWORD wide (nDwords == 1)
//
for (iy = 0; iy < sizlDst.cy; iy++)
{
//*pulXfer = *((ULONG*)pjBits) & ulLeftMask;
WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pjBits) & ulLeftMask));
pjBits += lDeltaSrc;
CP_MEMORY_BARRIER(); // Flush memory cache
}
}
prcl++;
} while (--c != 0);
}
/******************************Public*Routine******************************\
* VOID vIoXfer4bpp
*
* Does a 4bpp transfer from a bitmap to the screen.
*
* NOTE: The screen must be 8bpp for this function to be called!
*
* The reason we implement this is that a lot of resources are kept as 4bpp,
* and used to initialize DFBs, some of which we of course keep off-screen.
*
\**************************************************************************/
// XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use
// for doing the translate. Note that in general stack buffers should
// be kept as small as possible. The OS guarantees us only 8k for stack
// from GDI down to the display driver in low memory situations; if we
// ask for more, we'll access violate. Note also that at any time the
// stack buffer cannot be larger than a page (4k) -- otherwise we may
// miss touching the 'guard page' and access violate then too.
#define XLATE_BUFFER_SIZE 256
VOID vIoXfer4bpp(
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // List of destination rectangles, in relative
// coordinates
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Translate that provides colour-expansion information
{
ULONG* pulXfer = ppdev->pulXfer;
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
ULONG ulDstAddr;
LONG dx;
LONG dy;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
BYTE* pjScan;
BYTE* pjSrc;
BYTE* pjDst;
LONG cxThis;
LONG cxToGo;
LONG xSrc;
LONG iLoop;
BYTE jSrc;
ULONG* pulXlate;
LONG cdwThis;
BYTE* pjBuf;
BYTE ajBuf[XLATE_BUFFER_SIZE];
ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp");
ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp");
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
DISPDBG((5, "vXfer4bpp: entry"));
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]);
CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
while(TRUE)
{
ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1);
CP_IO_YCNT(ppdev, pjPorts, cy - 1);
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
pulXlate = pxlo->pulXlate;
xSrc = prcl->left + dx;
pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1);
CP_IO_START_BLT(ppdev, pjPorts);
do {
pjSrc = pjScan;
cxToGo = cx; // # of pels per scan in 4bpp source
do {
cxThis = XLATE_BUFFER_SIZE;
// We can handle XLATE_BUFFER_SIZE number
// of pels in this xlate batch
cxToGo -= cxThis; // cxThis will be the actual number of
// pels we'll do in this xlate batch
if (cxToGo < 0)
cxThis += cxToGo;
pjDst = ajBuf; // Points to our temporary batch buffer
// We handle alignment ourselves because it's easy to
// do, rather than pay the cost of setting/resetting
// the scissors register:
if (xSrc & 1)
{
// When unaligned, we have to be careful not to read
// past the end of the 4bpp bitmap (that could
// potentially cause us to access violate):
iLoop = cxThis >> 1; // Each loop handles 2 pels;
// we'll handle odd pel
// separately
jSrc = *pjSrc;
while (iLoop-- != 0)
{
*pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
jSrc = *(++pjSrc);
*pjDst++ = (BYTE) pulXlate[jSrc >> 4];
}
if (cxThis & 1)
*pjDst = (BYTE) pulXlate[jSrc & 0xf];
}
else
{
iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels
do {
jSrc = *pjSrc++;
*pjDst++ = (BYTE) pulXlate[jSrc >> 4];
*pjDst++ = (BYTE) pulXlate[jSrc & 0xf];
} while (--iLoop != 0);
}
// The number of bytes we'll transfer is equal to the number
// of pels we've processed in the batch. Since we're
// transferring words, we have to round up to get the word
// count:
cdwThis = (cxThis + 3) >> 2;
pjBuf = ajBuf;
TRANSFER_DWORD_ALIGNED(ppdev, pulXfer, pjBuf, cdwThis);
} while (cxToGo > 0);
pjScan += lSrcDelta; // Advance to next source scan. Note
// that we could have computed the
// value to advance 'pjSrc' directly,
// but this method is less
// error-prone.
} while (--cy != 0);
if (--c == 0)
return;
prcl++;
}
}
/******************************Public*Routine******************************\
* VOID vIoXferNative
*
* Transfers a bitmap that is the same color depth as the display to
* the screen via the data transfer register, with no translation.
*
\**************************************************************************/
VOID vIoXferNative(
PDEV* ppdev,
LONG c, // Count of rectangles, can't be zero
RECTL* prcl, // Array of relative coordinates destination rectangles
ULONG rop4, // rop4
SURFOBJ* psoSrc, // Source surface
POINTL* pptlSrc, // Original unclipped source point
RECTL* prclDst, // Original unclipped destination rectangle
XLATEOBJ* pxlo) // Not used
{
ULONG* pulXfer = ppdev->pulXfer;
BYTE* pjPorts = ppdev->pjPorts;
LONG lDelta = ppdev->lDelta;
ULONG ulDstAddr;
LONG dx;
LONG dy;
LONG cx;
LONG cy;
LONG lSrcDelta;
BYTE* pjSrcScan0;
BYTE* pjSrc;
LONG cjSrc;
ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
"Can handle trivial xlate only");
ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
"Source must be same color depth as screen");
ASSERTDD(c > 0, "Can't handle zero rectangles");
ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff),
"Expect only a rop2");
dx = pptlSrc->x - prclDst->left;
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
lSrcDelta = psoSrc->lDelta;
pjSrcScan0 = psoSrc->pvScan0;
ppdev->pfnBankMap(ppdev, ppdev->lXferBank);
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta);
CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]);
CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA);
while(TRUE)
{
ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left);
cx = prcl->right - prcl->left;
cy = prcl->bottom - prcl->top;
CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts);
CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1);
CP_IO_YCNT(ppdev, pjPorts, cy - 1);
CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr);
cjSrc = PELS_TO_BYTES(cx);
pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta
+ (PELS_TO_BYTES(prcl->left + dx));
CP_IO_START_BLT(ppdev, pjPorts);
vImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy);
if (--c == 0)
return;
prcl++;
}
}