/******************************************************************************\ * * $Workfile: bltio.c $ * * Contains the low-level IO blt functions. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify much in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1995 Microsoft Corporation * Copyright (c) 1996 Cirrus Logic, Inc. * * $Log: S:/projects/drivers/ntsrc/display/bltio.c_v $ * * Rev 1.2 Nov 07 1996 16:47:52 unknown * Clean up CAPS flags * * Rev 1.1 Oct 10 1996 15:36:10 unknown * * * Rev 1.1 12 Aug 1996 16:49:42 frido * Removed unaccessed local parameters. * * jl01 10-08-96 Do Transparent BLT w/o Solid Fill. Refer to PDRs#5511/6817. \******************************************************************************/ #include "precomp.h" /************************************************************************** * VOID vIoFastPatRealize * * Realizes a pattern into offscreen memory. * **************************************************************************/ VOID vIoFastPatRealize( PDEV* ppdev, RBRUSH* prb) // Points to brush realization structure { BRUSHENTRY* pbe; LONG iBrushCache; BYTE* pjPattern; LONG cjPattern; BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; LONG lDeltaPat; LONG xCnt; LONG yCnt; ULONG ulDst; DISPDBG((10,"vFastPatRealize called")); pbe = prb->pbe; if ((pbe == NULL) || (pbe->prbVerify != prb)) { // We have to allocate a new offscreen cache brush entry for // the brush: iBrushCache = ppdev->iBrushCache; pbe = &ppdev->abe[iBrushCache]; iBrushCache++; if (iBrushCache >= ppdev->cBrushCache) iBrushCache = 0; ppdev->iBrushCache = iBrushCache; // Update our links: pbe->prbVerify = prb; prb->pbe = pbe; } // // Download brush into cache // pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer cjPattern = PELS_TO_BYTES(TOTAL_BRUSH_SIZE); lDeltaPat = PELS_TO_BYTES(8); xCnt = PELS_TO_BYTES(8); yCnt = 8; ulDst = (pbe->y * ppdev->lDelta) + PELS_TO_BYTES(pbe->x); ppdev->pfnBankMap(ppdev, ppdev->lXferBank); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2)); CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1)); CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1)); CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA); CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY); CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulDst); CP_IO_START_BLT(ppdev, pjPorts); vImageTransfer(ppdev, pjPattern, lDeltaPat, xCnt, yCnt); // // Duplicate brush horizontally // CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_XCNT(ppdev, pjPorts, (xCnt - 1)); CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1)); CP_IO_BLT_MODE(ppdev, pjPorts, 0); CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (lDeltaPat * 2)); CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst); CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + lDeltaPat)); CP_IO_START_BLT(ppdev, pjPorts); // // Duplicate brush vertically // CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, (xCnt * 2)); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, (xCnt * 2)); CP_IO_BLT_MODE(ppdev, pjPorts, 0); CP_IO_XCNT(ppdev, pjPorts, ((xCnt * 2) - 1)); CP_IO_YCNT(ppdev, pjPorts, (yCnt - 1)); CP_IO_SRC_ADDR(ppdev, pjPorts, ulDst); CP_IO_DST_ADDR_ABS(ppdev, pjPorts, (ulDst + PELS_TO_BYTES(128))); CP_IO_START_BLT(ppdev, pjPorts); #if 0 { //////////////////////////////////////////////////////////////// // DEBUG TILED PATTERNS // // The following code helps to debug patterns if you break the // realization code. It copies the 2x2 tiled copy of the brush // to the visible screen. // POINTL ptl; RECTL rcl; ptl.x = pbe->x; ptl.y = pbe->y; rcl.left = 10; rcl.right = 10 + 16; rcl.top = ppdev->cyScreen - 10 - 16; rcl.bottom = ppdev->cyScreen - 10; { LONG lDelta = ppdev->lDelta; BYTE jHwRop; BYTE jMode; // // Make sure we can write to the video registers. // CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY); CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16)); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); { // // Top to Bottom - Left to Right // jMode |= DIR_TBLR; CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor); { CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(rcl.right - rcl.left) - 1)); CP_IO_YCNT(ppdev, pjPorts, (rcl.bottom - rcl.top - 1)); CP_IO_SRC_ADDR(ppdev, pjPorts, (0 + ((ptl.y) * lDelta) + PELS_TO_BYTES(ptl.x))); CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ((rcl.top * lDelta) + PELS_TO_BYTES(rcl.left))); CP_IO_START_BLT(ppdev, pjPorts); } } } } #endif } /************************************************************************** * VOID vIoFillPat * * This routine uses the pattern hardware to draw a patterned list of * rectangles. * **************************************************************************/ VOID vIoFillPat( PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // Array of relative coordinate destination rects ROP4 rop4, // Obvious? RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor POINTL* pptlBrush) // { BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; ULONG ulAlignedPatternOffset = ppdev->ulAlignedPatternOffset; ULONG ulPatternAddrBase; BYTE jHwRop; BYTE jMode; BRUSHENTRY* pbe; // Pointer to brush entry data, which is used // for keeping track of the location and status // of the pattern bits cached in off-screen // memory DISPDBG((10,"vFillPat called")); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(ppdev->cBpp < 3, "vFillPat only works at 8bpp and 16bpp"); if ((rbc.prb->pbe == NULL) || (rbc.prb->pbe->prbVerify != rbc.prb)) { vIoFastPatRealize(ppdev, rbc.prb); DISPDBG((5, " -- Brush cache miss, put it at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y)); } else { DISPDBG((5, " -- Brush cache hit on brush at (%d,%d)", rbc.prb->pbe->x, rbc.prb->pbe->y)); } pbe = rbc.prb->pbe; // // Fill the list of rectangles // ulPatternAddrBase = pbe->xy; jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf]; jMode = ppdev->jModeColor | ENABLE_8x8_PATTERN_COPY; do { ULONG offset = 0; offset = PELS_TO_BYTES( (((prcl->top-pptlBrush->y)&7) << 4) +((prcl->left-pptlBrush->x)&7) ); // align the pattern to a new location CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_BLT_MODE(ppdev, pjPorts, 0); CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY); CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(16)); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, PELS_TO_BYTES(8)); CP_IO_SRC_ADDR(ppdev, pjPorts, (ulPatternAddrBase + offset)); CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(8) - 1)); CP_IO_YCNT(ppdev, pjPorts, (8 - 1)); CP_IO_DST_ADDR_ABS(ppdev, pjPorts, ulAlignedPatternOffset); CP_IO_START_BLT(ppdev, pjPorts); // fill using aligned pattern CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_BLT_MODE(ppdev, pjPorts, jMode); CP_IO_ROP(ppdev, pjPorts, jHwRop); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); CP_IO_SRC_ADDR(ppdev, pjPorts, ulAlignedPatternOffset); CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1)); CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1)); CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left))); CP_IO_START_BLT(ppdev, pjPorts); prcl++; } while (--c != 0); } /************************************************************************** * VOID vIoFillSolid * * Does a solid fill to a list of rectangles. * **************************************************************************/ VOID vIoFillSolid( PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // Array of relative coordinate destination rects ROP4 rop4, // Obvious? RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor POINTL* pptlBrush) // Not used { BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; LONG cBpp = ppdev->cBpp; ULONG ulSolidColor; BYTE jHwRop; DISPDBG((10,"vFillSolid called")); ASSERTDD(c > 0, "Can't handle zero rectangles"); ulSolidColor = rbc.iSolidColor; if (cBpp == 1) { ulSolidColor |= ulSolidColor << 8; ulSolidColor |= ulSolidColor << 16; } else if (cBpp == 2) { ulSolidColor |= ulSolidColor << 16; } jHwRop = gajHwMixFromRop2[(rop4 >> 2) & 0xf]; // // Make sure we can write to the video registers. // CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_ROP(ppdev, pjPorts, jHwRop); CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); CP_IO_BLT_MODE(ppdev, pjPorts, ENABLE_COLOR_EXPAND | ENABLE_8x8_PATTERN_COPY | ppdev->jModeColor); CP_IO_FG_COLOR(ppdev, pjPorts, ulSolidColor); // // Fill the list of rectangles // while (TRUE) { CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1)); CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1)); CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left))); CP_IO_START_BLT(ppdev, pjPorts); if (--c == 0) return; prcl++; CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); } } /************************************************************************** * VOID vIoCopyBlt * * Does a screen-to-screen blt of a list of rectangles. * **************************************************************************/ VOID vIoCopyBlt( PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // Array of relative coordinates destination rectangles ROP4 rop4, // Obvious? POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst) // Original unclipped destination rectangle { LONG dx; LONG dy; // Add delta to destination to get source LONG xyOffset = ppdev->xyOffset; BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; BYTE jHwRop; DISPDBG((10,"vCopyBlt called")); ASSERTDD(c > 0, "Can't handle zero rectangles"); // // The src-dst delta will be the same for all rectangles // dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // // Make sure we can write to the video registers. // CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); jHwRop = gajHwMixFromRop2[rop4 & 0xf]; CP_IO_ROP(ppdev, pjPorts, jHwRop); CP_IO_SRC_Y_OFFSET(ppdev, pjPorts, lDelta); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); // // The accelerator may not be as fast at doing right-to-left copies, so // only do them when the rectangles truly overlap: // if (!OVERLAP(prclDst, pptlSrc) || (prclDst->top < pptlSrc->y) || ((prclDst->top == pptlSrc->y) && (prclDst->left <= pptlSrc->x)) ) { // // Top to Bottom - Left to Right // DISPDBG((12,"Top to Bottom - Left to Right")); CP_IO_BLT_MODE(ppdev, pjPorts, DIR_TBLR); while (TRUE) { CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1)); CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1)); CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->top + dy) * lDelta) + PELS_TO_BYTES(prcl->left + dx))); CP_IO_DST_ADDR(ppdev, pjPorts, ((prcl->top * lDelta) + PELS_TO_BYTES(prcl->left))); CP_IO_START_BLT(ppdev, pjPorts); if (--c == 0) return; prcl++; CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); } } else { // // Bottom to Top - Right to Left // DISPDBG((12,"Bottom to Top - Right to Left")); CP_IO_BLT_MODE(ppdev, pjPorts, DIR_BTRL); while (TRUE) { CP_IO_XCNT(ppdev, pjPorts, (PELS_TO_BYTES(prcl->right - prcl->left) - 1)); CP_IO_YCNT(ppdev, pjPorts, (prcl->bottom - prcl->top - 1)); CP_IO_SRC_ADDR(ppdev, pjPorts, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + PELS_TO_BYTES(prcl->right + dx) - 1)); CP_IO_DST_ADDR(ppdev, pjPorts, (((prcl->bottom - 1) * lDelta) + PELS_TO_BYTES(prcl->right) - 1)); CP_IO_START_BLT(ppdev, pjPorts); if (--c == 0) return; prcl++; CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); } } } /******************************Public*Routine******************************\ * VOID vIoXfer1bpp * * Low-level routine used to transfer monochrome data to the screen using * DWORD writes to the blt engine. * * This can handle opaque or transparent expansions. It does opaque * expansions by drawing the opaque rectangle first and then transparently * expands the foreground bits. * \**************************************************************************/ VOID vIoXfer1bpp( PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // List of destination rectangles, in relative // coordinates ROP4 rop4, // Actually had better be a rop3 SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Translate that provides color-expansion information { ULONG* pulXfer; ULONG* pul; LONG ix; LONG iy; LONG cxWidthInBytes; BYTE* pjBits; POINTL ptlDst; POINTL ptlSrc; SIZEL sizlDst; LONG cxLeftMask; LONG cxRightMask; ULONG ulDstAddr; INT nDwords; ULONG ulLeftMask; ULONG ulRightMask; LONG dx; LONG dy; BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; LONG lDeltaSrc = psoSrc->lDelta; LONG cBpp = ppdev->cBpp; ULONG ulFgColor = pxlo->pulXlate[1]; ULONG ulBgColor = pxlo->pulXlate[0]; // Since the hardware clipping on some of the Cirrus chips is broken, we // do the clipping by rounding out the edges to dword boundaries and then // doing the blt transparently. In the event that we want the expansion // to be opaque, we do the opaquing blt in advance. One side effect of // this is that the destination bits are no longer valid for processing // the rop. This could probably be optimized by doing the edges seperately // and then doing the middle section in one pass. However, this is // complicated by a 5434 bug that breaks blts less than 10 pixels wide. ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) == 0xcc00), "Expected foreground rop of 0xcc"); // // The src-dst delta will be the same for all rectangles // dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; if (cBpp == 1) { ulFgColor = (ulFgColor << 8) | (ulFgColor & 0xff); ulBgColor = (ulBgColor << 8) | (ulBgColor & 0xff); ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff); ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff); } else if (cBpp == 2) { ulFgColor = (ulFgColor << 16) | (ulFgColor & 0xffff); ulBgColor = (ulBgColor << 16) | (ulBgColor & 0xffff); } pulXfer = ppdev->pulXfer; ppdev->pfnBankMap(ppdev, ppdev->lXferBank); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); if (rop4 != 0xCCAA) { LONG lCnt = c; RECTL* prclTmp = prcl; BYTE jHwBgRop = gajHwMixFromRop2[rop4 & 0xf]; CP_IO_ROP(ppdev, pjPorts, jHwBgRop); CP_IO_FG_COLOR(ppdev, pjPorts, ulBgColor); CP_IO_SRC_ADDR(ppdev, pjPorts, ppdev->ulSolidColorOffset); CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor | ENABLE_COLOR_EXPAND | ENABLE_8x8_PATTERN_COPY); do { // calculate the size of the blt ptlDst.x = prclTmp->left; ptlDst.y = prclTmp->top; sizlDst.cx = prclTmp->right - ptlDst.x; sizlDst.cy = prclTmp->bottom - ptlDst.y; // // Fill the background rectangle with the background color // // Set the dest addresses ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); // // Tell the hardware how many bytes we'd like to write: // sizlDst.cx * sizelDst.cy // CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1); CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1); CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr); // Start the blt operation CP_IO_START_BLT(ppdev, pjPorts); prclTmp++; } while (--lCnt != 0); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); } CP_IO_FG_COLOR(ppdev, pjPorts, ulFgColor); CP_IO_BG_COLOR(ppdev, pjPorts, ~ulFgColor); CP_IO_XPAR_COLOR(ppdev, pjPorts, ~ulFgColor); CP_IO_ROP(ppdev, pjPorts, CL_SRC_COPY); CP_IO_BLT_MODE(ppdev, pjPorts, ppdev->jModeColor | ENABLE_COLOR_EXPAND | ENABLE_TRANSPARENCY_COMPARE | SRC_CPU_DATA); CP_IO_BLT_EXT_MODE(ppdev, pjPorts, 0); // jl01 do { // calculate the size of the blt ptlDst.x = prcl->left; ptlDst.y = prcl->top; sizlDst.cx = prcl->right - ptlDst.x; sizlDst.cy = prcl->bottom - ptlDst.y; // calculate the number of dwords per scan line ptlSrc.x = prcl->left + dx; ptlSrc.y = prcl->top + dy; // Floor the source. // Extend the width by the amount required to floor to a dword boundary. // Set the size of the left mask. // Floor the dest, so it aligns with the floored source. if ((cxLeftMask = (ptlSrc.x & 31))) { sizlDst.cx += cxLeftMask; ptlSrc.x &= ~31; ptlDst.x -= cxLeftMask; } ulLeftMask = gaulLeftClipMask[cxLeftMask]; // Ceil the cx to a dword boundary. if (cxRightMask = (sizlDst.cx & 31)) { cxRightMask = 32 - cxRightMask; sizlDst.cx = (sizlDst.cx + 31) & ~31; } ulRightMask = gaulRightClipMask[cxRightMask]; if (sizlDst.cx == 32) { ulLeftMask &= ulRightMask; ulRightMask = 0; } // Note: At this point sizlDst.cx is the width of the blt in pixels, // floored to a dword boundary, and ceiled to a dword boundary. // Calculate the width in Bytes cxWidthInBytes = sizlDst.cx >> 3; // Calculate the number of Dwords and any remaining bytes nDwords = cxWidthInBytes >> 2; ASSERTDD(((cxWidthInBytes & 0x03) == 0), "cxWidthInBytes is not a DWORD multiple"); // Calculate the address of the source bitmap // This is to a byte boundary. pjBits = (PBYTE) psoSrc->pvScan0; pjBits += ptlSrc.y * lDeltaSrc; pjBits += ptlSrc.x >> 3; ASSERTDD((((ULONG_PTR)pjBits & 0x03) == 0), "pjBits not DWORD aligned like it should be"); // // Blt the 1 bpp bitmap // ulDstAddr = (ptlDst.y * lDelta) + PELS_TO_BYTES(ptlDst.x); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(sizlDst.cx) - 1); CP_IO_YCNT(ppdev, pjPorts, sizlDst.cy - 1); // // The 542x chips require a write to the Src Address Register when // doing a host transfer with color expansion. The value is // irrelevant, but the write is crucial. This is documented in // the manual, not the errata. Go figure. // CP_IO_SRC_ADDR(ppdev, pjPorts, 0); CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr); CP_IO_START_BLT(ppdev, pjPorts); // // Transfer the host bitmap. // if (ulRightMask) { // // Blt is > 1 DWORD wide (nDwords > 1) // for (iy = 0; iy < sizlDst.cy; iy++) { pul = (ULONG*) pjBits; //*pulXfer++ = *(((ULONG*)pul)++) & ulLeftMask; WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulLeftMask)); pul++; for (ix = 0; ix < (nDwords-2); ix++) { //*pulXfer++ = *(((ULONG*)pul)++); WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul))); pul++; } //*pulXfer++ = *(((ULONG*)pul)++) & ulRightMask; WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pul) & ulRightMask)); pul++; pjBits += lDeltaSrc; //pulXfer = ppdev->pulXfer; CP_MEMORY_BARRIER(); // Flush memory cache when we reset the address } } else { // // Blt is 1 DWORD wide (nDwords == 1) // for (iy = 0; iy < sizlDst.cy; iy++) { //*pulXfer = *((ULONG*)pjBits) & ulLeftMask; WRITE_REGISTER_ULONG(pulXfer, (*((ULONG*)pjBits) & ulLeftMask)); pjBits += lDeltaSrc; CP_MEMORY_BARRIER(); // Flush memory cache } } prcl++; } while (--c != 0); } /******************************Public*Routine******************************\ * VOID vIoXfer4bpp * * Does a 4bpp transfer from a bitmap to the screen. * * NOTE: The screen must be 8bpp for this function to be called! * * The reason we implement this is that a lot of resources are kept as 4bpp, * and used to initialize DFBs, some of which we of course keep off-screen. * \**************************************************************************/ // XLATE_BUFFER_SIZE defines the size of the stack-based buffer we use // for doing the translate. Note that in general stack buffers should // be kept as small as possible. The OS guarantees us only 8k for stack // from GDI down to the display driver in low memory situations; if we // ask for more, we'll access violate. Note also that at any time the // stack buffer cannot be larger than a page (4k) -- otherwise we may // miss touching the 'guard page' and access violate then too. #define XLATE_BUFFER_SIZE 256 VOID vIoXfer4bpp( PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // List of destination rectangles, in relative // coordinates ULONG rop4, // rop4 SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Translate that provides colour-expansion information { ULONG* pulXfer = ppdev->pulXfer; BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; ULONG ulDstAddr; LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjScan; BYTE* pjSrc; BYTE* pjDst; LONG cxThis; LONG cxToGo; LONG xSrc; LONG iLoop; BYTE jSrc; ULONG* pulXlate; LONG cdwThis; BYTE* pjBuf; BYTE ajBuf[XLATE_BUFFER_SIZE]; ASSERTDD(ppdev->iBitmapFormat == BMF_8BPP, "Screen must be 8bpp"); ASSERTDD(psoSrc->iBitmapFormat == BMF_4BPP, "Source must be 4bpp"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2"); DISPDBG((5, "vXfer4bpp: entry")); dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; ppdev->pfnBankMap(ppdev, ppdev->lXferBank); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]); CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA); while(TRUE) { ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left); cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top; CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1); CP_IO_YCNT(ppdev, pjPorts, cy - 1); CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr); pulXlate = pxlo->pulXlate; xSrc = prcl->left + dx; pjScan = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (xSrc >> 1); CP_IO_START_BLT(ppdev, pjPorts); do { pjSrc = pjScan; cxToGo = cx; // # of pels per scan in 4bpp source do { cxThis = XLATE_BUFFER_SIZE; // We can handle XLATE_BUFFER_SIZE number // of pels in this xlate batch cxToGo -= cxThis; // cxThis will be the actual number of // pels we'll do in this xlate batch if (cxToGo < 0) cxThis += cxToGo; pjDst = ajBuf; // Points to our temporary batch buffer // We handle alignment ourselves because it's easy to // do, rather than pay the cost of setting/resetting // the scissors register: if (xSrc & 1) { // When unaligned, we have to be careful not to read // past the end of the 4bpp bitmap (that could // potentially cause us to access violate): iLoop = cxThis >> 1; // Each loop handles 2 pels; // we'll handle odd pel // separately jSrc = *pjSrc; while (iLoop-- != 0) { *pjDst++ = (BYTE) pulXlate[jSrc & 0xf]; jSrc = *(++pjSrc); *pjDst++ = (BYTE) pulXlate[jSrc >> 4]; } if (cxThis & 1) *pjDst = (BYTE) pulXlate[jSrc & 0xf]; } else { iLoop = (cxThis + 1) >> 1; // Each loop handles 2 pels do { jSrc = *pjSrc++; *pjDst++ = (BYTE) pulXlate[jSrc >> 4]; *pjDst++ = (BYTE) pulXlate[jSrc & 0xf]; } while (--iLoop != 0); } // The number of bytes we'll transfer is equal to the number // of pels we've processed in the batch. Since we're // transferring words, we have to round up to get the word // count: cdwThis = (cxThis + 3) >> 2; pjBuf = ajBuf; TRANSFER_DWORD_ALIGNED(ppdev, pulXfer, pjBuf, cdwThis); } while (cxToGo > 0); pjScan += lSrcDelta; // Advance to next source scan. Note // that we could have computed the // value to advance 'pjSrc' directly, // but this method is less // error-prone. } while (--cy != 0); if (--c == 0) return; prcl++; } } /******************************Public*Routine******************************\ * VOID vIoXferNative * * Transfers a bitmap that is the same color depth as the display to * the screen via the data transfer register, with no translation. * \**************************************************************************/ VOID vIoXferNative( PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // Array of relative coordinates destination rectangles ULONG rop4, // rop4 SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Not used { ULONG* pulXfer = ppdev->pulXfer; BYTE* pjPorts = ppdev->pjPorts; LONG lDelta = ppdev->lDelta; ULONG ulDstAddr; LONG dx; LONG dy; LONG cx; LONG cy; LONG lSrcDelta; BYTE* pjSrcScan0; BYTE* pjSrc; LONG cjSrc; ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL), "Can handle trivial xlate only"); ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat, "Source must be same color depth as screen"); ASSERTDD(c > 0, "Can't handle zero rectangles"); ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only a rop2"); dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source lSrcDelta = psoSrc->lDelta; pjSrcScan0 = psoSrc->pvScan0; ppdev->pfnBankMap(ppdev, ppdev->lXferBank); CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_DST_Y_OFFSET(ppdev, pjPorts, lDelta); CP_IO_ROP(ppdev, pjPorts, gajHwMixFromRop2[rop4 & 0xf]); CP_IO_BLT_MODE(ppdev, pjPorts, SRC_CPU_DATA); while(TRUE) { ulDstAddr = (prcl->top * lDelta) + PELS_TO_BYTES(prcl->left); cx = prcl->right - prcl->left; cy = prcl->bottom - prcl->top; CP_IO_WAIT_FOR_BLT_COMPLETE(ppdev, pjPorts); CP_IO_XCNT(ppdev, pjPorts, PELS_TO_BYTES(cx) - 1); CP_IO_YCNT(ppdev, pjPorts, cy - 1); CP_IO_DST_ADDR(ppdev, pjPorts, ulDstAddr); cjSrc = PELS_TO_BYTES(cx); pjSrc = pjSrcScan0 + (prcl->top + dy) * lSrcDelta + (PELS_TO_BYTES(prcl->left + dx)); CP_IO_START_BLT(ppdev, pjPorts); vImageTransfer(ppdev, pjSrc, lSrcDelta, cjSrc, cy); if (--c == 0) return; prcl++; } }