/******************************Module*Header*******************************\ * Module Name: bltmil.c * * Contains the low-level blt functions for the Millenium. * * Hopefully, if you're basing your display driver on this code, to * support all of DrvBitBlt and DrvCopyBits, you'll only have to implement * the following routines. You shouldn't have to modify much in * 'bitblt.c'. I've tried to make these routines as few, modular, simple, * and efficient as I could, while still accelerating as many calls as * possible that would be cost-effective in terms of performance wins * versus size and effort. * * Note: In the following, 'relative' coordinates refers to coordinates * that haven't yet had the offscreen bitmap (DFB) offset applied. * 'Absolute' coordinates have had the offset applied. For example, * we may be told to blt to (1, 1) of the bitmap, but the bitmap may * be sitting in offscreen memory starting at coordinate (0, 768) -- * (1, 1) would be the 'relative' start coordinate, and (1, 769) * would be the 'absolute' start coordinate'. * * Copyright (c) 1992-1996 Microsoft Corporation * Copyright (c) 1993-1996 Matrox Electronic Systems, Ltd. \**************************************************************************/ #include "precomp.h" /******************************Public*Routine******************************\ * VOID vMilFillSolid * * Fills a list of rectangles with a solid colour. * \**************************************************************************/ VOID vMilFillSolid( PDEV* ppdev, // pdev LONG c, // Number of rectangles to be filled, // can't be zero RECTL* prcl, // List of rectangles to be filled ULONG rop4, // Rop4 RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure POINTL* pptlBrush) // Pattern alignment { BYTE* pjBase; LONG xOffset; LONG yOffset; ULONG ulDwg; ULONG ulHwMix; pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; CHECK_FIFO_SPACE(pjBase, 4); ppdev->HopeFlags = (SIGN_CACHE | ARX_CACHE | PATTERN_CACHE); if (rop4 == 0xf0f0) // PATCOPY { if (ppdev->iBitmapFormat == BMF_24BPP) { if (((rbc.iSolidColor & 0x000000ff) != ((rbc.iSolidColor >> 8) & 0x000000ff)) || ((rbc.iSolidColor & 0x000000ff) != ((rbc.iSolidColor >> 16) & 0x000000ff))) { // We're in 24bpp, and the color is not a gray level, so we // can't use block mode. ulDwg = (opcode_TRAP + blockm_OFF + atype_RPL + solid_SOLID + arzero_ZERO + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE); } else { // We're in 24bpp, and the color is a gray level, so we // can use block mode if we prepare our color. rbc.iSolidColor = (rbc.iSolidColor << 8) | (rbc.iSolidColor & 0x000000ff); ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID + arzero_ZERO + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE); } } else { // This is not 24bpp. ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID + arzero_ZERO + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE); } } else { // The ROP3 is a combination of P and D only: // // ROP3 Mga ROP3 Mga ROP3 Mga ROP3 Mga // // 0x00 0 0x50 4 0xa0 8 0xf0 c // 0x05 1 0x55 5 0xa5 9 0xf5 d // 0x0a 2 0x5a 6 0xaa a 0xfa e // 0x0f 3 0x5f 7 0xaf b 0xff f ulHwMix = (rop4 & 0x03) + ((rop4 & 0x30) >> 2); if (ulHwMix == MGA_WHITENESS) { rbc.iSolidColor = 0xffffffff; ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID + arzero_ZERO + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE); } else if (ulHwMix == MGA_BLACKNESS) { rbc.iSolidColor = 0x00000000; ulDwg = (opcode_TRAP + blockm_ON + solid_SOLID + arzero_ZERO + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + pattern_OFF + transc_BG_OPAQUE); } else { ulDwg = (opcode_TRAP + blockm_OFF + atype_RSTR + solid_SOLID + arzero_ZERO + sgnzero_ZERO + shftzero_ZERO + pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16)); } } CP_WRITE(pjBase, DWG_DWGCTL, ulDwg); CP_WRITE(pjBase, DWG_FCOL, COLOR_REPLICATE(ppdev, rbc.iSolidColor)); while(TRUE) { CP_WRITE(pjBase, DWG_FXBNDRY, (((prcl->right + xOffset) << bfxright_SHIFT) | ((prcl->left + xOffset) & bfxleft_MASK))); // ylength_MASK not is needed since coordinates are within range CP_START(pjBase, DWG_YDSTLEN, (((prcl->top + yOffset ) << yval_SHIFT) | ((prcl->bottom - prcl->top)))); if (--c == 0) return; CHECK_FIFO_SPACE(pjBase, 2); prcl++; } } /******************************Public*Routine******************************\ * VOID vMilPatRealize * * Download the Color Brush to the Color brush cache in the Storm offscreen * memory. For 8, 16, and 32 bpp, we download an 8x8 brush; a special * routine, vPatRealize24bpp, is used for 24bpp brushes. We'll use direct * frame buffer access whenever possible. * * There are some hardware restrictions concerning the way that a pattern * must be stored in memory: * - the first pixel of the pattern must be stored so that the first pixel * address mod 256 is 0, 8, 16, or 24; * - each line of 8 pixels is stored continuously, but there must be a * difference of 32 in the pixel addresses of successive pattern lines. * This means that we will store patterns in the following way: * * +----+---------------+---------------+---------------+---------------+ * | | Pattern 0 | Pattern 1 | Pattern 2 | Pattern 3 | * |Line| | |1 1 1 1 1 1 1 1|1 1 1 1 1 1 1 1| * | |0 1 2 3 4 5 6 7|8 9 a b c d e f|0 1 2 3 4 5 6 7|8 9 a b c d e f| * +----+---------------+---------------+---------------+---------------+ * | 0 |* * * * | X | o o|x x | * | 1 | * * * *| X | o o | x x | * | 2 |* * * * | X | o o | x x | * | 3 | * * * *| X |o o | x x| * | 4 |* * * * |X X X X X X X X| o o|x x | * | 5 | * * * *| X | o o | x x | * | 6 |* * * * | X | o o | x x | * | 7 | * * * *| X |o o | x x| * +----+---------------+---------------+---------------+---------------+ * * where a given pixel address is * FirstPixelAddress + Line*0x20 + Pattern*0x08 + xPat. * \**************************************************************************/ VOID vMilPatRealize( PDEV* ppdev, RBRUSH* prb) { BYTE* pjBase; BRUSHENTRY* pbe; LONG iBrushCache; ULONG culScan; ULONG i; ULONG j; ULONG* pulBrush; ULONG* pulDst; ULONG lDeltaPat; pjBase = ppdev->pjBase; // Allocate a new off-screen cache brush entry for the brush. iBrushCache = ppdev->iBrushCache; pbe = &ppdev->pbe[iBrushCache]; iBrushCache++; if (iBrushCache >= ppdev->cBrushCache) iBrushCache = 0; ppdev->iBrushCache = iBrushCache; // Update our links. pbe->prbVerify = prb; prb->apbe[IBOARD(ppdev)] = pbe; // Point to the pattern bits. pulBrush = prb->aulPattern; // Calculate delta from end of pattern scan 1 to start of pattern scan2. lDeltaPat = 8 * ppdev->cjHwPel; // 8 -> 32? // Convert it to a byte address. culScan = 2 * ppdev->cjHwPel; pulDst = (ULONG*) (pbe->pvScan0); START_DIRECT_ACCESS_STORM(ppdev, pjBase); for (i = 8; i != 0 ; i--) { for (j = 0; j < culScan; j++) { pulDst[j] = *pulBrush++; } pulDst += lDeltaPat; } END_DIRECT_ACCESS_STORM(ppdev, pjBase); } /***************************************************************************** * VOID vMilFillPat * * 8, 16, and 32bpp patterned color fills for Storm. ****************************************************************************/ VOID vMilFillPat( PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // List of rectangles to be filled, in relative // coordinates ULONG rop4, // Rop4 RBRUSH_COLOR rbc, // rbc.prb points to brush realization structure POINTL* pptlBrush) // Pattern alignment { BRUSHENTRY* pbe; LONG xOffset; LONG yOffset; LONG xLeft; LONG yTop; LONG xBrush; LONG yBrush; LONG lSrcAdd; ULONG ulLinear; BYTE* pjBase; ASSERTDD(!(rbc.prb->fl & RBRUSH_2COLOR), "Can't do 2 colour brushes here"); // We have to ensure that no other brush took our spot in off-screen // memory, or we might have to realize the brush for the first time. pbe = rbc.prb->apbe[IBOARD(ppdev)]; if (pbe->prbVerify != rbc.prb) { vMilPatRealize(ppdev, rbc.prb); pbe = rbc.prb->apbe[IBOARD(ppdev)]; } pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; lSrcAdd = ppdev->lPatSrcAdd; CHECK_FIFO_SPACE(pjBase, 6); CP_WRITE(pjBase, DWG_AR5, 32); // Source (pattern) pitch. ppdev->HopeFlags = SIGN_CACHE; if ((rop4 & 0x000000FF) == 0x000000F0) { // The rop is PATCOPY. CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RPL + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + bltmod_BFCOL + pattern_ON + transc_BG_OPAQUE)); } else { CP_WRITE(pjBase, DWG_DWGCTL, (opcode_BITBLT + atype_RSTR + sgnzero_ZERO + shftzero_ZERO + bltmod_BFCOL + pattern_ON + transc_BG_OPAQUE + (((rop4 & 0x03) + ((rop4 & 0x30) >> 2)) << 16))); } // The pattern setup is complete. while(TRUE) { // There is a problem with Storm. We have to program: // AR3: ssa // AR0: sea, where sea<18:3> = ssa<18:3> and // sea< 2:0> = ssa< 2:0> + 2 for 8bpp; // sea< 2:0> = ssa< 2:0> + 4 for 16bpp; // sea< 2:0> = ssa< 2:0> + 6 for 32bpp. // Take into account the brush origin. The upper left pel of the // brush should be aligned here in the destination surface. yTop = prcl->top; xLeft = prcl->left; xBrush = (xLeft - pptlBrush->x) & 7; yBrush = (yTop - pptlBrush->y) & 7; ulLinear = pbe->ulLinear + (yBrush << 5) + xBrush; CP_WRITE(pjBase, DWG_AR3, ulLinear); CP_WRITE(pjBase, DWG_AR0, ((ulLinear & 0xfffffff8) | ((ulLinear+lSrcAdd) & 7))); CP_WRITE(pjBase, DWG_FXBNDRY, (((prcl->right + xOffset - 1) << bfxright_SHIFT) | ((xLeft + xOffset) & bfxleft_MASK))); // ylength_MASK not is needed since coordinates are within range CP_START(pjBase, DWG_YDSTLEN, (((yTop + yOffset ) << yval_SHIFT) | ((prcl->bottom - yTop)))); if (--c == 0) return; CHECK_FIFO_SPACE(pjBase, 4); prcl++; } } /******************************Public*Routine******************************\ * vMilXfer1bpp * * This routine colour expands a monochrome bitmap. * \**************************************************************************/ VOID vMilXfer1bpp( // Type FNXFER PDEV* ppdev, LONG c, // Count of rectangles, can't be zero RECTL* prcl, // List of destination rectangles, in relative // coordinates ULONG rop4, // Foreground and background hardware mix SURFOBJ* psoSrc, // Source surface POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst, // Original unclipped destination rectangle XLATEOBJ* pxlo) // Translate that provides colour-expansion information { LONG xOffset; LONG yOffset; ULONG ulBitFlip; LONG dx; LONG dy; LONG xSrc; LONG ySrc; LONG xDst; LONG yDst; LONG cxDst; LONG cyDst; LONG xSrcAlign; LONG lSrcDelta; LONG lSrcSkip; LONG i; LONG k; LONG cdSrc; LONG cdSrcPerScan; ULONG FCol; ULONG BCol; ULONG ul; BYTE* pjDma; ULONG* pulXlate; ULONG* pulSrc; ULONG* pulDst; BYTE* pjSrcScan0; BYTE* pjBase; LONG cFifo; LONG xAlign; ULONG cFullLoops; ULONG cRemLoops; ASSERTDD(((rop4 & 0xff00) >> 8) == (rop4 & 0xff), "Expect only an opaquing rop"); pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; ulBitFlip = 0; dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source pjSrcScan0 = psoSrc->pvScan0; lSrcDelta = psoSrc->lDelta; pjDma = pjBase + DMAWND; ppdev->HopeFlags = SIGN_CACHE; // Get the foreground and background colors. pulXlate = pxlo->pulXlate; FCol = COLOR_REPLICATE(ppdev, pulXlate[1]); BCol = COLOR_REPLICATE(ppdev, pulXlate[0]); CHECK_FIFO_SPACE(pjBase, 10); if (rop4 == 0x0000CCCC) // SRCCOPY { if (ppdev->iBitmapFormat == BMF_24BPP) { CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + bltmod_BMONOWF)); } else { CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + blockm_ON + sgnzero_ZERO + shftzero_ZERO + bop_SRCCOPY + bltmod_BMONOWF)); } } else if ((rop4 == 0xb8b8) || (rop4 == 0xe2e2)) { // We special-cased 0xb8b8 and 0xe2e2 in bitblt.c: if (rop4 == 0xb8b8) { // 0xb8 is weird because it says that the '1' bit is leave-alone, // but the '0' bit is the destination color. The Millennium can // only handle transparent blts when the '0' bit is leave-alone, // so we flip the source bits before we give it to the Millennium. // // Since we're limited by the speed of the bus, this additional // overhead of an extra XOR on every write won't be measurable. ulBitFlip = (ULONG) -1; } CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RPL + blockm_OFF + bop_SRCCOPY + trans_0 + bltmod_BMONO + pattern_OFF + hbgr_SRC_WINDOWS + transc_BG_TRANSP)); } else { CP_WRITE(pjBase, DWG_DWGCTL, (opcode_ILOAD + atype_RSTR + sgnzero_ZERO + shftzero_ZERO + ((rop4 & 0xf) << 16) + bltmod_BMONOWF)); } CP_WRITE(pjBase, DWG_BCOL, BCol); CP_WRITE(pjBase, DWG_FCOL, FCol); CP_WRITE(pjBase, DWG_AR5, 0); CP_WRITE(pjBase, DWG_SGN, 0); while (TRUE) { cxDst = prcl->right - prcl->left; cyDst = prcl->bottom - prcl->top; xDst = prcl->left + xOffset; yDst = prcl->top + yOffset; ySrc = prcl->top + dy; xSrc = prcl->left + dx; // Since SSA (AR3) is always zero, we may have to clip the expanded // ILOAD using CXLEFT, and we'll have to modify FXLEFT accordingly. xSrcAlign = xSrc & 0x1F; if (xSrcAlign) { // We'll have to use clipping. CP_WRITE(pjBase, DWG_CXLEFT, xDst); } // Number of pixels per line. CP_WRITE(pjBase, DWG_AR0, (cxDst - 1 + xSrcAlign)); CP_WRITE(pjBase, DWG_AR3, 0); CP_WRITE(pjBase, DWG_FXBNDRY, (((xDst + cxDst - 1) << bfxright_SHIFT) | ((xDst - xSrcAlign) & bfxleft_MASK))); // ylength_MASK not needed since coordinates are within range CP_START(pjBase, DWG_YDSTLEN, ((yDst << yval_SHIFT) | cyDst)); // Calculate the location of the source rectangle. This points to the // first dword to be downloaded. It is aligned on a dword boundary. // The first bit of interest in the first dword is at (xSrc & 0x1f). pulSrc = (ULONG*)(pjSrcScan0 + (ySrc * lSrcDelta) + ((xSrc & 0xFFFFFFE0) >> 3)); CHECK_FIFO_SPACE(pjBase, FIFOSIZE); BLT_WRITE_ON(ppdev, pjBase); // Number of bytes, padded to the next dword, to be moved per // scanline. Since we align the starting dword on a dword boundary, // we know that we cannot overflow the end of the bitmap. cdSrc = ((xSrcAlign + cxDst + 0x1F) & 0xFFFFFFE0) >> 3; lSrcSkip = lSrcDelta - cdSrc; if (lSrcSkip == 0) { // There is no line-to-line increment, we can go full speed. // Total number of dwords to be sent. cdSrc = cyDst * (cdSrc >> 2); while ((cdSrc -= FIFOSIZE) > 0) { pulDst = (ULONG*)pjDma; CHECK_FIFO_SPACE(pjBase, FIFOSIZE); for (i = FIFOSIZE; i != 0; i--) { CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip); } } pulDst = (ULONG*)pjDma; cdSrc += FIFOSIZE; CHECK_FIFO_SPACE(pjBase, cdSrc); for (i = cdSrc; i != 0; i--) { CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip); } } else { // We can't go full speed. // Number of full dwords to be moved on each scan. We know that // we won't overflow the end of the bitmap with this. cdSrc >>= 2; cdSrcPerScan = cdSrc; for (k = cyDst; k != 0; k--) { pulDst = (ULONG*)pjDma; cdSrc = cdSrcPerScan; while ((cdSrc -= FIFOSIZE) > 0) { CHECK_FIFO_SPACE(pjBase, FIFOSIZE); for (i = FIFOSIZE; i != 0; i--) { CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip); } } cdSrc += FIFOSIZE; CHECK_FIFO_SPACE(pjBase, cdSrc); for (i = cdSrc; i != 0; i--) { CP_WRITE_DMA(ppdev, pulDst++, *pulSrc++ ^ ulBitFlip); } // We're done with the current scan, go to the next one. pulSrc = (ULONG*) ((BYTE*) pulSrc + lSrcSkip); } } BLT_WRITE_OFF(ppdev, pjBase); if (xSrcAlign) { // Restore the clipping: CHECK_FIFO_SPACE(pjBase, 1); CP_WRITE(pjBase, DWG_CXLEFT, 0); } if (--c == 0) break; prcl++; CHECK_FIFO_SPACE(pjBase, 5); } } /******************************Public*Routine******************************\ * LONG lSplitRcl * * WRAM-WRAM blts can't span banks, and this routine does the tough work * of figuring out how much of the blt can be done via WRAM-WRAM in one bank, * then a regular blt over the bank boundary, and again WRAM-WRAM in the * next bank. * \**************************************************************************/ LONG lSplitRcl( RECTL *arclDst, LONG *ayBreak, LONG cyBreak, LONG dy, ULONG flDirCode, LONG *aiCmd) { LONG iBreak = 0; LONG iSrc = 0; LONG iDst = 0; RECTL rcl; LONG lBoundsTop; LONG lBoundsBottom; LONG iCmdLast = 0; /////////////////////////////////////////////////////////////////////////////// // See [WRN] comment below before changing this macro. This macro is // particular to this function. #define NON_EMPTY_RECT(rcl) ((rcl.right > rcl.left) && (rcl.bottom > rcl.top)) aiCmd[0] = 0; if (cyBreak == 0) { return 1; } while (TRUE) { rcl = arclDst[iSrc]; // Find the bounding scans of the union of the source and destination. lBoundsTop = min(rcl.top, rcl.top + dy); lBoundsBottom = max(rcl.bottom, rcl.bottom + dy); if ((ayBreak[iBreak] < lBoundsTop) || (ayBreak[iBreak] >= lBoundsBottom)) { // Do nothing iDst++; goto next_break; } // [WRN] For the following, bottom could be less than top and // right could be less than left. These should be considered // empty rectangles, and the macro above reflects this. arclDst[iDst].left = rcl.left; arclDst[iDst].right = rcl.right; arclDst[iDst].top = rcl.top; arclDst[iDst].bottom = min(rcl.bottom, (ayBreak[iBreak] - dy)); if (NON_EMPTY_RECT(arclDst[iDst])) { aiCmd[iDst++] = 0; iCmdLast = 0; } arclDst[iDst].left = rcl.left; arclDst[iDst].right = rcl.right; arclDst[iDst].top = max(rcl.top, (ayBreak[iBreak] - dy)); arclDst[iDst].bottom = min(rcl.bottom, (ayBreak[iBreak] + 1)); if (NON_EMPTY_RECT(arclDst[iDst])) { aiCmd[iDst++] = 1; iCmdLast = 1; } arclDst[iDst].left = rcl.left; arclDst[iDst].right = rcl.right; arclDst[iDst].top = max(rcl.top, (ayBreak[iBreak] + 1)); arclDst[iDst].bottom = rcl.bottom; if (NON_EMPTY_RECT(arclDst[iDst])) { aiCmd[iDst++] = 0; iCmdLast = 0; } next_break: if ((--cyBreak == 0) || (iCmdLast == 1)) { // If we have run out of breaks, we're done. // Once the last rectangle is marked slow, it stays slow. break; } iSrc = --iDst; iBreak++; }; return iDst; } /******************************Public*Routine******************************\ * VOID vMilCopyBlt * * Does a screen-to-screen blt of a list of rectangles. * \**************************************************************************/ VOID vMilCopyBlt( // Type FNCOPY PDEV* ppdev, LONG c, // Can't be zero RECTL* prcl, // Array of relative coordinates destination rectangles ULONG rop4, // Rop4 POINTL* pptlSrc, // Original unclipped source point RECTL* prclDst) // Original unclipped destination rectangle { BYTE* pjBase; LONG xOffset; LONG yOffset; LONG dx; LONG dy; FLONG flDirCode; LONG lSignedPitch; ULONG ulHwMix; ULONG ulDwg; LONG yDst; LONG ySrc; LONG cy; LONG xSrc; LONG lSignedWidth; LONG lSrcStart; ULONG ulDwgFast = 0; LONG cjPelSize; pjBase = ppdev->pjBase; xOffset = ppdev->xOffset; yOffset = ppdev->yOffset; cjPelSize = ppdev->cjPelSize; dx = pptlSrc->x - prclDst->left; dy = pptlSrc->y - prclDst->top; // Add to destination to get source flDirCode = DRAWING_DIR_TBLR; lSignedPitch = ppdev->cxMemory; // If the destination and source rectangles overlap, we will have to // tell the accelerator in which direction the copy should be done: if (OVERLAP(prclDst, pptlSrc)) { if (prclDst->left > pptlSrc->x) { flDirCode |= scanleft_RIGHT_TO_LEFT; } if (prclDst->top > pptlSrc->y) { flDirCode |= sdy_BOTTOM_TO_TOP; lSignedPitch = -lSignedPitch; } } if (rop4 == 0xcccc) { ulDwg = opcode_BITBLT | atype_RPL | blockm_OFF | bltmod_BFCOL | pattern_OFF | transc_BG_OPAQUE | bop_SRCCOPY | shftzero_ZERO | sgnzero_NO_ZERO; if ((dy > 0) && (dx == 0)) { // We enable fast WRAM to WRAM blts only for upward scrolls. // We could enable it for more blts, but it has stringent // alignment requirements which aren't likely to be met unless // it's a vertical scroll. ulDwgFast = opcode_FBITBLT | atype_RPL | blockm_OFF | bltmod_BFCOL | pattern_OFF | transc_BG_OPAQUE | bop_NOP | shftzero_ZERO | sgnzero_NO_ZERO; } } else { ulHwMix = rop4 & 0xf; ulDwg = opcode_BITBLT + atype_RSTR + blockm_OFF + bltmod_BFCOL + pattern_OFF + transc_BG_OPAQUE + (ulHwMix << 16); } // The SRC0 to SRC3 registers are probably trashed by the blt, and we // may be using a different SGN: ppdev->HopeFlags = 0; CHECK_FIFO_SPACE(pjBase, 8); CP_WRITE(pjBase, DWG_SGN, flDirCode); CP_WRITE(pjBase, DWG_AR5, lSignedPitch); // If the overhead for setting up the fast blt is too high, then we should // have a minimum size for prclDst. if (ulDwgFast) { RECTL arclDst[1+(MAX_WRAM_BARRIERS*2)]; LONG aiCmd[1+(MAX_WRAM_BARRIERS*2)]; LONG ayBreak[MAX_WRAM_BARRIERS]; LONG cyBreak; RECTL *prclDst; LONG crclDst; ULONG aulCmd[2] = {ulDwgFast, ulDwg}; LONG i; cyBreak = ppdev->cyBreak; for (i = 0; i < cyBreak; i++) { // lSplitRcl deals in relative coordinates for the destination and // source rectangles, so convert the break locations to relative // coordinates, too: ayBreak[i] = ppdev->ayBreak[i] - yOffset; } while (TRUE) { arclDst[0] = *prcl; prclDst = arclDst; // split the rectangle at each ayBreak[i] // If the first scan was on a split, start with the slow blt, // otherwise, start with the fast blt and alternate. crclDst = lSplitRcl(arclDst, ayBreak, cyBreak, dy, flDirCode, aiCmd); i = 0; while (TRUE) { LONG xRight; ASSERTDD((aiCmd[i] & ~1) == 0, "Only bit 0 of aiCmd[i] should be set."); CP_WRITE(pjBase, DWG_DWGCTL, aulCmd[aiCmd[i]]); xRight = prclDst->right + xOffset - 1; //////////////////////////////////////////////////////////////// // The following code is a bugfix for the fast WRAM copies // Extend the right edge to a specific value and then // clip to the actual desired edge. CP_WRITE(pjBase, DWG_CXRIGHT, xRight); switch(cjPelSize) { case 1: xRight |= 0x40; break; case 2: xRight |= 0x20; break; case 4: xRight |= 0x10; break; case 3: xRight = (((xRight * 3) + 2) | 0x40) / 3; break; } //////////////////////////////////////////////////////////////// CP_WRITE(pjBase, DWG_FXBNDRY, (((xRight) << bfxright_SHIFT) | ((prclDst->left + xOffset) & bfxleft_MASK))); yDst = yOffset + prclDst->top; ySrc = yOffset + prclDst->top + dy; // ylength_MASK not is needed since coordinates are within range CP_WRITE(pjBase, DWG_YDSTLEN, (((yDst) << yval_SHIFT) | ((prclDst->bottom - prclDst->top)))); xSrc = xOffset + prclDst->left + dx; lSignedWidth = prclDst->right - prclDst->left - 1; lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc; CP_WRITE(pjBase, DWG_AR3, lSrcStart); CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth); if (--crclDst == 0) break; prclDst++; i++; CHECK_FIFO_SPACE(pjBase, 6); } if (--c == 0) break; prcl++; CHECK_FIFO_SPACE(pjBase, 6); } // Restore the clipping: CHECK_FIFO_SPACE(pjBase, 1); CP_WRITE(pjBase, DWG_CXRIGHT, (ppdev->cxMemory - 1)); } else { CP_WRITE(pjBase, DWG_DWGCTL, ulDwg); while (TRUE) { CP_WRITE(pjBase, DWG_FXBNDRY, (((prcl->right + xOffset - 1) << bfxright_SHIFT) | ((prcl->left + xOffset) & bfxleft_MASK))); yDst = yOffset + prcl->top; ySrc = yOffset + prcl->top + dy; if (flDirCode & sdy_BOTTOM_TO_TOP) { cy = prcl->bottom - prcl->top - 1; yDst += cy; ySrc += cy; } // ylength_MASK not is needed since coordinates are within range CP_WRITE(pjBase, DWG_YDSTLEN, (((yDst) << yval_SHIFT) | ((prcl->bottom - prcl->top)))); xSrc = xOffset + prcl->left + dx; lSignedWidth = prcl->right - prcl->left - 1; if (flDirCode & scanleft_RIGHT_TO_LEFT) { xSrc += lSignedWidth; lSignedWidth = -lSignedWidth; } lSrcStart = ppdev->ulYDstOrg + (ySrc * ppdev->cxMemory) + xSrc; CP_WRITE(pjBase, DWG_AR3, lSrcStart); CP_START(pjBase, DWG_AR0, lSrcStart + lSignedWidth); if (--c == 0) break; prcl++; CHECK_FIFO_SPACE(pjBase, 4); } } }