2023 lines
64 KiB
C
2023 lines
64 KiB
C
/******************************Module*Header*******************************\
|
|
* Module Name: w32blt.c
|
|
*
|
|
* Contains the low-level memory-mapped IO blt functions.
|
|
*
|
|
* Hopefully, if you're basing your display driver on this code, to
|
|
* support all of DrvBitBlt and DrvCopyBits, you'll only have to implement
|
|
* the following routines. You shouldn't have to modify much in
|
|
* 'bitblt.c'. I've tried to make these routines as few, modular, simple,
|
|
* and efficient as I could, while still accelerating as many calls as
|
|
* possible that would be cost-effective in terms of performance wins
|
|
* versus size and effort.
|
|
*
|
|
* Note: In the following, 'relative' coordinates refers to coordinates
|
|
* that haven't yet had the offscreen bitmap (DFB) offset applied.
|
|
* 'Absolute' coordinates have had the offset applied. For example,
|
|
* we may be told to blt to (1, 1) of the bitmap, but the bitmap may
|
|
* be sitting in offscreen memory starting at coordinate (0, 768) --
|
|
* (1, 1) would be the 'relative' start coordinate, and (1, 769)
|
|
* would be the 'absolute' start coordinate'.
|
|
*
|
|
* Copyright (c) 1992-1996 Microsoft Corporation
|
|
*
|
|
\**************************************************************************/
|
|
|
|
#include "precomp.h"
|
|
|
|
/**************************************************************************
|
|
* All functions using the accelerator must...
|
|
* Wait for the ACL queue to be empty before loading any of the registers.
|
|
**************************************************************************/
|
|
|
|
/**************************************************************************
|
|
* The following tables are heinous, but required. The monochrome data
|
|
* (also known as Mix-Map or Mask) expander intereprets the data such that
|
|
* the least significant bit of a byte is pixel 0 and the most significant
|
|
* bit is pixel 7. This is backwards from the way monochrome data is
|
|
* interpreted by Windows and Windows NT. Also, the expander will ONLY
|
|
* do 1 to 8 expansion, so we need to replicate each bit by the number of
|
|
* bytes per pel in the current color depth.
|
|
**************************************************************************/
|
|
|
|
BYTE jReverse[] =
|
|
{
|
|
// Each element is the bitwise reverse of it's index.
|
|
//
|
|
// ie. 10000000 -> 00000001 and
|
|
// 10010100 -> 00101001.
|
|
|
|
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
|
|
0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
|
|
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
|
|
0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
|
|
0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
|
|
0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
|
|
0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
|
|
0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
|
|
0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
|
|
0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
|
|
0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
|
|
0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
|
|
0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
|
|
0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
|
|
0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
|
|
0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
|
|
0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
|
|
0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
|
|
0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
|
|
0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
|
|
0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
|
|
0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
|
|
0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
|
|
0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
|
|
0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
|
|
0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
|
|
0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
|
|
0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
|
|
0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
|
|
0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
|
|
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
|
|
0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
|
|
};
|
|
|
|
WORD wReverse2x[] =
|
|
{
|
|
// Each element is the bit doubled bitwise reverse of it's index.
|
|
//
|
|
// ie. 10000000 -> 0000000000000011 and
|
|
// 10010100 -> 0000110011000011.
|
|
|
|
0x0000, 0xc000, 0x3000, 0xf000, 0x0c00, 0xcc00, 0x3c00, 0xfc00,
|
|
0x0300, 0xc300, 0x3300, 0xf300, 0x0f00, 0xcf00, 0x3f00, 0xff00,
|
|
0x00c0, 0xc0c0, 0x30c0, 0xf0c0, 0x0cc0, 0xccc0, 0x3cc0, 0xfcc0,
|
|
0x03c0, 0xc3c0, 0x33c0, 0xf3c0, 0x0fc0, 0xcfc0, 0x3fc0, 0xffc0,
|
|
0x0030, 0xc030, 0x3030, 0xf030, 0x0c30, 0xcc30, 0x3c30, 0xfc30,
|
|
0x0330, 0xc330, 0x3330, 0xf330, 0x0f30, 0xcf30, 0x3f30, 0xff30,
|
|
0x00f0, 0xc0f0, 0x30f0, 0xf0f0, 0x0cf0, 0xccf0, 0x3cf0, 0xfcf0,
|
|
0x03f0, 0xc3f0, 0x33f0, 0xf3f0, 0x0ff0, 0xcff0, 0x3ff0, 0xfff0,
|
|
0x000c, 0xc00c, 0x300c, 0xf00c, 0x0c0c, 0xcc0c, 0x3c0c, 0xfc0c,
|
|
0x030c, 0xc30c, 0x330c, 0xf30c, 0x0f0c, 0xcf0c, 0x3f0c, 0xff0c,
|
|
0x00cc, 0xc0cc, 0x30cc, 0xf0cc, 0x0ccc, 0xcccc, 0x3ccc, 0xfccc,
|
|
0x03cc, 0xc3cc, 0x33cc, 0xf3cc, 0x0fcc, 0xcfcc, 0x3fcc, 0xffcc,
|
|
0x003c, 0xc03c, 0x303c, 0xf03c, 0x0c3c, 0xcc3c, 0x3c3c, 0xfc3c,
|
|
0x033c, 0xc33c, 0x333c, 0xf33c, 0x0f3c, 0xcf3c, 0x3f3c, 0xff3c,
|
|
0x00fc, 0xc0fc, 0x30fc, 0xf0fc, 0x0cfc, 0xccfc, 0x3cfc, 0xfcfc,
|
|
0x03fc, 0xc3fc, 0x33fc, 0xf3fc, 0x0ffc, 0xcffc, 0x3ffc, 0xfffc,
|
|
0x0003, 0xc003, 0x3003, 0xf003, 0x0c03, 0xcc03, 0x3c03, 0xfc03,
|
|
0x0303, 0xc303, 0x3303, 0xf303, 0x0f03, 0xcf03, 0x3f03, 0xff03,
|
|
0x00c3, 0xc0c3, 0x30c3, 0xf0c3, 0x0cc3, 0xccc3, 0x3cc3, 0xfcc3,
|
|
0x03c3, 0xc3c3, 0x33c3, 0xf3c3, 0x0fc3, 0xcfc3, 0x3fc3, 0xffc3,
|
|
0x0033, 0xc033, 0x3033, 0xf033, 0x0c33, 0xcc33, 0x3c33, 0xfc33,
|
|
0x0333, 0xc333, 0x3333, 0xf333, 0x0f33, 0xcf33, 0x3f33, 0xff33,
|
|
0x00f3, 0xc0f3, 0x30f3, 0xf0f3, 0x0cf3, 0xccf3, 0x3cf3, 0xfcf3,
|
|
0x03f3, 0xc3f3, 0x33f3, 0xf3f3, 0x0ff3, 0xcff3, 0x3ff3, 0xfff3,
|
|
0x000f, 0xc00f, 0x300f, 0xf00f, 0x0c0f, 0xcc0f, 0x3c0f, 0xfc0f,
|
|
0x030f, 0xc30f, 0x330f, 0xf30f, 0x0f0f, 0xcf0f, 0x3f0f, 0xff0f,
|
|
0x00cf, 0xc0cf, 0x30cf, 0xf0cf, 0x0ccf, 0xcccf, 0x3ccf, 0xfccf,
|
|
0x03cf, 0xc3cf, 0x33cf, 0xf3cf, 0x0fcf, 0xcfcf, 0x3fcf, 0xffcf,
|
|
0x003f, 0xc03f, 0x303f, 0xf03f, 0x0c3f, 0xcc3f, 0x3c3f, 0xfc3f,
|
|
0x033f, 0xc33f, 0x333f, 0xf33f, 0x0f3f, 0xcf3f, 0x3f3f, 0xff3f,
|
|
0x00ff, 0xc0ff, 0x30ff, 0xf0ff, 0x0cff, 0xccff, 0x3cff, 0xfcff,
|
|
0x03ff, 0xc3ff, 0x33ff, 0xf3ff, 0x0fff, 0xcfff, 0x3fff, 0xffff,
|
|
};
|
|
|
|
ULONG aulLeadCnt[] = {0x0, 0x3, 0x2, 0x1};
|
|
|
|
FNLOWXFER* afnXferI_Narrow[16] =
|
|
{
|
|
NULL,
|
|
vXferI_1_Byte,
|
|
vXferI_2_Bytes,
|
|
vXferI_3_Bytes
|
|
};
|
|
|
|
FNLOWXFER* afnXferP_Narrow[16] =
|
|
{
|
|
NULL,
|
|
vXferP_1_Byte,
|
|
vXferP_2_Bytes,
|
|
vXferP_3_Bytes
|
|
};
|
|
|
|
/**************************************************************************
|
|
*
|
|
* Realizes a pattern into offscreen memory.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vFastPatRealize( // Type FNFASTPATREALIZE
|
|
PDEV* ppdev,
|
|
RBRUSH* prb, // Points to brush realization structure
|
|
POINTL* pptlBrush, // Ignored
|
|
BOOL bTransparent) // FALSE for normal patterns; TRUE for
|
|
// patterns with a mask when the background
|
|
// mix is LEAVE_ALONE.
|
|
{
|
|
BRUSHENTRY* pbe;
|
|
LONG iBrushCache;
|
|
ULONG ulOffset;
|
|
BYTE* pjPattern;
|
|
LONG culPattern;
|
|
LONG cjPattern;
|
|
BYTE* pjDst;
|
|
ULONG ulDstOffset;
|
|
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
DISPDBG((10,"vFastPatRealize called"));
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
pbe = prb->pbe;
|
|
if ((pbe == NULL) || (pbe->prbVerify != prb))
|
|
{
|
|
// We have to allocate a new offscreen cache brush entry for
|
|
// the brush:
|
|
|
|
iBrushCache = ppdev->iBrushCache;
|
|
pbe = &ppdev->abe[iBrushCache];
|
|
|
|
iBrushCache++;
|
|
if (iBrushCache >= ppdev->cBrushCache)
|
|
iBrushCache = 0;
|
|
|
|
ppdev->iBrushCache = iBrushCache;
|
|
|
|
// Update our links:
|
|
|
|
pbe->prbVerify = prb;
|
|
prb->pbe = pbe;
|
|
}
|
|
|
|
prb->bTransparent = bTransparent;
|
|
|
|
ulDstOffset = ((pbe->y * ppdev->lDelta) + (pbe->x * ppdev->cBpp));
|
|
pjPattern = (PBYTE) &prb->aulPattern[0]; // Copy from brush buffer
|
|
cjPattern = PATTERN_SIZE * ppdev->cBpp;
|
|
if ((ppdev->ulChipID != W32P) && (ppdev->ulChipID != ET6000))
|
|
{
|
|
cjPattern *= 4;
|
|
}
|
|
|
|
START_DIRECT_ACCESS(ppdev, pjBase);
|
|
|
|
if (!ppdev->bAutoBanking)
|
|
{
|
|
// Set the address where we're going to put the pattern data.
|
|
// All data transfers to video memory take place through aperature 0.
|
|
|
|
CP_MMU_BP0(ppdev, pjBase, ulDstOffset);
|
|
pjDst = (PBYTE) ppdev->pjMmu0;
|
|
}
|
|
else
|
|
{
|
|
pjDst = ppdev->pjScreen + ulDstOffset;
|
|
}
|
|
|
|
RtlCopyMemory(pjDst, pjPattern, cjPattern);
|
|
|
|
END_DIRECT_ACCESS(ppdev, pjBase);
|
|
}
|
|
|
|
|
|
|
|
/**************************************************************************
|
|
*
|
|
* Does a pattern fill to a list of rectangles.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vPatternFillScr(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinate destination rects
|
|
ROP4 rop4, // Obvious?
|
|
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
|
|
POINTL* pptlBrush) //
|
|
{
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
BOOL bTransparent;
|
|
ULONG ulPatternAddrBase;
|
|
ULONG cTile = 0;
|
|
BRUSHENTRY* pbe; // Pointer to brush entry data, which is used
|
|
// for keeping track of the location and status
|
|
// of the pattern bits cached in off-screen
|
|
// memory
|
|
|
|
DISPDBG((10,"vPatternFillScr called"));
|
|
|
|
bTransparent = ((rop4 & 0xff) != (rop4 >> 8));
|
|
ASSERTDD(!bTransparent, "We don't handle transparent brushes yet.");
|
|
|
|
if ((ppdev->ulChipID != W32P) && (ppdev->ulChipID != ET6000))
|
|
{
|
|
//
|
|
// Patterns are duplicated horizontally and vertically (4 tiles)
|
|
//
|
|
|
|
cTile = 1; // Look, it means one extra to the right
|
|
}
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
|
|
if ((rbc.prb->pbe->prbVerify != rbc.prb))
|
|
{
|
|
vFastPatRealize(ppdev, rbc.prb, NULL, FALSE);
|
|
}
|
|
|
|
ASSERTDD(rbc.prb->bTransparent == bTransparent,
|
|
"Not realized with correct transparency");
|
|
|
|
pbe = rbc.prb->pbe;
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
|
|
//
|
|
// ### precalc & store the PAT_Y_OFFSET const in the pdev
|
|
//
|
|
|
|
CP_PAT_WRAP(ppdev, pjBase, ppdev->w32PatternWrap);
|
|
CP_PAT_Y_OFFSET(ppdev, pjBase, (((PATTERN_OFFSET * cBpp) << cTile) - 1));
|
|
|
|
//
|
|
// Fill the list of rectangles
|
|
//
|
|
|
|
ulPatternAddrBase = (pbe->y * lDelta) + (pbe->x * cBpp);
|
|
|
|
do {
|
|
ULONG offset;
|
|
|
|
offset = cBpp * (
|
|
(((prcl->top-pptlBrush->y)&7) << (3+cTile)) +
|
|
((prcl->left-pptlBrush->x)&7)
|
|
);
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_PAT_ADDR(ppdev, pjBase, (ulPatternAddrBase + offset));
|
|
|
|
CP_XCNT(ppdev, pjBase, (((prcl->right - prcl->left) * cBpp) - 1));
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
// Set the blit destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left)));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
|
|
|
|
/**************************************************************************
|
|
*
|
|
* Does a solid fill to a list of rectangles.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vSolidFillScr(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinate destination rects
|
|
ROP4 rop4, // Obvious?
|
|
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
|
|
POINTL* pptlBrush) // Not used
|
|
{
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
ULONG ulSolidColor;
|
|
|
|
DISPDBG((10,"vSolidFillScr called"));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD((ppdev->cBpp < 3),
|
|
"vSolidFillScr only works for 8bpp and 16bpp");
|
|
|
|
// Make sure we can write to the video registers.
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
|
|
CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
|
|
CP_PAT_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
|
|
|
|
ulSolidColor = rbc.iSolidColor;
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
ulSolidColor &= 0x000000FF; // We may get some extraneous data in the
|
|
ulSolidColor |= ulSolidColor << 8;
|
|
}
|
|
if (cBpp <= 2)
|
|
{
|
|
ulSolidColor &= 0x0000FFFF;
|
|
ulSolidColor |= ulSolidColor << 16;
|
|
}
|
|
|
|
// Set the color in offscreen memory
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
if (ppdev->bAutoBanking)
|
|
{
|
|
*(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulSolidColor;
|
|
}
|
|
else
|
|
{
|
|
CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
|
|
CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulSolidColor);
|
|
}
|
|
|
|
//
|
|
// Fill the list of rectangles
|
|
//
|
|
|
|
do {
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_XCNT(ppdev, pjBase, ((prcl->right - prcl->left) * cBpp - 1));
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
// Set the blt destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left)));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
|
|
|
|
VOID vSolidFillScr24(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinate destination rects
|
|
ROP4 rop4, // Obvious?
|
|
RBRUSH_COLOR rbc, // Drawing color is rbc.iSolidColor
|
|
POINTL* pptlBrush) // Not used
|
|
{
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG lDelta = ppdev->lDelta;
|
|
ULONG ulSolidColor = rbc.iSolidColor;
|
|
|
|
DISPDBG((10,"vSolidFillScr24 called"));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
|
|
ASSERTDD((ppdev->cBpp == 3),
|
|
"vSolidFillScr24 called when not in 24bpp mode");
|
|
|
|
ASSERTDD(((ppdev->ulChipID == W32P) || (ppdev->ulChipID == ET6000)),
|
|
"24bpp solid fills only accelerated for w32p/ET6000");
|
|
|
|
#define CBPP 3
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
//
|
|
// This must be special cased for the ET6000. I'm not sure why it worked
|
|
// for the others, because we have a 3 byte wide pattern, but were setting the
|
|
// pattern wrap for a 4 byte wide pattern. We were also setting the Y_offset
|
|
// to be 3 when it should be 2, which really means 3 bytes per line. Strange.
|
|
//
|
|
// Anyway, I've left the code for the others in place and it will get executed
|
|
// for them.
|
|
//
|
|
|
|
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP_24BPP); // 1 line, 3 bytes per line
|
|
CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET_24BPP - 1)); // indicates 3 bytes per line
|
|
|
|
CP_PAT_ADDR(ppdev, pjBase, ppdev->ulSolidColorOffset);
|
|
|
|
// Set the color in offscreen memory
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
if (ppdev->bAutoBanking)
|
|
{
|
|
*(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulSolidColor;
|
|
}
|
|
else
|
|
{
|
|
CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
|
|
CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulSolidColor);
|
|
}
|
|
|
|
//
|
|
// We know that the ACL is idle now, so no wait
|
|
//
|
|
|
|
CP_PEL_DEPTH(ppdev, pjBase, HW_PEL_DEPTH_24BPP);
|
|
|
|
//
|
|
// Fill the list of rectangles
|
|
//
|
|
|
|
do {
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
if (ppdev->ulChipID == ET6000)
|
|
{
|
|
CP_XCNT(ppdev, pjBase, (((prcl->right - prcl->left) * CBPP) - 1));
|
|
}
|
|
else
|
|
{
|
|
CP_XCNT(ppdev, pjBase, ((prcl->right - prcl->left - 1) * CBPP));
|
|
}
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
// Set the blt destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (CBPP * prcl->left)));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
|
|
// set pixel depth back to 1
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_PEL_DEPTH(ppdev, pjBase, HW_PEL_DEPTH_8BPP);
|
|
#undef CBPP
|
|
}
|
|
|
|
|
|
/**************************************************************************
|
|
*
|
|
* Does a screen-to-screen blt of a list of rectangles.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vScrToScr(
|
|
PDEV* ppdev,
|
|
LONG c, // Can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ROP4 rop4, // Obvious?
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst) // Original unclipped destination rectangle
|
|
{
|
|
LONG dx;
|
|
LONG dy; // Add delta to destination to get source
|
|
|
|
LONG xyOffset = ppdev->xyOffset;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
|
|
DISPDBG((10,"vScrToScr called"));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
|
|
//
|
|
// The src-dst delta will be the same for all rectangles
|
|
//
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top;
|
|
|
|
//
|
|
// Make sure we can write to the video registers.
|
|
//
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
|
|
CP_SRC_WRAP(ppdev, pjBase, NO_PATTERN_WRAP);
|
|
CP_SRC_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
|
|
// ### I don't think this is necessary - WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
//
|
|
// The accelerator may not be as fast at doing right-to-left copies, so
|
|
// only do them when the rectangles truly overlap:
|
|
//
|
|
|
|
if (!OVERLAP(prclDst, pptlSrc))
|
|
goto Top_Down_Left_To_Right;
|
|
|
|
if (prclDst->top <= pptlSrc->y)
|
|
{
|
|
if (prclDst->left <= pptlSrc->x)
|
|
{
|
|
|
|
Top_Down_Left_To_Right:
|
|
|
|
//
|
|
// Top to Bottom - Left to Right
|
|
//
|
|
|
|
DISPDBG((12,"Top to Bottom - Left to Right"));
|
|
|
|
CP_XY_DIR(ppdev, pjBase, 0); // Top to Bottom - Left to Right
|
|
|
|
do {
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + cBpp * (prcl->left + dx)));
|
|
|
|
// Set the blt destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->left)));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Top to Bottom - Right to left
|
|
//
|
|
|
|
DISPDBG((12,"Top to Bottom - Right to left"));
|
|
|
|
CP_XY_DIR(ppdev, pjBase, RIGHT_TO_LEFT);
|
|
|
|
do {
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->top + dy) * lDelta) + cBpp * (prcl->right + dx) - 1));
|
|
|
|
// Set the blt destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, ((prcl->top * lDelta) + (cBpp * prcl->right) - 1));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (prclDst->left <= pptlSrc->x)
|
|
{
|
|
//
|
|
// Bottom to Top - Left to Right
|
|
//
|
|
|
|
DISPDBG((12,"Bottom to Top - Left to Right"));
|
|
|
|
CP_XY_DIR(ppdev, pjBase, BOTTOM_TO_TOP);
|
|
|
|
do {
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + cBpp * (prcl->left + dx)));
|
|
|
|
// Set the blt destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, (((prcl->bottom - 1) * lDelta) + (cBpp * prcl->left)));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Bottom to Top - Right to Left
|
|
//
|
|
|
|
DISPDBG((12,"Bottom to Top - Right to Left"));
|
|
|
|
CP_XY_DIR(ppdev, pjBase, (BOTTOM_TO_TOP | RIGHT_TO_LEFT));
|
|
|
|
do {
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_XCNT(ppdev, pjBase, (cBpp * (prcl->right - prcl->left) - 1));
|
|
CP_YCNT(ppdev, pjBase, (prcl->bottom - prcl->top - 1));
|
|
|
|
CP_SRC_ADDR(ppdev, pjBase, (xyOffset + ((prcl->bottom - 1 + dy) * lDelta) + cBpp * (prcl->right + dx) - 1));
|
|
|
|
// Set the blt destination address as the base address of MMU aperture 2
|
|
// Then start the accelerated operation by writing something to this
|
|
// aperture.
|
|
|
|
SET_DEST_ADDR(ppdev, (((prcl->bottom - 1) * lDelta) + cBpp * (prcl->right) - 1));
|
|
START_ACL(ppdev);
|
|
|
|
prcl++;
|
|
|
|
} while (--c != 0);
|
|
}
|
|
}
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XY_DIR(ppdev, pjBase, 0); // Top to Bottom - Left to Right
|
|
}
|
|
|
|
/**************************************************************************
|
|
*
|
|
* Does a monochrome expansion to video memory.
|
|
*
|
|
* Make this Xfer1to8bpp and create another for Xfer1to16bpp?
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vSlowXfer1bpp( // Type FNXFER
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // List of destination rectangles, in relative
|
|
// coordinates
|
|
ROP4 rop4, // Actually had better be a rop3
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Translate that provides color-expansion information
|
|
{
|
|
LONG dx;
|
|
LONG dy;
|
|
LONG lSrcDelta;
|
|
BYTE* pjSrcScan0;
|
|
BYTE* pjSrc;
|
|
LONG cjSrc;
|
|
LONG cjTrail;
|
|
LONG culSrc;
|
|
BYTE jFgRop3;
|
|
BYTE jBgRop3;
|
|
BOOL bW32p;
|
|
|
|
ULONG ulSolidColorOffset = ppdev->ulSolidColorOffset;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
ULONG ulFgColor = pxlo->pulXlate[1];
|
|
ULONG ulBgColor = pxlo->pulXlate[0];
|
|
|
|
LONG xyOffset = (ppdev->cBpp * ppdev->xOffset) +
|
|
(ppdev->yOffset * ppdev->lDelta);
|
|
|
|
|
|
|
|
DISPDBG((10,"vSlowXfer1bpp called"));
|
|
|
|
DISPDBG((11,"rop4(%04x)", rop4));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
|
|
ASSERTDD(ppdev->cBpp <= 2, "vSlowXfer1bpp doesn't work at 24 bpp");
|
|
|
|
bW32p = (ppdev->ulChipID == W32P);
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
jFgRop3 = (BYTE)(rop4 >> 8); // point to src color where src is indicated
|
|
// point to pat color where src is indicated
|
|
|
|
if ((BYTE) rop4 != R3_NOP)
|
|
{
|
|
jBgRop3 = (BYTE)((rop4 & 0xc3) | ((rop4 & 0xf0) >> 2));
|
|
}
|
|
else
|
|
{
|
|
jBgRop3 = (BYTE) rop4;
|
|
}
|
|
|
|
DISPDBG((11,"jFgRop3(%04x), jBgRop3(%04x)", jFgRop3, jBgRop3));
|
|
|
|
CP_FG_ROP(ppdev, pjBase, jFgRop3);
|
|
CP_BK_ROP(ppdev, pjBase, jBgRop3);
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
|
|
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
|
|
CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
|
|
CP_SRC_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
|
|
CP_SRC_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
|
|
CP_PAT_ADDR(ppdev, pjBase, ulSolidColorOffset + 4);
|
|
CP_SRC_ADDR(ppdev, pjBase, ulSolidColorOffset);
|
|
|
|
{
|
|
//
|
|
// Set the address where we're going to put the solid color data.
|
|
// All data transfers to video memory take place through aperature 0.
|
|
//
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
CP_MMU_BP0(ppdev, pjBase, ppdev->ulSolidColorOffset);
|
|
|
|
//
|
|
// Set the color in offscreen memory
|
|
//
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
ulFgColor |= ulFgColor << 8;
|
|
ulBgColor |= ulBgColor << 8;
|
|
}
|
|
if (cBpp <= 2)
|
|
{
|
|
ulFgColor |= ulFgColor << 16;
|
|
ulBgColor |= ulBgColor << 16;
|
|
}
|
|
|
|
CP_WRITE_MMU_DWORD(ppdev, 0, 0, ulFgColor);
|
|
CP_WRITE_MMU_DWORD(ppdev, 0, 4, ulBgColor);
|
|
}
|
|
|
|
CP_ROUTING_CTRL(ppdev, pjBase, CPU_MIX_DATA);
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
|
|
|
|
pjSrcScan0 = psoSrc->pvScan0;
|
|
|
|
DISPDBG((2,"lSrcDelta(%x)", psoSrc->lDelta));
|
|
|
|
do {
|
|
ULONG ulDst;
|
|
RECTL rclSrc;
|
|
RECTL rclDst;
|
|
LONG xBitsPad;
|
|
LONG xBitsUsed;
|
|
LONG xBytesPad;
|
|
|
|
//
|
|
// load lSrcDelta inside the loop because we adjust it later.
|
|
//
|
|
|
|
lSrcDelta = psoSrc->lDelta;
|
|
|
|
rclDst = *prcl;
|
|
rclSrc.left = rclDst.left + dx;
|
|
rclSrc.right = rclDst.right + dx;
|
|
rclSrc.top = rclDst.top + dy;
|
|
rclSrc.bottom = rclDst.bottom + dy;
|
|
|
|
// x = prcl->left;
|
|
// y = prcl->top;
|
|
|
|
//
|
|
// Calculate number of bits used in first partial.
|
|
//
|
|
|
|
xBitsPad = rclSrc.left & 7;
|
|
xBitsUsed = min((8-xBitsPad),(rclSrc.right-rclSrc.left));
|
|
xBytesPad = rclDst.left & 3;
|
|
|
|
if (xBitsPad != 0) // (0 < xBitsUsed < 8)
|
|
{
|
|
|
|
DISPDBG((2,"xBitsUsed(%d) xBitsPad(%d)", xBitsUsed, xBitsPad));
|
|
DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)",
|
|
rclSrc.left,
|
|
rclSrc.top,
|
|
rclSrc.right,
|
|
rclSrc.bottom,
|
|
rclDst.left,
|
|
rclDst.top,
|
|
rclDst.right,
|
|
rclDst.bottom));
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
// Do the column of the first xBitsUsed pixels
|
|
|
|
if (!bW32p)
|
|
{
|
|
CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT);
|
|
}
|
|
|
|
CP_XCNT(ppdev, pjBase, ((xBitsUsed * cBpp) - 1));
|
|
CP_YCNT(ppdev, pjBase, (rclDst.bottom - rclDst.top - 1));
|
|
|
|
pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta
|
|
+ (rclSrc.left >> 3);
|
|
|
|
ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left);
|
|
ulDst += xyOffset;
|
|
|
|
if (bW32p)
|
|
{
|
|
// We will align the data ourselves.
|
|
CP_MIX_ADDR(ppdev, pjBase, 0);
|
|
CP_MIX_Y_OFFSET(ppdev, pjBase, -1);
|
|
}
|
|
|
|
CP_MMU_BP2(ppdev, pjBase, ulDst);
|
|
|
|
CP_DST_ADDR(ppdev, pjBase, ulDst);
|
|
|
|
if (bW32p) WAIT_FOR_BUSY_ACL(ppdev, pjBase);
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
LONG i;
|
|
|
|
for (i = rclDst.bottom - rclDst.top; i; i--)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, jReverse[(*pjSrc << xBitsPad) & 0xff]);
|
|
pjSrc += lSrcDelta;
|
|
}
|
|
}
|
|
else // if (cBpp == 2)
|
|
{
|
|
LONG i;
|
|
WORD wTmp;
|
|
BYTE * pjCvt = (BYTE *) &wTmp;
|
|
|
|
for (i = rclDst.bottom - rclDst.top; i; i--)
|
|
{
|
|
wTmp = wReverse2x[(*pjSrc << xBitsPad) & 0xff];
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, pjCvt[0]);
|
|
if (xBitsUsed > 4)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 1, pjCvt[1]);
|
|
}
|
|
pjSrc += lSrcDelta;
|
|
}
|
|
}
|
|
|
|
rclSrc.left += xBitsUsed;
|
|
rclDst.left += xBitsUsed;
|
|
}
|
|
|
|
// If the entire blt wasn't contained in the first partial byte,
|
|
// the we have to do the rest.
|
|
|
|
if (rclSrc.left < rclSrc.right)
|
|
{
|
|
DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)",
|
|
rclSrc.left,
|
|
rclSrc.top,
|
|
rclSrc.right,
|
|
rclSrc.bottom,
|
|
rclDst.left,
|
|
rclDst.top,
|
|
rclDst.right,
|
|
rclDst.bottom));
|
|
|
|
//
|
|
// Legend has it that we need a WAIT_FOR_IDLE_ACL, instead of just
|
|
// a WAIT_FOR_EMPTY_ACL_QUEUE, to prevent hanging W32
|
|
//
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
if (!bW32p)
|
|
{
|
|
CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_32_BIT);
|
|
}
|
|
|
|
CP_XCNT(ppdev, pjBase, (cBpp * (rclDst.right - rclDst.left) - 1));
|
|
CP_YCNT(ppdev, pjBase, (rclDst.bottom - rclDst.top - 1));
|
|
|
|
cjSrc = (((rclSrc.right * cBpp) + 7) >> 3) -
|
|
((rclSrc.left * cBpp) >> 3); // # bytes to transfer
|
|
|
|
culSrc = (cjSrc >> 2);
|
|
cjTrail = (cjSrc & 3);
|
|
|
|
DISPDBG((2,"cjSrc(%d)", cjSrc));
|
|
DISPDBG((2,"culSrc(%d)", culSrc));
|
|
DISPDBG((2,"cjTrail(%d)", cjTrail));
|
|
|
|
pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta
|
|
+ (rclSrc.left >> 3);
|
|
|
|
DISPDBG((2,"pjSrc(%x)", pjSrc));
|
|
|
|
ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left);
|
|
ulDst += xyOffset;
|
|
|
|
if (bW32p)
|
|
{
|
|
// We will align the data ourselves.
|
|
CP_MIX_ADDR(ppdev, pjBase, 0);
|
|
CP_MIX_Y_OFFSET(ppdev, pjBase, -1);
|
|
}
|
|
CP_MMU_BP2(ppdev, pjBase, ulDst);
|
|
|
|
CP_DST_ADDR(ppdev, pjBase, ulDst);
|
|
|
|
if (bW32p) WAIT_FOR_BUSY_ACL(ppdev, pjBase);
|
|
|
|
{
|
|
LONG i;
|
|
LONG j;
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
lSrcDelta -= cjSrc;
|
|
|
|
for (i = rclDst.bottom - rclDst.top; i; i--)
|
|
{
|
|
ULONG cjTmp = cjTrail;
|
|
volatile BYTE * pjTmp;
|
|
volatile ULONG * pulTmp;
|
|
|
|
DISPDBG((2,"pjSrc(%x)", pjSrc));
|
|
|
|
for (j = culSrc; j; j--)
|
|
{
|
|
ULONG ulTmp = 0;
|
|
|
|
ulTmp |= (ULONG)jReverse[*pjSrc++];
|
|
ulTmp |= (ULONG)jReverse[*pjSrc++] << 8;
|
|
ulTmp |= (ULONG)jReverse[*pjSrc++] << 16;
|
|
ulTmp |= (ULONG)jReverse[*pjSrc++] << 24;
|
|
CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
|
|
|
|
DISPDBG((2,"Src(%08x) Tmp(%08x)",
|
|
*((ULONG *)(pjSrc-4)),
|
|
ulTmp
|
|
));
|
|
}
|
|
|
|
if (bW32p)
|
|
{
|
|
int ndx = 0;
|
|
while (cjTmp--)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, ndx, jReverse[*pjSrc]);
|
|
pjSrc++;
|
|
ndx++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (cjTmp)
|
|
{
|
|
ULONG ulTmp = 0;
|
|
if (cjTmp == 1) goto do_1_byte;
|
|
if (cjTmp == 2) goto do_2_bytes;
|
|
|
|
//
|
|
// do all three bytes of the partial
|
|
//
|
|
|
|
ulTmp |= (ULONG)jReverse[pjSrc[2]] << 16;
|
|
do_2_bytes:
|
|
ulTmp |= (ULONG)jReverse[pjSrc[1]] << 8;
|
|
do_1_byte:
|
|
ulTmp |= (ULONG)jReverse[pjSrc[0]];
|
|
|
|
//*pulTmp = ulTmp;
|
|
CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
|
|
|
|
pjSrc += cjTmp;
|
|
}
|
|
}
|
|
|
|
pjSrc += lSrcDelta;
|
|
}
|
|
}
|
|
else // if (cBpp == 2)
|
|
{
|
|
lSrcDelta -= (cjSrc + 1) >> 1;
|
|
|
|
for (i = rclDst.bottom - rclDst.top; i; i--)
|
|
{
|
|
ULONG cjTmp = cjTrail;
|
|
int ndx = 0;
|
|
|
|
DISPDBG((2,"pjSrc(%x)", pjSrc));
|
|
|
|
for (j = culSrc; j; j--)
|
|
{
|
|
ULONG ulTmp;
|
|
|
|
ulTmp = (ULONG)wReverse2x[*pjSrc++];
|
|
ulTmp |= (ULONG)wReverse2x[*pjSrc++] << 16;
|
|
CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
|
|
}
|
|
|
|
if (bW32p)
|
|
{
|
|
while (cjTmp--)
|
|
{
|
|
WORD wCvt;
|
|
BYTE * pjCvt = (BYTE *) &wCvt;
|
|
|
|
wCvt = wReverse2x[*pjSrc++];
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, ndx, pjCvt[0]);
|
|
ndx++;
|
|
if (cjTmp)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, ndx, pjCvt[1]);
|
|
ndx++;
|
|
cjTmp--;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (cjTmp)
|
|
{
|
|
ULONG ulTmp;
|
|
|
|
ulTmp = (ULONG)wReverse2x[pjSrc[0]];
|
|
ulTmp |= (ULONG)wReverse2x[pjSrc[1]] << 16;
|
|
CP_WRITE_MMU_DWORD(ppdev, 2, 0, ulTmp);
|
|
|
|
pjSrc += (cjTmp+1) >> 1;
|
|
}
|
|
}
|
|
|
|
pjSrc += lSrcDelta;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
prcl++;
|
|
} while (--c != 0);
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_ROUTING_CTRL(ppdev, pjBase, 0);
|
|
if (!bW32p)
|
|
{
|
|
CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT);
|
|
}
|
|
}
|
|
|
|
VOID vXferBlt8i(
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ROP4 rop4, // Obvious?
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Not used
|
|
{
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0;
|
|
LONG lDeltaDst = ppdev->lDelta;
|
|
LONG lDeltaSrc = psoSrc->lDelta;
|
|
POINTL ptlSrc = *pptlSrc;
|
|
RECTL rclDst = *prclDst;
|
|
LONG cBpp = ppdev->cBpp;
|
|
SIZEL sizlBlt;
|
|
ULONG ulDstAddr;
|
|
BYTE* pjSrc;
|
|
INT ix, iy;
|
|
LONG dx;
|
|
LONG dy; // Add delta to destination to get source
|
|
LONG cjLead;
|
|
LONG cjTrail;
|
|
LONG culMiddle;
|
|
LONG xyOffset = (cBpp * ppdev->xOffset) +
|
|
(lDeltaDst * ppdev->yOffset);
|
|
|
|
//
|
|
// The src-dst delta will be the same for all rectangles
|
|
//
|
|
|
|
dx = ptlSrc.x - rclDst.left;
|
|
dy = ptlSrc.y - rclDst.top;
|
|
|
|
// Note: Legend has it that if we don't wait for the ACL to become idle,
|
|
// then the code will hang on the W32, but not on the W32i.
|
|
//
|
|
// Since we do a WAIT_FOR_IDLE_ACL we don't need to
|
|
// WAIT_FOR_EMPTY_ACL_QUEUE
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
CP_ROUTING_CTRL(ppdev, pjBase, CPU_SOURCE_DATA);
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
|
|
|
|
do {
|
|
// Calculate blt dimensions in bytes
|
|
|
|
sizlBlt.cx = cBpp * (prcl->right - prcl->left);
|
|
sizlBlt.cy = prcl->bottom - prcl->top;
|
|
|
|
pjSrc = pjSrcScan0 +
|
|
((prcl->top + dy) * lDeltaSrc) +
|
|
((prcl->left + dx) * cBpp);
|
|
|
|
cjTrail = cjLead = (LONG)((ULONG_PTR)pjSrc);
|
|
cjLead = aulLeadCnt[cjLead & 3];
|
|
if (cjLead < sizlBlt.cx)
|
|
{
|
|
cjTrail += sizlBlt.cx;
|
|
cjTrail &= 3;
|
|
culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2;
|
|
}
|
|
else
|
|
{
|
|
cjLead = sizlBlt.cx;
|
|
cjTrail = 0;
|
|
culMiddle = 0;
|
|
}
|
|
|
|
ASSERTDD(culMiddle >= 0, "vXferBlt8i: culMiddle < 0");
|
|
|
|
ulDstAddr = (prcl->top * lDeltaDst) +
|
|
(prcl->left * cBpp) +
|
|
(xyOffset);
|
|
|
|
if ((sizlBlt.cx - (cjLead + cjTrail)) & 3)
|
|
DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
|
|
|
|
DISPDBG((8, "rclSrc(%d,%d,%d,%d)",
|
|
prcl->left+dx,
|
|
prcl->top+dy,
|
|
prcl->right+dx,
|
|
prcl->bottom+dy
|
|
));
|
|
|
|
DISPDBG((8, "rclDst(%d,%d,%d,%d)",
|
|
prcl->left,
|
|
prcl->top,
|
|
prcl->right,
|
|
prcl->bottom
|
|
));
|
|
|
|
DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)",
|
|
pjSrc,
|
|
sizlBlt.cx,
|
|
ulDstAddr,
|
|
cjLead,
|
|
culMiddle,
|
|
cjTrail
|
|
));
|
|
|
|
if (cjLead)
|
|
{
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, (cjLead - 1));
|
|
CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
|
|
CP_MMU_BP2(ppdev, pjBase, (ulDstAddr));
|
|
afnXferI_Narrow[cjLead](ppdev,
|
|
pjSrc,
|
|
0,
|
|
sizlBlt.cy,
|
|
lDeltaSrc);
|
|
}
|
|
|
|
if (cjTrail)
|
|
{
|
|
LONG cjOffset = cjLead + (culMiddle<<2);
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, (cjTrail - 1));
|
|
CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
|
|
CP_MMU_BP2(ppdev, pjBase, (ulDstAddr+cjOffset));
|
|
afnXferI_Narrow[cjTrail](ppdev,
|
|
(pjSrc+cjOffset),
|
|
0,
|
|
sizlBlt.cy,
|
|
lDeltaSrc);
|
|
}
|
|
|
|
if (culMiddle)
|
|
{
|
|
LONG cjOffset = cjLead;
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, ((culMiddle<<2) - 1));
|
|
CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
|
|
CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_32_BIT);
|
|
CP_MMU_BP2(ppdev, pjBase, (ulDstAddr+cjOffset));
|
|
vXfer_DWORDS(ppdev,
|
|
(pjSrc+cjOffset),
|
|
culMiddle,
|
|
sizlBlt.cy,
|
|
lDeltaSrc);
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_BUS_SIZE(ppdev, pjBase, VIRTUAL_BUS_8_BIT);
|
|
}
|
|
|
|
prcl++;
|
|
} while (--c != 0);
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_ROUTING_CTRL(ppdev, pjBase, 0);
|
|
}
|
|
|
|
VOID vXferBlt8p(
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ROP4 rop4, // Obvious?
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Not used
|
|
{
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0;
|
|
LONG lDeltaDst = ppdev->lDelta;
|
|
LONG lDeltaSrc = psoSrc->lDelta;
|
|
POINTL ptlSrc = *pptlSrc;
|
|
RECTL rclDst = *prclDst;
|
|
LONG cBpp = ppdev->cBpp;
|
|
SIZEL sizlBlt;
|
|
ULONG ulDstAddr;
|
|
BYTE* pjSrc;
|
|
INT ix, iy;
|
|
LONG dx;
|
|
LONG dy; // Add delta to destination to get source
|
|
LONG iLeadNdx;
|
|
LONG cjLead;
|
|
LONG cjTrail;
|
|
LONG culMiddle;
|
|
LONG xyOffset = (cBpp * ppdev->xOffset) +
|
|
(lDeltaDst * ppdev->yOffset);
|
|
|
|
//
|
|
// The src-dst delta will be the same for all rectangles
|
|
//
|
|
|
|
dx = ptlSrc.x - rclDst.left;
|
|
dy = ptlSrc.y - rclDst.top;
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_ROUTING_CTRL(ppdev, pjBase, CPU_SOURCE_DATA);
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
|
|
CP_SRC_ADDR(ppdev, pjBase, 0);
|
|
CP_SRC_Y_OFFSET(ppdev, pjBase, -1);
|
|
|
|
do {
|
|
// Calculate blt dimensions in bytes
|
|
|
|
sizlBlt.cx = cBpp * (prcl->right - prcl->left);
|
|
sizlBlt.cy = prcl->bottom - prcl->top;
|
|
|
|
pjSrc = pjSrcScan0 +
|
|
((prcl->top + dy) * lDeltaSrc) +
|
|
((prcl->left + dx) * cBpp);
|
|
|
|
cjTrail = iLeadNdx = (LONG)((ULONG_PTR)pjSrc);
|
|
iLeadNdx &= 3;
|
|
cjLead = aulLeadCnt[iLeadNdx];
|
|
if (cjLead < sizlBlt.cx)
|
|
{
|
|
cjTrail += sizlBlt.cx;
|
|
cjTrail &= 3;
|
|
culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2;
|
|
}
|
|
else
|
|
{
|
|
cjLead = sizlBlt.cx;
|
|
cjTrail = 0;
|
|
culMiddle = 0;
|
|
}
|
|
|
|
ASSERTDD(culMiddle >= 0, "vXferBlt8i: culMiddle < 0");
|
|
|
|
ulDstAddr = (prcl->top * lDeltaDst) +
|
|
(prcl->left * cBpp) +
|
|
(xyOffset);
|
|
|
|
if ((sizlBlt.cx - (cjLead + cjTrail)) & 3)
|
|
DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
|
|
|
|
DISPDBG((8, "rclSrc(%d,%d,%d,%d)",
|
|
prcl->left+dx,
|
|
prcl->top+dy,
|
|
prcl->right+dx,
|
|
prcl->bottom+dy
|
|
));
|
|
|
|
DISPDBG((8, "rclDst(%d,%d,%d,%d)",
|
|
prcl->left,
|
|
prcl->top,
|
|
prcl->right,
|
|
prcl->bottom
|
|
));
|
|
|
|
DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)",
|
|
pjSrc,
|
|
sizlBlt.cx,
|
|
ulDstAddr,
|
|
cjLead,
|
|
culMiddle,
|
|
cjTrail
|
|
));
|
|
|
|
if (cjLead)
|
|
{
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, (cjLead - 1));
|
|
CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
|
|
// The next two turn off src to dst alignment
|
|
CP_DST_ADDR(ppdev, pjBase, (ulDstAddr));
|
|
WAIT_FOR_BUSY_ACL(ppdev, pjBase);
|
|
afnXferP_Narrow[cjLead](ppdev,
|
|
pjSrc,
|
|
0,
|
|
sizlBlt.cy,
|
|
lDeltaSrc);
|
|
}
|
|
|
|
if (cjTrail)
|
|
{
|
|
LONG cjOffset = cjLead + (culMiddle<<2);
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, (cjTrail - 1));
|
|
CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
|
|
// The next two turn off src to dst alignment
|
|
CP_DST_ADDR(ppdev, pjBase, (ulDstAddr+cjOffset));
|
|
WAIT_FOR_BUSY_ACL(ppdev, pjBase);
|
|
afnXferP_Narrow[cjTrail](ppdev,
|
|
(pjSrc+cjOffset),
|
|
0,
|
|
sizlBlt.cy,
|
|
lDeltaSrc);
|
|
}
|
|
|
|
if (culMiddle)
|
|
{
|
|
LONG cjOffset = cjLead;
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, ((culMiddle<<2) - 1));
|
|
CP_YCNT(ppdev, pjBase, (sizlBlt.cy - 1));
|
|
// The next two turn off src to dst alignment
|
|
CP_DST_ADDR(ppdev, pjBase, (ulDstAddr+cjOffset));
|
|
WAIT_FOR_BUSY_ACL(ppdev, pjBase);
|
|
vXfer_DWORDS(ppdev,
|
|
(pjSrc+cjOffset),
|
|
culMiddle,
|
|
sizlBlt.cy,
|
|
lDeltaSrc);
|
|
}
|
|
|
|
prcl++;
|
|
} while (--c != 0);
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_ROUTING_CTRL(ppdev, pjBase, 0);
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
// N DWORD low level blt routines for vXferNativeI and vXferNativeP
|
|
|
|
|
|
// A DWORD at a time
|
|
|
|
VOID vXfer_DWORDS(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
// We had better be in 32 bit virtual bus mode
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
for (ix = 0; ix < culX; ix++)
|
|
{
|
|
CP_WRITE_MMU_DWORD(ppdev, 2, 0, *((ULONG*)pjTmp));
|
|
pjTmp += 4;
|
|
}
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
|
|
// A BYTE at a time
|
|
|
|
VOID vXfer_BYTES(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG cjX = (culX << 2);
|
|
|
|
// We had better be in 8 bit virtual bus mode
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
for (ix = 0; ix < cjX; ix++)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
|
|
pjTmp++;
|
|
}
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
// Narrow low level blt routines for vXferNativeI
|
|
|
|
VOID vXferI_1_Byte(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjSrc);
|
|
|
|
pjSrc += lDeltaSrc;
|
|
}
|
|
}
|
|
|
|
VOID vXferI_2_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++;
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
|
|
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
|
|
VOID vXferI_3_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG culX, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++;
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp); pjTmp++;
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, 0, *pjTmp);
|
|
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
// Narrow low level blt routines for vXferNativeP
|
|
|
|
VOID vXferP_1_Byte(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, index, *pjSrc);
|
|
|
|
pjSrc += lDeltaSrc;
|
|
}
|
|
}
|
|
|
|
VOID vXferP_2_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_WORD(ppdev, 2, index, *((WORD*)pjTmp));
|
|
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
|
|
VOID vXferP_3_Bytes(PPDEV ppdev, BYTE* pjSrc, LONG index, LONG cy, LONG lDeltaSrc)
|
|
{
|
|
LONG iy;
|
|
LONG ix;
|
|
BYTE* pjTmp = pjSrc;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
|
|
if (index & 1)
|
|
{
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, index, *pjTmp);
|
|
pjTmp++;
|
|
CP_WRITE_MMU_WORD(ppdev, 2, index+1, *((WORD*)pjTmp));
|
|
|
|
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (iy = 0; iy < cy; iy++)
|
|
{
|
|
CP_WRITE_MMU_WORD(ppdev, 2, index, *((WORD*)pjTmp));
|
|
pjTmp+=2;
|
|
CP_WRITE_MMU_BYTE(ppdev, 2, index+2, *pjTmp);
|
|
|
|
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
}
|
|
}
|
|
}
|
|
|
|
// This routine was added to perform accelerated host to screen blts for the
|
|
// ET6000. The W32 had a path from host memory to display memory which allowed
|
|
// ROPs to be performed as the data was transferred. The ET6000 does not have
|
|
// that feature, so to provide accelerated host to screen support we must
|
|
// buffer each scanline of the source in offscreen memory and then perform
|
|
// a blt to move it into the appropriate area of display memory. This is
|
|
// much more efficient than hand coding each rop or punting to GDI.
|
|
|
|
VOID vXferET6000(
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // Array of relative coordinates destination rectangles
|
|
ROP4 rop4, // Obvious?
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Not used
|
|
{
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
BYTE* pjSrcScan0 = (BYTE*) psoSrc->pvScan0;
|
|
LONG lDeltaDst = ppdev->lDelta;
|
|
LONG lDeltaSrc = psoSrc->lDelta;
|
|
POINTL ptlSrc = *pptlSrc;
|
|
RECTL rclDst = *prclDst;
|
|
LONG cBpp = ppdev->cBpp;
|
|
SIZEL sizlBlt;
|
|
ULONG ulDstAddr;
|
|
BYTE* pjSrc;
|
|
BYTE* pjDst;
|
|
INT ix, iy;
|
|
LONG dx;
|
|
LONG dy; // Add delta to destination to get source
|
|
LONG iLeadNdx;
|
|
LONG cjLead;
|
|
LONG cjTrail;
|
|
LONG culMiddle;
|
|
LONG xyOffset = (cBpp * ppdev->xOffset) +
|
|
(lDeltaDst * ppdev->yOffset);
|
|
ULONG ulBltBufferOffset = (cBpp * ppdev->pohBltBuffer->x) +
|
|
(lDeltaDst * ppdev->pohBltBuffer->y);
|
|
ULONG BltScanOffset = 0;
|
|
|
|
//
|
|
// The src-dst delta will be the same for all rectangles
|
|
//
|
|
|
|
dx = ptlSrc.x - rclDst.left;
|
|
dy = ptlSrc.y - rclDst.top;
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_FG_ROP(ppdev, pjBase, (rop4 >> 8));
|
|
CP_BK_ROP(ppdev, pjBase, (rop4 & 0xff));
|
|
CP_SRC_WRAP(ppdev, pjBase, NO_PATTERN_WRAP);
|
|
CP_SRC_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDeltaDst - 1));
|
|
|
|
do
|
|
{
|
|
BYTE* pjTmp;
|
|
|
|
// Calculate blt dimensions in bytes
|
|
|
|
sizlBlt.cx = cBpp * (prcl->right - prcl->left);
|
|
sizlBlt.cy = prcl->bottom - prcl->top;
|
|
|
|
pjSrc = pjSrcScan0 +
|
|
((prcl->top + dy) * lDeltaSrc) +
|
|
((prcl->left + dx) * cBpp);
|
|
|
|
pjTmp = pjSrc;
|
|
|
|
cjTrail = iLeadNdx = (LONG)((ULONG_PTR)pjSrc);
|
|
iLeadNdx &= 3;
|
|
cjLead = aulLeadCnt[iLeadNdx];
|
|
if (cjLead < sizlBlt.cx)
|
|
{
|
|
cjTrail += sizlBlt.cx;
|
|
cjTrail &= 3;
|
|
culMiddle = (sizlBlt.cx - (cjLead + cjTrail)) >> 2;
|
|
}
|
|
else
|
|
{
|
|
cjLead = sizlBlt.cx;
|
|
cjTrail = 0;
|
|
culMiddle = 0;
|
|
}
|
|
|
|
ASSERTDD(culMiddle >= 0, "vXferET6000: culMiddle < 0");
|
|
|
|
ulDstAddr = (prcl->top * lDeltaDst) +
|
|
(prcl->left * cBpp) +
|
|
(xyOffset);
|
|
|
|
if ((sizlBlt.cx - (cjLead + cjTrail)) & 3)
|
|
DISPDBG((0, "WARNING: cx - (cjLead+cjTail) not multiple of 4"));
|
|
|
|
DISPDBG((8, "rclSrc(%d,%d,%d,%d)",
|
|
prcl->left+dx,
|
|
prcl->top+dy,
|
|
prcl->right+dx,
|
|
prcl->bottom+dy
|
|
));
|
|
|
|
DISPDBG((8, "rclDst(%d,%d,%d,%d)",
|
|
prcl->left,
|
|
prcl->top,
|
|
prcl->right,
|
|
prcl->bottom
|
|
));
|
|
|
|
DISPDBG((8, "pjSrc(%x) cx(%d) ulDstAddr(%xh) (%d,%d,%d)",
|
|
pjSrc,
|
|
sizlBlt.cx,
|
|
ulDstAddr,
|
|
cjLead,
|
|
culMiddle,
|
|
cjTrail
|
|
));
|
|
|
|
for (iy = 0; iy < sizlBlt.cy; iy++)
|
|
{
|
|
LONG ix, lScanLineOffset;
|
|
|
|
// We'll first load the first scan line of
|
|
// the BltBuffer and then load the second. The second scan line
|
|
// will be loaded into the BltBuffer while the first is still being
|
|
// processed. We'll alternate between the two segments of our
|
|
// BltBuffer until all scans have been processed.
|
|
|
|
pjDst = ppdev->pjScreen + ulBltBufferOffset + BltScanOffset;
|
|
|
|
if (cjLead)
|
|
{
|
|
for (ix = 0; ix < cjLead; ix++)
|
|
{
|
|
*pjDst++ = *pjTmp++;
|
|
}
|
|
}
|
|
|
|
if (culMiddle)
|
|
{
|
|
for (ix = 0; ix < culMiddle; ix++)
|
|
{
|
|
*((ULONG*)pjDst)++ = *((ULONG*)pjTmp)++;
|
|
}
|
|
}
|
|
if (cjTrail)
|
|
{
|
|
for (ix = 0; ix < cjTrail; ix++)
|
|
{
|
|
*pjDst++ = *pjTmp++;
|
|
}
|
|
}
|
|
|
|
// Now that we've loaded our scanline into a segment of our BltBuffer,
|
|
// we need to trigger an accelerator operation to transfer it into
|
|
// visible screen memory. Our static stuff will have already been setup
|
|
// prior to entering any of our loops.
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_XCNT(ppdev, pjBase, (sizlBlt.cx - 1));
|
|
CP_YCNT(ppdev, pjBase, 0); // Only 1 scan at a time
|
|
|
|
CP_SRC_ADDR(ppdev, pjBase, (ulBltBufferOffset + BltScanOffset));
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_DST_ADDR(ppdev, pjBase, ulDstAddr);
|
|
|
|
BltScanOffset ^= ppdev->lBltBufferPitch;
|
|
pjTmp = (pjSrc += lDeltaSrc);
|
|
|
|
ulDstAddr += lDeltaDst;
|
|
} // next cy
|
|
|
|
prcl++;
|
|
} while (--c != 0);
|
|
}
|
|
/**************************************************************************
|
|
*
|
|
* Does a monochrome expansion to video memory.
|
|
*
|
|
**************************************************************************/
|
|
|
|
VOID vET6000SlowXfer1bpp( // Type FNXFER
|
|
PDEV* ppdev,
|
|
LONG c, // Count of rectangles, can't be zero
|
|
RECTL* prcl, // List of destination rectangles, in relative
|
|
// coordinates
|
|
ROP4 rop4, // Actually had better be a rop3
|
|
SURFOBJ* psoSrc, // Source surface
|
|
POINTL* pptlSrc, // Original unclipped source point
|
|
RECTL* prclDst, // Original unclipped destination rectangle
|
|
XLATEOBJ* pxlo) // Translate that provides color-expansion information
|
|
{
|
|
LONG dx;
|
|
LONG dy;
|
|
LONG lSrcDelta;
|
|
BYTE* pjSrcScan0;
|
|
BYTE* pjSrc;
|
|
LONG cjSrc;
|
|
LONG cjTrail;
|
|
LONG culSrc;
|
|
BYTE jFgRop3;
|
|
BYTE jBgRop3;
|
|
|
|
ULONG ulSolidColorOffset = ppdev->ulSolidColorOffset;
|
|
BYTE* pjBase = ppdev->pjBase;
|
|
LONG lDelta = ppdev->lDelta;
|
|
LONG cBpp = ppdev->cBpp;
|
|
ULONG ulFgColor = pxlo->pulXlate[1];
|
|
ULONG ulBgColor = pxlo->pulXlate[0];
|
|
|
|
LONG xyOffset = (ppdev->cBpp * ppdev->xOffset) +
|
|
(ppdev->yOffset * ppdev->lDelta);
|
|
LONG lBltBuffer = (ppdev->pohBltBuffer->x * ppdev->cBpp) +
|
|
(ppdev->pohBltBuffer->y * ppdev->lDelta);
|
|
|
|
DISPDBG((10,"vET6000SlowXfer1bpp called"));
|
|
|
|
DISPDBG((11,"rop4(%04x)", rop4));
|
|
|
|
ASSERTDD(c > 0, "Can't handle zero rectangles");
|
|
ASSERTDD(pptlSrc != NULL && psoSrc != NULL, "Can't have NULL sources");
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
jFgRop3 = (BYTE)(rop4 >> 8); // point to src color where src is indicated
|
|
// point to pat color where src is indicated
|
|
|
|
if ((BYTE) rop4 != R3_NOP)
|
|
{
|
|
jBgRop3 = (BYTE)((rop4 & 0xc3) | ((rop4 & 0xf0) >> 2));
|
|
}
|
|
else
|
|
{
|
|
jBgRop3 = (BYTE) rop4;
|
|
}
|
|
|
|
DISPDBG((11,"jFgRop3(%04x), jBgRop3(%04x)", jFgRop3, jBgRop3));
|
|
|
|
CP_FG_ROP(ppdev, pjBase, jFgRop3);
|
|
CP_BK_ROP(ppdev, pjBase, jBgRop3);
|
|
CP_DST_Y_OFFSET(ppdev, pjBase, (lDelta - 1));
|
|
|
|
CP_PAT_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
|
|
CP_PAT_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
|
|
CP_SRC_WRAP(ppdev, pjBase, SOLID_COLOR_PATTERN_WRAP);
|
|
CP_SRC_Y_OFFSET(ppdev, pjBase, (SOLID_COLOR_PATTERN_OFFSET - 1));
|
|
CP_PAT_ADDR(ppdev, pjBase, ulSolidColorOffset + 4);
|
|
CP_SRC_ADDR(ppdev, pjBase, ulSolidColorOffset);
|
|
CP_PEL_DEPTH(ppdev, pjBase, (cBpp - 1) << 4);
|
|
|
|
// Here we are going to load the foreground and background colors into
|
|
// display memory. We'll use the area for solid colors that we allocated
|
|
// earlier.
|
|
|
|
{
|
|
// Set the color in offscreen memory
|
|
|
|
if (cBpp == 1)
|
|
{
|
|
ulFgColor &= 0x000000FF; // We may get some extraneous data in the
|
|
ulBgColor &= 0x000000FF; // unused portion of our color. Clear it.
|
|
ulFgColor |= ulFgColor << 8;
|
|
ulBgColor |= ulBgColor << 8;
|
|
}
|
|
if (cBpp <= 2)
|
|
{
|
|
ulFgColor &= 0x0000FFFF;
|
|
ulBgColor &= 0x0000FFFF;
|
|
ulFgColor |= ulFgColor << 16;
|
|
ulBgColor |= ulBgColor << 16;
|
|
}
|
|
|
|
// We don't want to change the colors if the accelerator is active, because
|
|
// a previous oepration might be using them.
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
*(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset) = ulFgColor;
|
|
*(PULONG)(ppdev->pjScreen + ppdev->ulSolidColorOffset + 4) = ulBgColor;
|
|
}
|
|
|
|
// This is the mix control register for the ET6000. We are setting it to
|
|
// use a mix ROP of 2, which specifies that a 0 in the mixmap selects the
|
|
// background color and 1 selects the foreground color. Bit 7 says that
|
|
// we want bit 7 of each byte in our mix data to be pixel 0. This should
|
|
// be the way that NT wants it. We also have to set our mask ROP so we
|
|
// can get the data onto the screen.
|
|
|
|
CP_ROUTING_CTRL(ppdev, pjBase, 0xB2);
|
|
|
|
dx = pptlSrc->x - prclDst->left;
|
|
dy = pptlSrc->y - prclDst->top; // Add to destination to get source
|
|
|
|
pjSrcScan0 = psoSrc->pvScan0;
|
|
|
|
DISPDBG((2,"lSrcDelta(%x)", psoSrc->lDelta));
|
|
|
|
do
|
|
{
|
|
ULONG ulDst;
|
|
RECTL rclSrc;
|
|
RECTL rclDst;
|
|
BYTE* pjTmp;
|
|
BYTE* pjDst;
|
|
LONG i;
|
|
BYTE *pjMmu1 = ppdev->pjMmu1;
|
|
long lDwords, lBytes, lStart;
|
|
int cBitsToSkip;
|
|
|
|
// load lSrcDelta inside the loop because we adjust it later.
|
|
|
|
lSrcDelta = psoSrc->lDelta;
|
|
|
|
rclDst = *prcl;
|
|
rclSrc.left = rclDst.left + dx;
|
|
rclSrc.right = rclDst.right + dx;
|
|
rclSrc.top = rclDst.top + dy;
|
|
rclSrc.bottom = rclDst.bottom + dy;
|
|
|
|
// x = prcl->left;
|
|
// y = prcl->top;
|
|
|
|
DISPDBG((2,"rclSrc(%d,%d,%d,%d) rclDst(%d,%d,%d,%d)",
|
|
rclSrc.left,
|
|
rclSrc.top,
|
|
rclSrc.right,
|
|
rclSrc.bottom,
|
|
rclDst.left,
|
|
rclDst.top,
|
|
rclDst.right,
|
|
rclDst.bottom));
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
|
|
CP_XCNT(ppdev, pjBase, ((rclSrc.right - rclSrc.left) * cBpp) - 1);
|
|
CP_YCNT(ppdev, pjBase, 0); // 1 scan at a time
|
|
|
|
pjSrc = pjSrcScan0 + rclSrc.top * lSrcDelta
|
|
+ (rclSrc.left >> 3);
|
|
cBitsToSkip = rclSrc.left % 8;
|
|
pjTmp = pjSrc;
|
|
|
|
ulDst = (rclDst.top * lDelta) + (cBpp * rclDst.left);
|
|
ulDst += xyOffset;
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
// We are going to transfer the mix map into our BltBuffer so
|
|
// we can get it to the screen.
|
|
|
|
CP_MIX_Y_OFFSET(ppdev, pjBase, 0); // 1 scan at a time
|
|
|
|
// We are using the rectangle dimensions to determine how many pixels per line to move. This
|
|
// fixes a bug exposed by the HCT when we had to clip a large temporary buffer and would draw
|
|
// using data close to the end of the buffer. We would get a protection exception depending on
|
|
// whether we ran too close to the end of the buffer. lSrcDelta will still be used when
|
|
// stepping through the source bitmap, but not to determine how many pixels will be drawn.
|
|
//
|
|
// We're adding cBitsToSkip back into here because it's necessary to compute the correct number
|
|
// of bytes to move. We always round to the next byte.
|
|
|
|
// i = abs(lSrcDelta); // this doesn't work
|
|
i = ((rclSrc.right - rclSrc.left) + cBitsToSkip + 7) >> 3; // Round up before shift.
|
|
|
|
lDwords = i / 4;
|
|
lBytes = i % 4;
|
|
lStart = 0;
|
|
|
|
// Here we are going to transfer the monochrome bitmap to the screen.
|
|
// We'll double buffer it to get some more throughput.
|
|
|
|
for (i=0; i < (rclSrc.bottom - rclSrc.top); i++)
|
|
{
|
|
long ix;
|
|
|
|
pjDst = ppdev->pjScreen + lBltBuffer + lStart;
|
|
ix = lDwords;
|
|
|
|
while (ix--)
|
|
{
|
|
*((ULONG*)pjDst)++ = *((ULONG*)pjTmp)++;
|
|
}
|
|
|
|
ix = lBytes;
|
|
while (ix--)
|
|
{
|
|
*pjDst++ = *pjTmp++;
|
|
}
|
|
|
|
WAIT_FOR_IDLE_ACL(ppdev, pjBase);
|
|
|
|
// We have to add in rclSrc.left mod 8 to compensate for the possibility
|
|
// of starting to draw to soon in our bitmap. This generally occurs when
|
|
// clipping text or moving windows where we are only asked to draw
|
|
// part of a monochrome bitmap.
|
|
|
|
CP_MIX_ADDR(ppdev, pjBase, ((lBltBuffer + lStart) * 8) + cBitsToSkip);
|
|
CP_DST_ADDR(ppdev, pjBase, ulDst);
|
|
pjTmp = (pjSrc += lSrcDelta);
|
|
ulDst += lDelta;
|
|
lStart ^= ppdev->lBltBufferPitch;
|
|
}
|
|
prcl++;
|
|
} while (--c != 0);
|
|
|
|
WAIT_FOR_EMPTY_ACL_QUEUE(ppdev, pjBase);
|
|
CP_ROUTING_CTRL(ppdev, pjBase, 0x33);
|
|
CP_PEL_DEPTH(ppdev, pjBase, 0);
|
|
}
|