windows-nt/Source/XPSP1/NT/drivers/video/matrox/mga/disp/stretch.c
2020-09-26 16:20:57 +08:00

1529 lines
45 KiB
C

/******************************Module*Header*******************************\
* Module Name: stretch.c
*
* Copyright (c) 1993-1996 Microsoft Corporation
\**************************************************************************/
#include "precomp.h"
#define STRETCH_MAX_EXTENT 32767
typedef DWORDLONG ULONGLONG;
/******************************Public*Routine******************************\
* VOID vMgaDirectStretch8Narrow
*
* Hardware assisted stretchblt at 8bpp when the width is 7 or less, for
* old MGAs.
*
\**************************************************************************/
VOID vMgaDirectStretch8Narrow(
STR_BLT* pStrBlt)
{
BYTE* pjSrc;
BYTE* pjDstEnd;
ULONG ulDst;
ULONG xAccum;
ULONG xTmp;
ULONG yTmp;
ULONG ulDstScan;
BYTE* pjDstScan;
PDEV* ppdev = pStrBlt->ppdev;
BYTE* pjBase = ppdev->pjBase;
LONG yDst = pStrBlt->YDstStart;
LONG xDst = pStrBlt->XDstStart;
LONG xSrc = pStrBlt->XSrcStart;
BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
LONG yCount = pStrBlt->YDstCount;
LONG WidthX = pStrBlt->XDstEnd - xDst;
ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
ULONG yAccum = pStrBlt->ulYFracAccumulator;
ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
ULONG yInt = 0;
yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
ulDstScan = ppdev->ulYDstOrg + (yDst + ppdev->yOffset) * ppdev->cxMemory
+ (xDst + ppdev->xOffset);
pjDstScan = ppdev->pjBase + SRCWND + (ulDstScan & 31);
// We can't touch the frame buffer while the accelerator is doing
// any drawing:
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
WAIT_NOT_BUSY(pjBase);
do {
ULONG yTmp = yAccum + yFrac;
BYTE jSrc0;
BYTE* pjDst;
BYTE* pjDstEndNarrow;
CP_WRITE_REGISTER(pjBase + HST_DSTPAGE, ulDstScan);
ulDstScan += ppdev->cxMemory; // Increment to next scan
pjDst = pjDstScan;
pjSrc = pjSrcScan;
xAccum = pStrBlt->ulXFracAccumulator;
pjDstEndNarrow = pjDst + WidthX;
do {
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt + (xTmp < xAccum);
*pjDst++ = jSrc0;
xAccum = xTmp;
} while (pjDst != pjDstEndNarrow);
pjSrcScan += yInt;
if (yTmp < yAccum)
{
pjSrcScan += pStrBlt->lDeltaSrc;
}
yAccum = yTmp;
} while (--yCount);
}
/******************************Public*Routine******************************\
* VOID vMilDirectStretch8Narrow
*
* Hardware assisted stretchblt at 8bpp when the width is 7 or less, for
* Millenniums.
*
\**************************************************************************/
VOID vMilDirectStretch8Narrow(
STR_BLT* pStrBlt)
{
BYTE* pjSrc;
BYTE* pjDstEnd;
ULONG ulDst;
ULONG xAccum;
ULONG xTmp;
ULONG yTmp;
ULONG ulDstScan;
BYTE* pjDstScan;
PDEV* ppdev = pStrBlt->ppdev;
BYTE* pjBase = ppdev->pjBase;
LONG yDst = pStrBlt->YDstStart;
LONG xDst = pStrBlt->XDstStart;
LONG xSrc = pStrBlt->XSrcStart;
BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
LONG yCount = pStrBlt->YDstCount;
LONG WidthX = pStrBlt->XDstEnd - xDst;
ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
ULONG yAccum = pStrBlt->ulYFracAccumulator;
ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
ULONG yInt = 0;
yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
ulDstScan = ppdev->ulYDstOrg + (yDst + ppdev->yOffset) * ppdev->cxMemory
+ (xDst + ppdev->xOffset);
pjDstScan = ppdev->pjScreen;
// We can't touch the frame buffer while the accelerator is doing
// any drawing:
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
WAIT_NOT_BUSY(pjBase);
do {
ULONG yTmp = yAccum + yFrac;
BYTE jSrc0;
BYTE* pjDst;
BYTE* pjDstEndNarrow;
pjDst = pjDstScan + ulDstScan;
ulDstScan += ppdev->cxMemory; // Increment to next scan
pjSrc = pjSrcScan;
xAccum = pStrBlt->ulXFracAccumulator;
pjDstEndNarrow = pjDst + WidthX;
do {
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt + (xTmp < xAccum);
*pjDst++ = jSrc0;
xAccum = xTmp;
} while (pjDst != pjDstEndNarrow);
pjSrcScan += yInt;
if (yTmp < yAccum)
{
pjSrcScan += pStrBlt->lDeltaSrc;
}
yAccum = yTmp;
} while (--yCount);
}
/******************************Public*Routine******************************\
* VOID vMilDirectStretch8
*
* Hardware assisted stretchblt at 8bpp when the width is 8 or more,
* for Millenniums.
*
\**************************************************************************/
VOID vMilDirectStretch8(
STR_BLT* pStrBlt)
{
BYTE* pjSrc;
BYTE* pjDstEnd;
LONG WidthXAln;
ULONG ulDst;
ULONG xAccum;
ULONG xTmp;
ULONG yTmp;
BYTE* pjOldScan;
LONG cyDuplicate;
LONG xDuplicate;
LONG yDuplicate;
LONG lDuplicate;
ULONG yInt;
PDEV* ppdev = pStrBlt->ppdev;
BYTE* pjBase = ppdev->pjBase;
LONG cxMemory = ppdev->cxMemory;
LONG xDst = pStrBlt->XDstStart;
LONG xSrc = pStrBlt->XSrcStart;
BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc;
BYTE* pjDst = pStrBlt->pjDstScan + xDst;
LONG yDst = pStrBlt->YDstStart;
LONG yCount = pStrBlt->YDstCount;
ULONG StartAln = (ULONG)((ULONG_PTR)pjDst & 0x03);
LONG WidthX = pStrBlt->XDstEnd - xDst;
ULONG EndAln = (ULONG)((ULONG_PTR)(pjDst + WidthX) & 0x03);
ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
ULONG yAccum = pStrBlt->ulYFracAccumulator;
ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
LONG lDstStride = pStrBlt->lDeltaDst - WidthX;
LONG lDeltaDst = pStrBlt->lDeltaDst;
LONG lDeltaSrc = pStrBlt->lDeltaSrc;
WidthXAln = WidthX - EndAln - ((- (LONG) StartAln) & 0x03);
yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
yDuplicate = yDst + ppdev->yOffset - 1;
xDuplicate = xDst + ppdev->xOffset + ppdev->ulYDstOrg;
START_DIRECT_ACCESS_STORM(ppdev, pjBase);
do {
BYTE jSrc0,jSrc1,jSrc2,jSrc3;
ULONG yTmp;
pjSrc = pjSrcScan;
xAccum = pStrBlt->ulXFracAccumulator;
switch (StartAln) {
case 1:
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
*pjDst++ = jSrc0;
xAccum = xTmp;
case 2:
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
*pjDst++ = jSrc0;
xAccum = xTmp;
case 3:
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
*pjDst++ = jSrc0;
xAccum = xTmp;
}
pjDstEnd = pjDst + WidthXAln;
while (pjDst != pjDstEnd)
{
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
jSrc1 = *pjSrc;
xAccum = xTmp + xFrac;
pjSrc = pjSrc + xInt;
if (xAccum < xTmp)
pjSrc++;
jSrc2 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
jSrc3 = *pjSrc;
xAccum = xTmp + xFrac;
pjSrc = pjSrc + xInt;
if (xAccum < xTmp)
pjSrc++;
ulDst = (jSrc3 << 24) | (jSrc2 << 16) | (jSrc1 << 8) | jSrc0;
*(PULONG)pjDst = ulDst;
pjDst += 4;
}
switch (EndAln) {
case 3:
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
*pjDst++ = jSrc0;
xAccum = xTmp;
case 2:
jSrc0 = *pjSrc;
xTmp = xAccum + xFrac;
pjSrc = pjSrc + xInt;
if (xTmp < xAccum)
pjSrc++;
*pjDst++ = jSrc0;
xAccum = xTmp;
case 1:
jSrc0 = *pjSrc;
*pjDst++ = jSrc0;
}
// Now count the number of duplicate scans:
pjDst += lDstStride;
yDuplicate++;
cyDuplicate = -1;
pjOldScan = pjSrcScan;
do {
cyDuplicate++;
pjSrcScan += yInt;
yTmp = yAccum + yFrac;
if (yTmp < yAccum)
{
pjSrcScan += lDeltaSrc;
}
yAccum = yTmp;
yCount--;
} while ((yCount != 0) && (pjSrcScan == pjOldScan));
// Duplicate the scan 'cyDuplicate' times with one blt:
if (cyDuplicate != 0)
{
lDuplicate = yDuplicate * cxMemory + xDuplicate;
CHECK_FIFO_SPACE(pjBase, 4);
CP_WRITE(pjBase, DWG_AR3, lDuplicate);
CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
atype_RPL |
blockm_OFF |
bltmod_BFCOL |
pattern_OFF |
transc_BG_OPAQUE |
bop_SRCCOPY |
shftzero_ZERO |
sgnzero_ZERO);
CP_START(pjBase, DWG_YDSTLEN, ((yDuplicate + 1) << yval_SHIFT) |
cyDuplicate);
yDuplicate += cyDuplicate;
pjDst += cyDuplicate * lDeltaDst;
}
} while (yCount != 0);
END_DIRECT_ACCESS_STORM(ppdev, pjBase);
}
/******************************Public*Routine******************************\
* VOID vMilDirectStretch16
*
* Hardware assisted stretchblt at 16bpp, for Millenniums.
*
\**************************************************************************/
VOID vMilDirectStretch16(
STR_BLT* pStrBlt)
{
BYTE* pjOldScan;
USHORT* pusSrc;
USHORT* pusDstEnd;
LONG WidthXAln;
ULONG ulDst;
ULONG xAccum;
ULONG xTmp;
ULONG yTmp;
LONG cyDuplicate;
LONG xDuplicate;
LONG yDuplicate;
LONG lDuplicate;
LONG yInt;
PDEV* ppdev = pStrBlt->ppdev;
BYTE* pjBase = ppdev->pjBase;
LONG cxMemory = ppdev->cxMemory;
LONG xDst = pStrBlt->XDstStart;
LONG xSrc = pStrBlt->XSrcStart;
BYTE* pjSrcScan = (pStrBlt->pjSrcScan) + xSrc * 2;
USHORT* pusDst = (USHORT*)(pStrBlt->pjDstScan) + xDst;
LONG yDst = pStrBlt->YDstStart;
LONG yCount = pStrBlt->YDstCount;
ULONG StartAln = ((ULONG)((ULONG_PTR)pusDst & 0x02)) >> 1;
LONG WidthX = pStrBlt->XDstEnd - xDst;
ULONG EndAln = (ULONG)(((ULONG_PTR)(pusDst + WidthX) & 0x02) >> 1);
ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
ULONG yAccum = pStrBlt->ulYFracAccumulator;
ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
LONG lDstStride = pStrBlt->lDeltaDst - 2 * WidthX;
LONG lDeltaDst = pStrBlt->lDeltaDst;
LONG lDeltaSrc = pStrBlt->lDeltaSrc;
WidthXAln = WidthX - EndAln - StartAln;
yInt = pStrBlt->lDeltaSrc * (LONG)(pStrBlt->ulYDstToSrcIntCeil);
yDuplicate = yDst + ppdev->yOffset - 1;
xDuplicate = xDst + ppdev->xOffset + ppdev->ulYDstOrg;
START_DIRECT_ACCESS_STORM(ppdev, pjBase);
// Loop stretching each scan line
do {
USHORT usSrc0,usSrc1;
ULONG yTmp;
pusSrc = (USHORT*) pjSrcScan;
xAccum = pStrBlt->ulXFracAccumulator;
if (StartAln)
{
usSrc0 = *pusSrc;
xTmp = xAccum + xFrac;
pusSrc = pusSrc + xInt;
if (xTmp < xAccum)
pusSrc++;
*pusDst++ = usSrc0;
xAccum = xTmp;
}
pusDstEnd = pusDst + WidthXAln;
while (pusDst != pusDstEnd)
{
usSrc0 = *pusSrc;
xTmp = xAccum + xFrac;
pusSrc = pusSrc + xInt;
if (xTmp < xAccum)
pusSrc++;
usSrc1 = *pusSrc;
xAccum = xTmp + xFrac;
pusSrc = pusSrc + xInt;
if (xAccum < xTmp)
pusSrc++;
ulDst = (ULONG)((usSrc1 << 16) | usSrc0);
*(ULONG*)pusDst = ulDst;
pusDst+=2;
}
if (EndAln)
{
usSrc0 = *pusSrc;
*pusDst++ = usSrc0;
}
// Now count the number of duplicate scans:
pusDst = (USHORT*) ((BYTE*) pusDst + lDstStride);
yDuplicate++;
cyDuplicate = -1;
pjOldScan = pjSrcScan;
do {
cyDuplicate++;
pjSrcScan += yInt;
yTmp = yAccum + yFrac;
if (yTmp < yAccum)
{
pjSrcScan += lDeltaSrc;
}
yAccum = yTmp;
yCount--;
} while ((yCount != 0) && (pjSrcScan == pjOldScan));
// Duplicate the scan 'cyDuplicate' times with one blt:
if (cyDuplicate != 0)
{
lDuplicate = yDuplicate * cxMemory + xDuplicate;
CHECK_FIFO_SPACE(pjBase, 4);
CP_WRITE(pjBase, DWG_AR3, lDuplicate);
CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
atype_RPL |
blockm_OFF |
bltmod_BFCOL |
pattern_OFF |
transc_BG_OPAQUE |
bop_SRCCOPY |
shftzero_ZERO |
sgnzero_ZERO);
CP_START(pjBase, DWG_YDSTLEN, ((yDuplicate + 1) << yval_SHIFT) |
cyDuplicate);
yDuplicate += cyDuplicate;
pusDst = (USHORT*) ((BYTE*) pusDst + cyDuplicate * lDeltaDst);
}
} while (yCount != 0);
END_DIRECT_ACCESS_STORM(ppdev, pjBase);
}
/******************************Public*Routine******************************\
* VOID vMilDirectStretch24
*
* Hardware assisted stretchblt at 24bpp, for Millenniums.
*
* We use the data-transfer register so that we don't have to worry about
* funky alignments.
*
\**************************************************************************/
VOID vMilDirectStretch24(
STR_BLT* pStrBlt)
{
BYTE* pjSrc;
LONG lAddress;
ULONG ulDst;
ULONG xAccum;
ULONG xTmp;
ULONG yTmp;
LONG i;
LONG cyDuplicate;
LONG xDuplicate;
LONG yDuplicate;
LONG lDuplicate;
BYTE* pjOldScan;
LONG xDstLeft;
LONG xDstRight;
LONG xDstRightFast;
LONG yDstTop;
LONG cyBreak;
LONG iBreak;
PDEV* ppdev = pStrBlt->ppdev;
BYTE* pjBase = ppdev->pjBase;
LONG cxMemory = ppdev->cxMemory;
BYTE* pjSrcScan = pStrBlt->pjSrcScan + pStrBlt->XSrcStart * 3;
LONG yCount = pStrBlt->YDstCount;
LONG WidthX = pStrBlt->XDstEnd - pStrBlt->XDstStart;
ULONG xInt = pStrBlt->ulXDstToSrcIntCeil * 3;
ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
ULONG yAccum = pStrBlt->ulYFracAccumulator;
ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
LONG lDeltaSrc = pStrBlt->lDeltaSrc;
ULONG yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
ULONG ulXFracAccumulator = pStrBlt->ulXFracAccumulator;
yDstTop = ppdev->yOffset + pStrBlt->YDstStart;
xDstLeft = ppdev->xOffset + pStrBlt->XDstStart;
xDstRight = ppdev->xOffset + pStrBlt->XDstEnd - 1; // Note inclusive
xDstRightFast = (((xDstRight * 3) + 2) | 0x40) / 3;
// Figure out how many scans we can duplicate before we hit the first
// WRAM boundary:
cyBreak = 0xffff;
for (iBreak = 0; iBreak < ppdev->cyBreak; iBreak++)
{
if (ppdev->ayBreak[iBreak] >= yDstTop)
{
cyBreak = ppdev->ayBreak[iBreak] - yDstTop;
break;
}
}
ppdev->HopeFlags = SIGN_CACHE;
CHECK_FIFO_SPACE(pjBase, 6);
CP_WRITE(pjBase, DWG_FXBNDRY, (xDstRight << bfxright_SHIFT) | xDstLeft);
CP_WRITE(pjBase, DWG_AR5, cxMemory);
CP_WRITE(pjBase, DWG_AR3, 0);
CP_WRITE(pjBase, DWG_AR0, WidthX - 1);
CP_WRITE(pjBase, DWG_YDST, yDstTop);
CP_WRITE(pjBase, DWG_CXRIGHT, xDstRight); // For fast-blt work-around
yDuplicate = yDstTop - 1;
xDuplicate = xDstLeft + ppdev->ulYDstOrg;
do {
CHECK_FIFO_SPACE(pjBase, 2);
CP_WRITE(pjBase, DWG_DWGCTL, opcode_ILOAD |
atype_RPL |
blockm_OFF |
pattern_OFF |
transc_BG_OPAQUE |
bop_SRCCOPY |
bltmod_BFCOL |
shftzero_ZERO |
sgnzero_ZERO);
CP_START(pjBase, DWG_LEN, 1);
// Make sure the MGA is ready to take the data:
CHECK_FIFO_SPACE(pjBase, FIFOSIZE);
pjSrc = pjSrcScan;
xAccum = ulXFracAccumulator;
i = WidthX;
while (TRUE)
{
ulDst = *(pjSrc); // Pixel 0
ulDst |= *(pjSrc + 1) << 8;
ulDst |= *(pjSrc + 2) << 16;
if (--i == 0)
break;
pjSrc += xInt;
xTmp = xAccum + xFrac;
if (xTmp < xAccum)
pjSrc += 3;
ulDst |= *(pjSrc) << 24; // Pixel 1
CP_WRITE_SRC(pjBase, ulDst);
ulDst = *(pjSrc + 1);
ulDst |= *(pjSrc + 2) << 8;
if (--i == 0)
break;
pjSrc += xInt;
xAccum = xTmp + xFrac;
if (xAccum < xTmp)
pjSrc += 3;
ulDst |= *(pjSrc) << 16; // Pixel 2
ulDst |= *(pjSrc + 1) << 24;
CP_WRITE_SRC(pjBase, ulDst);
ulDst = *(pjSrc + 2);
if (--i == 0)
break;
pjSrc += xInt;
xTmp = xAccum + xFrac;
if (xTmp < xAccum)
pjSrc += 3;
ulDst |= *(pjSrc) << 8; // Pixel 3
ulDst |= *(pjSrc + 1) << 16;
ulDst |= *(pjSrc + 2) << 24;
if (--i == 0)
break;
CP_WRITE_SRC(pjBase, ulDst);
pjSrc += xInt;
xAccum = xTmp + xFrac;
if (xAccum < xTmp)
pjSrc += 3;
}
// Write out the remainder of the scan:
CP_WRITE_SRC(pjBase, ulDst);
// Now count the number of duplicate scans:
cyBreak--;
yDuplicate++;
cyDuplicate = -1;
pjOldScan = pjSrcScan;
do {
cyDuplicate++;
pjSrcScan += yInt;
yTmp = yAccum + yFrac;
if (yTmp < yAccum)
{
pjSrcScan += lDeltaSrc;
}
yAccum = yTmp;
yCount--;
} while ((yCount != 0) && (pjSrcScan == pjOldScan));
// Duplicate the scan 'cyDuplicate' times with one blt:
if (cyDuplicate != 0)
{
lDuplicate = (yDuplicate * cxMemory) + xDuplicate;
yDuplicate += cyDuplicate;
CHECK_FIFO_SPACE(pjBase, 8);
CP_WRITE(pjBase, DWG_AR3, lDuplicate);
CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
cyBreak -= cyDuplicate;
if (cyBreak >= 0)
{
// We haven't crossed a WRAM boundary, so we can use a
// WRAM-WRAM blt to duplicate the scan:
CP_WRITE(pjBase, DWG_DWGCTL, opcode_FBITBLT |
atype_RPL |
blockm_OFF |
bltmod_BFCOL |
pattern_OFF |
transc_BG_OPAQUE |
bop_NOP |
shftzero_ZERO |
sgnzero_ZERO);
CP_WRITE(pjBase, DWG_FXRIGHT, xDstRightFast);
CP_START(pjBase, DWG_LEN, cyDuplicate);
CP_WRITE(pjBase, DWG_FXRIGHT, xDstRight);
}
else
{
CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
atype_RPL |
blockm_OFF |
bltmod_BFCOL |
pattern_OFF |
transc_BG_OPAQUE |
bop_SRCCOPY |
shftzero_ZERO |
sgnzero_ZERO);
CP_START(pjBase, DWG_LEN, cyDuplicate);
iBreak++;
if (iBreak >= ppdev->cyBreak)
{
// That was the last break we have to worry about:
cyBreak = 0xffff;
}
else
{
cyBreak += ppdev->ayBreak[iBreak]
- ppdev->ayBreak[iBreak - 1];
}
}
CP_WRITE(pjBase, DWG_AR3, 0);
CP_WRITE(pjBase, DWG_AR0, WidthX - 1);
}
} while (yCount != 0);
CHECK_FIFO_SPACE(pjBase, 1);
CP_WRITE(pjBase, DWG_CXRIGHT, ppdev->cxMemory - 1);
}
/******************************Public*Routine******************************\
* VOID vMilDirectStretch32
*
* Hardware assisted stretchblt at 32bpp, for Millenniums.
*
\**************************************************************************/
VOID vMilDirectStretch32(
STR_BLT* pStrBlt)
{
BYTE* pjOldScan;
ULONG* pulSrc;
ULONG* pulDstEnd;
ULONG ulDst;
ULONG xAccum;
ULONG xTmp;
ULONG yTmp;
LONG cyDuplicate;
LONG xDuplicate;
LONG yDuplicate;
LONG lDuplicate;
ULONG yInt;
PDEV* ppdev = pStrBlt->ppdev;
BYTE* pjBase = ppdev->pjBase;
LONG cxMemory = ppdev->cxMemory;
LONG xDst = pStrBlt->XDstStart;
LONG xSrc = pStrBlt->XSrcStart;
BYTE* pjSrcScan = pStrBlt->pjSrcScan + xSrc * 4;
ULONG* pulDst = (ULONG*)(pStrBlt->pjDstScan) + xDst;
LONG yDst = pStrBlt->YDstStart;
LONG yCount = pStrBlt->YDstCount;
LONG WidthX = pStrBlt->XDstEnd - xDst;
ULONG xInt = pStrBlt->ulXDstToSrcIntCeil;
ULONG xFrac = pStrBlt->ulXDstToSrcFracCeil;
ULONG yAccum = pStrBlt->ulYFracAccumulator;
ULONG yFrac = pStrBlt->ulYDstToSrcFracCeil;
LONG lDstStride = pStrBlt->lDeltaDst - 4*WidthX;
LONG lDeltaDst = pStrBlt->lDeltaDst;
LONG lDeltaSrc = pStrBlt->lDeltaSrc;
yInt = pStrBlt->lDeltaSrc * pStrBlt->ulYDstToSrcIntCeil;
yDuplicate = yDst + ppdev->yOffset - 1;
xDuplicate = xDst + ppdev->xOffset + ppdev->ulYDstOrg;
START_DIRECT_ACCESS_STORM(ppdev, pjBase);
do {
ULONG ulSrc;
ULONG yTmp;
pulSrc = (ULONG*) pjSrcScan;
xAccum = pStrBlt->ulXFracAccumulator;
pulDstEnd = pulDst + WidthX;
while (pulDst != pulDstEnd)
{
ulSrc = *pulSrc;
xTmp = xAccum + xFrac;
pulSrc = pulSrc + xInt;
if (xTmp < xAccum)
pulSrc++;
*(ULONG*)pulDst = ulSrc;
pulDst++;
xAccum = xTmp;
}
// Now count the number of duplicate scans:
pulDst = (ULONG*) ((BYTE*) pulDst + lDstStride);
yDuplicate++;
cyDuplicate = -1;
pjOldScan = pjSrcScan;
do {
cyDuplicate++;
pjSrcScan += yInt;
yTmp = yAccum + yFrac;
if (yTmp < yAccum)
{
pjSrcScan += lDeltaSrc;
}
yAccum = yTmp;
yCount--;
} while ((yCount != 0) && (pjSrcScan == pjOldScan));
// Duplicate the scan 'cyDuplicate' times with one blt:
if (cyDuplicate != 0)
{
lDuplicate = yDuplicate * cxMemory + xDuplicate;
CHECK_FIFO_SPACE(pjBase, 4);
CP_WRITE(pjBase, DWG_AR3, lDuplicate);
CP_WRITE(pjBase, DWG_AR0, lDuplicate + WidthX - 1);
CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT |
atype_RPL |
blockm_OFF |
bltmod_BFCOL |
pattern_OFF |
transc_BG_OPAQUE |
bop_SRCCOPY |
shftzero_ZERO |
sgnzero_ZERO);
CP_START(pjBase, DWG_YDSTLEN, ((yDuplicate + 1) << yval_SHIFT) |
cyDuplicate);
yDuplicate += cyDuplicate;
pulDst = (ULONG*) ((BYTE*) pulDst + cyDuplicate * lDeltaDst);
}
} while (yCount != 0);
END_DIRECT_ACCESS_STORM(ppdev, pjBase);
}
/******************************Public*Routine******************************\
*
* Routine Description:
*
* StretchBlt using integer math. Must be from one surface to another
* surface of the same format.
*
* Arguments:
*
* ppdev - PDEV for device
* prclDst - Pointer to rectangle of Dst extents
* pvSrc - Pointer to start of Src bitmap
* lDeltaSrc - Bytes from start of Src scan line to start of next
* prclSrc - Pointer to rectangle of Src extents
* prclClip - Clip Dest to this rect
*
* Return Value:
*
* Status
*
\**************************************************************************/
VOID vStretchDIB(
PDEV* ppdev,
RECTL* prclDst,
VOID* pvSrc,
LONG lDeltaSrc,
RECTL* prclSrc,
RECTL* prclClip)
{
STR_BLT StrBlt;
ULONG XSrcToDstIntFloor;
ULONG XSrcToDstFracFloor;
ULONG ulXDstToSrcIntCeil;
ULONG ulXDstToSrcFracCeil;
ULONG YSrcToDstIntFloor;
ULONG YSrcToDstFracFloor;
ULONG ulYDstToSrcIntCeil;
ULONG ulYDstToSrcFracCeil;
LONG SrcIntScan;
LONG DstDeltaScanEnd;
ULONG ulXFracAccumulator;
ULONG ulYFracAccumulator;
LONG LeftClipDistance;
LONG TopClipDistance;
BOOL bStretch;
union {
LARGE_INTEGER large;
ULONGLONG li;
} liInit;
PFN_DIRSTRETCH pfnStr;
//
// Calculate exclusive start and end points:
//
LONG WidthDst = prclDst->right - prclDst->left;
LONG HeightDst = prclDst->bottom - prclDst->top;
LONG WidthSrc = prclSrc->right - prclSrc->left;
LONG HeightSrc = prclSrc->bottom - prclSrc->top;
LONG XSrcStart = prclSrc->left;
LONG XSrcEnd = prclSrc->right;
LONG XDstStart = prclDst->left;
LONG XDstEnd = prclDst->right;
LONG YSrcStart = prclSrc->top;
LONG YSrcEnd = prclSrc->bottom;
LONG YDstStart = prclDst->top;
LONG YDstEnd = prclDst->bottom;
//
// Validate parameters:
//
ASSERTDD(pvSrc != (VOID*)NULL, "Bad source bitmap pointer");
ASSERTDD(prclDst != (RECTL*)NULL, "Bad destination rectangle");
ASSERTDD(prclSrc != (RECTL*)NULL, "Bad source rectangle");
ASSERTDD((WidthDst > 0) && (HeightDst > 0) &&
(WidthSrc > 0) && (HeightSrc > 0),
"Can't do mirroring or empty rectangles here");
ASSERTDD((WidthDst <= STRETCH_MAX_EXTENT) &&
(HeightDst <= STRETCH_MAX_EXTENT) &&
(WidthSrc <= STRETCH_MAX_EXTENT) &&
(HeightSrc <= STRETCH_MAX_EXTENT), "Stretch exceeds limits");
ASSERTDD(prclClip != NULL, "Bad clip rectangle");
//
// Calculate X Dst to Src mapping
//
//
// dst->src = ( CEIL( (2k*WidthSrc)/WidthDst) ) / 2k
//
// = ( FLOOR( (2k*WidthSrc -1) / WidthDst) + 1) / 2k
//
// where 2k = 2 ^ 32
//
{
ULONGLONG liWidthSrc;
ULONGLONG liQuo;
ULONG ulTemp;
//
// Work around a compiler bug dealing with the assignment
// 'liHeightSrc = (((LONGLONG)HeightSrc) << 32) - 1':
//
liInit.large.LowPart = (ULONG) -1;
liInit.large.HighPart = WidthSrc - 1;
liWidthSrc = liInit.li;
liQuo = liWidthSrc / (ULONGLONG) WidthDst;
ulXDstToSrcIntCeil = (ULONG)(liQuo >> 32);
ulXDstToSrcFracCeil = (ULONG)liQuo;
//
// Now add 1, use fake carry:
//
ulTemp = ulXDstToSrcFracCeil + 1;
ulXDstToSrcIntCeil += (ulTemp < ulXDstToSrcFracCeil);
ulXDstToSrcFracCeil = ulTemp;
}
//
// Calculate Y Dst to Src mapping
//
//
// dst->src = ( CEIL( (2k*HeightSrc)/HeightDst) ) / 2k
//
// = ( FLOOR( (2k*HeightSrc -1) / HeightDst) + 1) / 2k
//
// where 2k = 2 ^ 32
//
{
ULONGLONG liHeightSrc;
ULONGLONG liQuo;
ULONG ulTemp;
//
// Work around a compiler bug dealing with the assignment
// 'liHeightSrc = (((LONGLONG)HeightSrc) << 32) - 1':
//
liInit.large.LowPart = (ULONG) -1;
liInit.large.HighPart = HeightSrc - 1;
liHeightSrc = liInit.li;
liQuo = liHeightSrc / (ULONGLONG) HeightDst;
ulYDstToSrcIntCeil = (ULONG)(liQuo >> 32);
ulYDstToSrcFracCeil = (ULONG)liQuo;
//
// Now add 1, use fake carry:
//
ulTemp = ulYDstToSrcFracCeil + 1;
ulYDstToSrcIntCeil += (ulTemp < ulYDstToSrcFracCeil);
ulYDstToSrcFracCeil = ulTemp;
}
//
// Now clip Dst in X, and/or calc src clipping effect on dst
//
// adjust left and right edges if needed, record
// distance adjusted for fixing the src
//
if (XDstStart < prclClip->left)
{
XDstStart = prclClip->left;
}
if (XDstEnd > prclClip->right)
{
XDstEnd = prclClip->right;
}
//
// Check for totally clipped out destination:
//
if (XDstEnd <= XDstStart)
{
return;
}
LeftClipDistance = XDstStart - prclDst->left;
{
ULONG ulTempInt;
ULONG ulTempFrac;
//
// Calculate displacement for .5 in destination and add:
//
ulTempFrac = (ulXDstToSrcFracCeil >> 1) | (ulXDstToSrcIntCeil << 31);
ulTempInt = (ulXDstToSrcIntCeil >> 1);
XSrcStart += ulTempInt;
ulXFracAccumulator = ulTempFrac;
if (LeftClipDistance != 0)
{
ULONGLONG ullFraction;
ULONG ulTmp;
ullFraction = UInt32x32To64(ulXDstToSrcFracCeil, LeftClipDistance);
ulTmp = ulXFracAccumulator;
ulXFracAccumulator += (ULONG) (ullFraction);
if (ulXFracAccumulator < ulTmp)
XSrcStart++;
XSrcStart += (ulXDstToSrcIntCeil * LeftClipDistance)
+ (ULONG) (ullFraction >> 32);
}
}
//
// Now clip Dst in Y, and/or calc src clipping effect on dst
//
// adjust top and bottom edges if needed, record
// distance adjusted for fixing the src
//
if (YDstStart < prclClip->top)
{
YDstStart = prclClip->top;
}
if (YDstEnd > prclClip->bottom)
{
YDstEnd = prclClip->bottom;
}
//
// Check for totally clipped out destination:
//
if (YDstEnd <= YDstStart)
{
return;
}
TopClipDistance = YDstStart - prclDst->top;
{
ULONG ulTempInt;
ULONG ulTempFrac;
//
// Calculate displacement for .5 in destination and add:
//
ulTempFrac = (ulYDstToSrcFracCeil >> 1) | (ulYDstToSrcIntCeil << 31);
ulTempInt = ulYDstToSrcIntCeil >> 1;
YSrcStart += (LONG)ulTempInt;
ulYFracAccumulator = ulTempFrac;
if (TopClipDistance != 0)
{
ULONGLONG ullFraction;
ULONG ulTmp;
ullFraction = UInt32x32To64(ulYDstToSrcFracCeil, TopClipDistance);
ulTmp = ulYFracAccumulator;
ulYFracAccumulator += (ULONG) (ullFraction);
if (ulYFracAccumulator < ulTmp)
YSrcStart++;
YSrcStart += (ulYDstToSrcIntCeil * TopClipDistance)
+ (ULONG) (ullFraction >> 32);
}
}
//
// Warm up the hardware if doing an expanding stretch in 'y':
//
bStretch = (HeightDst > HeightSrc);
if (bStretch)
{
BYTE* pjBase = ppdev->pjBase;
LONG xOffset = ppdev->xOffset;
CHECK_FIFO_SPACE(pjBase, 6);
CP_WRITE(pjBase, DWG_DWGCTL, opcode_BITBLT + atype_RPL + blockm_OFF +
bltmod_BFCOL + pattern_OFF +
transc_BG_OPAQUE + bop_SRCCOPY);
CP_WRITE(pjBase, DWG_SHIFT, 0);
CP_WRITE(pjBase, DWG_SGN, 0);
CP_WRITE(pjBase, DWG_AR5, ppdev->cxMemory);
CP_WRITE(pjBase, DWG_FXLEFT, XDstStart + xOffset);
CP_WRITE(pjBase, DWG_FXRIGHT, XDstEnd + xOffset - 1);
ppdev->HopeFlags = SIGN_CACHE;
}
//
// Fill out blt structure, then call format-specific stretch code
//
StrBlt.ppdev = ppdev;
StrBlt.XDstEnd = XDstEnd;
StrBlt.YDstStart = YDstStart;
StrBlt.YDstCount = YDstEnd - YDstStart;
if (StrBlt.YDstCount > 0)
{
//
// Caclulate starting scan line address. Since the inner loop
// routines are format dependent, they must add XDstStart/XSrcStart
// to pjDstScan/pjSrcScan to get the actual starting pixel address.
//
StrBlt.pjSrcScan = (BYTE*) pvSrc + (YSrcStart * lDeltaSrc);
StrBlt.pjDstScan = ppdev->pjScreen
+ (ppdev->yOffset + YDstStart) * ppdev->lDelta
+ (ppdev->xOffset + ppdev->ulYDstOrg) * ppdev->cjPelSize;
StrBlt.lDeltaSrc = lDeltaSrc;
StrBlt.XSrcStart = XSrcStart;
StrBlt.XDstStart = XDstStart;
StrBlt.lDeltaDst = ppdev->lDelta;
StrBlt.ulXDstToSrcIntCeil = ulXDstToSrcIntCeil;
StrBlt.ulXDstToSrcFracCeil = ulXDstToSrcFracCeil;
StrBlt.ulYDstToSrcIntCeil = ulYDstToSrcIntCeil;
StrBlt.ulYDstToSrcFracCeil = ulYDstToSrcFracCeil;
StrBlt.ulXFracAccumulator = ulXFracAccumulator;
StrBlt.ulYFracAccumulator = ulYFracAccumulator;
if (ppdev->ulBoardId == MGA_STORM)
{
if (ppdev->iBitmapFormat == BMF_8BPP)
{
if ((XDstEnd - XDstStart) < 7)
pfnStr = vMilDirectStretch8Narrow;
else
pfnStr = vMilDirectStretch8;
}
else if (ppdev->iBitmapFormat == BMF_16BPP)
{
pfnStr = vMilDirectStretch16;
}
else if (ppdev->iBitmapFormat == BMF_24BPP)
{
pfnStr = vMilDirectStretch24;
}
else
{
ASSERTDD(ppdev->iBitmapFormat == BMF_32BPP, "Expected 32bpp");
pfnStr = vMilDirectStretch32;
}
}
else
{
#if defined(_X86_)
{
if (ppdev->iBitmapFormat == BMF_8BPP)
{
if ((XDstEnd - XDstStart) < 7)
pfnStr = vMgaDirectStretch8Narrow;
else
pfnStr = vMgaDirectStretch8;
}
else
{
ASSERTDD(ppdev->iBitmapFormat == BMF_16BPP, "Expected 16bpp");
pfnStr = vMgaDirectStretch16;
}
}
#endif
}
(*pfnStr)(&StrBlt);
}
}
/******************************Public*Routine******************************\
* BOOL DrvStretchBlt
*
\**************************************************************************/
BOOL DrvStretchBlt(
SURFOBJ* psoDst,
SURFOBJ* psoSrc,
SURFOBJ* psoMsk,
CLIPOBJ* pco,
XLATEOBJ* pxlo,
COLORADJUSTMENT* pca,
POINTL* pptlHTOrg,
RECTL* prclDst,
RECTL* prclSrc,
POINTL* pptlMsk,
ULONG iMode)
{
DSURF* pdsurfSrc;
DSURF* pdsurfDst;
PDEV* ppdev;
OH* poh;
SURFOBJ* psoDstNew;
SURFOBJ* psoSrcNew;
BOOL bPunt = FALSE;
BOOL bRet;
// GDI guarantees us that for a StretchBlt the destination surface
// will always be a device surface, and not a DIB:
ppdev = (PDEV*) psoDst->dhpdev;
pdsurfDst = (DSURF*) psoDst->dhsurf;
pdsurfSrc = (DSURF*) psoSrc->dhsurf;
poh = pdsurfDst->poh;
ppdev->xOffset = poh->x;
ppdev->yOffset = poh->y;
// It's quicker for GDI to do a StretchBlt when the source surface
// is not a device-managed surface, because then it can directly
// read the source bits without having to allocate a temporary
// buffer and call DrvCopyBits to get a copy that it can use.
if (ppdev->ulBoardId == MGA_STORM)
{
psoDstNew = psoDst;
if (psoSrc->iType != STYPE_BITMAP)
{
pdsurfSrc = (DSURF*) psoSrc->dhsurf;
if (pdsurfSrc->dt == DT_SCREEN)
{
// The source is a device bitmap that is currently stored
// in device memory.
psoSrcNew = psoSrc;
bPunt = TRUE;
}
else
{
ASSERTDD(pdsurfSrc->dt == DT_DIB, "Can only handle DIB DFBs here");
// The source was a device bitmap that we just converted
// to a DIB:
psoSrcNew = pdsurfSrc->pso;
}
}
else
{
psoSrcNew = psoSrc;
}
}
else
{
psoDstNew = psoDst;
if (psoSrc->iType == STYPE_DEVBITMAP)
{
pdsurfSrc = (DSURF*) psoSrc->dhsurf;
if (pdsurfSrc->dt == DT_SCREEN)
{
// The source is a device bitmap that is currently stored
// in device memory.
psoSrcNew = psoSrc;
bPunt = TRUE;
}
else
{
ASSERTDD(pdsurfSrc->dt == DT_DIB, "Can only handle DIB DFBs here");
// The source was a device bitmap that we just converted
// to a DIB:
psoSrcNew = pdsurfSrc->pso;
}
}
else
{
psoSrcNew = psoSrc;
}
// With the old MGA's, we have fast stretch code only for 8bpp and
// 16bpp:
if (ppdev->iBitmapFormat > BMF_16BPP)
{
bPunt = TRUE;
}
// With the old MGA's, we have fast stretch code only for x86:
#if !defined(_X86_)
{
bPunt = TRUE;
}
#endif
}
if (pdsurfDst->dt == DT_DIB)
{
// The destination was a device bitmap that we just converted
// to a DIB:
psoDstNew = pdsurfDst->pso;
bPunt = TRUE;
}
if (!bPunt)
{
RECTL rclClip;
RECTL* prclClip;
ULONG cxDst;
ULONG cyDst;
ULONG cxSrc;
ULONG cySrc;
BOOL bMore;
CLIPENUM ce;
LONG c;
LONG i;
if ((psoSrcNew->iType == STYPE_BITMAP) &&
(psoMsk == NULL) &&
((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL)) &&
((psoSrcNew->iBitmapFormat == ppdev->iBitmapFormat)))
{
cxDst = prclDst->right - prclDst->left;
cyDst = prclDst->bottom - prclDst->top;
cxSrc = prclSrc->right - prclSrc->left;
cySrc = prclSrc->bottom - prclSrc->top;
// Our 'vStretchDIB' routine requires that the stretch be
// non-inverting, within a certain size, to have no source
// clipping, and to have no empty rectangles (the latter is the
// reason for the '- 1' on the unsigned compare here):
if (((cxSrc - 1) < STRETCH_MAX_EXTENT) &&
((cySrc - 1) < STRETCH_MAX_EXTENT) &&
((cxDst - 1) < STRETCH_MAX_EXTENT) &&
((cyDst - 1) < STRETCH_MAX_EXTENT) &&
(prclSrc->left >= 0) &&
(prclSrc->top >= 0) &&
(prclSrc->right <= psoSrcNew->sizlBitmap.cx) &&
(prclSrc->bottom <= psoSrcNew->sizlBitmap.cy))
{
// Our snazzy routine only does COLORONCOLOR. But for
// stretching blts, BLACKONWHITE and WHITEONBLACK are also
// equivalent to COLORONCOLOR:
if ((iMode == COLORONCOLOR) ||
((iMode < COLORONCOLOR) && (cxSrc <= cxDst) && (cySrc <= cyDst)))
{
if ((pco == NULL) || (pco->iDComplexity == DC_TRIVIAL))
{
rclClip.left = LONG_MIN;
rclClip.top = LONG_MIN;
rclClip.right = LONG_MAX;
rclClip.bottom = LONG_MAX;
prclClip = &rclClip;
StretchSingleClipRect:
vStretchDIB(ppdev,
prclDst,
psoSrcNew->pvScan0,
psoSrcNew->lDelta,
prclSrc,
prclClip);
return(TRUE);
}
else if (pco->iDComplexity == DC_RECT)
{
prclClip = &pco->rclBounds;
goto StretchSingleClipRect;
}
else
{
CLIPOBJ_cEnumStart(pco, FALSE, CT_RECTANGLES, CD_ANY, 0);
do {
bMore = CLIPOBJ_bEnum(pco, sizeof(ce), (ULONG*) &ce);
c = cIntersect(prclDst, ce.arcl, ce.c);
if (c != 0)
{
for (i = 0; i < c; i++)
{
vStretchDIB(ppdev,
prclDst,
psoSrcNew->pvScan0,
psoSrcNew->lDelta,
prclSrc,
&ce.arcl[i]);
}
}
} while (bMore);
return(TRUE);
}
}
}
}
}
// GDI is nice enough to handle the cases where 'psoDst' and/or 'psoSrc'
// are device-managed surfaces, but it ain't gonna be fast...
if (ppdev->ulBoardId == MGA_STORM)
{
START_DIRECT_ACCESS_STORM(ppdev, ppdev->pjBase);
}
else
{
START_DIRECT_ACCESS_MGA(ppdev, ppdev->pjBase);
}
bRet = EngStretchBlt(psoDstNew, psoSrcNew, psoMsk, pco, pxlo, pca, pptlHTOrg,
prclDst, prclSrc, pptlMsk, iMode);
if (ppdev->ulBoardId == MGA_STORM)
{
END_DIRECT_ACCESS_STORM(ppdev, ppdev->pjBase);
}
else
{
END_DIRECT_ACCESS_MGA(ppdev, ppdev->pjBase);
}
return(bRet);
}