// // Copyright (c) 1996-2000 Microsoft Corporation. All rights reserved. // Mix.cpp // Mix engines for MSSynth #include "common.h" #define STR_MODULENAME "DDKSynth.sys:Mix: " #pragma warning(disable : 4101 4102 4146) #ifdef _ALPHA_ extern "C" { int __ADAWI(short, short *); }; #pragma intrinsic(__ADAWI) #define ALPHA_OVERFLOW 2 #define ALPHA_NEGATIVE 8 #else // !_ALPHA_ // TODO -- overflow detection for ia64? (+ axp64?) #endif // !_ALPHA_ #pragma code_seg() /***************************************************************************** * CDigitalAudio::Mix8() ***************************************************************************** * Implement a stereo eight-bit mix. * Heavily optimized for x86 non-MMX, plus C code for non-x86. */ DWORD CDigitalAudio::Mix8(short * pBuffer, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaLVolume, VFRACT vfDeltaRVolume,PFRACT pfDeltaPitch, PFRACT pfSampleLength,PFRACT pfLoopLength) { DWORD dwI; DWORD dwPosition; long lM, lLM; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; char * pcWave = (char *) m_pnWave; PFRACT pfSamplePos = m_pfLastSample; VFRACT vfLVolume = m_vfLastLVolume; VFRACT vfRVolume = m_vfLastRVolume; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; VFRACT vfLVFract = vfLVolume << 8; // Keep high res version around. VFRACT vfRVFract = vfRVolume << 8; dwLength <<= 1; #ifndef _X86_ for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; vfLVFract += vfDeltaLVolume; vfLVolume = vfLVFract >> 8; vfRVFract += vfDeltaRVolume; vfRVolume = vfRVFract >> 8; } dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch; lLM = pcWave[dwPosition]; lM = ((pcWave[dwPosition + 1] - lLM) * dwFract) >> 12; lM += lLM; lLM = lM; lLM *= vfLVolume; lLM >>= 5; // Signal bumps up to 15 bits. lM *= vfRVolume; lM >>= 5; #ifndef _X86_ #ifdef _ALPHA_ int nBitmask; if( ALPHA_OVERFLOW & (nBitmask = __ADAWI( (short) lLM, &pBuffer[dwI] )) ) { if( ALPHA_NEGATIVE & nBitmask ) { pBuffer[dwI] = 0x7FFF; } else pBuffer[dwI] = (short) 0x8000; } if( ALPHA_OVERFLOW & (nBitmask = __ADAWI( (short) lM, &pBuffer[dwI+1] )) ) { if( ALPHA_NEGATIVE & nBitmask ) { pBuffer[dwI+1] = 0x7FFF; } else pBuffer[dwI+1] = (short) 0x8000; } #else // !_ALPHA_ // TODO -- overflow detection on ia64 (+ axp64?) #endif // !_ALPHA_ #else // _X86_ (dead code) // Keep this around so we can use it to generate new assembly code (see below...) pBuffer[dwI] += (short) lLM; _asm{jno no_oflowl} pBuffer[dwI] = 0x7fff; _asm{js no_oflowl} pBuffer[dwI] = (short) 0x8000; no_oflowl: pBuffer[dwI+1] += (short) lM; _asm{jno no_oflowr} pBuffer[dwI+1] = 0x7fff; _asm{js no_oflowr} pBuffer[dwI+1] = (short) 0x8000; no_oflowr: #endif // _X86_ (dead code) dwI += 2; } #else // _X86_ int i, a, b, c, total; short * pBuf = pBuffer + dwLength, *pBufX; dwI = - dwLength; _asm { ; 979 : for (dwI = 0; dwI < dwLength; ) // Induction variables. mov edi, dwI mov ebx, DWORD PTR pfSamplePos // Previously set up. cmp DWORD PTR dwLength, 0 mov edx, pfPFract mov ecx, DWORD PTR pfPitch je $L30539 $L30536: cmp ebx, DWORD PTR pfSampleLength ; 981 : if (pfSamplePos >= pfSampleLength) mov esi, DWORD PTR dwIncDelta jge SHORT $L30540_ $L30540: ; 987 : else ; 988 : break; ; 990 : dwIncDelta--; dec esi mov DWORD PTR dwIncDelta, esi ; 991 : if (!dwIncDelta) je SHORT $L30541_ $L30541: // esi, edx, edi esi == dwIncDelta mov DWORD PTR i, 0 ; 1010 : b = dwIncDelta; // esi = b == dwIncDelta ; 1011 : c = (pfSampleLength - pfSamplePos) / pfPitch; ; 1009 : a = (dwLength - dwI) / 2; // Remaining span. mov edx, edi neg edx shr edx, 1 // edx = a ; 1017 : if (b < a && b < c) cmp esi, edx jge try_ax mov eax, ecx imul eax, esi add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1019 : i = b; cmp esi, 3 jl got_it mov DWORD PTR i, esi jmp SHORT got_it ; 1013 : if (a < b && a < c) try_a: cmp edx, esi jge try_c try_ax: mov eax, edx imul eax, ecx add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1015 : i = a; cmp edx, 3 jl got_it mov DWORD PTR i, edx jmp SHORT got_it ; 1021 : else if (c < a && c < b) try_c: push edx mov eax, DWORD PTR pfSampleLength sub eax, ebx cdq idiv ecx // eax == c pop edx cmp eax, edx jge got_it try_cx: cmp eax, esi jge got_it ; 1023 : i = c; cmp eax, 3 jl $L30543 mov DWORD PTR i, eax got_it: mov edx, DWORD PTR i mov eax, DWORD PTR pBuf dec edx jl $L30543 sub DWORD PTR dwIncDelta, edx ; 1093 : return (dwI >> 1); ; 1094 : } lea edx, [edx*2+2] // Current span. lea eax, [eax+edi*2] // Starting position. add edi, edx // Remaining span. lea eax, [eax+edx*2] // New ending position. push edi mov edi, edx // Current span. mov DWORD PTR pBufX, eax neg edi $L30797: ; 1005 : do ; 1010 : dwPosition = pfSamplePos >> 12; ; 1011 : dwFract = pfSamplePos & 0xFFF; mov edx, ebx mov esi, ebx add ebx, ecx mov ecx, DWORD PTR pcWave ; 1012 : pfSamplePos += pfPitch; sar edx, 12 ; 0000000cH and esi, 4095 ; 00000fffH ; 1013 : ; 1014 : lLM = (long) pcWave[dwPosition]; movsx eax, BYTE PTR [ecx+edx] ; 1015 : lM = ((pcWave[dwPosition+1] - lLM) * dwFract); ; 1016 : lM >>= 12; ; 1017 : lM += lLM; movsx edx, BYTE PTR [ecx+edx+1] ; 1018 : lLM = lM; ; 1019 : lLM *= vfLVolume; ; 1020 : lLM >>= 5; // Signal bumps up to 15 bits. ; 1022 : pBuffer[dwI] += (short) lLM; ; 1028 : lM *= vfRVolume; ; 1029 : lM >>= 5; ; 1030 : pBuffer[dwI+1] += (short) lM; ; 1036 : ; 1037 : dwI += 2; sub edx, eax imul edx, esi sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfLVolume add edx, eax imul esi, edx sar esi, 5 ; 00000005H mov eax, DWORD PTR pBufX add WORD PTR [eax+edi*2], si mov esi, DWORD PTR vfRVolume jo overflow_lx no_oflowlx: imul esi, edx ; 1038 : } while (--dwIncDelta); sar esi, 5 ; 00000005H mov ecx, DWORD PTR pfPitch add WORD PTR [eax+edi*2+2], si jo overflow_rx no_oflowrx: add edi, 2 jne SHORT $L30797 pop edi ; 1039 : ++dwIncDelta; ; 1040 : continue; mov edx, DWORD PTR pfPFract cmp edi, 0 jl SHORT $L30536 jmp SHORT $L30539 $L30540_: ; 982 : { ; 983 : if (pfLoopLength) cmp DWORD PTR pfLoopLength, 0 je $L30539 ; 984 : { ; 985 : pfSamplePos -= pfLoopLength; sub ebx, DWORD PTR pfLoopLength jmp $L30540 $L30541_: ; 994 : pfPFract += pfDeltaPitch; mov ecx, DWORD PTR pfDeltaPitch mov esi, DWORD PTR vfDeltaLVolume add ecx, edx mov edx, DWORD PTR vfLVFract ; 995 : pfPitch = pfPFract >> 8; ; 996 : vfLVFract += vfDeltaLVolume; mov DWORD PTR pfPFract, ecx add edx, esi ; 997 : vfLVolume = vfLVFract >> 8; ; 998 : vfRVFract += vfDeltaRVolume; sar ecx, 8 mov DWORD PTR vfLVFract, edx sar edx, 8 mov esi, DWORD PTR vfDeltaRVolume mov DWORD PTR vfLVolume, edx mov edx, DWORD PTR vfRVFract add edx, esi mov DWORD PTR pfPitch, ecx mov DWORD PTR vfRVFract, edx mov esi, DWORD PTR dwDeltaPeriod ; 999 : vfRVolume = vfRVFract >> 8; sar edx, 8 mov DWORD PTR dwIncDelta, esi ; 993 : dwIncDelta = dwDeltaPeriod; mov DWORD PTR vfRVolume, edx jmp $L30541 // Handle truncation. overflow_l: mov WORD PTR [eax+edi*2], 0x7fff js no_oflowl mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflowl overflow_r: mov WORD PTR [eax+edi*2+2], 0x7fff js no_oflowr mov WORD PTR [eax+edi*2+2], 0x8000 jmp no_oflowr overflow_lx: mov WORD PTR [eax+edi*2], 0x7fff js no_oflowlx mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflowlx overflow_rx: mov WORD PTR [eax+edi*2+2], 0x7fff js no_oflowrx mov WORD PTR [eax+edi*2+2], 0x8000 jmp no_oflowrx $L30543: ; 1041 : } ; 1044 : dwPosition = pfSamplePos >> 12; mov edx, ebx mov ecx, DWORD PTR pfPitch ; 1045 : dwFract = pfSamplePos & 0xFFF; sar edx, 12 ; 0000000cH mov esi, ebx add ebx, ecx and esi, 4095 ; 00000fffH ; 1046 : pfSamplePos += pfPitch; mov ecx, DWORD PTR pcWave ; 1047 : ; 1048 : lLM = (long) pcWave[dwPosition]; movsx eax, BYTE PTR [ecx+edx] ; 1049 : lM = ((pcWave[dwPosition+1] - lLM) * dwFract); ; 1050 : lM >>= 12; ; 1051 : lM += lLM; movsx edx, BYTE PTR [ecx+edx+1] sub edx, eax imul edx, esi ; 1052 : lLM = lM; ; 1053 : lLM *= vfLVolume; ; 1054 : lLM >>= 5; // Signal bumps up to 15 bits. sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfLVolume add edx, eax ; 1072 : pBuffer[dwI] += (short) lLM; imul esi, edx sar esi, 5 ; 00000005H mov eax, DWORD PTR pBuf add WORD PTR [eax+edi*2], si mov esi, DWORD PTR vfRVolume jo overflow_l no_oflowl: ; 1078 : lM *= vfRVolume; ; 1079 : lM >>= 5; imul esi, edx ; 1080 : pBuffer[dwI+1] += (short) lM; ; 1085 : no_oflowr: ; 1087 : dwI += 2; sar esi, 5 ; 00000005H mov ecx, DWORD PTR pfPitch add WORD PTR [eax+edi*2+2], si mov edx, DWORD PTR pfPFract jo overflow_r no_oflowr: ; 978 : ; 979 : for (dwI = 0; dwI < dwLength; ) add edi, 2 jl $L30536 $L30539: mov DWORD PTR dwI, edi mov DWORD PTR pfSamplePos, ebx } dwI += dwLength; #endif // _X86_ m_vfLastLVolume = vfLVolume; m_vfLastRVolume = vfRVolume; m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos; return (dwI >> 1); } /***************************************************************************** * CDigitalAudio::MixMono8() ***************************************************************************** * Implement a mono eight-bit mix. * Heavily optimized for x86 non-MMX, plus C code for non-x86. */ DWORD CDigitalAudio::MixMono8(short * pBuffer, DWORD dwLength, DWORD dwDeltaPeriod,VFRACT vfDeltaVolume, PFRACT pfDeltaPitch,PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI; DWORD dwPosition; long lM; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; char * pcWave = (char *) m_pnWave; PFRACT pfSamplePos = m_pfLastSample; VFRACT vfVolume = m_vfLastLVolume; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; VFRACT vfVFract = vfVolume << 8; // Keep high res version around. #ifndef _X86_ for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; vfVFract += vfDeltaVolume; vfVolume = vfVFract >> 8; } dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch; lM = pcWave[dwPosition]; lM += ((pcWave[dwPosition + 1] - lM) * dwFract) >> 12; lM *= vfVolume; lM >>= 5; #ifndef _X86_ #ifdef _ALPHA_ int nBitmask; if( ALPHA_OVERFLOW & (nBitmask = __ADAWI( (short) lM, &pBuffer[dwI] )) ) { if( ALPHA_NEGATIVE & nBitmask ) { pBuffer[dwI] = 0x7FFF; } else pBuffer[dwI] = (short) 0x8000; } #else // !_ALPHA_ // TODO -- overflow code on ia64 (+ axp64?) #endif // !_ALPHA_ #else // _X86_ (dead code) // Keep this around so we can use it to generate new assembly code (see below...) pBuffer[dwI] += (short) lM; _asm{jno no_oflow} pBuffer[dwI] = 0x7fff; _asm{js no_oflow} pBuffer[dwI] = (short) 0x8000; no_oflow: #endif // _X86_ (dead code) dwI++; } #else // _X86_ int i, a, b, c, total; short * pBuf = pBuffer + dwLength, *pBufX; dwI = - dwLength; _asm { ; 979 : for (dwI = 0; dwI < dwLength; ) // Induction variables. mov edi, dwI mov ebx, DWORD PTR pfSamplePos // Previously set up. cmp DWORD PTR dwLength, 0 mov edx, pfPFract mov ecx, DWORD PTR pfPitch je $L30539 $L30536: cmp ebx, DWORD PTR pfSampleLength ; 981 : if (pfSamplePos >= pfSampleLength) mov esi, DWORD PTR dwIncDelta jge SHORT $L30540_ $L30540: ; 987 : else ; 988 : break; ; 990 : dwIncDelta--; dec esi mov DWORD PTR dwIncDelta, esi ; 991 : if (!dwIncDelta) je SHORT $L30541_ $L30541: // esi, edx, edi esi == dwIncDelta mov DWORD PTR i, 0 ; 1010 : b = dwIncDelta; // esi = b == dwIncDelta ; 1011 : c = (pfSampleLength - pfSamplePos) / pfPitch; ; 1009 : a = dwLength - dwI; // Remaining span. mov edx, edi neg edx ; 1017 : if (b < a && b < c) cmp esi, edx jge try_ax mov eax, ecx imul eax, esi add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1019 : i = b; cmp esi, 3 jl got_it mov DWORD PTR i, esi jmp SHORT got_it ; 1013 : if (a < b && a < c) try_a: cmp edx, esi jge try_c try_ax: mov eax, edx imul eax, ecx add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1015 : i = a; cmp edx, 3 jl got_it mov DWORD PTR i, edx jmp SHORT got_it ; 1021 : else if (c < a && c < b) try_c: push edx mov eax, DWORD PTR pfSampleLength sub eax, ebx cdq idiv ecx // eax == c pop edx cmp eax, edx jge got_it try_cx: cmp eax, esi jge got_it ; 1023 : i = c; cmp eax, 3 jl $L30543 mov DWORD PTR i, eax got_it: mov edx, DWORD PTR i mov eax, DWORD PTR pBuf dec edx jl $L30543 sub DWORD PTR dwIncDelta, edx ; 1093 : return (dwI); ; 1094 : } lea edx, [edx+1] // Current span. lea eax, [eax+edi*2] // Starting position. add edi, edx // Remaining span. lea eax, [eax+edx*2] // New ending position. push edi mov edi, edx // Current span. mov DWORD PTR pBufX, eax neg edi $L30797: ; 1005 : do ; 1010 : dwPosition = pfSamplePos >> 12; ; 1011 : dwFract = pfSamplePos & 0xFFF; mov edx, ebx mov esi, ebx add ebx, ecx mov ecx, DWORD PTR pcWave ; 1012 : pfSamplePos += pfPitch; sar edx, 12 ; 0000000cH and esi, 4095 ; 00000fffH ; 1013 : ; 1014 : lLM = (long) pcWave[dwPosition]; movsx eax, BYTE PTR [ecx+edx] ; 1015 : lM = ((pcWave[dwPosition+1] - lLM) * dwFract); ; 1016 : lM >>= 12; ; 1017 : lM += lLM; movsx edx, BYTE PTR [ecx+edx+1] sub edx, eax ; 1018 : lLM = lM; ; 1019 : lLM *= vfLVolume; ; 1020 : lLM >>= 5; // Signal bumps up to 15 bits. ; 1022 : pBuffer[dwI] += (short) lLM; ; 1027 : no_oflowx: ; 1037 : ++dwI; imul edx, esi sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfVolume mov ecx, DWORD PTR pfPitch add edx, eax imul esi, edx sar esi, 5 ; 00000005H mov eax, DWORD PTR pBufX add WORD PTR [eax+edi*2], si jo overflow_x no_oflowx: inc edi jne SHORT $L30797 pop edi ; 1039 : ++dwIncDelta; ; 1040 : continue; mov edx, DWORD PTR pfPFract cmp edi, 0 jl SHORT $L30536 jmp SHORT $L30539 $L30540_: ; 982 : { ; 983 : if (pfLoopLength) cmp DWORD PTR pfLoopLength, 0 je $L30539 ; 984 : { ; 985 : pfSamplePos -= pfLoopLength; sub ebx, DWORD PTR pfLoopLength jmp $L30540 $L30541_: ; 994 : pfPFract += pfDeltaPitch; mov ecx, DWORD PTR pfDeltaPitch mov esi, DWORD PTR vfDeltaVolume add ecx, edx mov edx, DWORD PTR vfVFract ; 995 : pfPitch = pfPFract >> 8; ; 996 : vfVFract += vfDeltaVolume; mov DWORD PTR pfPFract, ecx add edx, esi ; 997 : vfLVolume = vfLVFract >> 8; sar ecx, 8 mov DWORD PTR vfVFract, edx sar edx, 8 mov esi, DWORD PTR dwDeltaPeriod mov DWORD PTR vfVolume, edx mov DWORD PTR pfPitch, ecx mov DWORD PTR dwIncDelta, esi ; 993 : dwIncDelta = dwDeltaPeriod; jmp $L30541 // Handle truncation. overflow_: mov WORD PTR [eax+edi*2], 0x7fff js no_oflow mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflow overflow_x: mov WORD PTR [eax+edi*2], 0x7fff js no_oflowx mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflowx $L30543: ; 1044 : dwPosition = pfSamplePos >> 12; mov edx, ebx mov ecx, DWORD PTR pfPitch ; 1045 : dwFract = pfSamplePos & 0xFFF; sar edx, 12 ; 0000000cH mov esi, ebx add ebx, ecx and esi, 4095 ; 00000fffH ; 1046 : pfSamplePos += pfPitch; mov ecx, DWORD PTR pcWave ; 1047 : ; 1048 : lLM = (long) pcWave[dwPosition]; movsx eax, BYTE PTR [ecx+edx] ; 1049 : lM = ((pcWave[dwPosition+1] - lLM) * dwFract); ; 1050 : lM >>= 12; ; 1051 : lM += lLM; movsx edx, BYTE PTR [ecx+edx+1] sub edx, eax imul edx, esi ; 1052 : lLM = lM; ; 1053 : lLM *= vfLVolume; ; 1054 : lLM >>= 5; // Signal bumps up to 15 bits. sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfVolume add edx, eax ; 1072 : pBuffer[dwI] += (short) lLM; imul esi, edx sar esi, 5 ; 00000005H mov eax, DWORD PTR pBuf add WORD PTR [eax+edi*2], si jo overflow_ no_oflow: inc edi mov edx, DWORD PTR pfPFract ; 979 : for (dwI = 0; dwI < dwLength; ) mov ecx, DWORD PTR pfPitch jl $L30536 $L30539: mov DWORD PTR dwI, edi mov DWORD PTR pfSamplePos, ebx } dwI += dwLength; #endif // _X86_ m_vfLastLVolume = vfVolume; m_vfLastRVolume = vfVolume; // !!! is this right? m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos; return (dwI); } /***************************************************************************** * CDigitalAudio::Mix16() ***************************************************************************** * Implement a stereo sixteen-bit mix. * Heavily optimized for x86 non-MMX, plus C code for non-x86. */ DWORD CDigitalAudio::Mix16(short * pBuffer, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaLVolume, VFRACT vfDeltaRVolume,PFRACT pfDeltaPitch, PFRACT pfSampleLength,PFRACT pfLoopLength) { DWORD dwI; DWORD dwPosition; long lA; long lM; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; VFRACT vfLVolume = m_vfLastLVolume; VFRACT vfRVolume = m_vfLastRVolume; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; VFRACT vfLVFract = vfLVolume << 8; // Keep high res version around. VFRACT vfRVFract = vfRVolume << 8; dwLength <<= 1; #ifndef _X86_ for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) { pfSamplePos -= pfLoopLength; } else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; vfLVFract += vfDeltaLVolume; vfLVolume = vfLVFract >> 8; vfRVFract += vfDeltaRVolume; vfRVolume = vfRVFract >> 8; } dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch; lA = (long) pcWave[dwPosition]; lM = ((pcWave[dwPosition+1] - lA) * dwFract); lM >>= 12; lM += lA; lA = lM; lA *= vfLVolume; lA >>= 13; // Signal bumps up to 15 bits. lM *= vfRVolume; lM >>= 13; #ifndef _X86_ #ifdef _ALPHA_ int nBitmask; if( ALPHA_OVERFLOW & (nBitmask = __ADAWI( (short) lA, &pBuffer[dwI] )) ) { if( ALPHA_NEGATIVE & nBitmask ) { pBuffer[dwI] = 0x7FFF; } else pBuffer[dwI] = (short) 0x8000; } if( ALPHA_OVERFLOW & (nBitmask = __ADAWI( (short) lM, &pBuffer[dwI+1] )) ) { if( ALPHA_NEGATIVE & nBitmask ) { pBuffer[dwI+1] = 0x7FFF; } else pBuffer[dwI+1] = (short) 0x8000; } #else // !_ALPHA_ // TODO -- overflow detection on ia64 (+ axp64?) #endif // !_ALPHA_ #else // _X86_ (dead code) // Keep this around so we can use it to generate new assembly code (see below...) pBuffer[dwI] += (short) lA; _asm{jno no_oflowl} pBuffer[dwI] = 0x7fff; _asm{js no_oflowl} pBuffer[dwI] = (short) 0x8000; no_oflowl: pBuffer[dwI+1] += (short) lM; _asm{jno no_oflowr} pBuffer[dwI+1] = 0x7fff; _asm{js no_oflowr} pBuffer[dwI+1] = (short) 0x8000; no_oflowr: #endif // _X86_ (dead code) dwI += 2; } #else // _X86_ int i, a, b, c, total; short * pBuf = pBuffer + dwLength, *pBufX; dwI = - dwLength; _asm { ; 979 : for (dwI = 0; dwI < dwLength; ) // Induction variables. mov edi, dwI mov ebx, DWORD PTR pfSamplePos // Previously set up. cmp DWORD PTR dwLength, 0 mov edx, pfPFract mov ecx, DWORD PTR pfPitch je $L30539 $L30536: cmp ebx, DWORD PTR pfSampleLength ; 981 : if (pfSamplePos >= pfSampleLength) mov esi, DWORD PTR dwIncDelta jge SHORT $L30540_ $L30540: ; 987 : else ; 988 : break; ; 990 : dwIncDelta--; dec esi mov DWORD PTR dwIncDelta, esi ; 991 : if (!dwIncDelta) je SHORT $L30541_ $L30541: // esi, edx, edi esi == dwIncDelta mov DWORD PTR i, 0 ; 1010 : b = dwIncDelta; // esi = b == dwIncDelta ; 1011 : c = (pfSampleLength - pfSamplePos) / pfPitch; ; 1009 : a = (dwLength - dwI) / 2; // Remaining span. mov edx, edi neg edx shr edx, 1 // edx = a ; 1017 : if (b < a && b < c) cmp esi, edx jge try_ax mov eax, ecx imul eax, esi add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1019 : i = b; cmp esi, 3 jl got_it mov DWORD PTR i, esi jmp SHORT got_it ; 1013 : if (a < b && a < c) try_a: cmp edx, esi jge try_c try_ax: mov eax, edx imul eax, ecx add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1015 : i = a; cmp edx, 3 jl got_it mov DWORD PTR i, edx jmp SHORT got_it ; 1021 : else if (c < a && c < b) try_c: push edx mov eax, DWORD PTR pfSampleLength sub eax, ebx cdq idiv ecx // eax == c pop edx cmp eax, edx jge got_it try_cx: cmp eax, esi jge got_it ; 1023 : i = c; cmp eax, 3 jl $L30543 mov DWORD PTR i, eax got_it: mov edx, DWORD PTR i mov eax, DWORD PTR pBuf dec edx jl $L30543 sub DWORD PTR dwIncDelta, edx ; 1093 : return (dwI >> 1); ; 1094 : } lea edx, [edx*2+2] // Current span. lea eax, [eax+edi*2] // Starting position. add edi, edx // Remaining span. lea eax, [eax+edx*2] // New ending position. push edi mov edi, edx // Current span. mov DWORD PTR pBufX, eax neg edi $L30797: ; 1005 : do ; 1010 : dwPosition = pfSamplePos >> 12; ; 1011 : dwFract = pfSamplePos & 0xFFF; mov edx, ebx mov esi, ebx add ebx, ecx mov ecx, DWORD PTR pcWave ; 1012 : pfSamplePos += pfPitch; sar edx, 12 ; 0000000cH and esi, 4095 ; 00000fffH ; 1014 : lA = (long) pcWave[dwPosition]; movsx eax, WORD PTR [ecx+edx*2] ; 1015 : lM = ((pcWave[dwPosition+1] - lA) * dwFract); ; 1016 : lM >>= 12; ; 1017 : lM += lA; movsx edx, WORD PTR [ecx+edx*2+2] sub edx, eax ; 1018 : lA = lM; ; 1019 : lA *= vfLVolume; ; 1020 : lA >>= 13; // Signal bumps up to 15 bits. ; 1022 : pBuffer[dwI] += (short) lA; ; 1027 : no_oflowlx: ; 1028 : lM *= vfRVolume; ; 1029 : lM >>= 13; ; 1030 : pBuffer[dwI+1] += (short) lM; ; 1035 : no_oflowrx: ; 1037 : dwI += 2; imul edx, esi sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfLVolume add edx, eax mov eax, DWORD PTR pBufX imul esi, edx sar esi, 13 ; 0000000dH add WORD PTR [eax+edi*2], si mov esi, DWORD PTR vfRVolume jo overflow_lx no_oflowlx: imul esi, edx ; 1038 : } while (--dwIncDelta); sar esi, 13 ; 0000000dH mov ecx, DWORD PTR pfPitch add WORD PTR [eax+edi*2+2], si jo overflow_rx no_oflowrx: add edi, 2 jne SHORT $L30797 pop edi ; 1039 : ++dwIncDelta; ; 1040 : continue; mov edx, DWORD PTR pfPFract cmp edi, 0 jl SHORT $L30536 jmp SHORT $L30539 $L30540_: ; 982 : { ; 983 : if (pfLoopLength) cmp DWORD PTR pfLoopLength, 0 je $L30539 ; 985 : pfSamplePos -= pfLoopLength; sub ebx, DWORD PTR pfLoopLength jmp $L30540 $L30541_: ; 994 : pfPFract += pfDeltaPitch; mov ecx, DWORD PTR pfDeltaPitch mov esi, DWORD PTR vfDeltaLVolume add ecx, edx mov edx, DWORD PTR vfLVFract ; 995 : pfPitch = pfPFract >> 8; ; 996 : vfLVFract += vfDeltaLVolume; mov DWORD PTR pfPFract, ecx add edx, esi ; 997 : vfLVolume = vfLVFract >> 8; ; 998 : vfRVFract += vfDeltaRVolume; sar ecx, 8 mov DWORD PTR vfLVFract, edx sar edx, 8 mov esi, DWORD PTR vfDeltaRVolume mov DWORD PTR vfLVolume, edx mov edx, DWORD PTR vfRVFract add edx, esi mov DWORD PTR pfPitch, ecx mov DWORD PTR vfRVFract, edx mov esi, DWORD PTR dwDeltaPeriod ; 999 : vfRVolume = vfRVFract >> 8; sar edx, 8 mov DWORD PTR dwIncDelta, esi ; 993 : dwIncDelta = dwDeltaPeriod; mov DWORD PTR vfRVolume, edx jmp $L30541 // Handle truncation. overflow_l: mov WORD PTR [eax+edi*2], 0x7fff js no_oflowl mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflowl overflow_r: mov WORD PTR [eax+edi*2+2], 0x7fff js no_oflowr mov WORD PTR [eax+edi*2+2], 0x8000 jmp no_oflowr overflow_lx: mov WORD PTR [eax+edi*2], 0x7fff js no_oflowlx mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflowlx overflow_rx: mov WORD PTR [eax+edi*2+2], 0x7fff js no_oflowrx mov WORD PTR [eax+edi*2+2], 0x8000 jmp no_oflowrx $L30543: ; 1044 : dwPosition = pfSamplePos >> 12; mov edx, ebx mov ecx, DWORD PTR pfPitch ; 1045 : dwFract = pfSamplePos & 0xFFF; sar edx, 12 ; 0000000cH mov esi, ebx and esi, 4095 ; 00000fffH add ebx, ecx ; 1046 : pfSamplePos += pfPitch; mov ecx, DWORD PTR pcWave ; 1047 : ; 1048 : lA = (long) pcWave[dwPosition]; movsx eax, WORD PTR [ecx+edx*2] ; 1049 : lM = ((pcWave[dwPosition+1] - lA) * dwFract); ; 1050 : lM >>= 12; ; 1051 : lM += lA; movsx edx, WORD PTR [ecx+edx*2+2] sub edx, eax imul edx, esi ; 1052 : lA = lM; ; 1053 : lA *= vfLVolume; ; 1054 : lA >>= 13; // Signal bumps up to 15 bits. sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfLVolume add edx, eax ; 1072 : pBuffer[dwI] += (short) lA; imul esi, edx sar esi, 13 ; 0000000dH mov eax, DWORD PTR pBuf add WORD PTR [eax+edi*2], si mov esi, DWORD PTR vfRVolume jo overflow_l no_oflowl: ; 1077 : no_oflowl: ; 1078 : lM *= vfRVolume; ; 1079 : lM >>= 13; imul esi, edx ; 1080 : pBuffer[dwI+1] += (short) lM; ; 1085 : no_oflowr: ; 1086 : #endif /* _ALPHA */ ; 1087 : dwI += 2; sar esi, 13 ; 0000000dH mov ecx, DWORD PTR pfPitch add WORD PTR [eax+edi*2+2], si mov edx, DWORD PTR pfPFract jo overflow_r no_oflowr: add edi, 2 ; 978 : ; 979 : for (dwI = 0; dwI < dwLength; ) jl $L30536 $L30539: mov DWORD PTR dwI, edi mov DWORD PTR pfSamplePos, ebx } dwI += dwLength; #endif // _X86_ m_vfLastLVolume = vfLVolume; m_vfLastRVolume = vfRVolume; m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos; return (dwI >> 1); } /***************************************************************************** * CDigitalAudio::MixMono16() ***************************************************************************** * Implement a mono sixteen-bit mix. * Heavily optimized for x86 non-MMX, plus C code for non-x86. */ DWORD CDigitalAudio::MixMono16(short * pBuffer, DWORD dwLength, DWORD dwDeltaPeriod,VFRACT vfDeltaVolume, PFRACT pfDeltaPitch,PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI; DWORD dwPosition; long lA;//, lB; long lM; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; VFRACT vfVolume = m_vfLastLVolume; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; VFRACT vfVFract = vfVolume << 8; // Keep high res version around. #ifndef _X86_ for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; vfVFract += vfDeltaVolume; vfVolume = vfVFract >> 8; } dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch; lA = (long) pcWave[dwPosition]; lM = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA; lM *= vfVolume; lM >>= 13; // Signal bumps up to 12 bits. #ifndef _X86_ #ifdef _ALPHA_ int nBitmask; if( ALPHA_OVERFLOW & (nBitmask = __ADAWI( (short) lM, &pBuffer[dwI] )) ) { if( ALPHA_NEGATIVE & nBitmask ) { pBuffer[dwI] = 0x7FFF; } else pBuffer[dwI] = (short) 0x8000; } #else // !_ALPHA_ // TODO -- overflow detection for ia64 (+ axp64?) #endif // !_ALPHA_ #else // _X86_ (dead code) // Keep this around so we can use it to generate new assembly code (see below...) pBuffer[dwI] += (short) lM; _asm{jno no_oflow} pBuffer[dwI] = 0x7fff; _asm{js no_oflow} pBuffer[dwI] = (short) 0x8000; no_oflow: #endif // _X86 (dead code) dwI++; } #else // _X86_ int i, a, b, c, total; short * pBuf = pBuffer + dwLength, *pBufX; dwI = - dwLength; _asm { ; 979 : for (dwI = 0; dwI < dwLength; ) // Induction variables. mov edi, dwI mov ebx, DWORD PTR pfSamplePos // Previously set up. cmp DWORD PTR dwLength, 0 mov edx, pfPFract mov ecx, DWORD PTR pfPitch je $L30539 $L30536: cmp ebx, DWORD PTR pfSampleLength ; 981 : if (pfSamplePos >= pfSampleLength) mov esi, DWORD PTR dwIncDelta jge SHORT $L30540_ $L30540: ; 987 : else ; 988 : break; ; 990 : dwIncDelta--; dec esi mov DWORD PTR dwIncDelta, esi ; 991 : if (!dwIncDelta) je SHORT $L30541_ $L30541: // esi, edx, edi esi == dwIncDelta mov DWORD PTR i, 0 ; 1010 : b = dwIncDelta; // esi = b == dwIncDelta ; 1011 : c = (pfSampleLength - pfSamplePos) / pfPitch; ; 1009 : a = dwLength - dwI; // Remaining span. mov edx, edi neg edx ; 1017 : if (b < a && b < c) cmp esi, edx jge try_ax mov eax, ecx imul eax, esi add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1019 : i = b; cmp esi, 3 jl got_it mov DWORD PTR i, esi jmp SHORT got_it ; 1013 : if (a < b && a < c) try_a: cmp edx, esi jge try_c try_ax: mov eax, edx imul eax, ecx add eax, ebx cmp eax, DWORD PTR pfSampleLength jge try_c ; 1015 : i = a; cmp edx, 3 jl got_it mov DWORD PTR i, edx jmp SHORT got_it ; 1021 : else if (c < a && c < b) try_c: push edx mov eax, DWORD PTR pfSampleLength sub eax, ebx cdq idiv ecx // eax == c pop edx cmp eax, edx jge got_it try_cx: cmp eax, esi jge got_it ; 1023 : i = c; cmp eax, 3 jl $L30543 mov DWORD PTR i, eax got_it: mov edx, DWORD PTR i mov eax, DWORD PTR pBuf dec edx jl $L30543 sub DWORD PTR dwIncDelta, edx ; 1093 : return (dwI); ; 1094 : } lea edx, [edx+1] // Current span. lea eax, [eax+edi*2] // Starting position. add edi, edx // Remaining span. lea eax, [eax+edx*2] // New ending position. push edi mov edi, edx // Current span. mov DWORD PTR pBufX, eax neg edi $L30797: ; 1005 : do ; 1010 : dwPosition = pfSamplePos >> 12; ; 1011 : dwFract = pfSamplePos & 0xFFF; mov edx, ebx mov esi, ebx add ebx, ecx mov ecx, DWORD PTR pcWave ; 1012 : pfSamplePos += pfPitch; sar edx, 12 ; 0000000cH and esi, 4095 ; 00000fffH ; 1013 : ; 1014 : lA = (long) pcWave[dwPosition]; movsx eax, WORD PTR [ecx+edx*2] ; 1015 : lM = ((pcWave[dwPosition+1] - lA) * dwFract); ; 1016 : lM >>= 12; ; 1017 : lM += lA; movsx edx, WORD PTR [ecx+edx*2+2] sub edx, eax ; 1018 : lA = lM; ; 1019 : lA *= vfLVolume; ; 1020 : lA >>= 13; // Signal bumps up to 15 bits. ; 1022 : pBuffer[dwI] += (short) lA; ; 1027 : no_oflowx: ; 1037 : ++dwI; imul edx, esi sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfVolume add edx, eax mov ecx, DWORD PTR pfPitch imul esi, edx sar esi, 13 ; 0000000dH mov eax, DWORD PTR pBufX add WORD PTR [eax+edi*2], si jo overflow_x no_oflowx: ; 1038 : } while (--dwIncDelta); inc edi jne SHORT $L30797 pop edi ; 1039 : ++dwIncDelta; ; 1040 : continue; mov edx, DWORD PTR pfPFract cmp edi, 0 jl SHORT $L30536 jmp SHORT $L30539 $L30540_: ; 983 : if (pfLoopLength) cmp DWORD PTR pfLoopLength, 0 je $L30539 ; 985 : pfSamplePos -= pfLoopLength; sub ebx, DWORD PTR pfLoopLength jmp $L30540 $L30541_: ; 994 : pfPFract += pfDeltaPitch; mov ecx, DWORD PTR pfDeltaPitch mov esi, DWORD PTR vfDeltaVolume add ecx, edx mov edx, DWORD PTR vfVFract ; 995 : pfPitch = pfPFract >> 8; ; 996 : vfVFract += vfDeltaVolume; mov DWORD PTR pfPFract, ecx add edx, esi ; 997 : vfVolume = vfVFract >> 8; sar ecx, 8 mov DWORD PTR vfVFract, edx sar edx, 8 mov esi, DWORD PTR dwDeltaPeriod mov DWORD PTR vfVolume, edx mov DWORD PTR pfPitch, ecx mov DWORD PTR dwIncDelta, esi ; 993 : dwIncDelta = dwDeltaPeriod; jmp $L30541 // Handle truncation. overflow_: mov WORD PTR [eax+edi*2], 0x7fff js no_oflow mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflow overflow_x: mov WORD PTR [eax+edi*2], 0x7fff js no_oflowx mov WORD PTR [eax+edi*2], 0x8000 jmp no_oflowx $L30543: ; 1044 : dwPosition = pfSamplePos >> 12; mov edx, ebx mov ecx, DWORD PTR pfPitch ; 1045 : dwFract = pfSamplePos & 0xFFF; sar edx, 12 ; 0000000cH mov esi, ebx and esi, 4095 ; 00000fffH add ebx, ecx ; 1046 : pfSamplePos += pfPitch; mov ecx, DWORD PTR pcWave ; 1047 : ; 1048 : lA = (long) pcWave[dwPosition]; movsx eax, WORD PTR [ecx+edx*2] ; 1049 : lM = ((pcWave[dwPosition+1] - lA) * dwFract); ; 1050 : lM >>= 12; ; 1051 : lM += lA; movsx edx, WORD PTR [ecx+edx*2+2] sub edx, eax imul edx, esi ; 1052 : lA = lM; ; 1053 : lA *= vfVolume; ; 1054 : lA >>= 13; // Signal bumps up to 15 bits. sar edx, 12 ; 0000000cH mov esi, DWORD PTR vfVolume add edx, eax ; 1072 : pBuffer[dwI] += (short) lA; imul esi, edx sar esi, 13 ; 0000000dH mov eax, DWORD PTR pBuf add WORD PTR [eax+edi*2], si jo overflow_ no_oflow: ; 1077 : no_oflowl: ; 1087 : ++dwI; inc edi mov edx, DWORD PTR pfPFract ; 979 : for (dwI = 0; dwI < dwLength; ) mov ecx, DWORD PTR pfPitch jl $L30536 $L30539: mov DWORD PTR dwI, edi mov DWORD PTR pfSamplePos, ebx } dwI += dwLength; #endif // _X86_ m_vfLastLVolume = vfVolume; m_vfLastRVolume = vfVolume; // !!! is this right? m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos; return (dwI); }