windows-nt/Source/XPSP1/NT/shell/ext/mlang/utf7obj.cpp

// ============================================================================
// Internet Character Set Conversion: Input from UTF-7
// ============================================================================

#include "private.h"
#include "fechrcnv.h"
#include "utf7obj.h"

//+-----------------------------------------------------------------------
//
//  Function:   IsBase64
//
//  Synopsis:   We use the following table to quickly determine if we have
//              a valid base64 character.
//
//------------------------------------------------------------------------

static UCHAR g_aBase64[256] =
{
  /*            0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   A,   B,   C,   D,   E,   F, */

  /* 00-0f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* 10-1f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* 20-2f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,
  /* 30-3f */  52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 255, 255, 255,
  /* 40-4f */ 255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
  /* 50-5f */  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,
  /* 60-6f */ 255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
  /* 70-7f */  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255,
  /* 80-8f */  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* 90-9f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* a0-af */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* b0-bf */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* c0-cf */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* d0-df */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* e0-ef */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* f0-ff */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
};


// Direct encoded ASCII table
static UCHAR g_aDirectChar[128] =
{
  /*            0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   A,   B,   C,   D,   E,   F, */

  /* 00-0f */ 255, 255, 255, 255, 255, 255, 255, 255, 255,  72,  73, 255, 255,  74, 255, 255,
  /* 10-1f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  /* 20-2f */  71, 255, 255, 255, 255, 255, 255,  62,  63,  64, 255, 255,  65,  66,  67,  68,
  /* 30-3f */  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  69, 255, 255, 255, 255,  70,
  /* 40-4f */ 255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
  /* 50-5f */  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,
  /* 60-6f */ 255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
  /* 70-7f */  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255,
};

// Base64 byte value table
static UCHAR g_aInvBase64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" };

static inline BOOL
IsBase64(UCHAR t )
{
    return g_aBase64[t] < 64;
}

/******************************************************************************
**************************   C O N S T R U C T O R   **************************
******************************************************************************/

CInccUTF7In::CInccUTF7In(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
{
    Reset();    // initialization
    return ;
}

/******************************************************************************
*******************************   R E S E T   *********************************
******************************************************************************/

void CInccUTF7In::Reset()
{
    m_pfnConv = ConvMain;
    m_pfnCleanUp = CleanUpMain;
    m_fUTF7Mode = FALSE ;
    m_nBitCount = 0 ;
    m_tcUnicode = 0 ;
    m_nOutCount = 0 ;
    return ;
}


/******************************************************************************
*************************   C O N V E R T   C H A R   *************************
******************************************************************************/

HRESULT CInccUTF7In::ConvertChar(UCHAR tc, int cchSrc)
{
	BOOL fDone = (this->*m_pfnConv)(tc);
    if (fDone)
        return S_OK;
    else
        return E_FAIL;
}

/******************************************************************************
*****************************   C L E A N   U P   *****************************
******************************************************************************/

BOOL CInccUTF7In::CleanUp()
{
    return (this->*m_pfnCleanUp)();
}

/******************************************************************************
****************************   C O N V   M A I N   ****************************
******************************************************************************/

BOOL CInccUTF7In::ConvMain(UCHAR tc)
{
    BOOL fDone = TRUE;

    // are we in UTF-7 mode ?
    if (m_fUTF7Mode )
    {
        if ( IsBase64(tc) )
        {
            UCHAR t64, outc ;
            LONG tcUnicode ;

            // save the Base64 value and update bit count
            t64 = g_aBase64[tc] ;
            m_tcUnicode = m_tcUnicode << 6 | t64 ;
            m_nBitCount += 6 ;

            // see if we accumulate enough bits
            if ( m_nBitCount >= 16 )
            {
                // get higher 16 bits data from buffer
                tcUnicode = m_tcUnicode >> ( m_nBitCount - 16 ) ;
                // output one Unicode char
                outc = (UCHAR) tcUnicode ;
                Output( outc );
                outc = (UCHAR) ( tcUnicode >> 8 ) ;
                fDone = Output( outc );

                // update output char count
                m_nOutCount ++ ;
                m_nBitCount -= 16 ;
            }
        }
        // not a Base64 char, reset UTF-7 mode
        else
        {
            // special case +- decodes to +
            if ( tc == '-' && m_nOutCount == 0 && m_nBitCount == 0 )
            {
                Output('+');
                fDone=Output(0);
            }
            // absorb shiht-out char '-', otherwise output char
            else if ( tc != '-')
            {
                Output(tc);
                fDone=Output(0);
            }
            // reset variables and UTF7Mode
            m_fUTF7Mode = FALSE ;
            m_nBitCount = 0 ;
            m_tcUnicode = 0 ;
            m_nOutCount = 0 ;
        }
    }
    // is it a UTF-7 shift-in char ?
    else if ( tc == '+' )
    {
        m_fUTF7Mode = TRUE ;
        m_nBitCount = 0 ;
        m_tcUnicode = 0 ;
        m_nOutCount = 0 ;
    }
    else
    // convert ASCII directly to Unicode if it is not in UFT-7 mode
    {
        Output(tc);
        fDone = Output(0);
    }

    return fDone;
}

/******************************************************************************
************************   C L E A N   U P   M A I N   ************************
******************************************************************************/

BOOL CInccUTF7In::CleanUpMain()
{
    return TRUE;
}

int CInccUTF7In::GetUnconvertBytes()
{
    return  0 ;
}

DWORD CInccUTF7In::GetConvertMode()
{
    DWORD dwMode ;

    if ( m_fUTF7Mode )
    {
        dwMode = ( m_tcUnicode & 0xffff ) | ( m_nBitCount << 16 ) ;
        if ( dwMode == 0 )
            dwMode = 1L ; // it is ok, since bitcount is 0
    }
    else
        dwMode = 0 ;

    return dwMode;
}

void CInccUTF7In::SetConvertMode(DWORD mode)
{
    Reset();    // initialization
    if (mode)
    {
        m_fUTF7Mode = TRUE ;
        m_tcUnicode = ( mode & 0x7fff );
        m_nBitCount = ( mode >> 16 ) & 0xffff ;
    }
    else
        m_fUTF7Mode = FALSE ;
}

// ============================================================================
// Internet Character Set Conversion: Output to UTF-7
// ============================================================================

/******************************************************************************
**************************   C O N S T R U C T O R   **************************
******************************************************************************/

CInccUTF7Out::CInccUTF7Out(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)

{
    Reset();    // initialization
    return ;
}

/******************************************************************************
*******************************   R E S E T   *********************************
******************************************************************************/

void CInccUTF7Out::Reset()
{
    m_fDoubleByte = FALSE;
    m_fUTF7Mode = FALSE ;
    m_nBitCount = 0 ;
    m_tcUnicode = 0 ;
    return;
}

HRESULT CInccUTF7Out::ConvertChar(UCHAR tc, int cchSrc)
{
    BOOL fDone = TRUE;
    WORD uc ;

    // 2nd byte of Unicode
    if (m_fDoubleByte )
    {
        BOOL bNeedShift ;

        // compose the 16 bits char
        uc = ( (WORD) tc << 8 | m_tcFirstByte  ) ;

        // check whether the char can be direct encoded ?
        bNeedShift = uc > 0x7f ? TRUE : g_aDirectChar[(UCHAR)uc] == 255 ;

        if ( bNeedShift && m_fUTF7Mode == FALSE)
        {
            // output Shift-in char to change to UTF-7 Mode
            fDone = Output('+');

            // handle special case '+-'
            if ( uc == '+' ) // single byte "+"
            {
                fDone=Output('-');
            }
            else
                m_fUTF7Mode = TRUE ;
        }

        if (m_fUTF7Mode)
        {
            LONG tcUnicode ;
            UCHAR t64 ;
            int pad_bits ;

            // either write the char to the bit buffer
            // or pad bit buffer out to a full base64 char
            if (bNeedShift)
            {
                m_tcUnicode = m_tcUnicode << 16 | uc ;
                m_nBitCount += 16 ;
            }
            // pad bit buffer out to a full base64 char
            else if (m_nBitCount % 6 )
            {
                pad_bits = 6 - (m_nBitCount % 6 ) ;
                // get to next 6 multiple, pad these bits with 0
                m_tcUnicode = m_tcUnicode << pad_bits ;
                m_nBitCount += pad_bits ;
            }

            // flush out as many full base64 char as possible
            while ( m_nBitCount >= 6 && fDone )
            {
                tcUnicode = ( m_tcUnicode >> ( m_nBitCount - 6 ) );
                t64 = (UCHAR) ( tcUnicode & 0x3f ) ;
                fDone = Output(g_aInvBase64[t64]);
                m_nBitCount -= 6 ;
            }

            if (!bNeedShift)
            {
                // output Shift-out char
                fDone = Output('-');

                m_fUTF7Mode = FALSE ;
                m_nBitCount = 0 ;
                m_tcUnicode = 0 ;
            }
        }

        // the character can be directly encoded as ASCII
        if (!bNeedShift)
        {
            fDone = Output(m_tcFirstByte);
        }

        m_fDoubleByte = FALSE ;
    }
    // 1st byte of Unicode
    else
    {
        m_tcFirstByte = tc ;
        m_fDoubleByte = TRUE ;
    }

    if (fDone)
        return S_OK;
    else
        return E_FAIL;
}

/******************************************************************************
*****************************   C L E A N   U P   *****************************
******************************************************************************/

BOOL CInccUTF7Out::CleanUp()
{
    BOOL fDone = TRUE;

    if (m_fUTF7Mode)
    {
        UCHAR t64 ;
        LONG tcUnicode ;
        int pad_bits ;

        // pad bit buffer out to a full base64 char
        if (m_nBitCount % 6 )
        {
            pad_bits = 6 - (m_nBitCount % 6 ) ;
            // get to next 6 multiple, pad these bits with 0
            m_tcUnicode = m_tcUnicode << pad_bits ;
            m_nBitCount += pad_bits ;
        }

        // flush out as many full base64 char as possible
        while ( m_nBitCount >= 6 && fDone )
        {
            tcUnicode = ( m_tcUnicode >> ( m_nBitCount - 6 ) );
            t64 = (UCHAR) ( tcUnicode & 0x3f ) ;
            fDone = Output(g_aInvBase64[t64]);
            m_nBitCount -= 6 ;
        }

        {
            // output Shift-out char
            fDone = Output('-');

            m_fUTF7Mode = FALSE ;
            m_nBitCount = 0 ;
            m_tcUnicode = 0 ;
        }
    }
    return fDone;
}

int CInccUTF7Out::GetUnconvertBytes()
{
    return  m_fDoubleByte ? 1 : 0 ;
}

DWORD CInccUTF7Out::GetConvertMode()
{
    return 0 ;
}

void CInccUTF7Out::SetConvertMode(DWORD mode)
{
    Reset();    // initialization
    return ;
}