windows-nt/Source/XPSP1/NT/inetsrv/iis/iisrearc/iisplus/ulw3/ulparse.cxx

/*++

   Copyright    (c)    2000    Microsoft Corporation

   Module Name :
     ulparse.cxx

   Abstract:
     Rip some useful UL code
     
   Author:
     (RIPPED from UL driver code (HenrySa, PaulMcd)

   Environment:
     Win32 - User Mode

   Project:
     ULW3.DLL
--*/

#include "precomp.hxx"

typedef enum _URL_PART
{
    Scheme,
    HostName,
    AbsPath,
    QueryString

} URL_PART;

#define IS_UTF8_TRAILBYTE(ch)      (((ch) & 0xc0) == 0x80)

NTSTATUS
Unescape(
    IN  PUCHAR pChar,
    OUT PUCHAR pOutChar
    )

{
    UCHAR Result, Digit;

    if (pChar[0] != '%' ||
        SAFEIsXDigit(pChar[1]) == FALSE ||
        SAFEIsXDigit(pChar[2]) == FALSE)
    {
        return STATUS_OBJECT_PATH_SYNTAX_BAD;
    }

    //
    // HexToChar() inlined
    //

    // uppercase #1
    //
    if (isalpha(pChar[1]))
        Digit = (UCHAR) toupper(pChar[1]);
    else
        Digit = pChar[1];

    Result = ((Digit >= 'A') ? (Digit - 'A' + 0xA) : (Digit - '0')) << 4;

    // uppercase #2
    //
    if (isalpha(pChar[2]))
        Digit = (UCHAR) toupper(pChar[2]);
    else
        Digit = pChar[2];

    Result |= (Digit >= 'A') ? (Digit - 'A' + 0xA) : (Digit - '0');

    *pOutChar = Result;

    return STATUS_SUCCESS;

}   // Unescape


NTSTATUS
PopChar(
    IN URL_PART UrlPart,
    IN PUCHAR pChar,
    OUT WCHAR * pUnicodeChar,
    OUT PULONG pCharToSkip
    )
{
    NTSTATUS Status;
    WCHAR   UnicodeChar;
    UCHAR   Char;
    UCHAR   Trail1;
    UCHAR   Trail2;
    ULONG   CharToSkip;

    //
    // need to unescape ?
    //
    // can't decode the query string.  that would be lossy decodeing
    // as '=' and '&' characters might be encoded, but have meaning
    // to the usermode parser.
    //

    if (UrlPart != QueryString && pChar[0] == '%')
    {
        Status = Unescape(pChar, &Char);
        if (NT_SUCCESS(Status) == FALSE)
            goto end;
        CharToSkip = 3;
    }
    else
    {
        Char = pChar[0];
        CharToSkip = 1;
    }

    //
    // convert to unicode, checking for utf8 .
    //
    // 3 byte runs are the largest we can have.  16 bits in UCS-2 =
    // 3 bytes of (4+4,2+6,2+6) where it's code + char.
    // for a total of 6+6+4 char bits = 16 bits.
    //

    //
    // NOTE: we'll only bother to decode utf if it was escaped
    // thus the (CharToSkip == 3)
    //
    if ((CharToSkip == 3) && ((Char & 0xf0) == 0xe0))
    {
        // 3 byte run
        //

        // Unescape the next 2 trail bytes
        //

        Status = Unescape(pChar+CharToSkip, &Trail1);
        if (NT_SUCCESS(Status) == FALSE)
            goto end;

        CharToSkip += 3; // %xx

        Status = Unescape(pChar+CharToSkip, &Trail2);
        if (NT_SUCCESS(Status) == FALSE)
            goto end;

        CharToSkip += 3; // %xx

        if (IS_UTF8_TRAILBYTE(Trail1) == FALSE ||
            IS_UTF8_TRAILBYTE(Trail2) == FALSE)
        {
            // bad utf!
            //
            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
            goto end;
        }

        // handle three byte case
        // 1110xxxx 10xxxxxx 10xxxxxx

        UnicodeChar = (USHORT) (((Char & 0x0f) << 12) |
                                ((Trail1 & 0x3f) << 6) |
                                (Trail2 & 0x3f));

    }
    else if ((CharToSkip == 3) && ((Char & 0xe0) == 0xc0))
    {
        // 2 byte run
        //

        // Unescape the next 1 trail byte
        //

        Status = Unescape(pChar+CharToSkip, &Trail1);
        if (NT_SUCCESS(Status) == FALSE)
            goto end;

        CharToSkip += 3; // %xx

        if (IS_UTF8_TRAILBYTE(Trail1) == FALSE)
        {
            // bad utf!
            //
            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
            goto end;
        }

        // handle two byte case
        // 110xxxxx 10xxxxxx

        UnicodeChar = (USHORT) (((Char & 0x1f) << 6) |
                                (Trail1 & 0x3f));

    }

    // now this can either be unescaped high-bit (bad)
    // or escaped high-bit.  (also bad)
    //
    // thus not checking CharToSkip
    //

    else if ((Char & 0x80) == 0x80)
    {
        // high bit set !  bad utf!
        //
        Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
        goto end;

    }
    //
    // Normal character (again either escaped or unescaped)
    //
    else
    {
        //
        // Simple conversion to unicode, it's 7-bit ascii.
        //

        UnicodeChar = (USHORT)Char;
    }

    //
    // turn backslashes into forward slashes
    //

    if (UrlPart != QueryString && UnicodeChar == L'\\')
    {
        UnicodeChar = L'/';
    }
    else if (UnicodeChar == 0)
    {
        //
        // we pop'd a NULL.  bad!
        //
        Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
        goto end;
    }

    *pCharToSkip  = CharToSkip;
    *pUnicodeChar = UnicodeChar;

    Status = STATUS_SUCCESS;

end:
    return Status;

}   // PopChar

//
//  Private constants.
//

#define ACTION_NOTHING              0x00000000
#define ACTION_EMIT_CH              0x00010000
#define ACTION_EMIT_DOT_CH          0x00020000
#define ACTION_EMIT_DOT_DOT_CH      0x00030000
#define ACTION_BACKUP               0x00040000
#define ACTION_MASK                 0xFFFF0000

//
// Private globals
//

//
// this table says what to do based on the current state and the current
// character
//
ULONG  pActionTable[16] =
{
    //
    // state 0 = fresh, seen nothing exciting yet
    //
    ACTION_EMIT_CH,         // other = emit it                      state = 0
    ACTION_EMIT_CH,         // "."   = emit it                      state = 0
    ACTION_NOTHING,         // EOS   = normal finish                state = 4
    ACTION_EMIT_CH,         // "/"   = we saw the "/", emit it      state = 1

    //
    // state 1 = we saw a "/" !
    //
    ACTION_EMIT_CH,         // other = emit it,                     state = 0
    ACTION_NOTHING,         // "."   = eat it,                      state = 2
    ACTION_NOTHING,         // EOS   = normal finish                state = 4
    ACTION_NOTHING,         // "/"   = extra slash, eat it,         state = 1

    //
    // state 2 = we saw a "/" and ate a "." !
    //
    ACTION_EMIT_DOT_CH,     // other = emit the dot we ate.         state = 0
    ACTION_NOTHING,         // "."   = eat it, a ..                 state = 3
    ACTION_NOTHING,         // EOS   = normal finish                state = 4
    ACTION_NOTHING,         // "/"   = we ate a "/./", swallow it   state = 1

    //
    // state 3 = we saw a "/" and ate a ".." !
    //
    ACTION_EMIT_DOT_DOT_CH, // other = emit the "..".               state = 0
    ACTION_EMIT_DOT_DOT_CH, // "."   = 3 dots, emit the ".."        state = 0
    ACTION_BACKUP,          // EOS   = we have a "/..\0", backup!   state = 4
    ACTION_BACKUP           // "/"   = we have a "/../", backup!    state = 1
};

//
// this table says which newstate to be in given the current state and the
// character we saw
//
ULONG  pNextStateTable[16] =
{
    // state 0
    0 ,             // other
    0 ,             // "."
    4 ,             // EOS
    1 ,             // "\"

    //  state 1
    0 ,              // other
    2 ,             // "."
    4 ,             // EOS
    1 ,             // "\"

    // state 2
    0 ,             // other
    3 ,             // "."
    4 ,             // EOS
    1 ,             // "\"

    // state 3
    0 ,             // other
    0 ,             // "."
    4 ,             // EOS
    1               // "\"
};

//
// this says how to index into pNextStateTable given our current state.
//
// since max states = 4, we calculate the index by multiplying with 4.
//
#define IndexFromState( st)   ( (st) * 4)


/***************************************************************************++

Routine Description:


    Unescape
    Convert backslash to forward slash
    Remove double slashes (empty directiories names) - e.g. // or \\
    Handle /./
    Handle /../
    Convert to unicode

Arguments:

Return Value:

    HRESULT 


--***************************************************************************/
HRESULT
UlCleanAndCopyUrl(
    IN      PUCHAR      pSource,
    IN      ULONG       SourceLength,
    OUT     PULONG      pBytesCopied,
    OUT     PWSTR       pDestination,
    OUT     PWSTR *     ppQueryString OPTIONAL
    )
{
    NTSTATUS Status;
    PWSTR   pDest;
    PUCHAR  pChar;
    ULONG   CharToSkip;
    UCHAR   Char;
    ULONG   BytesCopied;
    PWSTR   pQueryString;
    ULONG   StateIndex;
    WCHAR   UnicodeChar;
    BOOLEAN MakeCanonical;
    URL_PART UrlPart = AbsPath;

//
// a cool local helper macro
//

#define EMIT_CHAR(ch)                                   \
    do {                                                \
        pDest[0] = (ch);                                \
        pDest += 1;                                     \
        BytesCopied += 2;                               \
    } while (0)


    pDest = pDestination;
    pQueryString = NULL;
    BytesCopied = 0;

    pChar = pSource;
    CharToSkip = 0;

    StateIndex = 0;

    MakeCanonical = (UrlPart == AbsPath) ? TRUE : FALSE;

    while (SourceLength > 0)
    {
        //
        // advance !  it's at the top of the loop to enable ANSI_NULL to
        // come through ONCE
        //

        pChar += CharToSkip;
        SourceLength -= CharToSkip;

        //
        // well?  have we hit the end?
        //

        if (SourceLength == 0)
        {
            UnicodeChar = UNICODE_NULL;
        }
        else
        {
            //
            // Nope.  Peek briefly to see if we hit the query string
            //

            if (UrlPart == AbsPath && pChar[0] == '?')
            {
                DBG_ASSERT(pQueryString == NULL);

                //
                // remember it's location
                //

                pQueryString = pDest;

                //
                // let it fall through ONCE to the canonical
                // in order to handle a trailing "/.." like
                // "http://foobar:80/foo/bar/..?v=1&v2"
                //

                UnicodeChar = L'?';
                CharToSkip = 1;

                //
                // now we are cleaning the query string
                //

                UrlPart = QueryString;
            }
            else
            {
                //
                // grab the next char
                //

                Status = PopChar(UrlPart, pChar, &UnicodeChar, &CharToSkip);
                if (NT_SUCCESS(Status) == FALSE)
                    goto end;
            }
        }

        if (MakeCanonical)
        {
            //
            // now use the state machine to make it canonical .
            //

            //
            // from the old value of StateIndex, figure out our new base StateIndex
            //
            StateIndex = IndexFromState(pNextStateTable[StateIndex]);

            //
            // did we just hit the query string?  this will only happen once
            // that we take this branch after hitting it, as we stop
            // processing after hitting it.
            //

            if (UrlPart == QueryString)
            {
                //
                // treat this just like we hit a NULL, EOS.
                //

                StateIndex += 2;
            }
            else
            {
                //
                // otherwise based the new state off of the char we
                // just popped.
                //

                switch (UnicodeChar)
                {
                case UNICODE_NULL:      StateIndex += 2;    break;
                case L'.':              StateIndex += 1;    break;
                case L'/':              StateIndex += 3;    break;
                default:                StateIndex += 0;    break;
                }
            }

        }
        else
        {
            StateIndex = (UnicodeChar == UNICODE_NULL) ? 2 : 0;
        }

        //
        //  Perform the action associated with the state.
        //

        switch (pActionTable[StateIndex])
        {
        case ACTION_EMIT_DOT_DOT_CH:

            EMIT_CHAR(L'.');

            // fall through

        case ACTION_EMIT_DOT_CH:

            EMIT_CHAR(L'.');

            // fall through

        case ACTION_EMIT_CH:

            EMIT_CHAR(UnicodeChar);

            // fall through

        case ACTION_NOTHING:
            break;

        case ACTION_BACKUP:

            //
            // pDest currently points 1 past the last '/'.  backup over it and
            // find the preceding '/', set pDest to 1 past that one.
            //

            //
            // backup to the '/'
            //

            pDest       -= 1;
            BytesCopied -= 2;

            DBG_ASSERT(pDest[0] == L'/');

            //
            // are we at the start of the string?  that's bad, can't go back!
            //

            if (pDest == pDestination)
            {
                DBG_ASSERT(BytesCopied == 0);
                Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
                goto end;
            }

            //
            // back up over the '/'
            //

            pDest       -= 1;
            BytesCopied -= 2;

            DBG_ASSERT(pDest > pDestination);

            //
            // now find the previous slash
            //

            while (pDest > pDestination && pDest[0] != L'/')
            {
                pDest       -= 1;
                BytesCopied -= 2;
            }

            //
            // we already have a slash, so don't have to store 1.
            //

            DBG_ASSERT(pDest[0] == L'/');

            //
            // simply skip it, as if we had emitted it just now
            //

            pDest       += 1;
            BytesCopied += 2;

            break;

        default:
            DBG_ASSERT(!"w3core!UlpCleanAndCopyUrl: Invalid action code in state table!");
            Status = STATUS_OBJECT_PATH_SYNTAX_BAD;
            goto end;
        }

        //
        // Just hit the query string ?
        //

        if (MakeCanonical && UrlPart == QueryString)
        {
            //
            // Stop canonical processing
            //

            MakeCanonical = FALSE;

            //
            // Need to emit the '?', it wasn't emitted above
            //

            DBG_ASSERT(pActionTable[StateIndex] != ACTION_EMIT_CH);

            EMIT_CHAR(L'?');

        }

    }

    //
    // terminate the string, it hasn't been done in the loop
    //

    DBG_ASSERT((pDest-1)[0] != UNICODE_NULL);

    pDest[0] = UNICODE_NULL;

    *pBytesCopied = BytesCopied;
    if (ppQueryString != NULL)
    {
        *ppQueryString = pQueryString;
    }

    Status = STATUS_SUCCESS;


end:
    return HRESULT_FROM_WIN32( RtlNtStatusToDosError( Status ) );

} // UlCleanAndCopyUrl
Add source files 2020-09-26 03:20:57 -05:00			`/*++`

			`Copyright (c) 2000 Microsoft Corporation`

			`Module Name :`
			`ulparse.cxx`

			`Abstract:`
			`Rip some useful UL code`

			`Author:`
			`(RIPPED from UL driver code (HenrySa, PaulMcd)`

			`Environment:`
			`Win32 - User Mode`

			`Project:`
			`ULW3.DLL`
			`--*/`

			`#include "precomp.hxx"`

			`typedef enum _URL_PART`
			`{`
			`Scheme,`
			`HostName,`
			`AbsPath,`
			`QueryString`

			`} URL_PART;`

			`#define IS_UTF8_TRAILBYTE(ch) (((ch) & 0xc0) == 0x80)`

			`NTSTATUS`
			`Unescape(`
			`IN PUCHAR pChar,`
			`OUT PUCHAR pOutChar`
			`)`

			`{`
			`UCHAR Result, Digit;`

			`if (pChar[0] != '%' \|\|`
			`SAFEIsXDigit(pChar[1]) == FALSE \|\|`
			`SAFEIsXDigit(pChar[2]) == FALSE)`
			`{`
			`return STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`}`

			`//`
			`// HexToChar() inlined`
			`//`

			`// uppercase #1`
			`//`
			`if (isalpha(pChar[1]))`
			`Digit = (UCHAR) toupper(pChar[1]);`
			`else`
			`Digit = pChar[1];`

			`Result = ((Digit >= 'A') ? (Digit - 'A' + 0xA) : (Digit - '0')) << 4;`

			`// uppercase #2`
			`//`
			`if (isalpha(pChar[2]))`
			`Digit = (UCHAR) toupper(pChar[2]);`
			`else`
			`Digit = pChar[2];`

			`Result \|= (Digit >= 'A') ? (Digit - 'A' + 0xA) : (Digit - '0');`

			`*pOutChar = Result;`

			`return STATUS_SUCCESS;`

			`} // Unescape`


			`NTSTATUS`
			`PopChar(`
			`IN URL_PART UrlPart,`
			`IN PUCHAR pChar,`
			`OUT WCHAR * pUnicodeChar,`
			`OUT PULONG pCharToSkip`
			`)`
			`{`
			`NTSTATUS Status;`
			`WCHAR UnicodeChar;`
			`UCHAR Char;`
			`UCHAR Trail1;`
			`UCHAR Trail2;`
			`ULONG CharToSkip;`

			`//`
			`// need to unescape ?`
			`//`
			`// can't decode the query string. that would be lossy decodeing`
			`// as '=' and '&' characters might be encoded, but have meaning`
			`// to the usermode parser.`
			`//`

			`if (UrlPart != QueryString && pChar[0] == '%')`
			`{`
			`Status = Unescape(pChar, &Char);`
			`if (NT_SUCCESS(Status) == FALSE)`
			`goto end;`
			`CharToSkip = 3;`
			`}`
			`else`
			`{`
			`Char = pChar[0];`
			`CharToSkip = 1;`
			`}`

			`//`
			`// convert to unicode, checking for utf8 .`
			`//`
			`// 3 byte runs are the largest we can have. 16 bits in UCS-2 =`
			`// 3 bytes of (4+4,2+6,2+6) where it's code + char.`
			`// for a total of 6+6+4 char bits = 16 bits.`
			`//`

			`//`
			`// NOTE: we'll only bother to decode utf if it was escaped`
			`// thus the (CharToSkip == 3)`
			`//`
			`if ((CharToSkip == 3) && ((Char & 0xf0) == 0xe0))`
			`{`
			`// 3 byte run`
			`//`

			`// Unescape the next 2 trail bytes`
			`//`

			`Status = Unescape(pChar+CharToSkip, &Trail1);`
			`if (NT_SUCCESS(Status) == FALSE)`
			`goto end;`

			`CharToSkip += 3; // %xx`

			`Status = Unescape(pChar+CharToSkip, &Trail2);`
			`if (NT_SUCCESS(Status) == FALSE)`
			`goto end;`

			`CharToSkip += 3; // %xx`

			`if (IS_UTF8_TRAILBYTE(Trail1) == FALSE \|\|`
			`IS_UTF8_TRAILBYTE(Trail2) == FALSE)`
			`{`
			`// bad utf!`
			`//`
			`Status = STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`goto end;`
			`}`

			`// handle three byte case`
			`// 1110xxxx 10xxxxxx 10xxxxxx`

			`UnicodeChar = (USHORT) (((Char & 0x0f) << 12) \|`
			`((Trail1 & 0x3f) << 6) \|`
			`(Trail2 & 0x3f));`

			`}`
			`else if ((CharToSkip == 3) && ((Char & 0xe0) == 0xc0))`
			`{`
			`// 2 byte run`
			`//`

			`// Unescape the next 1 trail byte`
			`//`

			`Status = Unescape(pChar+CharToSkip, &Trail1);`
			`if (NT_SUCCESS(Status) == FALSE)`
			`goto end;`

			`CharToSkip += 3; // %xx`

			`if (IS_UTF8_TRAILBYTE(Trail1) == FALSE)`
			`{`
			`// bad utf!`
			`//`
			`Status = STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`goto end;`
			`}`

			`// handle two byte case`
			`// 110xxxxx 10xxxxxx`

			`UnicodeChar = (USHORT) (((Char & 0x1f) << 6) \|`
			`(Trail1 & 0x3f));`

			`}`

			`// now this can either be unescaped high-bit (bad)`
			`// or escaped high-bit. (also bad)`
			`//`
			`// thus not checking CharToSkip`
			`//`

			`else if ((Char & 0x80) == 0x80)`
			`{`
			`// high bit set ! bad utf!`
			`//`
			`Status = STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`goto end;`

			`}`
			`//`
			`// Normal character (again either escaped or unescaped)`
			`//`
			`else`
			`{`
			`//`
			`// Simple conversion to unicode, it's 7-bit ascii.`
			`//`

			`UnicodeChar = (USHORT)Char;`
			`}`

			`//`
			`// turn backslashes into forward slashes`
			`//`

			`if (UrlPart != QueryString && UnicodeChar == L'\\')`
			`{`
			`UnicodeChar = L'/';`
			`}`
			`else if (UnicodeChar == 0)`
			`{`
			`//`
			`// we pop'd a NULL. bad!`
			`//`
			`Status = STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`goto end;`
			`}`

			`*pCharToSkip = CharToSkip;`
			`*pUnicodeChar = UnicodeChar;`

			`Status = STATUS_SUCCESS;`

			`end:`
			`return Status;`

			`} // PopChar`

			`//`
			`// Private constants.`
			`//`

			`#define ACTION_NOTHING 0x00000000`
			`#define ACTION_EMIT_CH 0x00010000`
			`#define ACTION_EMIT_DOT_CH 0x00020000`
			`#define ACTION_EMIT_DOT_DOT_CH 0x00030000`
			`#define ACTION_BACKUP 0x00040000`
			`#define ACTION_MASK 0xFFFF0000`

			`//`
			`// Private globals`
			`//`

			`//`
			`// this table says what to do based on the current state and the current`
			`// character`
			`//`
			`ULONG pActionTable[16] =`
			`{`
			`//`
			`// state 0 = fresh, seen nothing exciting yet`
			`//`
			`ACTION_EMIT_CH, // other = emit it state = 0`
			`ACTION_EMIT_CH, // "." = emit it state = 0`
			`ACTION_NOTHING, // EOS = normal finish state = 4`
			`ACTION_EMIT_CH, // "/" = we saw the "/", emit it state = 1`

			`//`
			`// state 1 = we saw a "/" !`
			`//`
			`ACTION_EMIT_CH, // other = emit it, state = 0`
			`ACTION_NOTHING, // "." = eat it, state = 2`
			`ACTION_NOTHING, // EOS = normal finish state = 4`
			`ACTION_NOTHING, // "/" = extra slash, eat it, state = 1`

			`//`
			`// state 2 = we saw a "/" and ate a "." !`
			`//`
			`ACTION_EMIT_DOT_CH, // other = emit the dot we ate. state = 0`
			`ACTION_NOTHING, // "." = eat it, a .. state = 3`
			`ACTION_NOTHING, // EOS = normal finish state = 4`
			`ACTION_NOTHING, // "/" = we ate a "/./", swallow it state = 1`

			`//`
			`// state 3 = we saw a "/" and ate a ".." !`
			`//`
			`ACTION_EMIT_DOT_DOT_CH, // other = emit the "..". state = 0`
			`ACTION_EMIT_DOT_DOT_CH, // "." = 3 dots, emit the ".." state = 0`
			`ACTION_BACKUP, // EOS = we have a "/..\0", backup! state = 4`
			`ACTION_BACKUP // "/" = we have a "/../", backup! state = 1`
			`};`

			`//`
			`// this table says which newstate to be in given the current state and the`
			`// character we saw`
			`//`
			`ULONG pNextStateTable[16] =`
			`{`
			`// state 0`
			`0 , // other`
			`0 , // "."`
			`4 , // EOS`
			`1 , // "\"`

			`// state 1`
			`0 , // other`
			`2 , // "."`
			`4 , // EOS`
			`1 , // "\"`

			`// state 2`
			`0 , // other`
			`3 , // "."`
			`4 , // EOS`
			`1 , // "\"`

			`// state 3`
			`0 , // other`
			`0 , // "."`
			`4 , // EOS`
			`1 // "\"`
			`};`

			`//`
			`// this says how to index into pNextStateTable given our current state.`
			`//`
			`// since max states = 4, we calculate the index by multiplying with 4.`
			`//`
			`#define IndexFromState( st) ( (st) * 4)`




			`/***************************************************************************++`

			`Routine Description:`


			`Unescape`
			`Convert backslash to forward slash`
			`Remove double slashes (empty directiories names) - e.g. // or \\`
			`Handle /./`
			`Handle /../`
			`Convert to unicode`

			`Arguments:`

			`Return Value:`

			`HRESULT`


			`--***************************************************************************/`
			`HRESULT`
			`UlCleanAndCopyUrl(`
			`IN PUCHAR pSource,`
			`IN ULONG SourceLength,`
			`OUT PULONG pBytesCopied,`
			`OUT PWSTR pDestination,`
			`OUT PWSTR * ppQueryString OPTIONAL`
			`)`
			`{`
			`NTSTATUS Status;`
			`PWSTR pDest;`
			`PUCHAR pChar;`
			`ULONG CharToSkip;`
			`UCHAR Char;`
			`ULONG BytesCopied;`
			`PWSTR pQueryString;`
			`ULONG StateIndex;`
			`WCHAR UnicodeChar;`
			`BOOLEAN MakeCanonical;`
			`URL_PART UrlPart = AbsPath;`

			`//`
			`// a cool local helper macro`
			`//`

			`#define EMIT_CHAR(ch) \`
			`do { \`
			`pDest[0] = (ch); \`
			`pDest += 1; \`
			`BytesCopied += 2; \`
			`} while (0)`


			`pDest = pDestination;`
			`pQueryString = NULL;`
			`BytesCopied = 0;`

			`pChar = pSource;`
			`CharToSkip = 0;`

			`StateIndex = 0;`

			`MakeCanonical = (UrlPart == AbsPath) ? TRUE : FALSE;`

			`while (SourceLength > 0)`
			`{`
			`//`
			`// advance ! it's at the top of the loop to enable ANSI_NULL to`
			`// come through ONCE`
			`//`

			`pChar += CharToSkip;`
			`SourceLength -= CharToSkip;`

			`//`
			`// well? have we hit the end?`
			`//`

			`if (SourceLength == 0)`
			`{`
			`UnicodeChar = UNICODE_NULL;`
			`}`
			`else`
			`{`
			`//`
			`// Nope. Peek briefly to see if we hit the query string`
			`//`

			`if (UrlPart == AbsPath && pChar[0] == '?')`
			`{`
			`DBG_ASSERT(pQueryString == NULL);`

			`//`
			`// remember it's location`
			`//`

			`pQueryString = pDest;`

			`//`
			`// let it fall through ONCE to the canonical`
			`// in order to handle a trailing "/.." like`
			`// "http://foobar:80/foo/bar/..?v=1&v2"`
			`//`

			`UnicodeChar = L'?';`
			`CharToSkip = 1;`

			`//`
			`// now we are cleaning the query string`
			`//`

			`UrlPart = QueryString;`
			`}`
			`else`
			`{`
			`//`
			`// grab the next char`
			`//`

			`Status = PopChar(UrlPart, pChar, &UnicodeChar, &CharToSkip);`
			`if (NT_SUCCESS(Status) == FALSE)`
			`goto end;`
			`}`
			`}`

			`if (MakeCanonical)`
			`{`
			`//`
			`// now use the state machine to make it canonical .`
			`//`

			`//`
			`// from the old value of StateIndex, figure out our new base StateIndex`
			`//`
			`StateIndex = IndexFromState(pNextStateTable[StateIndex]);`

			`//`
			`// did we just hit the query string? this will only happen once`
			`// that we take this branch after hitting it, as we stop`
			`// processing after hitting it.`
			`//`

			`if (UrlPart == QueryString)`
			`{`
			`//`
			`// treat this just like we hit a NULL, EOS.`
			`//`

			`StateIndex += 2;`
			`}`
			`else`
			`{`
			`//`
			`// otherwise based the new state off of the char we`
			`// just popped.`
			`//`

			`switch (UnicodeChar)`
			`{`
			`case UNICODE_NULL: StateIndex += 2; break;`
			`case L'.': StateIndex += 1; break;`
			`case L'/': StateIndex += 3; break;`
			`default: StateIndex += 0; break;`
			`}`
			`}`

			`}`
			`else`
			`{`
			`StateIndex = (UnicodeChar == UNICODE_NULL) ? 2 : 0;`
			`}`

			`//`
			`// Perform the action associated with the state.`
			`//`

			`switch (pActionTable[StateIndex])`
			`{`
			`case ACTION_EMIT_DOT_DOT_CH:`

			`EMIT_CHAR(L'.');`

			`// fall through`

			`case ACTION_EMIT_DOT_CH:`

			`EMIT_CHAR(L'.');`

			`// fall through`

			`case ACTION_EMIT_CH:`

			`EMIT_CHAR(UnicodeChar);`

			`// fall through`

			`case ACTION_NOTHING:`
			`break;`

			`case ACTION_BACKUP:`

			`//`
			`// pDest currently points 1 past the last '/'. backup over it and`
			`// find the preceding '/', set pDest to 1 past that one.`
			`//`

			`//`
			`// backup to the '/'`
			`//`

			`pDest -= 1;`
			`BytesCopied -= 2;`

			`DBG_ASSERT(pDest[0] == L'/');`

			`//`
			`// are we at the start of the string? that's bad, can't go back!`
			`//`

			`if (pDest == pDestination)`
			`{`
			`DBG_ASSERT(BytesCopied == 0);`
			`Status = STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`goto end;`
			`}`

			`//`
			`// back up over the '/'`
			`//`

			`pDest -= 1;`
			`BytesCopied -= 2;`

			`DBG_ASSERT(pDest > pDestination);`

			`//`
			`// now find the previous slash`
			`//`

			`while (pDest > pDestination && pDest[0] != L'/')`
			`{`
			`pDest -= 1;`
			`BytesCopied -= 2;`
			`}`

			`//`
			`// we already have a slash, so don't have to store 1.`
			`//`

			`DBG_ASSERT(pDest[0] == L'/');`

			`//`
			`// simply skip it, as if we had emitted it just now`
			`//`

			`pDest += 1;`
			`BytesCopied += 2;`

			`break;`

			`default:`
			`DBG_ASSERT(!"w3core!UlpCleanAndCopyUrl: Invalid action code in state table!");`
			`Status = STATUS_OBJECT_PATH_SYNTAX_BAD;`
			`goto end;`
			`}`

			`//`
			`// Just hit the query string ?`
			`//`

			`if (MakeCanonical && UrlPart == QueryString)`
			`{`
			`//`
			`// Stop canonical processing`
			`//`

			`MakeCanonical = FALSE;`

			`//`
			`// Need to emit the '?', it wasn't emitted above`
			`//`

			`DBG_ASSERT(pActionTable[StateIndex] != ACTION_EMIT_CH);`

			`EMIT_CHAR(L'?');`

			`}`

			`}`

			`//`
			`// terminate the string, it hasn't been done in the loop`
			`//`

			`DBG_ASSERT((pDest-1)[0] != UNICODE_NULL);`

			`pDest[0] = UNICODE_NULL;`

			`*pBytesCopied = BytesCopied;`
			`if (ppQueryString != NULL)`
			`{`
			`*ppQueryString = pQueryString;`
			`}`

			`Status = STATUS_SUCCESS;`


			`end:`
			`return HRESULT_FROM_WIN32( RtlNtStatusToDosError( Status ) );`

			`} // UlCleanAndCopyUrl`