windows-nt/Source/XPSP1/NT/inetsrv/iis/svcs/infocomm/kisfecnv/detjpncs.c
2020-09-26 16:20:57 +08:00

235 lines
8.7 KiB
C

// File Name: detjpncs.c
// Owner: Tetsuhide Akaishi
// Revision: 1.00 02/21/'93 Tetsuhide Akaishi
//
// Modified by v-chikos
#include "win32.h"
#include "fechrcnv.h"
// The DetectJapaneseCode function find out what kind of code set is there in
// a character string.
//
//
// UCHAR *string Points to the character string to be checked.
//
// int count Specifies the size in bytes of the string pointed
// to by the string parameter.
//
// Return Value
// The function return the followings values.
//
// Value Meaning
// CODE_ONLY_SBCS There are no Japanese character in the
// string.
// CODE_JPN_JIS JIS Code Set. There are JIS Code Set
// character in the string.
// CODE_JPN_EUC EUC Code Set. There are EUC Code Set
// character in the string.
// CODE_JPN_SJIS Shift JIS Code Set. There are Shift JIS
// Code Set character in the string.
//
//
// Note: CODE_UNKNOWN == CODE_ONLY_SBCS
// added by v-chikos for IIS 2.0J
#define GetNextChar(r) \
{ \
if ( --count ) \
c = *++string; \
else \
return (r); \
}
int DetectJPNCode ( UCHAR *string, int count )
{
int i;
int c;
for ( ; count > 0; count--, string++ ) {
c = *string;
if ( c == ESC ) { // check for jis (iso-2022-jp)
if ( count < 3 )
return CODE_UNKNOWN;
c = *++string; count--;
if ( c == KANJI_IN_1ST_CHAR &&
( *(string+1) == KANJI_IN_2ND_CHAR1 || // ESC $ B
*(string+1) == KANJI_IN_2ND_CHAR2 )) // ESC $ @
return CODE_JPN_JIS;
else if ( c == KANJI_OUT_1ST_CHAR &&
( *(string+1) == KANJI_OUT_2ND_CHAR1 || // ESC ( B
*(string+1) == KANJI_OUT_2ND_CHAR2 )) // ESC ( J
return CODE_JPN_JIS;
else
return CODE_UNKNOWN;
} else if ( (0x81 <= c && c <= 0x8d) || (0x8f <= c && c <= 0x9f) ) { // 1
// found sjis 1st
return CODE_JPN_SJIS;
} else if ( 0x8e == c ) { // 2
// found sjis 1st || euc Kana 1st (SS2)
GetNextChar( CODE_UNKNOWN )
if ( (0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xa0) || (0xe0 <= c && c <= 0xfc) ) // 2-1
// found sjis 2nd
return CODE_JPN_SJIS;
else if ( 0xa1 <= c && c <= 0xdf ) // 2-2
// found sjis 2nd || euc Kana 2nd (sjis || euc)
continue;
else
// illegal character code sequence
return CODE_UNKNOWN;
} else if ( 0xf0 <= c && c <= 0xfe ) { // 4
// found euc 1st
return CODE_JPN_EUC;
} else if ( 0xe0 <= c && c <= 0xef ) { // 5
// found sjis 1st || euc 1st
GetNextChar( CODE_UNKNOWN )
if ( (0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xa0) ) // 5-1
// found sjis 2nd
return CODE_JPN_SJIS;
else if ( 0xfd <= c && c <= 0xfe ) // 5-2
// found euc 2nd
return CODE_JPN_EUC;
else if ( 0xa1 <= c && c <= 0xfc ) // 5-3
// found sjis 2nd || euc 2nd (sjis || euc)
continue;
else
// illegal character code sequence
return CODE_UNKNOWN;
} else if ( 0xa1 <= c && c <= 0xdf ) { // 3
// found sjis Kana || euc 1st
GetNextChar( CODE_JPN_SJIS )
if ( c <= 0x9f ) // 3-4
// not euc 2nd byte
return CODE_JPN_SJIS;
else if ( 0xa1 <= c && c <= 0xdf ) // 3-2
// found sjis kana || euc 2nd (sjis || euc)
continue;
else if ( 0xe0 <= c && c <= 0xef ) { // 3-3
// found sjis 1st || euc 2nd
sjis1stOReuc2nd:
GetNextChar( CODE_JPN_EUC )
if ( 0xfd <= c && c <= 0xfe ) // 3-3-5
// found euc 1st
return CODE_JPN_EUC;
else if ( (0x80 <= c && c <= 0x8d) || (0x8f <= c && c <= 0xa0) ) // 3-3-2
// found sjis 2nd
return CODE_JPN_SJIS;
else if ( 0x40 <= c && c <= 0x7e ) // 3-3-1
// found sjis 2nd || sbcs (sjis || euc)
continue;
else if ( 0x8e == c ) { // 3-3-3
// found sjis 2nd || euc kana 1st
GetNextChar( CODE_JPN_SJIS )
if ( 0xa1 <= c && c <= 0xdf )
// found sjis Kana || euc Kana 2nd (sjis || euc)
continue;
else
// not found euc kana 2nd
return CODE_JPN_SJIS;
} else if ( 0xa1 <= c && c <= 0xfc ) { // 3-3-4
// found sjis 2nd || euc 1st
GetNextChar( CODE_JPN_SJIS )
if ( 0xa1 <= c && c <= 0xdf ) // 3-3-4-1
// found sjis kana || euc 2nd (sjis || euc)
continue;
if ( 0xe0 <= c && c <= 0xef ) // 3-3-4-2
// found sjis 1st || euc 2nd
goto sjis1stOReuc2nd;
if ( 0xf0 <= c && c <= 0xfe ) // 3-3-4-3
// found euc 2nd
return CODE_JPN_EUC;
else
// not found euc 2nd
return CODE_JPN_SJIS;
} else
// not found sjis 2nd
return CODE_JPN_EUC;
} else if ( 0xf0 <= c && c <= 0xfe ) // 3-1
return CODE_JPN_EUC;
else
return CODE_UNKNOWN;
}
}
return CODE_ONLY_SBCS;
// |<-----sjis1st---->| |<-sjisKana->|<-sjis1st->|
// ss2 |<------euc1st-------------------->|
// |81 8d|8e|8f 9f|a0|a1 df|e0 ef|f0 fe|
// |<--1-->|2 |<--1-->| |<-----3---->|<----5---->|<---4--->|
// case 1 sjis
// case 4 euc
// case 2
// |<---sjis2nd--->| |<------sjis2nd---------------------->|
// |<-eucKana2nd->|
// |40 7e|7f|80 a0|a1 df|e0 fc|
// |<-------1----->| |<----1--->|<------2----->|<----1---->|
// case 5
// |<----sjis2nd----->| |<---------sjis2nd---------------->|
// |<--------euc2nd------>|
// |40 7e|7f|80 a0|a1 fc|fd fe|
// |<--------1------->| |<----1---->|<-------3------>|<-2->|
// case 3
// |<-----sjis1st---->| |<-sjisKana->|<-sjis1st->|
// |<------euc2nd-------------------->|
// |81 9f|a0|a1 df|e0 ef|f0 fe|
// <--------4--------->| |<-----2---->|<----3---->|<--1---->|
// case 3-3
// |<--sjis2nd-->| |<------------sjis2nd------------------>|
// ss2 |<------euc1st------>|
// |40 7e|7f|80 8d|8e|8f a0|a1 fc|fd fe|
// |<-----1----->| |<--2-->|3 |<--2-->|<------4----->|<-5->|
// case 3-3-4
// |<-sjisKana->|<-sjis1st->|
// |<------euc2nd-------------------->|
// |a1 df|e0 ef|f0 fe|
// |<-----1---->|<----2---->|<--3---->|
#if 0 // old code
for ( i = 0 ; i < count ; i++, string++ ) {
if ( *string == ESC ) {
if ( *(string+1) == KANJI_IN_1ST_CHAR &&
( *(string+2) == KANJI_IN_2ND_CHAR1 || // ESC $ B
*(string+2) == KANJI_IN_2ND_CHAR2 )) { // ESC $ @
return CODE_JPN_JIS;
}
if ( *(string+1) == KANJI_OUT_1ST_CHAR &&
( *(string+2) == KANJI_OUT_2ND_CHAR1 || // ESC ( B
*(string+2) == KANJI_OUT_2ND_CHAR2 )) { // ESC ( J
return CODE_JPN_JIS;
}
} else if ( *(string) >= 0x0081) {
if ( *(string) < 0x00a0 ) {
return CODE_JPN_SJIS;
}
else if ( *(string) < 0x00e0 || *(string) > 0x00ef) {
return CODE_JPN_EUC;
}
if ( *(string+1) < 0x00a1) {
return CODE_JPN_SJIS;
}
else if ( *(string+1) > 0x00fc) {
return CODE_JPN_EUC;
}
}
}
return CODE_ONLY_SBCS;
#endif // 0
}