321 lines
8.6 KiB
C
321 lines
8.6 KiB
C
/*----------------------------------------------------------------------------
|
|
%%File: msencode.h
|
|
%%Unit: fechmap
|
|
%%Contact: jpick
|
|
|
|
External header file for MsEncode character conversion module.
|
|
----------------------------------------------------------------------------*/
|
|
|
|
#ifndef MSENCODE_H
|
|
#define MSENCODE_H
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
//
|
|
// Error Returns
|
|
//
|
|
// ----------------------------------------------------------------------------
|
|
|
|
//
|
|
// Return Type for API Functions
|
|
//
|
|
typedef int CCE;
|
|
|
|
//
|
|
// Error: cceSuccess
|
|
// Explanation: Function succeeded (no error).
|
|
//
|
|
#define cceSuccess 0
|
|
|
|
//
|
|
// Error: cceRequestedStop
|
|
// Explanation: Function succeeded (no error). Caller
|
|
// requested function to be run in iterator mode
|
|
// (stop on each character or stop on ASCII) and
|
|
// function is making requested stop. (Stream
|
|
// conversion functions only).
|
|
//
|
|
#define cceRequestedStop (-1)
|
|
|
|
//
|
|
// Error: cceInsufficientBuffer
|
|
// Explanation: Buffer provided to function is too small.
|
|
//
|
|
#define cceInsufficientBuffer (-2)
|
|
|
|
//
|
|
// Error: cceInvalidFlags
|
|
// Explanation: An invalid flag or combination of flags was
|
|
// given to function.
|
|
//
|
|
#define cceInvalidFlags (-3)
|
|
|
|
//
|
|
// Error: cceInvalidParameter
|
|
// Explanation: Invalid parameter passed to function (null
|
|
// pointer, invalid encoding specified, etc.).
|
|
//
|
|
#define cceInvalidParameter (-4)
|
|
|
|
//
|
|
// Error: cceRead
|
|
// Explanation: User read-callback function failed.
|
|
//
|
|
#define cceRead (-5)
|
|
|
|
//
|
|
// Error: cceWrite
|
|
// Explanation: User write-callback function failed.
|
|
//
|
|
#define cceWrite (-6)
|
|
|
|
//
|
|
// Error: cceUnget
|
|
// Explanation: User unget-callback function failed.
|
|
//
|
|
#define cceUnget (-7)
|
|
|
|
//
|
|
// Error: cceNoCodePage
|
|
// Explanation: Requested encoding requires an installed
|
|
// code page (NLS file) for conversion. That
|
|
// file is not installed.
|
|
//
|
|
#define cceNoCodePage (-8)
|
|
|
|
//
|
|
// Error: cceEndOfInput
|
|
// Explanation: Unexpected end-of-input occurred within a
|
|
// multi-byte character in conversion function.
|
|
// (Returned only if user requested errors for
|
|
// invalid characters).
|
|
//
|
|
#define cceEndOfInput (-9)
|
|
|
|
//
|
|
// Error: cceNoTranslation
|
|
// Explanation: Character in input stream or string has no
|
|
// equivalent Unicode (multi-byte to Unicode) or
|
|
// multi-byte (Unicode to multi-byte) character.
|
|
// (Returned only if user requested errors for
|
|
// invalid characters).
|
|
//
|
|
#define cceNoTranslation (-10)
|
|
|
|
//
|
|
// Error: cceInvalidChar
|
|
// Explanation: Converter found a single or multi-byte character
|
|
// that is outside the legal range for the given
|
|
// encoding. (Returned only if user requested
|
|
// errors for invalid characters).
|
|
//
|
|
#define cceInvalidChar (-11)
|
|
|
|
//
|
|
// Error: cceAmbiguousInput
|
|
// Explanation: CceDetectInputCode(), only. Data matches more
|
|
// than one of the supported encodings types.
|
|
// (Returned only if function told to not resolve
|
|
// ambiguity).
|
|
//
|
|
#define cceAmbiguousInput (-12)
|
|
|
|
//
|
|
// Error: cceUnknownInput
|
|
// Explanation: CceDetectInputCode(), only. Data matches none
|
|
// of the supported encoding types.
|
|
//
|
|
#define cceUnknownInput (-13)
|
|
|
|
//
|
|
// Error: cceMayBeAscii
|
|
// Explanation: CceDetectInputCode(), only. Technically, data
|
|
// matches at least one of the supported encoding
|
|
// types, but may not be a true match. (For example,
|
|
// an ASCII file with only a few scattered extended
|
|
// characters). (Returned only if function told to
|
|
// resolve ambiguity).
|
|
//
|
|
// This is not an error, only a flag to the calling
|
|
// application. CceDetectInputCode() will still set
|
|
// the encoding type if it returns this value.
|
|
//
|
|
#define cceMayBeAscii (-14)
|
|
|
|
//
|
|
// Error: cceInternal
|
|
// Explanation: Unrecoverable internal error.
|
|
//
|
|
#define cceInternal (-15)
|
|
|
|
//
|
|
// Error: cceConvert
|
|
// Explanation: Unexpected DBCS function conversion error.
|
|
//
|
|
#define cceConvert (-16)
|
|
|
|
//
|
|
// Error: cceEncodingNotImplemented
|
|
// Explanation: Temporary integration error. Requested encoding
|
|
// is not implemented.
|
|
//
|
|
#define cceEncodingNotImplemented (-100)
|
|
|
|
//
|
|
// Error: cceFunctionNotImplemented
|
|
// Explanation: Temporary integration error. Function
|
|
// is not implemented.
|
|
//
|
|
#define cceFunctionNotImplemented (-101)
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
//
|
|
// General Definitions for Modules Using these Routines
|
|
//
|
|
// ----------------------------------------------------------------------------
|
|
|
|
#define MSENAPI PASCAL
|
|
#define MSENCBACK PASCAL
|
|
#define EXPIMPL(type) type MSENAPI
|
|
#define EXPDECL(type) extern type MSENAPI
|
|
|
|
// In case these are not already defined
|
|
//
|
|
#ifndef FAR
|
|
#ifdef _WIN32
|
|
#define FAR __far
|
|
#else
|
|
#define FAR
|
|
#endif
|
|
#endif
|
|
|
|
typedef unsigned char UCHAR;
|
|
typedef UCHAR *PUCHAR;
|
|
typedef UCHAR FAR *LPUSTR;
|
|
typedef const UCHAR FAR *LPCUSTR;
|
|
|
|
#ifndef UNIX // IEUNIX uses 4 bytes WCHAR, these are already defined in winnt.h
|
|
typedef unsigned short WCHAR;
|
|
typedef WCHAR *PWCHAR;
|
|
typedef WCHAR FAR *LPWSTR;
|
|
typedef const WCHAR FAR *LPCWSTR;
|
|
#endif
|
|
|
|
//
|
|
// Character encoding types supported by this module.
|
|
//
|
|
typedef enum _cenc
|
|
{
|
|
ceNil = -1,
|
|
ceEucCn = 0,
|
|
ceEucJp,
|
|
ceEucKr,
|
|
ceIso2022Jp,
|
|
ceIso2022Kr,
|
|
ceBig5,
|
|
ceGbk,
|
|
ceHz,
|
|
ceShiftJis,
|
|
ceWansung,
|
|
ceUtf7,
|
|
ceUtf8,
|
|
ceCount,
|
|
};
|
|
|
|
typedef short CEnc;
|
|
|
|
//
|
|
// Encoding "families" (for CceDetectInputCode() preferences).
|
|
//
|
|
typedef enum _efam
|
|
{
|
|
efNone = 0,
|
|
efDbcs,
|
|
efEuc,
|
|
efIso2022,
|
|
efUtf8,
|
|
} EFam;
|
|
|
|
//
|
|
// API private/reserved structure. For most API functions,
|
|
// this structure must be zero-filled by calling application.
|
|
// See converter function documentation, below, for more
|
|
// information.
|
|
//
|
|
#define cdwReserved 4
|
|
typedef struct _ars
|
|
{
|
|
DWORD rgdw[cdwReserved];
|
|
} ARS;
|
|
|
|
// For GetProcAddress()
|
|
typedef void (MSENAPI *PFNMSENCODEVER)(WORD FAR *, WORD FAR *);
|
|
|
|
// ----------------------------------------------------------------------------
|
|
//
|
|
// Input Code Auto-Detection Routine
|
|
//
|
|
// ----------------------------------------------------------------------------
|
|
|
|
//
|
|
// Configuration Flags for Auto Detection Routine
|
|
//
|
|
// grfDetectResolveAmbiguity
|
|
// The default is to return cceAmbiguousInput if the auto
|
|
// detection code cannot definitely determine the encoding
|
|
// of the input stream. If this flag is set, the function
|
|
// will use optional user preferences and the system code
|
|
// page to pick an encoding (note that in this case, the
|
|
// "lpfGuess" flag will be set to fTrue upon return).
|
|
//
|
|
// grfDetectUseCharMapping
|
|
// The default action of the auto-detection code is to
|
|
// parse the input against the known encoding types. Legal
|
|
// character sequences are not analyzed for anything
|
|
// beyond syntactic correctness. If this flag is set,
|
|
// auto-detect will map recognized sequences to flush out
|
|
// invalid characters.
|
|
//
|
|
// This option will cause auto-detection to run more
|
|
// slowly, but also yield more accurate results.
|
|
//
|
|
// grfDetectIgnoreEof
|
|
// Because auto-detect parses byte sequences against the
|
|
// the known encoding types, end-of-input in the middle of a
|
|
// sequence is obviously an error. If the calling application
|
|
// will artificially limit the sample size, set this flag
|
|
// to ignore such end-of-input errors.
|
|
//
|
|
#define grfDetectResolveAmbiguity 0x1
|
|
#define grfDetectUseCharMapping 0x2
|
|
#define grfDetectIgnoreEof 0x4
|
|
|
|
//
|
|
// Entry Point -- Attempt to Detect the Encoding
|
|
//
|
|
// Return cceAmbiguousInput if input is ambiguous or cceUnknownInput
|
|
// if encoding type matches none of the known types.
|
|
//
|
|
// Detected encoding is returned in lpCe. lpfGuess used to return
|
|
// a flag indicating whether or not the function "guessed" at an
|
|
// encoding (chose default from ambiguous state).
|
|
//
|
|
// User preferences for encoding family (efPref) and code page
|
|
// (nPrefCp) are optional, even if caller chooses to have
|
|
// this function attempt to resolve ambiguity. If either has
|
|
// the value 0, they will be ignored.
|
|
//
|
|
EXPDECL(CCE)
|
|
CceDetectInputCode(
|
|
IStream *pstmIn, // input stream
|
|
DWORD dwFlags, // configuration flags
|
|
EFam efPref, // optional: preferred encoding family
|
|
int nPrefCp, // optional: preferred code page
|
|
UINT *lpCe, // set to detected encoding
|
|
BOOL *lpfGuess // set to fTrue if function "guessed"
|
|
);
|
|
|
|
#endif // #ifndef MSENCODE_H
|