65# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
67# define IGNORE_SECTION_TOK_VTABLE
71 {PREFIX(prologTok), PREFIX(contentTok), \
72 PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \
73 {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \
74 PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \
75 PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \
76 PREFIX(updatePosition), PREFIX(isPublicId)
78#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
80#define UCS2_GET_NAMING(pages, hi, lo) \
81 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
87#define UTF8_GET_NAMING2(pages, byte) \
88 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
89 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \
90 & (1u << (((byte)[1]) & 0x1F)))
97#define UTF8_GET_NAMING3(pages, byte) \
99 [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \
101 + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
102 & (1u << (((byte)[2]) & 0x1F)))
114#define UTF8_INVALID2(p) \
115 ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
117#define UTF8_INVALID3(p) \
118 (((p)[2] & 0x80) == 0 \
119 || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \
120 : ((p)[2] & 0xC0) == 0xC0) \
122 ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
123 : ((p)[1] & 0x80) == 0 \
124 || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
126#define UTF8_INVALID4(p) \
127 (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \
128 || ((p)[2] & 0xC0) == 0xC0 \
130 ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
131 : ((p)[1] & 0x80) == 0 \
132 || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
153#define utf8_isName4 isNever
167#define utf8_isNmstrt4 isNever
208#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
212# define STANDARD_VTABLE(E) \
213 E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
217# define STANDARD_VTABLE(E)
221#define NORMAL_VTABLE(E) \
222 E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \
223 E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
236# define sb_isNameMin isNever
237# define sb_isNmstrtMin isNever
241# define MINBPC(enc) ((enc)->minBytesPerChar)
244# define MINBPC(enc) 1
247#define SB_BYTE_TYPE(enc, p) \
248 (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
255# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
257# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
261# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
268# define BYTE_TO_ASCII(enc, p) (*(p))
271#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
272#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
274# define IS_INVALID_CHAR(enc, p, n) \
275 (AS_NORMAL_ENCODING(enc)->isInvalid##n \
276 && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
278# define IS_INVALID_CHAR(enc, p, n) \
279 (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
283# define IS_NAME_CHAR_MINBPC(enc, p) \
284 (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
285# define IS_NMSTRT_CHAR_MINBPC(enc, p) \
286 (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
288# define IS_NAME_CHAR_MINBPC(enc, p) (0)
289# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
293# define CHAR_MATCHES(enc, p, c) \
294 (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
302# define CHAR_MATCHES(enc, p, c) (*(p) == (c))
305#define PREFIX(ident) normal_##ident
306#define XML_TOK_IMPL_C
315#undef IS_NAME_CHAR_MINBPC
317#undef IS_NMSTRT_CHAR_MINBPC
318#undef IS_INVALID_CHAR
332 for (; fromLim > from; fromLim--, walked++) {
333 const unsigned char prev = (
unsigned char)fromLim[-1];
336 if (walked + 1 >= 4) {
342 }
else if ((prev & 0xf0u)
344 if (walked + 1 >= 3) {
350 }
else if ((prev & 0xe0u)
352 if (walked + 1 >= 2) {
358 }
else if ((prev & 0x80u)
368 char **toP,
const char *toLim) {
407 unsigned short **toP,
const unsigned short *toLim) {
409 unsigned short *
to = *toP;
410 const char *from = *fromP;
411 while (from < fromLim &&
to < toLim) {
414 if (fromLim - from < 2) {
418 *
to++ = (
unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
422 if (fromLim - from < 3) {
426 *
to++ = (
unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
432 if (toLim -
to < 2) {
436 if (fromLim - from < 4) {
440 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
441 | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
443 to[0] = (
unsigned short)((
n >> 10) | 0xD800);
444 to[1] = (
unsigned short)((
n & 0x3FF) | 0xDC00);
474#define BT_COLON BT_NMSTRT
496#define BT_COLON BT_NMSTRT
505 char **toP,
const char *toLim) {
509 if (*fromP == fromLim)
511 c = (
unsigned char)**fromP;
513 if (toLim - *toP < 2)
516 *(*toP)++ = (
char)((
c & 0x3f) | 0x80);
521 *(*toP)++ = *(*fromP)++;
528 unsigned short **toP,
const unsigned short *toLim) {
530 while (*fromP < fromLim && *toP < toLim)
531 *(*toP)++ = (
unsigned char)*(*fromP)++;
533 if ((*toP == toLim) && (*fromP < fromLim))
554#define BT_COLON BT_NMSTRT
563 char **toP,
const char *toLim) {
565 while (*fromP < fromLim && *toP < toLim)
566 *(*toP)++ = *(*fromP)++;
568 if ((*toP == toLim) && (*fromP < fromLim))
589#define BT_COLON BT_NMSTRT
598 switch ((
unsigned char)
hi) {
612 switch ((
unsigned char)
lo) {
622#define DEFINE_UTF16_TO_UTF8(E) \
623 static enum XML_Convert_Result PTRCALL E##toUtf8( \
624 const ENCODING *enc, const char **fromP, const char *fromLim, \
625 char **toP, const char *toLim) { \
626 const char *from = *fromP; \
628 fromLim = from + (((fromLim - from) >> 1) << 1); \
629 for (; from < fromLim; from += 2) { \
632 unsigned char lo = GET_LO(from); \
633 unsigned char hi = GET_HI(from); \
637 if (*toP == toLim) { \
639 return XML_CONVERT_OUTPUT_EXHAUSTED; \
652 if (toLim - *toP < 2) { \
654 return XML_CONVERT_OUTPUT_EXHAUSTED; \
656 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
657 *(*toP)++ = ((lo & 0x3f) | 0x80); \
660 if (toLim - *toP < 3) { \
662 return XML_CONVERT_OUTPUT_EXHAUSTED; \
665 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
666 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
667 *(*toP)++ = ((lo & 0x3f) | 0x80); \
673 if (toLim - *toP < 4) { \
675 return XML_CONVERT_OUTPUT_EXHAUSTED; \
677 if (fromLim - from < 4) { \
679 return XML_CONVERT_INPUT_INCOMPLETE; \
681 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
682 *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \
683 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
685 lo2 = GET_LO(from); \
686 *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \
687 | (lo2 >> 6) | 0x80); \
688 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
693 if (from < fromLim) \
694 return XML_CONVERT_INPUT_INCOMPLETE; \
696 return XML_CONVERT_COMPLETED; \
699#define DEFINE_UTF16_TO_UTF16(E) \
700 static enum XML_Convert_Result PTRCALL E##toUtf16( \
701 const ENCODING *enc, const char **fromP, const char *fromLim, \
702 unsigned short **toP, const unsigned short *toLim) { \
703 enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
705 fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); \
707 if (fromLim - *fromP > ((toLim - *toP) << 1) \
708 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
710 res = XML_CONVERT_INPUT_INCOMPLETE; \
712 for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
713 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
714 if ((*toP == toLim) && (*fromP < fromLim)) \
715 return XML_CONVERT_OUTPUT_EXHAUSTED; \
720#define GET_LO(ptr) ((unsigned char)(ptr)[0])
721#define GET_HI(ptr) ((unsigned char)(ptr)[1])
729#define GET_LO(ptr) ((unsigned char)(ptr)[1])
730#define GET_HI(ptr) ((unsigned char)(ptr)[0])
738#define LITTLE2_BYTE_TYPE(enc, p) \
739 ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
740#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
741#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
742#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \
743 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
744#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \
745 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
779# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
784# define PREFIX(ident) little2_##ident
785# define MINBPC(enc) 2
787# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
788# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
789# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
790# define IS_NAME_CHAR(enc, p, n) 0
791# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
792# define IS_NMSTRT_CHAR(enc, p, n) (0)
793# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
795# define XML_TOK_IMPL_C
797# undef XML_TOK_IMPL_C
804# undef IS_NAME_CHAR_MINBPC
805# undef IS_NMSTRT_CHAR
806# undef IS_NMSTRT_CHAR_MINBPC
807# undef IS_INVALID_CHAR
815# if BYTEORDER == 1234
838#define BT_COLON BT_NMSTRT
862# define BT_COLON BT_NMSTRT
871#define BIG2_BYTE_TYPE(enc, p) \
872 ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
873#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
874#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
875#define BIG2_IS_NAME_CHAR_MINBPC(p) \
876 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
877#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \
878 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
912# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
917# define PREFIX(ident) big2_##ident
918# define MINBPC(enc) 2
920# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
921# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
922# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
923# define IS_NAME_CHAR(enc, p, n) 0
924# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
925# define IS_NMSTRT_CHAR(enc, p, n) (0)
926# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
928# define XML_TOK_IMPL_C
930# undef XML_TOK_IMPL_C
937# undef IS_NAME_CHAR_MINBPC
938# undef IS_NMSTRT_CHAR
939# undef IS_NMSTRT_CHAR_MINBPC
940# undef IS_INVALID_CHAR
948# if BYTEORDER == 4321
971#define BT_COLON BT_NMSTRT
995# define BT_COLON BT_NMSTRT
1155 const char *
end,
const char **badPtr,
const char **
versionPtr,
1273 buf[1] = (
char)((
c & 0x3f) | 0x80);
1278 buf[1] = (
char)(((
c >> 6) & 0x3f) | 0x80);
1279 buf[2] = (
char)((
c & 0x3f) | 0x80);
1284 buf[1] = (
char)(((
c >> 12) & 0x3f) | 0x80);
1285 buf[2] = (
char)(((
c >> 6) & 0x3f) | 0x80);
1286 buf[3] = (
char)((
c & 0x3f) | 0x80);
1317#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
1351 char **toP,
const char *toLim) {
1357 if (*fromP == fromLim)
1362 int c =
uenc->convert(
uenc->userData, *fromP);
1364 if (
n > toLim - *toP)
1370 if (
n > toLim - *toP)
1381 unsigned short **toP,
const unsigned short *toLim) {
1383 while (*fromP < fromLim && *toP < toLim) {
1384 unsigned short c =
uenc->utf16[(
unsigned char)**fromP];
1394 if ((*toP == toLim) && (*fromP < fromLim))
1406 for (
i = 0;
i < 128;
i++)
1410 for (
i = 0;
i < 256;
i++) {
1415 e->utf16[
i] = 0xFFFF;
1427 }
else if (
c < 0x80) {
1434 e->utf16[
i] = (
unsigned short)(
c == 0 ? 0xFFFF :
c);
1438 e->utf16[
i] = 0xFFFF;
1469 return &(
e->normal.enc);
1521#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1522#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1533 int state,
const char *ptr,
const char *
end,
const char **
nextTokPtr) {
1539 if (ptr + 1 ==
end) {
1554 switch ((
unsigned char)*ptr) {
1566 switch (((
unsigned char)ptr[0] << 8) | (
unsigned char)ptr[1]) {
1603 if ((
unsigned char)ptr[2] == 0xBF) {
1610 if (ptr[0] ==
'\0') {
1621 }
else if (ptr[1] ==
'\0') {
1654# define ns(x) x##_ns
1656# define XML_TOK_NS_C
const unsigned char * buf
apr_pool_t apr_dbd_t apr_dbd_results_t ** res
apr_array_header_t ** result
apr_vformatter_buff_t * c
const apr_bucket_type_t * type
int(* isInvalid2)(const ENCODING *, const char *)
int(* isNmstrt4)(const ENCODING *, const char *)
int(* isNmstrt3)(const ENCODING *, const char *)
int(* isName2)(const ENCODING *, const char *)
int(* isName4)(const ENCODING *, const char *)
int(* isNmstrt2)(const ENCODING *, const char *)
int(* isInvalid3)(const ENCODING *, const char *)
int(* isInvalid4)(const ENCODING *, const char *)
int(* isName3)(const ENCODING *, const char *)
unsigned short utf16[256]
struct normal_encoding normal
typedef int(WSAAPI *apr_winapi_fpt_WSAPoll)(IN OUT LPWSAPOLLFD fdArray
#define XmlInitUnknownEncodingNS
static int checkCharRefNumber(int result)
static const struct normal_encoding internal_utf8_encoding
#define BIG2_CHAR_MATCHES(p, c)
#define STANDARD_VTABLE(E)
static const char KW_encoding[]
static int utf8_isNmstrt2(const ENCODING *enc, const char *p)
static const char KW_ISO_8859_1[]
int XmlUtf8Encode(int c, char *buf)
static int unknown_isInvalid(const ENCODING *enc, const char *p)
#define UTF8_GET_NAMING3(pages, byte)
#define LITTLE2_BYTE_TO_ASCII(p)
static const char KW_UTF_16LE[]
static int streqci(const char *s1, const char *s2)
static int utf8_isNmstrt3(const ENCODING *enc, const char *p)
#define DEFINE_UTF16_TO_UTF8(E)
static int isSpace(int c)
#define BIG2_BYTE_TYPE(enc, p)
static int toAscii(const ENCODING *enc, const char *ptr, const char *end)
static const struct normal_encoding ascii_encoding
static const char KW_UTF_16[]
static int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone)
static int isNever(const ENCODING *enc, const char *p)
static enum XML_Convert_Result unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static int utf8_isName3(const ENCODING *enc, const char *p)
static const struct normal_encoding latin1_encoding
static const struct normal_encoding internal_big2_encoding
static int unknown_isNmstrt(const ENCODING *enc, const char *p)
static enum XML_Convert_Result latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
static int unicode_byte_type(char hi, char lo)
static int utf8_isInvalid2(const ENCODING *enc, const char *p)
void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef)
static int utf8_isInvalid3(const ENCODING *enc, const char *p)
#define INIT_ENC_INDEX(enc)
static int utf8_isName2(const ENCODING *enc, const char *p)
static enum XML_Convert_Result utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
static int parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **nameEndPtr, const char **valPtr, const char **nextTokPtr)
static const struct normal_encoding internal_little2_encoding
ENCODING * XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, void *userData)
static const char KW_yes[]
int XmlUtf16Encode(int charNum, unsigned short *buf)
int XmlSizeOfUnknownEncoding(void)
static int utf8_isInvalid4(const ENCODING *enc, const char *p)
static const struct normal_encoding utf8_encoding
#define BIG2_BYTE_TO_ASCII(p)
#define LITTLE2_IS_NAME_CHAR_MINBPC(p)
#define SB_BYTE_TYPE(enc, p)
static enum XML_Convert_Result unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim)
#define BIG2_IS_NAME_CHAR_MINBPC(p)
static enum XML_Convert_Result ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static const char KW_standalone[]
static int getEncodingIndex(const char *name)
#define LITTLE2_BYTE_TYPE(enc, p)
static enum XML_Convert_Result utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static int initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr)
#define AS_UNKNOWN_ENCODING(enc)
static const char KW_UTF_8[]
#define UCS2_GET_NAMING(pages, hi, lo)
static const char KW_US_ASCII[]
#define UTF8_GET_NAMING2(pages, byte)
static const char KW_no[]
static int unknown_isName(const ENCODING *enc, const char *p)
static const struct normal_encoding little2_encoding
#define DEFINE_UTF16_TO_UTF16(E)
static void initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos)
static const char KW_UTF_16BE[]
#define BIG2_IS_NMSTRT_CHAR_MINBPC(p)
#define AS_NORMAL_ENCODING(enc)
static enum XML_Convert_Result latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim)
static const char KW_version[]
#define LITTLE2_CHAR_MATCHES(p, c)
static const struct normal_encoding big2_encoding
#define XML_CONTENT_STATE
#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim)
#define XML_UTF8_ENCODE_MAX
#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2)
int(XMLCALL * CONVERTER)(void *userData, const char *p)
@ XML_CONVERT_OUTPUT_EXHAUSTED
@ XML_CONVERT_INPUT_INCOMPLETE
#define XmlTok(enc, state, ptr, end, nextTokPtr)