Apache HTTPD
basic_tests.c
Go to the documentation of this file.
1/* Tests in the "basic" test case for the Expat test suite
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <[email protected]>
10 Copyright (c) 2003 Greg Stein <[email protected]>
11 Copyright (c) 2005-2007 Steven Solie <[email protected]>
12 Copyright (c) 2005-2012 Karl Waclawek <[email protected]>
13 Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
14 Copyright (c) 2017-2022 Rhodri James <[email protected]>
15 Copyright (c) 2017 Joe Orton <[email protected]>
16 Copyright (c) 2017 José Gutiérrez de la Concha <[email protected]>
17 Copyright (c) 2018 Marco Maggi <[email protected]>
18 Copyright (c) 2019 David Loffredo <[email protected]>
19 Copyright (c) 2020 Tim Gates <[email protected]>
20 Copyright (c) 2021 Donghee Na <[email protected]>
21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <[email protected]>
22 Licensed under the MIT license:
23
24 Permission is hereby granted, free of charge, to any person obtaining
25 a copy of this software and associated documentation files (the
26 "Software"), to deal in the Software without restriction, including
27 without limitation the rights to use, copy, modify, merge, publish,
28 distribute, sublicense, and/or sell copies of the Software, and to permit
29 persons to whom the Software is furnished to do so, subject to the
30 following conditions:
31
32 The above copyright notice and this permission notice shall be included
33 in all copies or substantial portions of the Software.
34
35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41 USE OR OTHER DEALINGS IN THE SOFTWARE.
42*/
43
44#if defined(NDEBUG)
45# undef NDEBUG /* because test suite relies on assert(...) at the moment */
46#endif
47
48#include <assert.h>
49
50#include <stdio.h>
51#include <string.h>
52#include <time.h>
53
54#if ! defined(__cplusplus)
55# include <stdbool.h>
56#endif
57
58#include "expat_config.h"
59
60#include "expat.h"
61#include "internal.h"
62#include "minicheck.h"
63#include "structdata.h"
64#include "common.h"
65#include "dummy.h"
66#include "handlers.h"
67#include "siphash.h"
68#include "basic_tests.h"
69
70static void
73 if (g_parser == NULL)
74 fail("Parser not created.");
75}
76
77/*
78 * Character & encoding tests.
79 */
80
82 char text[] = "<doc>\0</doc>";
83
84 /* test that a NUL byte (in US-ASCII data) is an error */
87 fail("Parser did not report error on NUL-byte.");
90}
92
94 /* test that a NUL byte (in US-ASCII data) is an error */
95 expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96 "Parser did not report error on NUL-byte.");
97}
99
101 if (! sip24_valid())
102 fail("SipHash self-test failed");
103}
105
107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109 "\x0a\x0b\x0c\x0d\x0e";
110 const size_t len = sizeof(message) - 1;
111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112 struct siphash state;
113 struct sipkey key;
114
115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116 "\x0a\x0b\x0c\x0d\x0e\x0f");
117 sip24_init(&state, &key);
118
119 /* Cover spread across calls */
120 sip24_update(&state, message, 4);
121 sip24_update(&state, message + 4, len - 4);
122
123 /* Cover null length */
124 sip24_update(&state, message, 0);
125
126 if (sip24_final(&state) != expected)
127 fail("sip24_final failed spec test\n");
128
129 /* Cover wrapper */
130 if (siphash24(message, len, &key) != expected)
131 fail("siphash24 failed spec test\n");
132}
134
136 /* This test is really just making sure we don't core on a UTF-8 BOM. */
137 const char *text = "\357\273\277<e/>";
138
142}
144
146 char text[] = "\376\377\0<\0e\0/\0>";
147
151}
153
155 char text[] = "\377\376<\0e\0/\0>\0";
156
160}
162
164 char text[] = " \0<\0e\0/\0>\0";
165
166 if (g_chunkSize == 1) {
167 // TODO: with just the first byte, we can't tell the difference between
168 // UTF-16-LE and UTF-8. Avoid the failure for now.
169 return;
170 }
171
175}
177
179 /* For full coverage of the lookup routine, we need to ensure a
180 * hash collision even though we can only tell that we have one
181 * through breakpoint debugging or coverage statistics. The
182 * following will cause a hash collision on machines with a 64-bit
183 * long type; others will have to experiment. The full coverage
184 * tests invoked from qa.sh usually provide a hash collision, but
185 * not always. This is an attempt to provide insurance.
186 */
187#define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188 const char *text
189 = "<doc>\n"
190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195 "<d8>This triggers the table growth and collides with b2</d8>\n"
196 "</doc>\n";
197
202}
204#undef COLLIDING_HASH_SALT
205
206/* Regression test for SF bug #491986. */
208 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210#ifdef XML_UNICODE
211 const XML_Char *expected
212 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213#else
214 const XML_Char *expected
215 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216#endif
217 run_character_check(text, expected);
218}
220
221/* Regression test for SF bug #514281. */
223 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224 "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225#ifdef XML_UNICODE
226 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227#else
228 const XML_Char *expected
229 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230#endif
231 run_character_check(text, expected);
232}
234
236 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237 "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238#ifdef XML_UNICODE
239 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240#else
241 const XML_Char *expected
242 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243#endif
244 run_character_check(text, expected);
245}
247
249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251#ifdef XML_UNICODE
252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253#else
254 const XML_Char *expected
255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256#endif
257 run_character_check(text, expected);
258}
260
262 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263 "<doc>\xC3\xA9</doc>";
264#ifdef XML_UNICODE
265 const XML_Char *expected = XCS("\x00e9");
266#else
267 const XML_Char *expected = XCS("\xC3\xA9");
268#endif
269 run_character_check(text, expected);
270}
272
273/* Regression test for SF bug #600479.
274 XXX There should be a test that exercises all legal XML Unicode
275 characters as PCDATA and attribute value content, and XML Name
276 characters as part of element and attribute names.
277*/
279 const char *text = "<doc>\xEF\xBA\xBF</doc>";
280#ifdef XML_UNICODE
281 const XML_Char *expected = XCS("\xfebf");
282#else
283 const XML_Char *expected = XCS("\xEF\xBA\xBF");
284#endif
285 run_character_check(text, expected);
286}
288
289/* Regression test for SF bug #477667.
290 This test assures that any 8-bit character followed by a 7-bit
291 character will not be mistakenly interpreted as a valid UTF-8
292 sequence.
293*/
295 char text[100];
296 int i;
297
298 for (i = 128; i <= 255; ++i) {
299 snprintf(text, sizeof(text), "<e>%ccd</e>", i);
301 == XML_STATUS_OK) {
302 snprintf(text, sizeof(text),
303 "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304 i);
305 fail(text);
308 /* Reset the parser since we use the same parser repeatedly. */
310 }
311}
313
314/* Examples, not masks: */
315#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320
322 struct TestCase {
324 const char *input;
325 };
326
327 struct TestCase cases[] = {
328 {00, ""},
329
330 {00, UTF8_LEAD_1},
331
332 {-1, UTF8_LEAD_2},
334
335 {-1, UTF8_LEAD_3},
338
339 {-1, UTF8_LEAD_4},
343 };
344
345 size_t i = 0;
346 bool success = true;
347 for (; i < sizeof(cases) / sizeof(*cases); i++) {
348 const char *fromLim = cases[i].input + strlen(cases[i].input);
349 const char *const fromLimInitially = fromLim;
351
353
356 size_t j = 0;
357 success = false;
358 printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359 ", actually moved by %2d chars: \"",
360 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
362 for (; j < strlen(cases[i].input); j++) {
363 printf("\\x%02x", (unsigned char)cases[i].input[j]);
364 }
365 printf("\"\n");
366 }
367 }
368
369 if (! success) {
370 fail("UTF-8 auto-alignment is not bullet-proof\n");
371 }
372}
374
376 /* <?xml version="1.0" encoding="UTF-16"?>
377 * <doc a='123'>some {A} text</doc>
378 *
379 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380 */
381 char text[]
382 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385 "\000'\000?\000>\000\n"
386 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388 "<\000/\000d\000o\000c\000>";
389#ifdef XML_UNICODE
390 const XML_Char *expected = XCS("some \xff21 text");
391#else
392 const XML_Char *expected = XCS("some \357\274\241 text");
393#endif
395
403}
405
407 unsigned int first_chunk_bytes = 17;
408 char text[] = "\xFF\xFE" /* BOM */
409 "<\000e\000/\000>\000" /* document element */
410 "\r\000\n\000\r\000\n\000"; /* epilog */
411
412 if (first_chunk_bytes >= sizeof(text) - 1)
413 fail("bad value of first_chunk_bytes");
417 else {
418 enum XML_Status rc;
420 sizeof(text) - first_chunk_bytes - 1,
421 XML_TRUE);
422 if (rc == XML_STATUS_ERROR)
424 }
425}
427
428/* Test that an outright lie in the encoding is faulted */
430 const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431 "<doc>Hi</doc>";
432
433 /* Use a handler to provoke the appropriate code paths */
436 "UTF-16 declared in UTF-8 not faulted");
437}
439
440/* Test that an unknown encoding is rejected */
442 const char *text = "<doc>Hi</doc>";
443
444 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445 fail("XML_SetEncoding failed");
447 "Unknown encoding not faulted");
448}
450
451/* Regression test for SF bug #481609, #774028. */
453 const char *text
454 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455 "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456 " >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457#ifdef XML_UNICODE
458 /* Expected results in UTF-16 */
459 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461#else
462 /* Expected results in UTF-8 */
463 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465#endif
466
467 run_character_check(text, expected);
469 run_attribute_check(text, expected);
470 /* Repeat with a default handler */
473 run_character_check(text, expected);
476 run_attribute_check(text, expected);
477}
479
480/* Test that an element name with a 4-byte UTF-8 character is rejected */
482 const char *text
483 = "<?xml version='1.0' encoding='utf-8'?>\n"
484 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485 "<do\xf0\x90\x80\x80/>";
487 "4-byte UTF-8 character in element name not faulted");
488}
490
491/* Test that a long latin-1 attribute (too long to convert in one go)
492 * is correctly converted
493 */
495 const char *text
496 = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497 "<doc att='"
498 /* 64 characters per line */
499 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515 /* Last character splits across a buffer boundary */
516 "\xe4'>\n</doc>";
517
518 const XML_Char *expected =
519 /* 64 characters per line */
520 /* clang-format off */
521 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537 /* clang-format on */
538#ifdef XML_UNICODE
539 XCS("\x00e4");
540#else
541 XCS("\xc3\xa4");
542#endif
543
544 run_attribute_check(text, expected);
545}
547
548/* Test that a long ASCII attribute (too long to convert in one go)
549 * is correctly converted
550 */
552 const char *text
553 = "<?xml version='1.0' encoding='us-ascii'?>\n"
554 "<doc att='"
555 /* 64 characters per line */
556 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572 "01234'>\n</doc>";
573 const XML_Char *expected =
574 /* 64 characters per line */
575 /* clang-format off */
576 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592 XCS("01234");
593 /* clang-format on */
594
595 run_attribute_check(text, expected);
596}
598
599/* Regression test #1 for SF bug #653180. */
601 const char *text = "<tag>\n"
602 "\n"
603 "\n</tag>";
604 XML_Size lineno;
605
610 if (lineno != 4) {
611 char buffer[100];
612 snprintf(buffer, sizeof(buffer),
613 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614 fail(buffer);
615 }
616}
618
619/* Regression test #2 for SF bug #653180. */
621 const char *text = "<tag></tag>";
623
628 if (colno != 11) {
629 char buffer[100];
630 snprintf(buffer, sizeof(buffer),
631 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632 fail(buffer);
633 }
634}
636
637/* Regression test #3 for SF bug #653180. */
639 const char *text = "<a>\n" /* Unix end-of-line */
640 " <b>\r\n" /* Windows end-of-line */
641 " <c/>\r" /* Mac OS end-of-line */
642 " </b>\n"
643 " <d>\n"
644 " <f/>\n"
645 " </d>\n"
646 "</a>";
647 const StructDataEntry expected[]
648 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG},
651 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}};
653 const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
655
663
666}
668
669/* Regression test #4 for SF bug #653180. */
671 const char *text = "<a>\n"
672 " <b>\n"
673 " </a>"; /* missing </b> */
674 XML_Size lineno;
677 fail("Expected a parse error");
678
680 if (lineno != 3) {
681 char buffer[100];
682 snprintf(buffer, sizeof(buffer),
683 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684 fail(buffer);
685 }
686}
688
689/* Regression test #5 for SF bug #653180. */
691 const char *text = "<a>\n"
692 " <b>\n"
693 " </a>"; /* missing </b> */
697 fail("Expected a parse error");
698
700 if (colno != 4) {
701 char buffer[100];
702 snprintf(buffer, sizeof(buffer),
703 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704 fail(buffer);
705 }
706}
708
709/* Regression test for SF bug #478332. */
711 /* This parses an input line longer than INIT_DATA_BUF_SIZE
712 characters long (defined to be 1024 in xmlparse.c). We take a
713 really cheesy approach to building the input buffer, because
714 this avoids writing bugs in buffer-filling code.
715 */
716 const char *text
717 = "<e>"
718 /* 64 chars */
719 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720 /* until we have at least 1024 characters on the line: */
721 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737 "</e>";
741}
743
744/* Test cdata processing across a buffer boundary */
746 /* As above, except that we want to provoke an output buffer
747 * overflow with a non-trivial encoding. For this we need to pass
748 * the whole cdata in one go, not byte-by-byte.
749 */
750 void *buffer;
751 const char *text
752 = "<?xml version='1.0' encoding='iso-8859-1'?>"
753 "<e>"
754 /* 64 chars */
755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756 /* until we have at least 1024 characters on the line: */
757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773 "</e>";
774 int parse_len = (int)strlen(text);
775
776 /* Need a cdata handler to provoke the code path we want to test */
778 buffer = XML_GetBuffer(g_parser, parse_len);
779 if (buffer == NULL)
780 fail("Could not allocate parse buffer");
781 assert(buffer != NULL);
782 memcpy(buffer, text, parse_len);
785}
787
788/*
789 * Element event tests.
790 */
791
793 const char *text = "<a><b><c/></b><d><f/></d></a>";
794 const XML_Char *expected = XCS("/c/b/f/d/a");
796
804}
806
807/*
808 * Attribute tests.
809 */
810
811/* Helper used by the following tests; this checks any "attr" and "refs"
812 attributes to make sure whitespace has been normalized.
813
814 Return true if whitespace has been normalized in a string, using
815 the rules for attribute value normalization. The 'is_cdata' flag
816 is needed since CDATA attributes don't need to have multiple
817 whitespace characters collapsed to a single space, while other
818 attribute data types do. (Section 3.3.3 of the recommendation.)
819*/
820static int
822 int blanks = 0;
823 int at_start = 1;
824 while (*s) {
825 if (*s == XCS(' '))
826 ++blanks;
827 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828 return 0;
829 else {
830 if (at_start) {
831 at_start = 0;
832 if (blanks && ! is_cdata)
833 /* illegal leading blanks */
834 return 0;
835 } else if (blanks > 1 && ! is_cdata)
836 return 0;
837 blanks = 0;
838 }
839 ++s;
840 }
841 if (blanks && ! is_cdata)
842 return 0;
843 return 1;
844}
845
846/* Check the attribute whitespace checker: */
850 assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0));
855 assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
856 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857 assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
866 assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867}
869
870static void XMLCALL
872 const XML_Char **atts) {
873 int i;
874 UNUSED_P(userData);
875 UNUSED_P(name);
876 for (i = 0; atts[i] != NULL; i += 2) {
877 const XML_Char *attrname = atts[i];
878 const XML_Char *value = atts[i + 1];
879 if (xcstrcmp(XCS("attr"), attrname) == 0
880 || xcstrcmp(XCS("ents"), attrname) == 0
881 || xcstrcmp(XCS("refs"), attrname) == 0) {
883 char buffer[256];
884 snprintf(buffer, sizeof(buffer),
885 "attribute value not normalized: %" XML_FMT_STR
886 "='%" XML_FMT_STR "'",
887 attrname, value);
888 fail(buffer);
889 }
890 }
891 }
892}
893
895 const char *text
896 = "<!DOCTYPE doc [\n"
897 " <!ATTLIST doc\n"
898 " attr NMTOKENS #REQUIRED\n"
899 " ents ENTITIES #REQUIRED\n"
900 " refs IDREFS #REQUIRED>\n"
901 "]>\n"
902 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
903 " ents=' ent-1 \t\r\n"
904 " ent-2 ' >\n"
905 " <e id='id-1'/>\n"
906 " <e id='id-2'/>\n"
907 "</doc>";
908
914}
916
917/*
918 * XML declaration tests.
919 */
920
922 expect_failure("\n"
923 "<?xml version='1.0'?>\n"
924 "<a/>",
926 "failed to report misplaced XML declaration");
927}
929
931 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932 "Failed to report invalid XML declaration");
933}
935
937 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938 "Failed to report missing XML declaration attribute");
939}
941
943 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944 "<doc/>",
946 "Failed to report missing attribute value");
947}
949
950/* Regression test for SF bug #584832. */
952 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954 "<test a='&foo;'/>";
955
960}
962
963/* Test unrecognised encoding handler */
965 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967 "<test a='&foo;'/>";
968
972 fail("Unrecognised encoding not rejected");
973}
975
976/* Regression test for SF bug #620106. */
978 const char *text = "<!DOCTYPE doc [\n"
979 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980 "]>\n"
981 "<doc>&en;</doc>";
983 = {/* This text says it's an unsupported encoding, but it's really
984 UTF-8, which we tell Expat using XML_SetEncoding().
985 */
986 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987#ifdef XML_UNICODE
988 const XML_Char *expected = XCS("\x00e9");
989#else
990 const XML_Char *expected = XCS("\xc3\xa9");
991#endif
992
995}
997
998/* Test external entities with no handler */
1000 const char *text = "<!DOCTYPE doc [\n"
1001 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002 "]>\n"
1003 "<doc>&en;</doc>";
1004
1007}
1009
1010/* Test UTF-8 BOM is accepted */
1012 const char *text = "<!DOCTYPE doc [\n"
1013 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014 "]>\n"
1015 "<doc>&en;</doc>";
1016 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017 "<?xml encoding='iso-8859-3'?>"
1018 "\xC3\xA9",
1019 XCS("utf-8"), NULL};
1020#ifdef XML_UNICODE
1021 const XML_Char *expected = XCS("\x00e9");
1022#else
1023 const XML_Char *expected = XCS("\xc3\xa9");
1024#endif
1025
1028}
1030
1031/* Test that bad encodings are faulted */
1033 const char *text = "<!DOCTYPE doc [\n"
1034 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035 "]>\n"
1036 "<doc>&en;</doc>";
1038 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040
1044 "Bad encoding should not have been accepted");
1045}
1047
1048/* Try handing an invalid encoding to an external entity parser */
1050 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052 "<doc>&entity;</doc>";
1054 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056
1061 "Bad encoding not faulted in external entity handler");
1062}
1064
1065/* Test that no error is reported for unknown entities if we don't
1066 read an external subset. This was fixed in Expat 1.95.5.
1067*/
1069 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070 "<doc>&entity;</doc>";
1071
1075}
1077
1078/* Test that an error is reported for unknown entities if we don't
1079 have an external subset.
1080*/
1082 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083 "Parser did not report undefined entity w/out a DTD.");
1084}
1086
1087/* Test that an error is reported for unknown entities if we don't
1088 read an external subset, but have been declared standalone.
1089*/
1091 const char *text
1092 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094 "<doc>&entity;</doc>";
1095
1097 "Parser did not report undefined entity (standalone).");
1098}
1100
1101/* Test that an error is reported for unknown entities if we have read
1102 an external subset, and standalone is true.
1103*/
1105 const char *text
1106 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108 "<doc>&entity;</doc>";
1109 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110
1115 "Parser did not report undefined entity (external DTD).");
1116}
1118
1119/* Test that external entity handling is not done if the parsing flag
1120 * is set to UNLESS_STANDALONE
1121 */
1123 const char *text
1124 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126 "<doc>&entity;</doc>";
1127 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128
1134 "Parser did not report undefined entity");
1135}
1137
1138/* Test that no error is reported for unknown entities if we have read
1139 an external subset, and standalone is false.
1140*/
1142 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144 "<doc>&entity;</doc>";
1145 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146
1150}
1152
1153/* Test that an error is reported if our NotStandalone handler fails */
1155 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157 "<doc>&entity;</doc>";
1158 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159
1165 "NotStandalone handler failed to reject");
1166
1167 /* Try again but without external entity handling */
1171 "NotStandalone handler failed to reject");
1172}
1174
1175/* Test that no error is reported if our NotStandalone handler succeeds */
1177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178 "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179 "<doc>&entity;</doc>";
1180 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181
1186
1187 /* Repeat without the external entity handler */
1191}
1193
1195 const char *text = "<!DOCTYPE doc [\n"
1196 " <!ENTITY entity '&#38;entity;'>\n"
1197 "]>\n"
1198 "<doc>&entity;</doc>";
1199
1201 "Parser did not report recursive entity reference.");
1202}
1204
1206 struct TestCase {
1207 const char *doc;
1209 };
1210
1211 struct TestCase cases[] = {
1212 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213 {"<!ENTITY % p1 '%p1;'>"
1214 "<!ENTITY % p1 'first declaration wins'>",
1216 {"<!ENTITY % p1 'first declaration wins'>"
1217 "<!ENTITY % p1 '%p1;'>",
1219 {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220 };
1221
1222 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223 const char *const doc = cases[i].doc;
1224 const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225 set_subtest("%s", doc);
1226
1229
1232
1233 const enum XML_Status actualStatus
1234 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235
1237 if (actualStatus != XML_STATUS_OK) {
1240 }
1241
1244 }
1245}
1247
1248/* Test incomplete external entities are faulted */
1250 const char *text = "<!DOCTYPE doc [\n"
1251 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252 "]>\n"
1253 "<doc>&en;</doc>";
1254 const ExtFaults faults[]
1255 = {{"<", "Incomplete element declaration not faulted", NULL,
1257 {"<\xe2\x82", /* First two bytes of a three-byte char */
1258 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1262 const ExtFaults *fault = faults;
1263
1264 for (; fault->parse_text != NULL; fault++) {
1265 set_subtest("\"%s\"", fault->parse_text);
1268 XML_SetUserData(g_parser, (void *)fault);
1270 "Parser did not report external entity error");
1272 }
1273}
1275
1276/* Regression test for SF bug #483514. */
1301
1302/* Test handling of attribute declarations */
1304 const char *prolog = "<!DOCTYPE doc [\n"
1305 "<!ELEMENT doc EMPTY>\n";
1307 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308 "]>"
1309 "<doc a='two'/>",
1310 XCS("doc"), XCS("a"),
1311 XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312 NULL, XML_TRUE},
1313 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315 "]>"
1316 "<doc/>",
1317 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319 "]>"
1320 "<doc/>",
1321 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323 "]>"
1324 "<doc/>",
1325 XCS("doc"), XCS("a"), XCS("CDATA"),
1326#ifdef XML_UNICODE
1327 XCS("\x06f2"),
1328#else
1329 XCS("\xdb\xb2"),
1330#endif
1331 XML_FALSE},
1332 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333 AttTest *test;
1334
1335 for (test = attr_data; test->definition != NULL; test++) {
1336 set_subtest("%s", test->definition);
1339 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340 XML_FALSE)
1343 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344 (int)strlen(test->definition), XML_TRUE)
1348 }
1349}
1351
1352/* See related SF bug #673791.
1353 When namespace processing is enabled, setting the namespace URI for
1354 a prefix is not allowed; this test ensures that it *is* allowed
1355 when namespace processing is not enabled.
1356 (See Namespaces in XML, section 2.)
1357*/
1359 const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360 " <e xmlns:prefix=''/>\n"
1361 "</doc>";
1362
1366}
1368
1369/* Regression test for SF bug #824420.
1370 Checks that an xmlns:prefix attribute set in an attribute's default
1371 value isn't misinterpreted.
1372*/
1374 const char *text = "<!DOCTYPE e:element [\n"
1375 " <!ATTLIST e:element\n"
1376 " xmlns:e CDATA 'http://example.org/'>\n"
1377 " ]>\n"
1378 "<e:element/>";
1379
1383}
1385
1386/* Regression test for SF bug #1515266: missing check of stopped
1387 parser in doContext() 'for' loop. */
1389 /* The sample data must be big enough that there are two calls to
1390 the character data handler from within the inner "for" loop of
1391 the XML_TOK_DATA_CHARS case in doContent(), and the character
1392 handler must stop the parser and clear the character data
1393 handler.
1394 */
1395 const char *text = long_character_data_text;
1396
1404}
1406
1407/* Regression test for SF bug #1515266: missing check of stopped
1408 parser in doContext() 'for' loop. */
1410 /* The sample data must be big enough that there are two calls to
1411 the character data handler from within the inner "for" loop of
1412 the XML_TOK_DATA_CHARS case in doContent(), and the character
1413 handler must stop the parser and clear the character data
1414 handler.
1415 */
1416 const char *text = long_character_data_text;
1417
1425 /* Try parsing directly */
1428 fail("Attempt to continue parse while suspended not faulted");
1430 fail("Suspended parse not faulted with correct error");
1431}
1433
1434/* Test repeated calls to XML_StopParser are handled correctly */
1462
1464 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466
1471 /* Add start and end handlers for coverage */
1474
1478 CharData_CheckXMLChars(&storage, expected);
1479
1480 /* Try again, this time with a default handler */
1486
1490 CharData_CheckXMLChars(&storage, expected);
1491}
1493
1495 /* Test data is:
1496 * <?xml version='1.0' encoding='utf-16'?>
1497 * <a><![CDATA[hello]]></a>
1498 */
1499 const char text[]
1500 = "\0<\0?\0x\0m\0l\0"
1501 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503 "1\0"
1504 "6\0'"
1505 "\0?\0>\0\n"
1506 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507 const XML_Char *expected = XCS("hello");
1508
1513
1514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1517 CharData_CheckXMLChars(&storage, expected);
1518}
1520
1522 /* Test data is:
1523 * <?xml version='1.0' encoding='utf-16'?>
1524 * <a><![CDATA[hello]]></a>
1525 */
1526 const char text[]
1527 = "<\0?\0x\0m\0l\0"
1528 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530 "1\0"
1531 "6\0'"
1532 "\0?\0>\0\n"
1533 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534 const XML_Char *expected = XCS("hello");
1535
1540
1541 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1544 CharData_CheckXMLChars(&storage, expected);
1545}
1547
1548/* Test UTF16 conversion of a long cdata string */
1549
1550/* 16 characters: handy macro to reduce visual clutter */
1551#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552
1554 /* Test data is:
1555 * <?xlm version='1.0' encoding='utf-16'?>
1556 * <a><![CDATA[
1557 * ABCDEFGHIJKLMNOP
1558 * ]]></a>
1559 */
1560 const char text[]
1561 = "\0<\0?\0x\0m\0l\0 "
1562 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565 /* 64 characters per line */
1566 /* clang-format off */
1584 /* clang-format on */
1585 "\0]\0]\0>\0<\0/\0a\0>";
1586 const XML_Char *expected =
1587 /* clang-format off */
1588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604 XCS("ABCDEFGHIJKLMNOP");
1605 /* clang-format on */
1607 void *buffer;
1608
1612 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613 if (buffer == NULL)
1614 fail("Could not allocate parse buffer");
1615 assert(buffer != NULL);
1616 memcpy(buffer, text, sizeof(text) - 1);
1619 CharData_CheckXMLChars(&storage, expected);
1620}
1622
1623/* Test handling of multiple unit UTF-16 characters */
1625 /* Test data is:
1626 * <?xml version='1.0' encoding='utf-16'?>
1627 * <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628 *
1629 * where {MINIM} is U+1d15e (a minim or half-note)
1630 * UTF-16: 0xd834 0xdd5e
1631 * UTF-8: 0xf0 0x9d 0x85 0x9e
1632 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633 * UTF-16: 0xd834 0xdd5f
1634 * UTF-8: 0xf0 0x9d 0x85 0x9f
1635 */
1636 const char text[] = "\0<\0?\0x\0m\0l\0"
1637 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639 "1\0"
1640 "6\0'"
1641 "\0?\0>\0\n"
1642 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644 "\0]\0]\0>\0<\0/\0a\0>";
1645#ifdef XML_UNICODE
1646 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647#else
1648 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649#endif
1651
1655
1656 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1659 CharData_CheckXMLChars(&storage, expected);
1660}
1662
1663/* Test that an element name with a UTF-16 surrogate pair is rejected */
1665 /* Test data is:
1666 * <?xml version='1.0' encoding='utf-16'?>
1667 * <a><![CDATA[{BADLINB}]]></a>
1668 *
1669 * where {BADLINB} is U+10000 (the first Linear B character)
1670 * with the UTF-16 surrogate pair in the wrong order, i.e.
1671 * 0xdc00 0xd800
1672 */
1673 const char text[] = "\0<\0?\0x\0m\0l\0"
1674 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676 "1\0"
1677 "6\0'"
1678 "\0?\0>\0\n"
1679 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680 "\xdc\x00\xd8\x00"
1681 "\0]\0]\0>\0<\0/\0a\0>";
1682
1683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1685 fail("Reversed UTF-16 surrogate pair not faulted");
1688}
1690
1692 struct CaseData {
1693 const char *text;
1695 };
1696
1697 struct CaseData cases[]
1698 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699 {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700 {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706
1707 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710
1711 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1713 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718
1719 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722
1723 size_t i = 0;
1724 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725 set_subtest("%s", cases[i].text);
1727 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1729
1731
1732 if (actualError != cases[i].expectedError) {
1733 char message[100];
1734 snprintf(message, sizeof(message),
1735 "Expected error %d but got error %d for case %u: \"%s\"\n",
1736 cases[i].expectedError, actualError, (unsigned int)i + 1,
1737 cases[i].text);
1738 fail(message);
1739 }
1740
1742 }
1743}
1745
1746/* Test failures in UTF-16 CDATA */
1748 struct CaseData {
1749 size_t text_bytes;
1750 const char *text;
1752 };
1753
1754 const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757 "1\0"
1758 "6\0'"
1759 "\0?\0>\0\n"
1760 "\0<\0a\0>";
1761 struct CaseData cases[] = {
1762 {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782 /* Now add a four-byte UTF-16 character */
1783 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1785 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1788 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1790 size_t i;
1791
1792 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793 set_subtest("case %lu", (long unsigned)(i + 1));
1796
1797 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798 XML_FALSE)
1802 (int)cases[i].text_bytes, XML_TRUE);
1806 char message[1024];
1807
1808 snprintf(message, sizeof(message),
1809 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810 ") for case %lu\n",
1813 XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814 fail(message);
1815 }
1817 }
1818}
1820
1821/* Test stopping the parser in cdata handler */
1830
1831/* Test suspending the parser in cdata handler */
1848
1849/* Test memory allocation functions */
1851 char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852 char *p;
1853
1854 if (buffer == NULL) {
1855 fail("Allocation failed");
1856 } else {
1857 /* Try writing to memory; some OSes try to cheat! */
1858 buffer[0] = 'T';
1859 buffer[1] = 'E';
1860 buffer[2] = 'S';
1861 buffer[3] = 'T';
1862 buffer[4] = '\0';
1863 if (strcmp(buffer, "TEST") != 0) {
1864 fail("Memory not writable");
1865 } else {
1866 p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867 if (p == NULL) {
1868 fail("Reallocation failed");
1869 } else {
1870 /* Write again, just to be sure */
1871 buffer = p;
1872 buffer[0] = 'V';
1873 if (strcmp(buffer, "VEST") != 0) {
1874 fail("Reallocated memory not writable");
1875 }
1876 }
1877 }
1879 }
1880}
1882
1883/* Test XML_DefaultCurrent() passes handling on correctly */
1885 const char *text = "<doc>hell]</doc>";
1886 const char *entity_text = "<!DOCTYPE doc [\n"
1887 "<!ENTITY entity '&#37;'>\n"
1888 "]>\n"
1889 "<doc>&entity;</doc>";
1890
1891 set_subtest("with defaulting");
1892 {
1894 storage.count = 0;
1901 int i = 0;
1902 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903 // we should have gotten one or more cdata callbacks, totaling 5 chars
1904 int cdata_len_remaining = 5;
1905 while (cdata_len_remaining > 0) {
1906 const struct handler_record_entry *c_entry
1908 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909 assert_true(c_entry->arg > 0);
1912 // default handler must follow, with the exact same len argument.
1913 assert_record_handler_called(&storage, i++, "record_default_handler",
1914 c_entry->arg);
1915 }
1916 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917 assert_true(storage.count == i);
1918 }
1919
1920 /* Again, without the defaulting */
1921 set_subtest("no defaulting");
1922 {
1924 storage.count = 0;
1932 int i = 0;
1933 assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934 // we should have gotten one or more cdata callbacks, totaling 5 chars
1935 int cdata_len_remaining = 5;
1936 while (cdata_len_remaining > 0) {
1937 const struct handler_record_entry *c_entry
1939 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940 assert_true(c_entry->arg > 0);
1943 }
1944 assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945 assert_true(storage.count == i);
1946 }
1947
1948 /* Now with an internal entity to complicate matters */
1949 set_subtest("with internal entity");
1950 {
1952 storage.count = 0;
1958 XML_TRUE)
1961 /* The default handler suppresses the entity */
1962 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979 assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981 assert_true(storage.count == 19);
1982 }
1983
1984 /* Again, with a skip handler */
1985 set_subtest("with skip handler");
1986 {
1988 storage.count = 0;
1995 XML_TRUE)
1998 /* The default handler suppresses the entity */
1999 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016 assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018 assert_true(storage.count == 19);
2019 }
2020
2021 /* This time, allow the entity through */
2022 set_subtest("allow entity");
2023 {
2025 storage.count = 0;
2031 XML_TRUE)
2034 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052 assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053 assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054 assert_true(storage.count == 20);
2055 }
2056
2057 /* Finally, without passing the cdata to the default handler */
2058 set_subtest("not passing cdata");
2059 {
2061 storage.count = 0;
2067 XML_TRUE)
2070 assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071 assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072 assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073 assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074 assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075 assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076 assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077 assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078 assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079 assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080 assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081 assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082 assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083 assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084 assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085 assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086 assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088 1);
2089 assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090 assert_true(storage.count == 19);
2091 }
2092}
2094
2095/* Test DTD element parsing code paths */
2097 const char *text = "<!DOCTYPE doc [\n"
2098 "<!ELEMENT doc (chapter)>\n"
2099 "<!ELEMENT chapter (#PCDATA)>\n"
2100 "]>\n"
2101 "<doc><chapter>Wombats are go</chapter></doc>";
2102
2107}
2109
2110static void XMLCALL
2112 XML_Content *model) {
2113 UNUSED_P(userData);
2114 uint32_t errorFlags = 0;
2115
2116 /* Expected model array structure is this:
2117 * [0] (type 6, quant 0)
2118 * [1] (type 5, quant 0)
2119 * [3] (type 4, quant 0, name "bar")
2120 * [4] (type 4, quant 0, name "foo")
2121 * [5] (type 4, quant 3, name "xyz")
2122 * [2] (type 4, quant 2, name "zebra")
2123 */
2124 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125 errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126
2127 if (model != NULL) {
2128 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133
2134 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139
2140 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2145 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146
2147 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152
2153 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158
2159 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164 }
2165
2168}
2169
2171 // Payload inspired by a test in Perl's XML::Parser
2172 const char *text = "<!DOCTYPE foo [\n"
2173 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174 "]>\n"
2175 "<foo/>";
2176
2177 XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178
2183
2185 fail("Element declaration model regression detected");
2186}
2188
2189/* Test foreign DTD handling */
2191 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192 const char *text2 = "<doc>&entity;</doc>";
2193 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194
2195 /* Check hash salt is passed through too */
2196 XML_SetHashSalt(g_parser, 0x12345678);
2200 /* Add a default handler to exercise more code paths */
2203 fail("Could not set foreign DTD");
2207
2208 /* Ensure that trying to set the DTD after parsing has started
2209 * is faulted, even if it's the same setting.
2210 */
2213 fail("Failed to reject late foreign DTD setting");
2214 /* Ditto for the hash salt */
2215 if (XML_SetHashSalt(g_parser, 0x23456789))
2216 fail("Failed to reject late hash salt change");
2217
2218 /* Now finish the parse */
2222}
2224
2225/* Test foreign DTD handling with a failing NotStandalone handler */
2227 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228 "<doc>&entity;</doc>";
2229 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230
2236 fail("Could not set foreign DTD");
2238 "NotStandalonehandler failed to reject");
2239}
2241
2242/* Test invalid character in a foreign DTD is faulted */
2244 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245 "<doc>&entity;</doc>";
2247 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248
2254 "Bad DTD should not have been accepted");
2255}
2257
2258/* Test foreign DTD use with a doctype */
2260 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262 const char *text2 = "<doc>&entity;</doc>";
2263 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264
2265 /* Check hash salt is passed through too */
2266 XML_SetHashSalt(g_parser, 0x12345678);
2270 /* Add a default handler to exercise more code paths */
2273 fail("Could not set foreign DTD");
2277
2278 /* Ensure that trying to set the DTD after parsing has started
2279 * is faulted, even if it's the same setting.
2280 */
2283 fail("Failed to reject late foreign DTD setting");
2284 /* Ditto for the hash salt */
2285 if (XML_SetHashSalt(g_parser, 0x23456789))
2286 fail("Failed to reject late hash salt change");
2287
2288 /* Now finish the parse */
2292}
2294
2295/* Test XML_UseForeignDTD with no external subset present */
2309
2311 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312 "<doc>&entity;</doc>";
2313
2318 "Undefined entity not faulted");
2319}
2321
2322/* Test XML Base is set and unset appropriately */
2324 const XML_Char *old_base;
2325 const XML_Char *new_base = XCS("/local/file/name.xml");
2326
2329 fail("Unable to set base");
2331 fail("Base setting not correct");
2333 fail("Unable to NULL base");
2334 if (XML_GetBase(g_parser) != NULL)
2335 fail("Base setting not nulled");
2337}
2339
2340/* Test attribute counts, indexing, etc */
2342 const char *text = "<!DOCTYPE doc [\n"
2343 "<!ELEMENT doc (tag)>\n"
2344 "<!ATTLIST doc id ID #REQUIRED>\n"
2345 "]>"
2346 "<doc a='1' id='one' b='2'>"
2347 "<tag c='3'/>"
2348 "</doc>";
2349 AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350 {XCS("b"), XCS("2")},
2351 {XCS("id"), XCS("one")},
2352 {NULL, NULL}};
2353 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355 {XCS("tag"), 1, NULL, NULL},
2356 {NULL, 0, NULL, NULL}};
2358 info[1].attributes = tag_info;
2359
2363 parser,
2364 info,
2365 };
2366
2369 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
2372
2374}
2376
2377/* Test reset works correctly in the middle of processing an internal
2378 * entity. Exercises some obscure code in XML_ParserReset().
2379 */
2381 const char *text = "<!DOCTYPE doc [\n"
2382 "<!ENTITY wombat 'wom'>\n"
2383 "<!ENTITY entity 'hi &wom; there'>\n"
2384 "]>\n"
2385 "<doc>&entity;</doc>";
2387
2394 if (status.parsing != XML_SUSPENDED)
2395 fail("Parsing status not SUSPENDED");
2398 if (status.parsing != XML_INITIALIZED)
2399 fail("Parsing status doesn't reset to INITIALIZED");
2400}
2402
2403/* Test that resume correctly passes through parse errors */
2405 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2406
2409 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2413 fail("Resumed invalid parse not faulted");
2415 fail("Invalid parse not correctly faulted");
2416}
2418
2419/* Test that re-suspended parses are correctly passed through */
2421 const char *text = "<doc>Hello<meep/>world</doc>";
2422
2425 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2431 fail("Resumption not suspended");
2432 /* This one should succeed and finish up */
2435}
2437
2438/* Test that CDATA shows up correctly through a default handler */
2440 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2441 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2443
2447
2451 CharData_CheckXMLChars(&storage, expected);
2452}
2454
2455/* Test resetting a subordinate parser does exactly nothing */
2457 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2458 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2459 "<doc>&entity;</doc>";
2460
2466}
2468
2469/* Test suspending a subordinate parser */
2471 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2472 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2473 "<doc>&entity;</doc>";
2474
2480}
2482
2483/* Test suspending a subordinate parser from an XML declaration */
2484/* Increases code coverage of the tests */
2485
2487 const char *text
2488 = "<!DOCTYPE doc [\n"
2489 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2490 "]>\n"
2491 "<doc>&entity;</doc>";
2492
2499}
2501
2503 const char *text
2504 = "<!DOCTYPE doc [\n"
2505 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2506 "]>\n"
2507 "<doc>&entity;</doc>";
2508
2515}
2517
2518/* Test external entity fault handling with suspension */
2520 const char *text = "<!DOCTYPE doc [\n"
2521 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2522 "]>\n"
2523 "<doc>&en;</doc>";
2525 = {{"<?xml version='1.0' encoding='us-ascii'?><",
2526 "Incomplete element declaration not faulted", NULL,
2528 {/* First two bytes of a three-byte char */
2529 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2530 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2533
2534 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2535 set_subtest("%s", fault->parse_text);
2541 "Parser did not report external entity error");
2543 }
2544}
2546
2547/* Test setting an explicit encoding */
2549 const char *text1 = "<doc>Hello ";
2550 const char *text2 = " World</doc>";
2551
2552 /* Just check that we can set the encoding to NULL before starting */
2554 fail("Failed to initialise encoding to NULL");
2555 /* Say we are UTF-8 */
2556 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2557 fail("Failed to set explicit encoding");
2561 /* Try to switch encodings mid-parse */
2562 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2563 fail("Allowed encoding change");
2567 /* Try now the parse is over */
2569 fail("Failed to unset encoding");
2570}
2572
2573/* Test handling of trailing CR (rather than newline) */
2575 const char *text = "<doc>\r";
2576 int found_cr;
2577
2578 /* Try with a character handler, for code coverage */
2581 found_cr = 0;
2583 == XML_STATUS_OK)
2584 fail("Failed to fault unclosed doc");
2585 if (found_cr == 0)
2586 fail("Did not catch the carriage return");
2588
2589 /* Now with a default handler instead */
2592 found_cr = 0;
2594 == XML_STATUS_OK)
2595 fail("Failed to fault unclosed doc");
2596 if (found_cr == 0)
2597 fail("Did not catch default carriage return");
2598}
2600
2601/* Test trailing CR in an external entity parse */
2603 const char *text = "<!DOCTYPE doc [\n"
2604 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2605 "]>\n"
2606 "<doc>&en;</doc>";
2607 int found_cr;
2608
2612 found_cr = 0;
2614 != XML_STATUS_OK)
2616 if (found_cr == 0)
2617 fail("No carriage return found");
2619
2620 /* Try again with a different trailing CR */
2624 found_cr = 0;
2626 != XML_STATUS_OK)
2628 if (found_cr == 0)
2629 fail("No carriage return found");
2630}
2632
2633/* Test handling of trailing square bracket */
2635 const char *text8 = "<doc>]";
2636 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2637 int found_rsqb;
2638 int text8_len = (int)strlen(text8);
2639
2642 found_rsqb = 0;
2644 == XML_STATUS_OK)
2645 fail("Failed to fault unclosed doc");
2646 if (found_rsqb == 0)
2647 fail("Did not catch the right square bracket");
2648
2649 /* Try again with a different encoding */
2653 found_rsqb = 0;
2654 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2655 XML_TRUE)
2656 == XML_STATUS_OK)
2657 fail("Failed to fault unclosed doc");
2658 if (found_rsqb == 0)
2659 fail("Did not catch the right square bracket");
2660
2661 /* And finally with a default handler */
2665 found_rsqb = 0;
2666 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2667 XML_TRUE)
2668 == XML_STATUS_OK)
2669 fail("Failed to fault unclosed doc");
2670 if (found_rsqb == 0)
2671 fail("Did not catch the right square bracket");
2672}
2674
2675/* Test trailing right square bracket in an external entity parse */
2677 const char *text = "<!DOCTYPE doc [\n"
2678 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2679 "]>\n"
2680 "<doc>&en;</doc>";
2681 int found_rsqb;
2682
2686 found_rsqb = 0;
2688 != XML_STATUS_OK)
2690 if (found_rsqb == 0)
2691 fail("No right square bracket found");
2692}
2694
2695/* Test CDATA handling in an external entity */
2697 const char *text = "<!DOCTYPE doc [\n"
2698 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2699 "]>\n"
2700 "<doc>&en;</doc>";
2701
2705 != XML_STATUS_OK)
2707}
2709
2710/* Test user parameter settings */
2712 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2713 "<!-- Primary parse -->\n"
2714 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2715 "<doc>&entity;";
2716 const char *epilog = "<!-- Back to primary parser -->\n"
2717 "</doc>";
2718
2719 g_comment_count = 0;
2720 g_skip_count = 0;
2721 g_xdecl_count = 0;
2728 XML_SetUserData(g_parser, (void *)1);
2733 /* Ensure we can't change policy mid-parse */
2735 fail("Changed param entity parsing policy while parsing");
2739 if (g_comment_count != 3)
2740 fail("Comment handler not invoked enough times");
2741 if (g_skip_count != 1)
2742 fail("Skip handler not invoked enough times");
2743 if (g_xdecl_count != 1)
2744 fail("XML declaration handler not invoked");
2745}
2747
2748/* Test that an explicit external entity handler argument replaces
2749 * the parser as the first argument.
2750 *
2751 * We do not call the first parameter to the external entity handler
2752 * 'parser' for once, since the first time the handler is called it
2753 * will actually be a text string. We need to be able to access the
2754 * global 'parser' variable to create our external entity parser from,
2755 * since there are code paths we need to ensure get executed.
2756 */
2758 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2759 "<!DOCTYPE doc SYSTEM 'foo'>\n"
2760 "<doc>&entity;</doc>";
2761
2764 /* Set a handler arg that is not NULL and not parser (which is
2765 * what NULL would cause to be passed.
2766 */
2772
2773 /* Now try again with unset args */
2782}
2784
2785/* Test the parsing of an empty string */
2787 const char *text = "<doc></doc>";
2788 const char *partial = "<doc>";
2789
2791 fail("Parsing empty string faulted");
2793 fail("Parsing final empty string not faulted");
2795 fail("Parsing final empty string faulted for wrong reason");
2796
2797 /* Now try with valid text before the empty end */
2803 fail("Parsing final empty string faulted");
2804
2805 /* Now try with invalid text before the empty end */
2808 XML_FALSE)
2812 fail("Parsing final incomplete empty string not faulted");
2813}
2815
2816/* Test XML_Parse for len < 0 */
2818 const char *const doc = "<root/>";
2819 for (int isFinal = 0; isFinal < 2; isFinal++) {
2820 set_subtest("isFinal=%d", isFinal);
2821
2823
2825 fail("There was not supposed to be any initial parse error.");
2826
2827 const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
2828
2829 if (status != XML_STATUS_ERROR)
2830 fail("Negative len was expected to fail the parse but did not.");
2831
2833 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2834
2836 }
2837}
2839
2840/* Test XML_ParseBuffer for len < 0 */
2842 const char *const doc = "<root/>";
2843 for (int isFinal = 0; isFinal < 2; isFinal++) {
2844 set_subtest("isFinal=%d", isFinal);
2845
2847
2849 fail("There was not supposed to be any initial parse error.");
2850
2851 void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
2852
2853 if (buffer == NULL)
2854 fail("XML_GetBuffer failed.");
2855
2856 memcpy(buffer, doc, strlen(doc));
2857
2858 const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
2859
2860 if (status != XML_STATUS_ERROR)
2861 fail("Negative len was expected to fail the parse but did not.");
2862
2864 fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
2865
2867 }
2868}
2870
2871/* Test odd corners of the XML_GetBuffer interface */
2872static enum XML_Status
2874 const XML_Feature *feature = XML_GetFeatureList();
2875
2876 if (feature == NULL)
2877 return XML_STATUS_ERROR;
2878 for (; feature->feature != XML_FEATURE_END; feature++) {
2879 if (feature->feature == feature_id) {
2880 *presult = feature->value;
2881 return XML_STATUS_OK;
2882 }
2883 }
2884 return XML_STATUS_ERROR;
2885}
2886
2887/* Test odd corners of the XML_GetBuffer interface */
2889 const char *text = get_buffer_test_text;
2890 void *buffer;
2891 long context_bytes;
2892
2893 /* Attempt to allocate a negative length buffer */
2894 if (XML_GetBuffer(g_parser, -12) != NULL)
2895 fail("Negative length buffer not failed");
2896
2897 /* Now get a small buffer and extend it past valid length */
2898 buffer = XML_GetBuffer(g_parser, 1536);
2899 if (buffer == NULL)
2900 fail("1.5K buffer failed");
2901 assert(buffer != NULL);
2902 memcpy(buffer, text, strlen(text));
2903 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2907 fail("INT_MAX buffer not failed");
2908
2909 /* Now try extending it a more reasonable but still too large
2910 * amount. The allocator in XML_GetBuffer() doubles the buffer
2911 * size until it exceeds the requested amount or INT_MAX. If it
2912 * exceeds INT_MAX, it rejects the request, so we want a request
2913 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
2914 * with an extra byte just to ensure that the request is off any
2915 * boundary. The request will be inflated internally by
2916 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2917 * request.
2918 */
2920 context_bytes = 0;
2921 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2922 fail("INT_MAX- buffer not failed");
2923
2924 /* Now try extending it a carefully crafted amount */
2925 if (XML_GetBuffer(g_parser, 1000) == NULL)
2926 fail("1000 buffer failed");
2927}
2929
2930/* Test more corners of the XML_GetBuffer interface */
2932 const char *text = get_buffer_test_text;
2933 void *buffer;
2934
2935 /* Now get a decent buffer */
2936 buffer = XML_GetBuffer(g_parser, 1536);
2937 if (buffer == NULL)
2938 fail("1.5K buffer failed");
2939 assert(buffer != NULL);
2940 memcpy(buffer, text, strlen(text));
2941 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2944
2945 /* Extend it, to catch a different code path */
2946 if (XML_GetBuffer(g_parser, 1024) == NULL)
2947 fail("1024 buffer failed");
2948}
2950
2951/* Test for signed integer overflow CVE-2022-23852 */
2952#if XML_CONTEXT_BYTES > 0
2955 assert(parser != NULL);
2956
2957 const char *const text = "\n";
2958 const int expectedKeepValue = (int)strlen(text);
2959
2960 // After this call, variable "keep" in XML_GetBuffer will
2961 // have value expectedKeepValue
2962 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2963 XML_FALSE /* isFinal */)
2966
2969 fail("enlarging buffer not failed");
2970
2972}
2974#endif // XML_CONTEXT_BYTES > 0
2975
2977 const char *const prefixes[] = {
2978 "",
2979 "<",
2980 "<x a='",
2981 "<doc><x a='",
2982 "<document><x a='",
2983 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2984 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2985 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2986 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2987 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2988 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2989 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2990#if defined(__MINGW32__) && ! defined(__MINGW64__)
2991 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2992 // Can we make a big allocation?
2993 void *big = malloc(maxbuf);
2994 if (! big) {
2995 // The big allocation failed. Let's be a little lenient.
2996 maxbuf = maxbuf / 2;
2997 }
2998 free(big);
2999#endif
3000
3001 for (int i = 0; i < num_prefixes; ++i) {
3002 set_subtest("\"%s\"", prefixes[i]);
3004 const int prefix_len = (int)strlen(prefixes[i]);
3005 const enum XML_Status s
3007 if (s != XML_STATUS_OK)
3009
3010 // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
3011 // subtracting the whole prefix is easiest, and close enough.
3013 // The limit should be consistent; no prefix should allow us to
3014 // reach above the max buffer size.
3017 }
3018}
3020
3034
3035/* Test position information macros */
3037 const char *text = "<doc></doc>";
3038
3041 fail("Byte index/count incorrect at start of parse");
3045 /* At end, the count will be zero and the index the end of string */
3047 fail("Terminal byte count incorrect");
3049 fail("Terminal byte index incorrect");
3050}
3052
3053/* Test position information from errors */
3054#define PRE_ERROR_STR "<doc></"
3055#define POST_ERROR_STR "wombat></doc>"
3057 const char *text = PRE_ERROR_STR POST_ERROR_STR;
3058
3060 == XML_STATUS_OK)
3061 fail("Syntax error not faulted");
3063 fail("Error byte count incorrect");
3065 fail("Error byte index incorrect");
3066}
3068#undef PRE_ERROR_STR
3069#undef POST_ERROR_STR
3070
3071/* Test position information in handler */
3072#define START_ELEMENT "<e>"
3073#define CDATA_TEXT "Hello"
3074#define END_ELEMENT "</e>"
3077 int offset, size;
3079
3080 /* Check initial context is empty */
3082 fail("Unexpected context at start of parse");
3083
3084 data.start_element_len = (int)strlen(START_ELEMENT);
3085 data.cdata_len = (int)strlen(CDATA_TEXT);
3086 data.total_string_len = (int)strlen(text);
3089 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3091}
3093#undef START_ELEMENT
3094#undef CDATA_TEXT
3095#undef END_ELEMENT
3096
3097/* Test predefined entities are correctly recognised */
3099 const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3100 const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3101 const XML_Char *result = XCS("<>&\"'");
3103
3105 /* run_character_check uses XML_SetCharacterDataHandler(), which
3106 * unfortunately heads off a code path that we need to exercise.
3107 */
3113 /* The default handler doesn't translate the entities */
3114 CharData_CheckXMLChars(&storage, expected);
3115
3116 /* Now try again and check the translation */
3119}
3121
3122/* Regression test that an invalid tag in an external parameter
3123 * reference in an external DTD is correctly faulted.
3124 *
3125 * Only a few specific tags are legal in DTDs ignoring comments and
3126 * processing instructions, all of which begin with an exclamation
3127 * mark. "<el/>" is not one of them, so the parser should raise an
3128 * error on encountering it.
3129 */
3131 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3132 "<doc></doc>\n";
3133
3137 "Invalid tag IN DTD external param not rejected");
3138}
3140
3141/* Test entities not quite the predefined ones are not mis-recognised */
3143 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3144 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3145 int i = 0;
3146
3147 while (text[i] != NULL) {
3149 "Undefined entity not rejected");
3151 i++;
3152 }
3153}
3155
3156/* Test conditional inclusion (IGNORE) */
3180
3182 const char text[] =
3183 /* <!DOCTYPE d SYSTEM 's'> */
3184 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3185 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3186 /* <d><e>&en;</e></d> */
3187 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3188 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3190
3201 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3204 CharData_CheckXMLChars(&storage, expected);
3205}
3207
3209 const char text[] =
3210 /* <!DOCTYPE d SYSTEM 's'> */
3211 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3212 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3213 /* <d><e>&en;</e></d> */
3214 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3215 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3217
3229 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3232 CharData_CheckXMLChars(&storage, expected);
3233}
3235
3236/* Test mis-formatted conditional exclusion */
3238 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3239 "<doc><e>&entity;</e></doc>";
3241 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3243 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3245 {/* FIrst two bytes of a three-byte char */
3246 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3250
3251 for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3252 set_subtest("%s", fault->parse_text);
3257 "Incomplete IGNORE section not failed");
3259 }
3260}
3262
3268
3269static int XMLCALL
3271 const XML_Char *base, const XML_Char *systemId,
3272 const XML_Char *publicId) {
3273 const char *text;
3274 UNUSED_P(base);
3275 UNUSED_P(systemId);
3276 UNUSED_P(publicId);
3277
3279 if (ext_parser == NULL)
3280 fail("Could not create external entity parser");
3281
3282 if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3283 struct bom_testdata *const testdata
3284 = (struct bom_testdata *)XML_GetUserData(parser);
3285 const char *const external = testdata->external;
3286 const int split = testdata->split;
3287 testdata->nested_callback_happened = XML_TRUE;
3288
3290 != XML_STATUS_OK) {
3292 }
3293 text = external + split; // the parse below will continue where we left off.
3294 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3295 text = "<!ELEMENT doc EMPTY>\n"
3296 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3297 "<!ENTITY % e2 '%e1;'>\n";
3298 } else {
3299 fail("unknown systemId");
3300 }
3301
3303 != XML_STATUS_OK)
3305
3307 return XML_STATUS_OK;
3308}
3309
3310/* regression test: BOM should be consumed when followed by a partial token. */
3312 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3313 "<doc></doc>\n";
3314 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3315 const int len = (int)strlen(external);
3316 for (int split = 0; split <= len; ++split) {
3317 set_subtest("split at byte %d", split);
3318
3319 struct bom_testdata testdata;
3321 testdata.split = split;
3322 testdata.nested_callback_happened = XML_FALSE;
3323
3325 if (parser == NULL) {
3326 fail("Couldn't create parser");
3327 }
3331 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3334 if (! testdata.nested_callback_happened) {
3335 fail("ref handler not called");
3336 }
3338 }
3339}
3341
3342/* Test recursive parsing */
3344 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3345 "<doc></doc>\n";
3346 ExtFaults data_004_2[] = {
3347 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3348 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3350 {"'wombat", "Unterminated string not faulted", NULL,
3352 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3354 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3355 {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3357 {/* UTF-8 BOM */
3358 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3360 {"<?xml version='1.0' encoding='utf-8'?>\n$",
3361 "Invalid token after text declaration not faulted", NULL,
3363 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3364 "Unterminated string after text decl not faulted", NULL,
3366 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3367 "Partial UTF-8 character after text decl not faulted", NULL,
3369 {"%e1;", "Recursive parameter entity not faulted", NULL,
3372 int i;
3373
3374 for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3375 set_subtest("%s", data_004_2[i].parse_text);
3383 }
3384}
3386
3387/* Test the recursive parse interacts with a not standalone handler */
3389 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3390 "<doc></doc>";
3391
3395 "Standalone rejection not caught");
3396}
3398
3411
3413 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3414 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3415 "<doc></doc>";
3416
3417 /* Setting a handler provokes a particular code path */
3420 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3421}
3423
3424/* Test based on ibm/valid/P32/ibm32v04.xml */
3426 const char *text = "<?xml version='1.0' standalone='no'?>\n"
3427 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3428 "<animal>This is a \n <a/> \n\nyellow tiger</animal>";
3430 = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3431 "<!ELEMENT a EMPTY>\n"
3432 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3433 NULL, NULL};
3434 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger");
3435
3439 /* An attribute list handler provokes a different code path */
3442}
3444
3445/* Slightly bizarrely, the library seems to silently ignore entity
3446 * definitions for predefined entities, even when they are wrong. The
3447 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3448 * to happen, so this is currently treated as acceptable.
3449 */
3451 const char *text = "<!DOCTYPE doc [\n"
3452 "<!ENTITY apos 'foo'>\n"
3453 "]>\n"
3454 "<doc>&apos;</doc>";
3456}
3458
3459/* Test that the parser stops processing the DTD after an unresolved
3460 * parameter entity is encountered.
3461 */
3463 const char *text = "<!DOCTYPE doc [\n"
3464 "%foo;\n"
3465 "<!ENTITY bar 'bas'>\n"
3466 "]><doc/>";
3467
3473 if (get_dummy_handler_flags() != 0)
3474 fail("DTD processing still going after undefined PE");
3475}
3477
3478/* Test public notations with no system ID */
3480 const char *text = "<!DOCTYPE doc [\n"
3481 "<!NOTATION note PUBLIC 'foo'>\n"
3482 "<!ELEMENT doc EMPTY>\n"
3483 "]>\n<doc/>";
3484
3491 fail("Notation declaration handler not called");
3492}
3494
3496 const char *text
3497 = "<!DOCTYPE doc [\n"
3498 "<!ELEMENT doc "
3499 /* Sixteen elements per line */
3500 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3501 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3502 "))))))))))))))))))))))))))))))))>\n"
3503 "<!ELEMENT e EMPTY>"
3504 "]>\n"
3505 "<doc><e/></doc>";
3507
3518 fail("Element handler not fired");
3519}
3521
3523 const char *text = "<!DOCTYPE doc [\n"
3524 "<!ELEMENT doc (a|b|c)+>\n"
3525 "<!ELEMENT a EMPTY>\n"
3526 "<!ELEMENT b (#PCDATA)>\n"
3527 "<!ELEMENT c ANY>\n"
3528 "]>\n"
3529 "<doc>\n"
3530 "<a/>\n"
3531 "<b attr='foo'>This is a foo</b>\n"
3532 "<c></c>\n"
3533 "</doc>\n";
3534
3541 fail("Element handler flag not raised");
3542}
3544
3546 const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3547 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3548 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3549 "%entity;\n"
3550 "]>\n"
3551 "<doc></doc>";
3552 char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3553
3560}
3562
3563/* Test skipping of parameter entity in an external DTD */
3564/* Derived from ibm/invalid/P69/ibm69i01.xml */
3566 const char *text = "<?xml version='1.0'?>\n"
3567 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3568 "<!ELEMENT root (#PCDATA|a)* >\n"
3569 "]>\n"
3570 "<root></root>";
3571 ExtTest dtd_data = {"%pe2;", NULL, NULL};
3572
3582 fail("Skip handler not executed");
3583}
3585
3586/* Test recursive parameter entity definition rejected in external DTD */
3588 const char *text = "<?xml version='1.0'?>\n"
3589 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3590 "<!ELEMENT root (#PCDATA|a)* >\n"
3591 "]>\n"
3592 "<root></root>";
3593 ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3594 "Recursive external parameter entity not faulted", NULL,
3596
3601 "Recursive external parameter not spotted");
3602}
3604
3605/* Test undefined parameter entity in external entity handler */
3607 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3608 "<doc></doc>\n";
3609
3616
3617 /* Now repeat without the external entity ref handler invoking
3618 * another copy of itself.
3619 */
3623 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3627}
3629
3630/* Test suspending the parse on receiving an XML declaration works */
3632 const char *text = long_character_data_text;
3633
3642 /* Attempt to start a new parse while suspended */
3645 fail("Attempt to parse while suspended not faulted");
3647 fail("Suspended parse not faulted with correct error");
3648}
3650
3651/* Test aborting the parse in an epilog works */
3653 const char *text = "<doc></doc>\n\r\n";
3654 XML_Char trigger_char = XCS('\r');
3655
3661 fail("Abort not triggered");
3664}
3666
3667/* Test a different code path for abort in the epilog */
3678
3679/* Test suspension from the epilog */
3692
3694 const char *text = "<doc/>";
3695 enum XML_Status rc;
3696
3700 if (rc == XML_STATUS_ERROR)
3702 else if (rc != XML_STATUS_SUSPENDED)
3703 fail("Suspend not triggered");
3705 if (rc == XML_STATUS_ERROR)
3707 else if (rc != XML_STATUS_OK)
3708 fail("Resume failed");
3709}
3711
3713 const char *text = "<doc></doc><";
3714
3716 "Incomplete epilog entry not faulted");
3717}
3719
3721 const char *text = "<doc></doc>\xe2\x82";
3722
3723 /* First check that no fault is raised if the parse is not finished */
3727 /* Now check that it is faulted once we finish */
3729 fail("Partial character in epilog not faulted");
3732}
3734
3735/* Test resuming a parse suspended in entity substitution */
3737 const char *text
3738 = "<!DOCTYPE doc [\n"
3739 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3740 "]>\n"
3741 "<doc>&foo;</doc>\n";
3742 const XML_Char *expected1 = XCS("Hi");
3743 const XML_Char *expected2 = XCS("HiHo");
3745
3750 // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3751 // we won't know exactly how much input we actually managed to give Expat.
3752 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3762}
3764
3766 const char *const text
3767 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3768 "<"
3769 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3770 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3771 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3772 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3773 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3774 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3775 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3776 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3777 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3778 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3779 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3780 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3781 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3782 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3783 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3784 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3785 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3786 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3787 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3788 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3789 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3790 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3791 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3792 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3793 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3794 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3795 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3796 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3797 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3798 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3799 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3800 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3801 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3802 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3803 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3804 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3805 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3806 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3807 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3808 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3809 "/>"
3810 "</b></a>";
3811 const size_t firstChunkSizeBytes = 54;
3812
3816
3823 (int)(strlen(text) - firstChunkSizeBytes),
3824 XML_TRUE)
3825 != XML_STATUS_OK)
3828}
3830
3831/* Test syntax error is caught at parse resumption */
3833 const char *text = "<!DOCTYPE doc [\n"
3834 "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3835 "]>\n"
3836 "<doc>&foo;</doc>\n";
3837
3843 fail("Syntax error in entity not faulted");
3846}
3848
3849/* Test suspending and resuming in a parameter entity substitution */
3851 const char *text = "<!DOCTYPE doc [\n"
3852 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3853 "%foo;\n"
3854 "]>\n"
3855 "<doc>Hello, world</doc>";
3856 const XML_Char *expected = XCS("Hello, world");
3858
3864 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3870 CharData_CheckXMLChars(&storage, expected);
3871}
3873
3874/* Test attempting to use parser after an error is faulted */
3876 const char *text = "<$doc><doc></doc>";
3877
3880 fail("Invalid tag name not faulted");
3884 fail("Restarting invalid parse not faulted");
3887}
3889
3890/* Test that angle brackets in an attribute default value are faulted */
3892 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3893 "<doc></doc>";
3894
3896 "Bad attribute default not faulted");
3897}
3899
3901 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3902 "<doc></doc>";
3903
3905 "Bad attribute default not faulted");
3906}
3908
3910 const char *text = "<doc a='value\r'/>";
3911
3915}
3917
3918/* Try parsing a general entity within a parameter entity in a
3919 * standalone internal DTD. Covers a corner case in the parser.
3920 */
3922 const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3923 "<!DOCTYPE doc [\n"
3924 " <!ELEMENT doc (#PCDATA)>\n"
3925 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3926 " <!ENTITY ge 'AttDefaultValue'>\n"
3927 " %pe;\n"
3928 "]>\n"
3929 "<doc att2='any'/>";
3930
3935}
3937
3938/* Test that a reference to an unknown external entity is skipped */
3940 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3941 "<doc></doc>\n";
3942 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3943 "<!ENTITY % e2 '%e1;'>\n",
3944 NULL, NULL};
3945
3952}
3954
3955/* Test a different form of unknown external entity */
3957 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3958 "<doc />";
3960 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3961 "<!ENTITY % pe2 '%pe1;'>\n"
3962 "%pe2;\n",
3964
3971}
3973
3975 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3976 "<doc />";
3978 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3979 "<!ENTITY % pe2 '%pe1;'>\n"
3980 "%pe2;\n",
3981 NULL};
3982
3989}
3991
3992/* Test that a parameter entity value ending with a carriage return
3993 * has it translated internally into a newline.
3994 */
3996#define PARAM_ENTITY_NAME "pe"
3997#define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3998 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3999 "<doc/>";
4001 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
4002 "%" PARAM_ENTITY_NAME ";\n",
4003 NULL, NULL};
4004
4016 fail("Parameter entity CR->NEWLINE conversion failed");
4018 fail("Parameter entity not parsed");
4019}
4020#undef PARAM_ENTITY_NAME
4021#undef PARAM_ENTITY_CORE_VALUE
4023
4025 const char *text = "<!DOCTYPE doc [\n"
4026 " <!ENTITY entity '&#x110000;'>\n"
4027 "]>\n"
4028 "<doc>&entity;</doc>";
4029
4031 "Out of range character reference not faulted");
4032}
4034
4036 const char *text = "<!DOCTYPE doc [\n"
4037 " <!ENTITY entity '&#xg0;'>\n"
4038 "]>\n"
4039 "<doc>&entity;</doc>";
4040
4042 "Out of range character reference not faulted");
4043}
4045
4047 const char text[] =
4048 /* <!DOCTYPE doc [\n */
4049 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4050 /* U+0E04 = KHO KHWAI
4051 * U+0E08 = CHO CHAN */
4052 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
4053 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
4054 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
4055 /* ]>\n */
4056 "\0]\0>\0\n"
4057 /* <doc>&entity;</doc> */
4058 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
4059
4060 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4062 fail("Invalid start of entity name not faulted");
4065}
4067
4069 const char *text = "<!DOCTYPE doc [\n"
4070 " <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4071 "]>\n"
4072 "<doc>&entity;</doc>";
4073
4075 "Out of range character reference not faulted");
4076}
4078
4079/* Test that processing instructions are picked up by a default handler */
4081 const char *text = "<?test processing instruction?>\n<doc/>";
4082 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4084
4091 CharData_CheckXMLChars(&storage, expected);
4092}
4094
4095/* Test that comments are picked up by a default handler */
4097 const char *text = "<!-- This is a comment -->\n<doc/>";
4098 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4100
4107 CharData_CheckXMLChars(&storage, expected);
4108}
4110
4111/* Test PIs that look almost but not quite like XML declarations */
4113 const char *text = "<?yml something like data?><doc/>";
4114 const XML_Char *expected = XCS("yml: something like data\n");
4116
4123 CharData_CheckXMLChars(&storage, expected);
4124}
4126
4128 const char *text = "<?xnl nothing like data?><doc/>";
4129 const XML_Char *expected = XCS("xnl: nothing like data\n");
4131
4138 CharData_CheckXMLChars(&storage, expected);
4139}
4141
4143 const char *text = "<?xmm everything like data?><doc/>";
4144 const XML_Char *expected = XCS("xmm: everything like data\n");
4146
4153 CharData_CheckXMLChars(&storage, expected);
4154}
4156
4158 const char text[] =
4159 /* <?{KHO KHWAI}{CHO CHAN}?>
4160 * where {KHO KHWAI} = U+0E04
4161 * and {CHO CHAN} = U+0E08
4162 */
4163 "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4164 /* <q/> */
4165 "<\0q\0/\0>\0";
4166#ifdef XML_UNICODE
4167 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4168#else
4169 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4170#endif
4172
4176 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4179 CharData_CheckXMLChars(&storage, expected);
4180}
4182
4184 const char text[] =
4185 /* <?{KHO KHWAI}{CHO CHAN}?>
4186 * where {KHO KHWAI} = U+0E04
4187 * and {CHO CHAN} = U+0E08
4188 */
4189 "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4190 /* <q/> */
4191 "\0<\0q\0/\0>";
4192#ifdef XML_UNICODE
4193 const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4194#else
4195 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4196#endif
4198
4202 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4205 CharData_CheckXMLChars(&storage, expected);
4206}
4208
4209/* Test that comments can be picked up and translated */
4211 const char text[] =
4212 /* <!-- Comment A --> */
4213 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4214 /* <doc/> */
4215 "\0<\0d\0o\0c\0/\0>";
4216 const XML_Char *expected = XCS(" Comment A ");
4218
4222 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4225 CharData_CheckXMLChars(&storage, expected);
4226}
4228
4230 const char text[] =
4231 /* <!-- Comment B --> */
4232 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4233 /* <doc/> */
4234 "<\0d\0o\0c\0/\0>\0";
4235 const XML_Char *expected = XCS(" Comment B ");
4237
4241 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4244 CharData_CheckXMLChars(&storage, expected);
4245}
4247
4248/* Test that the unknown encoding handler with map entries that expect
4249 * conversion but no conversion function is faulted
4250 */
4252 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4253 "<doc>\x81</doc>";
4254
4256 /* MiscEncodingHandler sets up an encoding with every top-bit-set
4257 * character introducing a two-byte sequence. For this, it
4258 * requires a convert function. The above function call doesn't
4259 * pass one through, so when BadEncodingHandler actually gets
4260 * called it should supply an invalid encoding.
4261 */
4263 "Encoding with missing convert() not faulted");
4264}
4266
4268 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4269 "<doc>\x81</doc>";
4270
4272 /* BadEncodingHandler sets up an encoding with every top-bit-set
4273 * character introducing a two-byte sequence. For this, it
4274 * requires a convert function. The above function call passes
4275 * one that insists all possible sequences are invalid anyway.
4276 */
4278 "Encoding with failing convert() not faulted");
4279}
4281
4282/* Test unknown encoding conversions */
4284 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4285 /* Equivalent to <eoc>Hello, world</eoc> */
4286 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4287
4289 run_character_check(text, XCS("Hello, world"));
4290}
4292
4293/* Test bad name character in unknown encoding */
4295 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4296 "<\xff\x64oc>Hello, world</\xff\x64oc>";
4297
4300 "Bad name start in unknown encoding not faulted");
4301}
4303
4304/* Test bad mid-name character in unknown encoding */
4306 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4307 "<d\xffoc>Hello, world</d\xffoc>";
4308
4311 "Bad name in unknown encoding not faulted");
4312}
4314
4315/* Test element name that is long enough to fill the conversion buffer
4316 * in an unknown encoding, finishing with an encoded character.
4317 */
4319 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4320 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4321 "Hi"
4322 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4323 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4325
4333 CharData_CheckXMLChars(&storage, expected);
4334}
4336
4337/* Test element name that is long enough to fill the conversion buffer
4338 * in an unknown encoding, finishing with an simple character.
4339 */
4341 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4342 "<abcdefghabcdefghabcdefghijklmnop>"
4343 "Hi"
4344 "</abcdefghabcdefghabcdefghijklmnop>";
4345 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4347
4355 CharData_CheckXMLChars(&storage, expected);
4356}
4358
4360 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4361 "<doc>Hello world</doc>";
4362
4365 "Invalid unknown encoding not faulted");
4366}
4368
4370 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4371 "<doc>Hello, world</doc>";
4372
4374 run_character_check(text, XCS("Hello, world"));
4375}
4377
4379 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4380 "<doc>Hello, \x80 world</doc>";
4381
4384 "Invalid character not faulted");
4385}
4387
4389 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4390 "<doc>Hello, world</doc>";
4391
4394 "Invalid unknown encoding not faulted");
4395}
4397
4399 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4400 "<doc>Hello, world</doc>";
4401
4404 "Invalid unknown encoding not faulted");
4405}
4407
4409 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4410 "<doc>Hello, \x82 world</doc>";
4411
4414 "Invalid unknown encoding not faulted");
4415}
4417
4419 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4420 "<doc>Hello, world</doc>";
4421
4424 "Invalid unknown encoding not faulted");
4425}
4427
4429 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4430 "<doc attr='\xff\x30'/>";
4431
4434 "Invalid attribute valid not faulted");
4435}
4437
4438/* Test an external entity parser set to use latin-1 detects UTF-16
4439 * BOMs correctly.
4440 */
4441/* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4443 const char *text = "<!DOCTYPE doc [\n"
4444 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4445 "]>\n"
4446 "<doc>&en;</doc>";
4448 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4449 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4450 * 0x4c = L and 0x20 is a space
4451 */
4452 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4453#ifdef XML_UNICODE
4454 const XML_Char *expected = XCS("\x00ff\x00feL ");
4455#else
4456 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4457 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4458#endif
4460
4462 test_data.storage = &storage;
4469 CharData_CheckXMLChars(&storage, expected);
4470}
4472
4474 const char *text = "<!DOCTYPE doc [\n"
4475 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4476 "]>\n"
4477 "<doc>&en;</doc>";
4479 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4480 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4481 * 0x4c = L and 0x20 is a space
4482 */
4483 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4484#ifdef XML_UNICODE
4485 const XML_Char *expected = XCS("\x00fe\x00ff L");
4486#else
4487 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4488 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4489#endif
4491
4493 test_data.storage = &storage;
4500 CharData_CheckXMLChars(&storage, expected);
4501}
4503
4504/* Parsing the full buffer rather than a byte at a time makes a
4505 * difference to the encoding scanning code, so repeat the above tests
4506 * without breaking them down by byte.
4507 */
4509 const char *text = "<!DOCTYPE doc [\n"
4510 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4511 "]>\n"
4512 "<doc>&en;</doc>";
4514 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4515 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4516 * 0x4c = L and 0x20 is a space
4517 */
4518 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4519#ifdef XML_UNICODE
4520 const XML_Char *expected = XCS("\x00ff\x00feL ");
4521#else
4522 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4523 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4524#endif
4526
4528 test_data.storage = &storage;
4535 CharData_CheckXMLChars(&storage, expected);
4536}
4538
4540 const char *text = "<!DOCTYPE doc [\n"
4541 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4542 "]>\n"
4543 "<doc>&en;</doc>";
4545 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4546 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4547 * 0x4c = L and 0x20 is a space
4548 */
4549 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4550#ifdef XML_UNICODE
4551 const XML_Char *expected = XCS("\x00fe\x00ff L");
4552#else
4553 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4554 const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4555#endif
4557
4559 test_data.storage = &storage;
4566 CharData_CheckXMLChars(&storage, expected);
4567}
4569
4570/* Test little-endian UTF-16 given an explicit big-endian encoding */
4572 const char *text = "<!DOCTYPE doc [\n"
4573 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4574 "]>\n"
4575 "<doc>&en;</doc>";
4576 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4577#ifdef XML_UNICODE
4578 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4579#else
4580 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4581 "\xe6\x94\x80" /* U+6500 */
4582 "\xe2\xbc\x80" /* U+2F00 */
4583 "\xe3\xb8\x80"); /* U+3E00 */
4584#endif
4586
4588 test_data.storage = &storage;
4595 CharData_CheckXMLChars(&storage, expected);
4596}
4598
4599/* Test big-endian UTF-16 given an explicit little-endian encoding */
4601 const char *text = "<!DOCTYPE doc [\n"
4602 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4603 "]>\n"
4604 "<doc>&en;</doc>";
4605 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4606#ifdef XML_UNICODE
4607 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4608#else
4609 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */
4610 "\xe6\x94\x80" /* U+6500 */
4611 "\xe2\xbc\x80" /* U+2F00 */
4612 "\xe3\xb8\x80"); /* U+3E00 */
4613#endif
4615
4617 test_data.storage = &storage;
4624 CharData_CheckXMLChars(&storage, expected);
4625}
4627
4628/* Test little-endian UTF-16 given no explicit encoding.
4629 * The existing default encoding (UTF-8) is assumed to hold without a
4630 * BOM to contradict it, so the entity value will in fact provoke an
4631 * error because 0x00 is not a valid XML character. We parse the
4632 * whole buffer in one go rather than feeding it in byte by byte to
4633 * exercise different code paths in the initial scanning routines.
4634 */
4636 const char *text = "<!DOCTYPE doc [\n"
4637 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4638 "]>\n"
4639 "<doc>&en;</doc>";
4641 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4643
4647 "Invalid character should not have been accepted");
4648}
4650
4651/* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
4653 const char *text = "<!DOCTYPE doc [\n"
4654 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4655 "]>\n"
4656 "<doc>&en;</doc>";
4658 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4659 3, NULL, NULL};
4660#ifdef XML_UNICODE
4661 const XML_Char *expected = XCS("\xfec0");
4662#else
4663 const XML_Char *expected = XCS("\xef\xbb\x80");
4664#endif
4666
4668 test_data.storage = &storage;
4675 CharData_CheckXMLChars(&storage, expected);
4676}
4678
4679/* Test that UTF-8 in a CDATA section is correctly passed through */
4681 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4682#ifdef XML_UNICODE
4683 const XML_Char *expected = XCS("one \x00e9 two");
4684#else
4685 const XML_Char *expected = XCS("one \xc3\xa9 two");
4686#endif
4687
4688 run_character_check(text, expected);
4689}
4691
4692/* Test that little-endian UTF-16 in a CDATA section is handled */
4694 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4695#ifdef XML_UNICODE
4696 const XML_Char *expected = XCS("\x00e9]\x00e9two");
4697#else
4698 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4699#endif
4700
4701 run_character_check(text, expected);
4702}
4704
4706 struct test_case {
4707 bool goodName;
4708 bool goodNameStart;
4709 const char *tagName;
4710 };
4711
4712 // The idea with the tests below is this:
4713 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4714 // go to isNever and are hence not a concern.
4715 //
4716 // We start with a character that is a valid name character
4717 // (or even name-start character, see XML 1.0r4 spec) and then we flip
4718 // single bits at places where (1) the result leaves the UTF-8 encoding space
4719 // and (2) we stay in the same n-byte sequence family.
4720 //
4721 // The flipped bits are highlighted in angle brackets in comments,
4722 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4723 // the most significant bit to 1 to leave UTF-8 encoding space.
4724 struct test_case cases[] = {
4725 // 1-byte UTF-8: [0xxx xxxx]
4726 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
4727 {false, false, "\xBA"}, // [<1>011 1010]
4728 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
4729 {false, false, "\xB9"}, // [<1>011 1001]
4730
4731 // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4732 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
4733 // Arabic small waw U+06E5
4734 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4735 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4736 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4737 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
4738 // combining char U+0301
4739 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4740 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4741 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4742
4743 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4744 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
4745 // Devanagari Letter A U+0905
4746 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4747 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4748 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4749 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4750 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4751 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
4752 // combining char U+0901
4753 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4754 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4755 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4756 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4757 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4758 };
4759 const bool atNameStart[] = {true, false};
4760
4761 size_t i = 0;
4762 char doc[1024];
4763 size_t failCount = 0;
4764
4765 // we need all the bytes to be parsed, but we don't want the errors that can
4766 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4768 return;
4769 }
4770
4771 for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4772 size_t j = 0;
4773 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4774 const bool expectedSuccess
4775 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4776 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4777 cases[i].tagName);
4779
4781 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4782
4783 bool success = true;
4784 if ((status == XML_STATUS_OK) != expectedSuccess) {
4785 success = false;
4786 }
4787 if ((status == XML_STATUS_ERROR)
4789 success = false;
4790 }
4791
4792 if (! success) {
4793 fprintf(
4794 stderr,
4795 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4796 (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
4798 failCount++;
4799 }
4800
4802 }
4803 }
4804
4805 if (failCount > 0) {
4806 fail("UTF-8 regression detected");
4807 }
4808}
4810
4811/* Test trailing spaces in elements are accepted */
4827
4829 const char text[] =
4830 /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4831 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4832 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4833 */
4834 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4835 const XML_Char *expected = XCS("a");
4837
4841 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4844 CharData_CheckXMLChars(&storage, expected);
4845}
4847
4849 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4850 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4851 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4852 */
4853 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4854 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4855 const XML_Char *expected = XCS("1");
4857
4861 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4864 CharData_CheckXMLChars(&storage, expected);
4865}
4867
4869 const char *text = "<doc attr1='a' / attr2='b'>";
4870
4871 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4872}
4874
4876 /* <!DOCTYPE doc [
4877 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4878 * %{KHO KHWAI}{CHO CHAN};
4879 * ]>
4880 * <doc></doc>
4881 *
4882 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4883 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4884 */
4885 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4886 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4887 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4888 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4889 "\0%\x0e\x04\x0e\x08\0;\0\n"
4890 "\0]\0>\0\n"
4891 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4892#ifdef XML_UNICODE
4893 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4894#else
4895 const XML_Char *expected
4896 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4897#endif
4899
4903 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4906 CharData_CheckXMLChars(&storage, expected);
4907}
4909
4910/* Test that duff attribute description keywords are rejected */
4912 const char *text = "<!DOCTYPE doc [\n"
4913 " <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4914 "]>\n"
4915 "<doc />";
4916
4918 "Bad keyword !IMPLIED not faulted");
4919}
4921
4922/* Test that an invalid attribute description keyword consisting of
4923 * UTF-16 characters with their top bytes non-zero are correctly
4924 * faulted
4925 */
4927 /* <!DOCTYPE d [
4928 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4929 * ]><d/>
4930 *
4931 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4932 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4933 */
4934 const char text[]
4935 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4936 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4937 "\0#\x0e\x04\x0e\x08\0>\0\n"
4938 "\0]\0>\0<\0d\0/\0>";
4939
4940 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4942 fail("Invalid UTF16 attribute keyword not faulted");
4945}
4947
4948/* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this
4949 * using prefix-encoding (see above) to trigger specific code paths
4950 */
4952 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4953 "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4954
4957 "Invalid bytes in DOCTYPE not faulted");
4958}
4960
4962 const char *text = "<!DOCTYPE \xDB\x25"
4963 "doc><doc/>"; // [1101 1011] [<0>010 0101]
4965 "Invalid UTF-8 in DOCTYPE not faulted");
4966}
4968
4970 const char text[] =
4971 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4972 *
4973 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4974 * (name character) but not a valid letter (name start character)
4975 */
4976 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4977 "\x06\xf2"
4978 "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4979
4980 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4982 fail("Invalid bytes in DOCTYPE not faulted");
4985}
4987
4989 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4990 "<1+>&foo;</1+>";
4991
4993 "'+' in document name not faulted");
4994}
4996
4998 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4999 "<1*>&foo;</1*>";
5000
5002 "'*' in document name not faulted");
5003}
5005
5007 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
5008 "<1?>&foo;</1?>";
5009
5011 "'?' in document name not faulted");
5012}
5014
5016 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
5017 "<!DOCTYPE doc SYSTEM 'foo'>"
5018 "<doc><e>&entity;</e></doc>";
5019 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
5020 "Invalid character not faulted", XCS("prefix-conv"),
5022
5028 "Bad IGNORE section with unknown encoding not failed");
5029}
5031
5033 const char text[] =
5034 /* <e a='&#228; &#x00E4;'></e> */
5035 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
5036 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
5037#ifdef XML_UNICODE
5038 const XML_Char *expected = XCS("\x00e4 \x00e4");
5039#else
5040 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5041#endif
5043
5047 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5050 CharData_CheckXMLChars(&storage, expected);
5051}
5053
5055 const char text[] =
5056 /* <e a='&#228; &#x00E4;'></e> */
5057 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
5058 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
5059#ifdef XML_UNICODE
5060 const XML_Char *expected = XCS("\x00e4 \x00e4");
5061#else
5062 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
5063#endif
5065
5069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5072 CharData_CheckXMLChars(&storage, expected);
5073}
5075
5077 const char text[] =
5078 /* <!DOCTYPE d [ */
5079 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5080 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5081 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5082 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5083 /* %e; */
5084 "\0%\0e\0;\0\n"
5085 /* ]> */
5086 "\0]\0>\0\n"
5087 /* <d>&j;</d> */
5088 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5090 = {/* <!ENTITY j 'baz'> */
5091 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5092 const XML_Char *expected = XCS("baz");
5094
5096 test_data.storage = &storage;
5101 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5104 CharData_CheckXMLChars(&storage, expected);
5105}
5107
5109 const char text[] =
5110 /* <!DOCTYPE d [ */
5111 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5112 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5113 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5114 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5115 /* %e; */
5116 "%\0e\0;\0\n\0"
5117 /* ]> */
5118 "]\0>\0\n\0"
5119 /* <d>&j;</d> */
5120 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5122 = {/* <!ENTITY j 'baz'> */
5123 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5124 const XML_Char *expected = XCS("baz");
5126
5128 test_data.storage = &storage;
5133 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5136 CharData_CheckXMLChars(&storage, expected);
5137}
5139
5140/* Test that a doctype with neither an internal nor external subset is
5141 * faulted
5142 */
5144 const char *text = "<!DOCTYPE doc></doc>";
5146 "DOCTYPE without subset not rejected");
5147}
5149
5151 const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5153 "DOCTYPE without Public ID not rejected");
5154}
5156
5158 const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5160 "DOCTYPE without System ID not rejected");
5161}
5163
5165 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5166 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5167}
5169
5171 const char *text = "<!DOCTYPE doc [\n"
5172 " <!ENTITY foo PUBLIC>\n"
5173 "]>\n"
5174 "<doc/>";
5176 "ENTITY without Public ID is not rejected");
5177}
5179
5180/* Test unquoted value is faulted */
5182 const char *text = "<!DOCTYPE doc [\n"
5183 " <!ENTITY % foo bar>\n"
5184 "]>\n"
5185 "<doc/>";
5187 "ENTITY without Public ID is not rejected");
5188}
5190
5192 const char *text = "<!DOCTYPE doc [\n"
5193 " <!ENTITY % foo PUBLIC>\n"
5194 "]>\n"
5195 "<doc/>";
5197 "Parameter ENTITY without Public ID is not rejected");
5198}
5200
5202 const char *text = "<!DOCTYPE doc [\n"
5203 " <!ENTITY % foo SYSTEM>\n"
5204 "]>\n"
5205 "<doc/>";
5207 "Parameter ENTITY without Public ID is not rejected");
5208}
5210
5212 const char *text = "<!DOCTYPE doc [\n"
5213 " <!NOTATION n SYSTEM>\n"
5214 "]>\n"
5215 "<doc/>";
5217 "Notation without System ID is not rejected");
5218}
5220
5221/* Test for issue #11, wrongly suppressed default handler */
5223 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5224 " <!ENTITY foo 'bar'>\n"
5225 "]>\n"
5226 "<doc>&foo;</doc>";
5227 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5228 {XCS("'test.dtd'"), 10, XML_FALSE},
5229 {NULL, 0, XML_FALSE}};
5230 int i;
5231
5238 for (i = 0; test_data[i].expected != NULL; i++)
5239 if (! test_data[i].seen)
5240 fail("Default handler not run for public !DOCTYPE");
5241}
5243
5245 const char *text = "<abort/>";
5246
5250 fail("Expected to error on abort");
5251}
5253
5254/* Regression test for GH issue #612: unfinished m_declAttributeType
5255 * allocation in ->m_tempPool can corrupt following allocation.
5256 */
5258 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5259 "<!DOCTYPE foo [\n"
5260 "<!ELEMENT foo ANY>\n"
5261 "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5262 "%entp;\n"
5263 "]>\n"
5264 "<a></a>\n";
5265 const XML_Char *expected = XCS("COMMENT");
5267
5277 CharData_CheckXMLChars(&storage, expected);
5278}
5280
5282 const char *const text = "<!DOCTYPE a [\n"
5283 " <!ENTITY e1 '<!--e1-->'>\n"
5284 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5285 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5286 "]>\n"
5287 "<a><!--start-->&e3;<!--end--></a>";
5288 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5289 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5294
5298
5299 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5300 while (status == XML_STATUS_SUSPENDED) {
5302 }
5303 if (status != XML_STATUS_OK)
5305
5306 CharData_CheckXMLChars(&storage, expected);
5308}
5310
5311/* Regression test for quadratic parsing on large tokens */
5313 const struct {
5314 const char *pre;
5315 const char *post;
5316 } text[] = {
5317 {"<a>", "</a>"}, // assumed good, used as baseline
5318 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5319 {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
5320 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
5321 {"<e><", "/></e>"}, // big elem name, used to be O(N²)
5322 };
5323 const int num_cases = sizeof(text) / sizeof(text[0]);
5324 char aaaaaa[4096];
5325 const int fillsize = (int)sizeof(aaaaaa);
5326 const int fillcount = 100;
5327 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5328 const unsigned max_factor = 4;
5329 const unsigned max_scanned = max_factor * approx_bytes;
5330
5331 memset(aaaaaa, 'a', fillsize);
5332
5334 return; // heuristic is disabled; we would get O(n^2) and fail.
5335 }
5336
5337 for (int i = 0; i < num_cases; ++i) {
5340 enum XML_Status status;
5341 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5342
5343 // parse the start text
5344 g_bytesScanned = 0;
5346 (int)strlen(text[i].pre), XML_FALSE);
5347 if (status != XML_STATUS_OK) {
5349 }
5350
5351 // parse lots of 'a', failing the test early if it takes too long
5352 unsigned past_max_count = 0;
5353 for (int f = 0; f < fillcount; ++f) {
5355 if (status != XML_STATUS_OK) {
5357 }
5359 // We're not done, and have already passed the limit -- the test will
5360 // definitely fail. This block allows us to save time by failing early.
5361 const unsigned pushed
5362 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5363 fprintf(
5364 stderr,
5365 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5369 // We are failing, but allow a few log prints first. If we don't reach
5370 // a count of five, the test will fail after the loop instead.
5372 }
5373 }
5374
5375 // parse the end text
5377 (int)strlen(text[i].post), XML_TRUE);
5378 if (status != XML_STATUS_OK) {
5380 }
5381
5382 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5384 fprintf(
5385 stderr,
5386 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5388 max_factor);
5389 fail("scanned too many bytes");
5390 }
5391
5393 }
5394}
5396
5398 const char *const pre = "<d>";
5399 const char *const start = "<x attr='";
5400 const char *const end = "'></x>";
5401 char eeeeee[100];
5402 const int fillsize = (int)sizeof(eeeeee);
5403 memset(eeeeee, 'e', fillsize);
5404
5405 for (int enabled = 0; enabled <= 1; enabled += 1) {
5406 set_subtest("deferral=%d", enabled);
5407
5411 // pre-grow the buffer to avoid reparsing due to almost-fullness
5413
5418
5419 enum XML_Status status;
5420 // parse the start text
5421 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5422 if (status != XML_STATUS_OK) {
5424 }
5425 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5426
5427 // ..and the start of the token
5428 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5429 if (status != XML_STATUS_OK) {
5431 }
5432 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5433
5434 // try to parse lots of 'e', but the token isn't finished
5435 for (int c = 0; c < 100; ++c) {
5437 if (status != XML_STATUS_OK) {
5439 }
5440 }
5441 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5442
5443 // end the <x> token.
5444 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5445 if (status != XML_STATUS_OK) {
5447 }
5448
5449 if (enabled) {
5450 // In general, we may need to push more data to trigger a reparse attempt,
5451 // but in this test, the data is constructed to always require it.
5452 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5453 // 2x the token length should suffice; the +1 covers the start and end.
5454 for (int c = 0; c < 101; ++c) {
5456 if (status != XML_STATUS_OK) {
5458 }
5459 }
5460 }
5461 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5462
5464 }
5465}
5467
5472
5473static void
5474element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5475 UNUSED_P(name);
5476 struct element_decl_data *testdata = (struct element_decl_data *)userData;
5477 testdata->count += 1;
5478 XML_FreeContentModel(testdata->parser, model);
5479}
5480
5481static int
5483 const XML_Char *base, const XML_Char *systemId,
5484 const XML_Char *publicId) {
5485 UNUSED_P(base);
5486 UNUSED_P(systemId);
5487 UNUSED_P(publicId);
5488 const char *const pre = "<!ELEMENT document ANY>\n";
5489 const char *const start = "<!ELEMENT ";
5490 const char *const end = " ANY>\n";
5491 const char *const post = "<!ELEMENT xyz ANY>\n";
5492 const int enabled = *(int *)XML_GetUserData(p);
5493 char eeeeee[100];
5494 char spaces[100];
5495 const int fillsize = (int)sizeof(eeeeee);
5496 assert_true(fillsize == (int)sizeof(spaces));
5497 memset(eeeeee, 'e', fillsize);
5498 memset(spaces, ' ', fillsize);
5499
5502 // pre-grow the buffer to avoid reparsing due to almost-fullness
5504
5507 testdata.count = 0;
5510
5511 enum XML_Status status;
5512 // parse the initial text
5513 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5514 if (status != XML_STATUS_OK) {
5516 }
5517 assert_true(testdata.count == 1); // first element should be done
5518
5519 // ..and the start of the big token
5520 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5521 if (status != XML_STATUS_OK) {
5523 }
5524 assert_true(testdata.count == 1); // still just the first one
5525
5526 // try to parse lots of 'e', but the token isn't finished
5527 for (int c = 0; c < 100; ++c) {
5529 if (status != XML_STATUS_OK) {
5531 }
5532 }
5533 assert_true(testdata.count == 1); // *still* just the first one
5534
5535 // end the big token.
5536 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5537 if (status != XML_STATUS_OK) {
5539 }
5540
5541 if (enabled) {
5542 // In general, we may need to push more data to trigger a reparse attempt,
5543 // but in this test, the data is constructed to always require it.
5544 assert_true(testdata.count == 1); // or the test is incorrect
5545 // 2x the token length should suffice; the +1 covers the start and end.
5546 for (int c = 0; c < 101; ++c) {
5548 if (status != XML_STATUS_OK) {
5550 }
5551 }
5552 }
5553 assert_true(testdata.count == 2); // the big token should be done
5554
5555 // parse the final text
5556 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5557 if (status != XML_STATUS_OK) {
5559 }
5560 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5561
5563 return XML_STATUS_OK;
5564}
5565
5567 const char *const text
5568 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5569 for (int enabled = 0; enabled <= 1; ++enabled) {
5570 set_subtest("deferral=%d", enabled);
5571
5574 XML_SetUserData(parser, (void *)&enabled);
5576 // this handler creates a sub-parser and checks that its deferral behavior
5577 // is what we expected, based on the value of `enabled` (in userdata).
5580 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5582
5584 }
5585}
5587
5599
5601 const char *const pre = "<d><x attr='";
5602 const char *const end = "'></x>";
5603 char iiiiii[100];
5604 const int fillsize = (int)sizeof(iiiiii);
5605 memset(iiiiii, 'i', fillsize);
5606
5610
5615
5616 enum XML_Status status;
5617 // parse the start text
5618 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5619 if (status != XML_STATUS_OK) {
5621 }
5622 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5623
5624 // try to parse some 'i', but the token isn't finished
5626 if (status != XML_STATUS_OK) {
5628 }
5629 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5630
5631 // end the <x> token.
5632 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5633 if (status != XML_STATUS_OK) {
5635 }
5636 CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5637
5638 // now change the heuristic setting and add *no* data
5640 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5641 status = XML_Parse(parser, "", 0, XML_FALSE);
5642 if (status != XML_STATUS_OK) {
5644 }
5646
5648}
5650
5665
5666static size_t g_totalAlloc = 0;
5667static size_t g_biggestAlloc = 0;
5668
5669static void *
5670counting_realloc(void *ptr, size_t size) {
5671 g_totalAlloc += size;
5672 if (size > g_biggestAlloc) {
5674 }
5675 return realloc(ptr, size);
5676}
5677
5678static void *
5680 return counting_realloc(NULL, size);
5681}
5682
5684 if (g_chunkSize != 0) {
5685 // this test does not use SINGLE_BYTES, because it depends on very precise
5686 // buffer fills.
5687 return;
5688 }
5690 return; // this test is irrelevant when the deferral heuristic is disabled.
5691 }
5692
5693 const int document_length = 65536;
5694 char *const document = (char *)malloc(document_length);
5695
5699 free,
5700 };
5701
5702 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5703 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5704 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5705
5706 for (const int *leading = leading_list; *leading >= 0; leading++) {
5707 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5708 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5709 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5710 *fillsize);
5711 // start by checking that the test looks reasonably valid
5713
5714 // put 'x' everywhere; some will be overwritten by elements.
5716 // maybe add an initial tag
5717 if (*leading) {
5718 assert_true(*leading >= 3); // or the test case is invalid
5719 memcpy(document, "<a>", 3);
5720 }
5721 // add the large token
5722 document[*leading + 0] = '<';
5723 document[*leading + 1] = 'b';
5724 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5725 document[*leading + *bigtoken - 1] = '>';
5726
5727 // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5728 const int expected_elem_total = 1 + (*leading ? 1 : 0);
5729
5732
5737
5738 g_biggestAlloc = 0;
5739 g_totalAlloc = 0;
5740 int offset = 0;
5741 // fill data until the big token is covered (but not necessarily parsed)
5742 while (offset < *leading + *bigtoken) {
5744 const enum XML_Status status
5746 if (status != XML_STATUS_OK) {
5748 }
5749 offset += *fillsize;
5750 }
5751 // Now, check that we've had a buffer allocation that could fit the
5752 // context bytes and our big token. In order to detect a special case,
5753 // we need to know how many bytes of our big token were included in the
5754 // first push that contained _any_ bytes of the big token:
5757 // Special case: we aren't saving any context, and the whole big token
5758 // was covered by a single fill, so Expat may have parsed directly
5759 // from our input pointer, without allocating an internal buffer.
5760 } else if (*leading < XML_CONTEXT_BYTES) {
5762 } else {
5764 }
5765 // fill data until the big token is actually parsed
5766 while (storage.count < expected_elem_total) {
5767 const size_t alloc_before = g_totalAlloc;
5769 const enum XML_Status status
5771 if (status != XML_STATUS_OK) {
5773 }
5774 offset += *fillsize;
5775 // since all the bytes of the big token are already in the buffer,
5776 // the bufsize ceiling should make us finish its parsing without any
5777 // further buffer allocations. We assume that there will be no other
5778 // large allocations in this test.
5780 }
5781 // test-the-test: was our alloc even called?
5783 // test-the-test: there shouldn't be any extra start elements
5785
5787 }
5788 }
5789 }
5790 free(document);
5791}
5793
5795 const int KiB = 1024;
5796 const int MiB = 1024 * KiB;
5797 const int document_length = 16 * MiB;
5798 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5799
5800 if (g_chunkSize != 0) {
5801 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5802 }
5803
5804 char *const document = (char *)malloc(document_length);
5807 document[0] = '<';
5808 document[1] = 't';
5809 memset(&document[2], ' ', big - 2); // a very spacy token
5810 document[big - 1] = '>';
5811
5812 // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5813 // When reparse deferral is enabled, the final (negated) value is the expected
5814 // maximum number of bytes scanned in parse attempts.
5815 const int testcases[][30] = {
5816 {8 * MiB, -8 * MiB},
5817 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5818 // zero-size fills shouldn't trigger the bypass
5819 {4 * MiB, 0, 4 * MiB, -12 * MiB},
5820 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5821 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5822 // try to hit the buffer ceiling only once (at the end)
5823 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5824 // try to hit the same buffer ceiling multiple times
5825 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5826
5827 // try to hit every ceiling, by always landing 1K shy of the buffer size
5828 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5829 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5830
5831 // try to avoid every ceiling, by always landing 1B past the buffer size
5832 // the normal 2x heuristic threshold still forces parse attempts.
5833 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5834 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5835 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5836 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5837 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5838 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5839 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5840 -(10 * MiB + 682 * KiB + 7)},
5841 // try to avoid every ceiling again, except on our last fill.
5842 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
5843 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
5844 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
5845 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
5846 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5847 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5848 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5849 -(10 * MiB + 682 * KiB + 6)},
5850
5851 // try to hit ceilings on the way multiple times
5852 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5853 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5854 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
5855 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
5856 // we'll make a parse attempt at every parse call
5857 -(45 * MiB + 12)},
5858 };
5859 const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5860 for (int test_i = 0; test_i < testcount; test_i++) {
5861 const int *fillsize = testcases[test_i];
5862 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5863 fillsize[2], fillsize[3]);
5866
5871
5872 g_bytesScanned = 0;
5873 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5874 int offset = 0;
5875 while (*fillsize >= 0) {
5876 assert_true(offset + *fillsize <= document_length); // or test is invalid
5877 const enum XML_Status status
5879 if (status != XML_STATUS_OK) {
5881 }
5882 offset += *fillsize;
5883 fillsize++;
5884 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5885 worstcase_bytes += offset; // we might've tried to parse all pending bytes
5886 }
5887 assert_true(storage.count == 1); // the big token should've been parsed
5888 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5890 // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5891 const unsigned max_bytes_scanned = -*fillsize;
5894 "bytes scanned in parse attempts: actual=%u limit=%u \n",
5896 fail("too many bytes scanned in parse attempts");
5897 }
5898 }
5900
5902 }
5903 free(document);
5904}
5906
5907void
5909 TCase *tc_basic = tcase_create("basic tests");
5910
5913
5933 /* Regression test for SF bug #491986. */
5935 /* Regression test for SF bug #514281. */
6026#if XML_CONTEXT_BYTES > 0
6028#endif
6159}
const char apr_size_t len
Definition ap_regex.h:187
void tcase_add_test__if_xml_ge(TCase *tc, tcase_test_function test)
Definition common.c:159
const char * long_cdata_text
Definition common.c:83
void tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test)
Definition common.c:149
void basic_teardown(void)
Definition common.c:169
int g_chunkSize
Definition common.c:144
enum XML_Status _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
Definition common.c:193
XML_Bool g_abortable
Definition common.c:141
const char * long_character_data_text
Definition common.c:59
const char * get_buffer_test_text
Definition common.c:112
XML_Bool g_resumable
Definition common.c:138
static void test_pi_xmm(void)
static void test_invalid_character_entity_3(void)
static void test_unknown_ascii_encoding_ok(void)
static void test_ext_entity_ref_parameter(void)
static void test_reparse_deferral_is_inherited(void)
static void test_unknown_encoding_bad_ignore(void)
static void test_illegal_utf8(void)
static void test_bad_doctype_utf8(void)
static void test_missing_encoding_conversion_fn(void)
static void test_suspend_parser_between_cdata_calls(void)
static void test_ext_entity_trailing_rsqb(void)
static void test_french_charref_hexidecimal(void)
static void test_entity_in_utf16_be_attr(void)
static void test_ext_entity_set_bom(void)
static void test_ignore_section_utf16_be(void)
static void test_ignore_section_utf16(void)
static void test_bad_attr_desc_keyword_utf16(void)
static void test_undefined_ext_entity_in_external_dtd(void)
static void test_utf8_auto_align(void)
static void test_byte_info_at_error(void)
static int XMLCALL external_bom_checker(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
#define UTF8_LEAD_3
static void test_standalone_parameter_entity(void)
static void test_nested_groups(void)
static void test_utf16_bad_surrogate_pair(void)
static void test_reject_lt_in_attribute_value(void)
static void test_really_long_lines(void)
#define COLLIDING_HASH_SALT
static void test_partial_char_in_epilog(void)
static void test_dtd_elements_nesting(void)
static void test_trailing_cr_in_att_value(void)
static void test_danish_latin1(void)
static void test_ext_entity_set_encoding(void)
static void test_unknown_encoding_long_name_1(void)
static void test_invalid_character_entity_4(void)
static void test_wfc_undeclared_entity_standalone(void)
static void test_utf16_attribute(void)
static void test_subordinate_xdecl_abort(void)
static void test_dtd_attr_handling(void)
static void test_empty_ns_without_namespaces(void)
static void test_invalid_character_entity_2(void)
static void test_set_reparse_deferral_on_null_parser(void)
static void test_utf16_pe(void)
static void test_bad_cdata_utf16(void)
static void test_hash_collision(void)
static void test_empty_element_abort(void)
static void test_ext_entity_utf16_be(void)
static void test_xmldecl_misplaced(void)
static void test_byte_info_at_cdata(void)
static void test_trailing_rsqb(void)
static void test_utf16_pi(void)
static void test_attr_whitespace_normalization(void)
static void test_memory_allocation(void)
static void test_bypass_heuristic_when_close_to_bufsize(void)
static void test_dtd_stop_processing(void)
static void test_ext_entity_not_standalone(void)
static void test_failing_encoding_conversion_fn(void)
static void test_good_cdata_utf16(void)
#define CDATA_TEXT
static void * counting_realloc(void *ptr, size_t size)
static void test_utf8_in_start_tags(void)
static void test_abort_epilog(void)
static void test_negative_len_parse(void)
static void test_suspend_resume_internal_entity(void)
static void test_cdata_default(void)
static void test_recursive_external_parameter_entity_2(void)
static void test_ext_entity_invalid_suspended_parse(void)
static void test_bad_doctype_plus(void)
static void test_external_entity_values(void)
static void test_utf16_second_attr(void)
static void test_unknown_encoding_bad_name_2(void)
static void test_utf16_le_comment(void)
static void test_skipped_external_entity(void)
#define UTF8_LEAD_2
static void test_pi_handled_in_default(void)
static void test_unknown_encoding_long_name_2(void)
static void test_reset_in_entity(void)
static void test_set_bad_reparse_option(void)
static void test_dtd_elements(void)
static void test_pi_yml(void)
static void test_french_utf8(void)
static size_t g_totalAlloc
static void test_bom_utf8(void)
static void test_big_tokens_scale_linearly(void)
static void test_dtd_default_handling(void)
static void test_external_bom_consumed(void)
static void element_decl_counter(void *userData, const XML_Char *name, XML_Content *model)
#define PRE_ERROR_STR
static void test_ext_entity_bad_encoding(void)
static void test_utf16_le_epilog_newline(void)
#define END_ELEMENT
static void test_group_choice(void)
static void test_xmldecl_missing_value(void)
static void test_good_cdata_ascii(void)
static void test_nobom_utf16_le(void)
static void test_siphash_spec(void)
static void test_foreign_dtd_not_standalone(void)
static void test_foreign_dtd_without_external_subset(void)
static void test_invalid_character_entity(void)
static void test_attr_after_solidus(void)
static void test_trailing_spaces_in_elements(void)
void make_basic_test_case(Suite *s)
static void test_utf8_false_rejection(void)
static void test_suspend_parser_between_char_data_calls(void)
static void test_bad_ignore_section(void)
static void test_unknown_encoding_invalid_high(void)
static void test_short_doctype_3(void)
static void test_standalone_internal_entity(void)
static void test_u0000_char(void)
Definition basic_tests.c:93
static void test_not_predefined_entities(void)
static void test_ext_entity_invalid_parse(void)
static void XMLCALL element_decl_check_model(void *userData, const XML_Char *name, XML_Content *model)
static void test_recursive_external_parameter_entity(void)
#define UTF8_LEAD_1
static void test_buffer_can_grow_to_max(void)
#define START_ELEMENT
static void test_unknown_encoding_invalid_topbit(void)
static void test_not_standalone_handler_reject(void)
static void test_empty_parse(void)
static void test_pool_integrity_with_unfinished_attr(void)
static void test_reject_unfinished_param_in_att_value(void)
static void test_suspend_resume_internal_entity_issue_629(void)
static void test_bad_doctype_star(void)
static void test_unknown_ascii_encoding_fail(void)
#define UTF8_FOLLOW
static void test_ext_entity_no_handler(void)
static void test_bad_attr_desc_keyword(void)
static void test_wfc_undeclared_entity_with_external_subset(void)
static void test_stop_parser_between_cdata_calls(void)
static void test_long_doctype(void)
static void test_nul_byte(void)
Definition basic_tests.c:81
static void test_ignore_section(void)
static void test_public_notation_no_sysid(void)
#define POST_ERROR_STR
static void test_bad_cdata(void)
static void test_unfinished_epilog(void)
#define UTF8_LEAD_4
static void test_bad_public_doctype(void)
static void test_bad_doctype_utf16(void)
static void test_line_number_after_error(void)
static void test_skipped_parameter_entity(void)
static void test_subordinate_reset(void)
static void test_explicit_encoding(void)
static void test_ext_entity_latin1_utf16le_bom(void)
static void test_xmldecl_missing_attr(void)
static void test_repeated_stop_parser_between_char_data_calls(void)
static void test_line_number_after_parse(void)
static void test_ext_entity_utf16_le(void)
static void test_long_ascii_attribute(void)
static void test_latin1_umlauts(void)
static void test_bad_notation(void)
static void test_invalid_tag_in_dtd(void)
static void test_helper_is_whitespace_normalized(void)
static void test_suspend_xdecl(void)
static void test_entity_public_utf16_be(void)
static void test_subordinate_suspend(void)
static void test_unknown_encoding_internal_entity(void)
static void test_predefined_entity_redefinition(void)
static void test_pi_xnl(void)
static void test_bad_doctype_query(void)
static void test_get_buffer_1(void)
static void test_good_cdata_utf16_le(void)
static void test_utf8_in_cdata_section_2(void)
static void test_short_doctype(void)
static void test_subordinate_xdecl_suspend(void)
static void XMLCALL check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, const XML_Char **atts)
static void test_set_foreign_dtd(void)
static void test_bad_entity_4(void)
static void * counting_malloc(size_t size)
static void test_ext_entity_latin1_utf16be_bom(void)
static void test_multichar_cdata_utf16(void)
static void test_long_cdata_utf16(void)
static void test_get_buffer_2(void)
static void test_ext_entity_good_cdata(void)
static void test_resume_invalid_parse(void)
static void test_column_number_after_parse(void)
static void test_byte_info_at_end(void)
static void test_suspend_epilog(void)
static void test_user_parameters(void)
static void test_attributes(void)
static void test_unknown_encoding_invalid_length(void)
static void test_invalid_unknown_encoding(void)
static int external_inherited_parser(XML_Parser p, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
static void test_negative_len_parse_buffer(void)
static void test_predefined_entities(void)
#define PARAM_ENTITY_NAME
static void test_set_reparse_deferral_on_the_fly(void)
static size_t g_biggestAlloc
static void test_suspend_in_sole_empty_tag(void)
static void test_restart_on_error(void)
static void test_trailing_cr(void)
static void test_really_long_encoded_lines(void)
static void test_varying_buffer_fills(void)
static void test_resume_resuspended(void)
static void test_not_utf16(void)
static void test_default_doctype_handler(void)
static void test_bad_encoding(void)
static void test_utf16_be_pi(void)
static void test_ext_entity_latin1_utf16le_bom2(void)
#define A_TO_P_IN_UTF16
static void test_set_reparse_deferral(void)
static void test_set_base(void)
static void test_ext_entity_latin1_utf16be_bom2(void)
static void test_french_charref_decimal(void)
static void test_attribute_enum_value(void)
static void test_ext_entity_utf8_non_bom(void)
static void test_suspend_resume_parameter_entity(void)
static void test_not_standalone_handler_accept(void)
static void test_param_entity_with_trailing_cr(void)
static void test_french_latin1(void)
static void test_utf16_be_comment(void)
static void test_unknown_encoding_invalid_attr_value(void)
static void test_unknown_encoding_success(void)
static void test_siphash_self(void)
static void test_foreign_dtd_with_doctype(void)
static void test_getbuffer_allocates_on_zero_len(void)
static void test_invalid_foreign_dtd(void)
static void test_abort_epilog_2(void)
static void test_empty_foreign_dtd(void)
static void test_ext_entity_value_abort(void)
static void test_line_and_column_numbers_inside_handlers(void)
static void test_bad_doctype(void)
static void test_column_number_after_error(void)
#define PARAM_ENTITY_CORE_VALUE
static void test_ns_in_attribute_default_without_namespaces(void)
static void test_unknown_encoding_invalid_surrogate(void)
static void test_short_doctype_2(void)
static void test_wfc_undeclared_entity_no_external_subset(void)
static void test_bad_entity(void)
static int is_whitespace_normalized(const XML_Char *s, int is_cdata)
static void test_bad_entity_2(void)
static void test_end_element_events(void)
static void test_ext_entity_bad_encoding_2(void)
static void test_wfc_undeclared_entity_unread_external_subset(void)
static void test_nested_entity_suspend(void)
static void basic_setup(void)
Definition basic_tests.c:71
static void test_wfc_undeclared_entity_with_external_subset_standalone(void)
static void test_unknown_encoding_bad_name(void)
static void test_entity_public_utf16_le(void)
static void test_resume_entity_with_syntax_error(void)
static void test_default_current(void)
static void test_long_utf8_character(void)
static void test_ext_entity_utf16_unknown(void)
static void test_bad_entity_3(void)
static void test_bom_utf16_be(void)
static void test_xmldecl_invalid(void)
static enum XML_Status get_feature(enum XML_FeatureEnum feature_id, long *presult)
static void test_wfc_no_recursive_entity_refs(void)
static void test_skipped_unloaded_ext_entity(void)
static void test_unrecognised_encoding_internal_entity(void)
static void test_utf16(void)
static void test_entity_with_external_subset_unless_standalone(void)
static void test_bom_utf16_le(void)
static void test_utf8_in_cdata_section(void)
static void test_skipped_null_loaded_ext_entity(void)
static void test_comment_handled_in_default(void)
static void test_entity_in_utf16_le_attr(void)
static void test_ext_entity_trailing_cr(void)
static void test_long_latin1_attribute(void)
static void test_stop_parser_between_char_data_calls(void)
void CharData_Init(CharData *storage)
Definition chardata.c:61
int CharData_CheckXMLChars(CharData *storage, const XML_Char *expected)
Definition chardata.c:87
#define XCS(s)
Definition common.h:77
XML_Parser g_parser
Definition runtests.c:62
#define expect_failure(text, errorCode, errorMessage)
Definition common.h:108
#define run_character_check(text, expected)
Definition common.h:117
#define run_attribute_check(text, expected)
Definition common.h:123
#define xcstrcmp(s, t)
Definition common.h:75
#define run_ext_character_check(text, test_data, expected)
Definition common.h:136
#define xml_failure(parser)
Definition common.h:99
void XMLCALL dummy_element_decl_handler(void *userData, const XML_Char *name, XML_Content *model)
Definition dummy.c:121
void XMLCALL dummy_start_element(void *userData, const XML_Char *name, const XML_Char **atts)
Definition dummy.c:162
void XMLCALL dummy_end_element(void *userData, const XML_Char *name)
Definition dummy.c:171
void XMLCALL dummy_comment_handler(void *userData, const XML_Char *data)
Definition dummy.c:147
void XMLCALL dummy_end_doctype_handler(void *userData)
Definition dummy.c:85
void XMLCALL dummy_start_cdata_handler(void *userData)
Definition dummy.c:177
void XMLCALL dummy_attlist_decl_handler(void *userData, const XML_Char *elname, const XML_Char *attname, const XML_Char *att_type, const XML_Char *dflt, int isrequired)
Definition dummy.c:134
void XMLCALL dummy_start_doctype_handler(void *userData, const XML_Char *doctypeName, const XML_Char *sysid, const XML_Char *pubid, int has_internal_subset)
Definition dummy.c:73
void XMLCALL dummy_end_cdata_handler(void *userData)
Definition dummy.c:183
void XMLCALL dummy_cdata_handler(void *userData, const XML_Char *s, int len)
Definition dummy.c:189
void XMLCALL dummy_pi_handler(void *userData, const XML_Char *target, const XML_Char *data)
Definition dummy.c:154
void XMLCALL dummy_entity_decl_handler(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName)
Definition dummy.c:91
void XMLCALL dummy_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity)
Definition dummy.c:255
void XMLCALL dummy_xdecl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone)
Definition dummy.c:64
void XMLCALL dummy_default_handler(void *userData, const XML_Char *s, int len)
Definition dummy.c:230
void XMLCALL dummy_notation_decl_handler(void *userData, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition dummy.c:109
void init_dummy_handlers(void)
Definition dummy.c:54
unsigned long get_dummy_handler_flags(void)
Definition dummy.c:59
#define DUMMY_ELEMENT_DECL_HANDLER_FLAG
Definition dummy.h:54
#define DUMMY_SKIP_HANDLER_FLAG
Definition dummy.h:66
#define DUMMY_NOTATION_DECL_HANDLER_FLAG
Definition dummy.h:53
#define XML_FMT_INT_MOD
#define XML_FMT_STR
int XML_GetCurrentByteCount(XML_Parser parser)
Definition xmlparse.c:2335
const XML_LChar * XML_ErrorString(enum XML_Error code)
Definition xmlparse.c:2429
#define XML_FALSE
Definition expat.h:59
void XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status)
Definition xmlparse.c:2310
XML_Bool XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled)
Definition xmlparse.c:2641
void XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start)
Definition xmlparse.c:1781
@ XML_SUSPENDED
Definition expat.h:845
@ XML_INITIALIZED
Definition expat.h:845
#define XML_STATUS_ERROR
Definition expat.h:76
void XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end)
Definition xmlparse.c:1691
XML_FeatureEnum
Definition expat.h:1019
@ XML_FEATURE_END
Definition expat.h:1020
@ XML_FEATURE_CONTEXT_BYTES
Definition expat.h:1024
XML_Index XML_GetCurrentByteIndex(XML_Parser parser)
Definition xmlparse.c:2325
enum XML_Status XML_SetEncoding(XML_Parser parser, const XML_Char *encoding)
Definition xmlparse.c:1327
void XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler)
Definition xmlparse.c:1700
void XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler)
Definition xmlparse.c:1726
void XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl)
Definition xmlparse.c:1871
void XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler)
Definition xmlparse.c:1831
unsigned char XML_Bool
Definition expat.h:57
@ XML_CQUANT_PLUS
Definition expat.h:151
@ XML_CQUANT_NONE
Definition expat.h:148
@ XML_CQUANT_REP
Definition expat.h:150
void XML_UseParserAsHandlerArg(XML_Parser parser)
Definition xmlparse.c:1603
void XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler)
Definition xmlparse.c:1712
XML_Bool XML_ParserReset(XML_Parser parser, const XML_Char *encoding)
Definition xmlparse.c:1286
XML_Error
Definition expat.h:83
@ XML_ERROR_UNCLOSED_TOKEN
Definition expat.h:89
@ XML_ERROR_ABORTED
Definition expat.h:121
@ XML_ERROR_XML_DECL
Definition expat.h:116
@ XML_ERROR_NO_ELEMENTS
Definition expat.h:87
@ XML_ERROR_NOT_STANDALONE
Definition expat.h:106
@ XML_ERROR_RECURSIVE_ENTITY_REF
Definition expat.h:96
@ XML_ERROR_PARTIAL_CHAR
Definition expat.h:90
@ XML_ERROR_SUSPENDED
Definition expat.h:119
@ XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
Definition expat.h:110
@ XML_ERROR_EXTERNAL_ENTITY_HANDLING
Definition expat.h:105
@ XML_ERROR_INCORRECT_ENCODING
Definition expat.h:103
@ XML_ERROR_MISPLACED_XML_PI
Definition expat.h:101
@ XML_ERROR_NONE
Definition expat.h:84
@ XML_ERROR_TAG_MISMATCH
Definition expat.h:91
@ XML_ERROR_UNCLOSED_CDATA_SECTION
Definition expat.h:104
@ XML_ERROR_BAD_CHAR_REF
Definition expat.h:98
@ XML_ERROR_UNDEFINED_ENTITY
Definition expat.h:95
@ XML_ERROR_PUBLICID
Definition expat.h:118
@ XML_ERROR_INVALID_TOKEN
Definition expat.h:88
@ XML_ERROR_UNKNOWN_ENCODING
Definition expat.h:102
@ XML_ERROR_INVALID_ARGUMENT
Definition expat.h:129
@ XML_ERROR_SYNTAX
Definition expat.h:86
void XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler)
Definition xmlparse.c:1764
const XML_Char * XML_GetBase(XML_Parser parser)
Definition xmlparse.c:1661
void XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler)
Definition xmlparse.c:1883
void XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler)
Definition xmlparse.c:1706
const XML_Feature * XML_GetFeatureList(void)
Definition xmlparse.c:2567
#define XML_STATUS_SUSPENDED
enum XML_Status XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
Definition xmlparse.c:1926
void XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *encodingHandlerData)
Definition xmlparse.c:1862
enum XML_Error XML_GetErrorCode(XML_Parser parser)
Definition xmlparse.c:2318
@ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
Definition expat.h:882
@ XML_PARAM_ENTITY_PARSING_ALWAYS
Definition expat.h:883
@ XML_PARAM_ENTITY_PARSING_NEVER
Definition expat.h:881
void XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end)
Definition xmlparse.c:1772
#define XML_GetUserData(parser)
Definition expat.h:683
XML_Parser XML_ParserCreate(const XML_Char *encoding)
Definition xmlparse.c:766
void XML_ParserFree(XML_Parser parser)
Definition xmlparse.c:1537
void XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler)
Definition xmlparse.c:1756
void XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end)
Definition xmlparse.c:1788
XML_Size XML_GetCurrentLineNumber(XML_Parser parser)
Definition xmlparse.c:2364
void XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start)
Definition xmlparse.c:1742
void XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler)
Definition xmlparse.c:1801
XML_Size XML_GetCurrentColumnNumber(XML_Parser parser)
Definition xmlparse.c:2376
void XML_SetUserData(XML_Parser parser, void *userData)
Definition xmlparse.c:1637
#define XML_STATUS_OK
Definition expat.h:78
void * XML_MemRealloc(XML_Parser parser, void *ptr, size_t size)
Definition xmlparse.c:2401
@ XML_CTYPE_SEQ
Definition expat.h:144
@ XML_CTYPE_CHOICE
Definition expat.h:143
@ XML_CTYPE_NAME
Definition expat.h:142
void XML_FreeContentModel(XML_Parser parser, XML_Content *model)
Definition xmlparse.c:2388
enum XML_Status XML_SetBase(XML_Parser parser, const XML_Char *base)
Definition xmlparse.c:1647
enum XML_Error XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD)
Definition xmlparse.c:1609
void XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler)
Definition xmlparse.c:1855
void XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler)
Definition xmlparse.c:1719
void * XML_MemMalloc(XML_Parser parser, size_t size)
Definition xmlparse.c:2394
enum XML_Status XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
Definition xmlparse.c:2037
void XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler)
Definition xmlparse.c:1838
void XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end)
Definition xmlparse.c:1749
void XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl)
Definition xmlparse.c:1889
int XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt)
Definition xmlparse.c:1912
const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size)
Definition xmlparse.c:2344
void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
Definition xmlparse.c:1845
void * XML_GetBuffer(XML_Parser parser, int len)
Definition xmlparse.c:2108
enum XML_Status XML_ResumeParser(XML_Parser parser)
Definition xmlparse.c:2270
XML_Parser XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, const XML_Char *namespaceSeparator)
Definition xmlparse.c:1058
#define XML_TRUE
Definition expat.h:58
int XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing)
Definition xmlparse.c:1895
XML_Status
Definition expat.h:74
XML_Parser XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, const XML_Char *encoding)
Definition xmlparse.c:1354
void XML_MemFree(XML_Parser parser, void *ptr)
Definition xmlparse.c:2408
void XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl)
Definition xmlparse.c:1877
#define XML_CONTEXT_BYTES
char XML_Char
#define XMLCALL
unsigned long XML_Size
long XML_Index
ap_conf_vector_t * base
apr_md5_ctx_t * context
Definition util_md5.h:58
int enabled
apr_file_t * f
apr_file_t apr_off_t start
const char apr_ssize_t int partial
Definition apr_escape.h:188
apr_redis_t * rc
Definition apr_redis.h:173
apr_text_header const char * text
Definition apr_xml.h:78
apr_xml_parser ** parser
Definition apr_xml.h:228
apr_size_t size
const char * input
Definition apr_cstr.h:93
const char * value
Definition apr_env.h:51
const char * key
apr_seek_where_t apr_off_t * offset
void * data
char * buffer
apr_array_header_t ** result
apr_vformatter_buff_t * c
Definition apr_lib.h:175
const char char ** end
const char * s
Definition apr_strings.h:95
int int status
int XMLCALL external_entity_param_checker(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:788
void XMLCALL entity_suspending_xdecl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone)
Definition handlers.c:589
int XMLCALL external_entity_devaluer(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1100
int XMLCALL external_entity_param(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:839
const void * g_handler_data
Definition handlers.c:63
int XMLCALL external_entity_loader2(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1154
int XMLCALL external_entity_rsqb_catcher(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:737
void XMLCALL cr_cdata_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1613
int XMLCALL MiscEncodingHandler(void *data, const XML_Char *encoding, XML_Encoding *info)
Definition handlers.c:349
void XMLCALL data_check_comment_handler(void *userData, const XML_Char *data)
Definition handlers.c:1800
void XMLCALL accumulate_characters(void *userData, const XML_Char *s, int len)
Definition handlers.c:1910
int XMLCALL external_entity_cr_catcher(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:693
int XMLCALL external_entity_valuer(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:953
int get_param_entity_match_flag(void)
Definition handlers.c:1771
void XMLCALL rsqb_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1624
void XMLCALL element_decl_suspender(void *userData, const XML_Char *name, XML_Content *model)
Definition handlers.c:1837
int XMLCALL external_entity_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:439
void XMLCALL record_element_start_handler(void *userData, const XML_Char *name, const XML_Char **atts)
Definition handlers.c:1705
int XMLCALL external_entity_load_ignore(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:880
int g_xdecl_count
Definition handlers.c:69
int XMLCALL external_entity_not_standalone(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:999
void XMLCALL param_check_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity)
Definition handlers.c:1790
int XMLCALL external_entity_oneshot_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1129
void XMLCALL record_cdata_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1684
int XMLCALL external_entity_value_aborter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1036
void XMLCALL record_element_end_handler(void *userData, const XML_Char *name)
Definition handlers.c:1712
void XMLCALL accumulate_attribute(void *userData, const XML_Char *name, const XML_Char **atts)
Definition handlers.c:1916
int g_comment_count
Definition handlers.c:65
void XMLCALL xml_decl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone)
Definition handlers.c:1778
int XMLCALL external_entity_bad_cr_catcher(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:714
int XMLCALL external_entity_unfinished_attlist(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1209
int XMLCALL external_entity_suspend_xmldecl(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:601
int XMLCALL UnrecognisedEncodingHandler(void *data, const XML_Char *encoding, XML_Encoding *info)
Definition handlers.c:303
int XMLCALL external_entity_public(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1073
void XMLCALL accumulate_entity_decl(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName)
Definition handlers.c:1864
void XMLCALL accumulate_pi_characters(void *userData, const XML_Char *target, const XML_Char *data)
Definition handlers.c:1846
static int entity_match_flag
Definition handlers.c:1731
void XMLCALL accumulate_comment(void *userData, const XML_Char *data)
Definition handlers.c:1857
void XMLCALL selective_aborting_default_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1812
void XMLCALL end_element_event_handler(void *userData, const XML_Char *name)
Definition handlers.c:81
void XMLCALL byte_character_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1632
void XMLCALL end_element_event_handler2(void *userData, const XML_Char *name)
Definition handlers.c:97
int XMLCALL external_entity_faulter2(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:1181
int XMLCALL external_entity_good_cdata_ascii(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:760
void XMLCALL param_entity_match_handler(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName)
Definition handlers.c:1734
int XMLCALL UnknownEncodingHandler(void *data, const XML_Char *encoding, XML_Encoding *info)
Definition handlers.c:282
int XMLCALL external_entity_null_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:494
void XMLCALL clearing_aborting_character_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1572
int XMLCALL accept_not_standalone_handler(void *userData)
Definition handlers.c:1542
void XMLCALL suspending_comment_handler(void *userData, const XML_Char *data)
Definition handlers.c:1830
void XMLCALL record_cdata_nodefault_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1691
int XMLCALL external_entity_load_ignore_utf16(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:901
void XMLCALL ext2_accumulate_characters(void *userData, const XML_Char *s, int len)
Definition handlers.c:1660
int XMLCALL reject_not_standalone_handler(void *userData)
Definition handlers.c:1536
void XMLCALL verify_attlist_decl_handler(void *userData, const XML_Char *element_name, const XML_Char *attr_name, const XML_Char *attr_type, const XML_Char *default_value, int is_required)
Definition handlers.c:1549
int XMLCALL external_entity_suspender(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:565
int XMLCALL external_entity_load_ignore_utf16_be(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:927
int XMLCALL external_entity_suspending_faulter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:638
int g_skip_count
Definition handlers.c:67
void XMLCALL start_element_suspender(void *userData, const XML_Char *name, const XML_Char **atts)
Definition handlers.c:166
void param_entity_match_init(const XML_Char *name, const XML_Char *value)
Definition handlers.c:1764
void XMLCALL counting_start_element_handler(void *userData, const XML_Char *name, const XML_Char **atts)
Definition handlers.c:104
void XMLCALL checking_default_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1938
int XMLCALL external_entity_faulter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:466
void XMLCALL parser_stop_character_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1582
void XMLCALL accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data)
Definition handlers.c:1952
void XMLCALL record_default_handler(void *userData, const XML_Char *s, int len)
Definition handlers.c:1678
void XMLCALL start_element_event_handler(void *userData, const XML_Char *name, const XML_Char **atts)
Definition handlers.c:74
int XMLCALL external_entity_ref_param_checker(XML_Parser parameter, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:813
void XMLCALL suspending_end_handler(void *userData, const XML_Char *s)
Definition handlers.c:160
int XMLCALL external_entity_resetter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)
Definition handlers.c:506
void XMLCALL start_element_event_handler2(void *userData, const XML_Char *name, const XML_Char **attr)
Definition handlers.c:88
void XMLCALL record_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity)
Definition handlers.c:1697
#define assert_record_handler_called(storage, index, expected_name, expected_arg)
Definition handlers.h:513
#define ENTITY_MATCH_NOT_FOUND
Definition handlers.h:524
#define handler_record_get(storage, index)
Definition handlers.h:510
#define STRUCT_START_TAG
Definition handlers.h:73
#define STRUCT_END_TAG
Definition handlers.h:74
#define ENTITY_MATCH_FAIL
Definition handlers.h:523
const XML_Bool g_reparseDeferralEnabledDefault
Definition xmlparse.c:638
void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef)
Definition xmltok.c:328
#define UNUSED_P(p)
Definition internal.h:137
apr_pool_t * p
Definition md_event.c:32
void tcase_add_test(TCase *tc, tcase_test_function test)
Definition minicheck.c:92
void suite_add_tcase(Suite *suite, TCase *tc)
Definition minicheck.c:74
void set_subtest(char const *fmt,...)
Definition minicheck.c:157
void tcase_add_checked_fixture(TCase *tc, tcase_setup_function setup, tcase_teardown_function teardown)
Definition minicheck.c:84
TCase * tcase_create(const char *name)
Definition minicheck.c:65
#define fail(msg)
Definition minicheck.h:87
#define END_TEST
Definition minicheck.h:81
#define assert_true(cond)
Definition minicheck.h:88
#define START_TEST(testname)
Definition minicheck.h:77
static const ap_slotmem_provider_t * storage
return NULL
Definition mod_so.c:359
int i
Definition mod_so.c:347
#define SIP_ULL(high, low)
Definition siphash.h:109
static struct siphash * sip24_update(struct siphash *H, const void *src, size_t len)
Definition siphash.h:206
static uint64_t siphash24(const void *src, size_t len, const struct sipkey *key)
Definition siphash.h:270
static int sip24_valid(void)
Definition siphash.h:287
static struct siphash * sip24_init(struct siphash *H, const struct sipkey *key)
Definition siphash.h:191
static uint64_t sip24_final(struct siphash *H)
Definition siphash.h:230
static struct sipkey * sip_tokey(struct sipkey *key, const void *src)
Definition siphash.h:147
char * name
long int value
Definition expat.h:1042
enum XML_FeatureEnum feature
Definition expat.h:1040
XML_Content * children
Definition expat.h:179
unsigned int numchildren
Definition expat.h:178
XML_Char * name
Definition expat.h:177
enum XML_Content_Quant quant
Definition expat.h:176
enum XML_Content_Type type
Definition expat.h:175
XML_Bool nested_callback_happened
const char * external
void StructData_Dispose(StructData *storage)
Definition structdata.c:148
void StructData_CheckItems(StructData *storage, const StructDataEntry *expected, int count)
Definition structdata.c:108
void StructData_Init(StructData *storage)
Definition structdata.c:72
AttrInfo * attributes
Definition handlers.h:92
Definition handlers.h:477
static char * testdata
Definition testud.c:27
typedef int(WSAAPI *apr_winapi_fpt_WSAPoll)(IN OUT LPWSAPOLLFD fdArray
INT info