undead.xml source code

1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will be removed from Phobos in 2.101.0.
6       If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD))
7  */
8 
9 /*
10 Classes and functions for creating and parsing XML
11 
12 The basic architecture of this module is that there are standalone functions,
13 classes for constructing an XML document from scratch (Tag, Element and
14 Document), and also classes for parsing a pre-existing XML file (ElementParser
15 and DocumentParser). The parsing classes <i>may</i> be used to build a
16 Document, but that is not their primary purpose. The handling capabilities of
17 DocumentParser and ElementParser are sufficiently customizable that you can
18 make them do pretty much whatever you want.
19 
20 Example: This example creates a DOM (Document Object Model) tree
21     from an XML file.
22 ------------------------------------------------------------------------------
23 import undead.xml;
24 import std.stdio;
25 import std.string;
26 import std.file;
27 
28 // books.xml is used in various samples throughout the Microsoft XML Core
29 // Services (MSXML) SDK.
30 //
31 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
32 
33 void main()
34 {
35     string s = cast(string) std.file.read("books.xml");
36 
37     // Check for well-formedness
38     check(s);
39 
40     // Make a DOM tree
41     auto doc = new Document(s);
42 
43     // Plain-print it
44     writeln(doc);
45 }
46 ------------------------------------------------------------------------------
47 
48 Example: This example does much the same thing, except that the file is
49     deconstructed and reconstructed by hand. This is more work, but the
50     techniques involved offer vastly more power.
51 ------------------------------------------------------------------------------
52 import undead.xml;
53 import std.stdio;
54 import std.string;
55 
56 struct Book
57 {
58     string id;
59     string author;
60     string title;
61     string genre;
62     string price;
63     string pubDate;
64     string description;
65 }
66 
67 void main()
68 {
69     string s = cast(string) std.file.read("books.xml");
70 
71     // Check for well-formedness
72     check(s);
73 
74     // Take it apart
75     Book[] books;
76 
77     auto xml = new DocumentParser(s);
78     xml.onStartTag["book"] = (ElementParser xml)
79     {
80         Book book;
81         book.id = xml.tag.attr["id"];
82 
83         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
84         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
85         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
86         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
87         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
88         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
89 
90         xml.parse();
91 
92         books ~= book;
93     };
94     xml.parse();
95 
96     // Put it back together again;
97     auto doc = new Document(new Tag("catalog"));
98     foreach (book;books)
99     {
100         auto element = new Element("book");
101         element.tag.attr["id"] = book.id;
102 
103         element ~= new Element("author",      book.author);
104         element ~= new Element("title",       book.title);
105         element ~= new Element("genre",       book.genre);
106         element ~= new Element("price",       book.price);
107         element ~= new Element("publish-date",book.pubDate);
108         element ~= new Element("description", book.description);
109 
110         doc ~= element;
111     }
112 
113     // Pretty-print it
114     writefln(join(doc.pretty(3),"\n"));
115 }
116 -------------------------------------------------------------------------------
117 Copyright: Copyright Janice Caron 2008 - 2009.
118 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
119 Authors:   Janice Caron
120 Source:    $(PHOBOSSRC undead.xml.d)
121 */
122 /*
123          Copyright Janice Caron 2008 - 2009.
124 Distributed under the Boost Software License, Version 1.0.
125    (See accompanying file LICENSE_1_0.txt or copy at
126          http://www.boost.org/LICENSE_1_0.txt)
127 */
128 module undead.xml;
129 
130 enum cdata = "<![CDATA[";
131 
132 /*
133  * Returns true if the character is a character according to the XML standard
134  *
135  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
136  *
137  * Params:
138  *    c = the character to be tested
139  */
140 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
141 {
142     if (c <= 0xD7FF)
143     {
144         if (c >= 0x20)
145             return true;
146         switch (c)
147         {
148         case 0xA:
149         case 0x9:
150         case 0xD:
151             return true;
152         default:
153             return false;
154         }
155     }
156     else if (0xE000 <= c && c <= 0x10FFFF)
157     {
158         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
159             return true;
160     }
161     return false;
162 }
163 
164 @safe @nogc nothrow pure unittest
165 {
166     assert(!isChar(cast(dchar) 0x8));
167     assert( isChar(cast(dchar) 0x9));
168     assert( isChar(cast(dchar) 0xA));
169     assert(!isChar(cast(dchar) 0xB));
170     assert(!isChar(cast(dchar) 0xC));
171     assert( isChar(cast(dchar) 0xD));
172     assert(!isChar(cast(dchar) 0xE));
173     assert(!isChar(cast(dchar) 0x1F));
174     assert( isChar(cast(dchar) 0x20));
175     assert( isChar('J'));
176     assert( isChar(cast(dchar) 0xD7FF));
177     assert(!isChar(cast(dchar) 0xD800));
178     assert(!isChar(cast(dchar) 0xDFFF));
179     assert( isChar(cast(dchar) 0xE000));
180     assert( isChar(cast(dchar) 0xFFFD));
181     assert(!isChar(cast(dchar) 0xFFFE));
182     assert(!isChar(cast(dchar) 0xFFFF));
183     assert( isChar(cast(dchar) 0x10000));
184     assert( isChar(cast(dchar) 0x10FFFF));
185     assert(!isChar(cast(dchar) 0x110000));
186 
187     debug (stdxml_TestHardcodedChecks)
188     {
189         foreach (c; 0 .. dchar.max + 1)
190             assert(isChar(c) == lookup(CharTable, c));
191     }
192 }
193 
194 /*
195  * Returns true if the character is whitespace according to the XML standard
196  *
197  * Only the following characters are considered whitespace in XML - space, tab,
198  * carriage return and linefeed
199  *
200  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
201  *
202  * Params:
203  *    c = the character to be tested
204  */
205 bool isSpace(dchar c) @safe @nogc pure nothrow
206 {
207     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
208 }
209 
210 /*
211  * Returns true if the character is a digit according to the XML standard
212  *
213  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
214  *
215  * Params:
216  *    c = the character to be tested
217  */
218 bool isDigit(dchar c) @safe @nogc pure nothrow
219 {
220     if (c <= 0x0039 && c >= 0x0030)
221         return true;
222     else
223         return lookup(DigitTable,c);
224 }
225 
226 @safe @nogc nothrow pure unittest
227 {
228     debug (stdxml_TestHardcodedChecks)
229     {
230         foreach (c; 0 .. dchar.max + 1)
231             assert(isDigit(c) == lookup(DigitTable, c));
232     }
233 }
234 
235 /*
236  * Returns true if the character is a letter according to the XML standard
237  *
238  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
239  *
240  * Params:
241  *    c = the character to be tested
242  */
243 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
244 {
245     return isIdeographic(c) || isBaseChar(c);
246 }
247 
248 /*
249  * Returns true if the character is an ideographic character according to the
250  * XML standard
251  *
252  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
253  *
254  * Params:
255  *    c = the character to be tested
256  */
257 bool isIdeographic(dchar c) @safe @nogc nothrow pure
258 {
259     if (c == 0x3007)
260         return true;
261     if (c <= 0x3029 && c >= 0x3021 )
262         return true;
263     if (c <= 0x9FA5 && c >= 0x4E00)
264         return true;
265     return false;
266 }
267 
268 @safe @nogc nothrow pure unittest
269 {
270     assert(isIdeographic('\u4E00'));
271     assert(isIdeographic('\u9FA5'));
272     assert(isIdeographic('\u3007'));
273     assert(isIdeographic('\u3021'));
274     assert(isIdeographic('\u3029'));
275 
276     debug (stdxml_TestHardcodedChecks)
277     {
278         foreach (c; 0 .. dchar.max + 1)
279             assert(isIdeographic(c) == lookup(IdeographicTable, c));
280     }
281 }
282 
283 /*
284  * Returns true if the character is a base character according to the XML
285  * standard
286  *
287  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
288  *
289  * Params:
290  *    c = the character to be tested
291  */
292 bool isBaseChar(dchar c) @safe @nogc nothrow pure
293 {
294     return lookup(BaseCharTable,c);
295 }
296 
297 /*
298  * Returns true if the character is a combining character according to the
299  * XML standard
300  *
301  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
302  *
303  * Params:
304  *    c = the character to be tested
305  */
306 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
307 {
308     return lookup(CombiningCharTable,c);
309 }
310 
311 /*
312  * Returns true if the character is an extender according to the XML standard
313  *
314  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
315  *
316  * Params:
317  *    c = the character to be tested
318  */
319 bool isExtender(dchar c) @safe @nogc nothrow pure
320 {
321     return lookup(ExtenderTable,c);
322 }
323 
324 /*
325  * Encodes a string by replacing all characters which need to be escaped with
326  * appropriate predefined XML entities.
327  *
328  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
329  * and greater-than), and similarly, decode() unescapes them. These functions
330  * are provided for convenience only. You do not need to use them when using
331  * the undead.xml classes, because then all the encoding and decoding will be done
332  * for you automatically.
333  *
334  * If the string is not modified, the original will be returned.
335  *
336  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
337  *
338  * Params:
339  *      s = The string to be encoded
340  *
341  * Returns: The encoded string
342  *
343  * Example:
344  * --------------
345  * writefln(encode("a > b")); // writes "a &gt; b"
346  * --------------
347  */
348 S encode(S)(S s)
349 {
350     import std.array : appender;
351 
352     string r;
353     size_t lastI;
354     auto result = appender!S();
355 
356     foreach (i, c; s)
357     {
358         switch (c)
359         {
360         case '&':  r = "&amp;"; break;
361         case '"':  r = "&quot;"; break;
362         case '\'': r = "&apos;"; break;
363         case '<':  r = "&lt;"; break;
364         case '>':  r = "&gt;"; break;
365         default: continue;
366         }
367         // Replace with r
368         result.put(s[lastI .. i]);
369         result.put(r);
370         lastI = i + 1;
371     }
372 
373     if (!result.data.ptr) return s;
374     result.put(s[lastI .. $]);
375     return result.data;
376 }
377 
378 @safe pure unittest
379 {
380     auto s = "hello";
381     assert(encode(s) is s);
382     assert(encode("a > b") == "a &gt; b", encode("a > b"));
383     assert(encode("a < b") == "a &lt; b");
384     assert(encode("don't") == "don&apos;t");
385     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
386     assert(encode("cat & dog") == "cat &amp; dog");
387 }
388 
389 /*
390  * Mode to use for decoding.
391  *
392  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
393  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
394  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
395  */
396 enum DecodeMode
397 {
398     NONE, LOOSE, STRICT
399 }
400 
401 /*
402  * Decodes a string by unescaping all predefined XML entities.
403  *
404  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
405  * and greater-than), and similarly, decode() unescapes them. These functions
406  * are provided for convenience only. You do not need to use them when using
407  * the undead.xml classes, because then all the encoding and decoding will be done
408  * for you automatically.
409  *
410  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
411  * &amp;lt; and &amp;gt,
412  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
413  *
414  * If the string does not contain an ampersand, the original will be returned.
415  *
416  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
417  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
418  * (decode, and throw a DecodeException in the event of an error).
419  *
420  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
421  *
422  * Params:
423  *      s = The string to be decoded
424  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
425  *
426  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
427  *
428  * Returns: The decoded string
429  *
430  * Example:
431  * --------------
432  * writefln(decode("a &gt; b")); // writes "a > b"
433  * --------------
434  */
435 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
436 {
437     import std.algorithm.searching : startsWith;
438 
439     if (mode == DecodeMode.NONE) return s;
440 
441     string buffer;
442     foreach (ref i; 0 .. s.length)
443     {
444         char c = s[i];
445         if (c != '&')
446         {
447             if (buffer.length != 0) buffer ~= c;
448         }
449         else
450         {
451             if (buffer.length == 0)
452             {
453                 buffer = s[0 .. i].dup;
454             }
455             if (startsWith(s[i..$],"&#"))
456             {
457                 try
458                 {
459                     dchar d;
460                     string t = s[i..$];
461                     checkCharRef(t, d);
462                     char[4] temp;
463                     import std.utf : encode;
464                     buffer ~= temp[0 .. encode(temp, d)];
465                     i = s.length - t.length - 1;
466                 }
467                 catch (Err e)
468                 {
469                     if (mode == DecodeMode.STRICT)
470                         throw new DecodeException("Unescaped &");
471                     buffer ~= '&';
472                 }
473             }
474             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
475             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
476             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
477             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
478             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
479             else
480             {
481                 if (mode == DecodeMode.STRICT)
482                     throw new DecodeException("Unescaped &");
483                 buffer ~= '&';
484             }
485         }
486     }
487     return (buffer.length == 0) ? s : buffer;
488 }
489 
490 @safe pure unittest
491 {
492     void assertNot(string s) pure
493     {
494         bool b = false;
495         try { decode(s,DecodeMode.STRICT); }
496         catch (DecodeException e) { b = true; }
497         assert(b,s);
498     }
499 
500     // Assert that things that should work, do
501     auto s = "hello";
502     assert(decode(s,                DecodeMode.STRICT) is s);
503     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
504     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
505     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
506     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
507     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
508     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
509     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
510     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
511     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
512     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
513     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
514     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
515     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
516 
517     // Assert that things that shouldn't work, don't
518     assertNot("cat & dog");
519     assertNot("a &gt b");
520     assertNot("&#;");
521     assertNot("&#x;");
522     assertNot("&#2G;");
523     assertNot("&#x2G;");
524 }
525 
526 /*
527  * Class representing an XML document.
528  *
529  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
530  *
531  */
532 class Document : Element
533 {
534     /*
535      * Contains all text which occurs before the root element.
536      * Defaults to &lt;?xml version="1.0"?&gt;
537      */
538     string prolog = "<?xml version=\"1.0\"?>";
539     /*
540      * Contains all text which occurs after the root element.
541      * Defaults to the empty string
542      */
543     string epilog;
544 
545     /*
546      * Constructs a Document by parsing XML text.
547      *
548      * This function creates a complete DOM (Document Object Model) tree.
549      *
550      * The input to this function MUST be valid XML.
551      * This is enforced by DocumentParser's in contract.
552      *
553      * Params:
554      *      s = the complete XML text.
555      */
556     this(string s)
557     in
558     {
559         assert(s.length != 0);
560     }
561     do
562     {
563         auto xml = new DocumentParser(s);
564         string tagString = xml.tag.tagString;
565 
566         this(xml.tag);
567         prolog = s[0 .. tagString.ptr - s.ptr];
568         parse(xml);
569         epilog = *xml.s;
570     }
571 
572     /*
573      * Constructs a Document from a Tag.
574      *
575      * Params:
576      *      tag = the start tag of the document.
577      */
578     this(const(Tag) tag)
579     {
580         super(tag);
581     }
582 
583     const
584     {
585         /*
586          * Compares two Documents for equality
587          *
588          * Example:
589          * --------------
590          * Document d1,d2;
591          * if (d1 == d2) { }
592          * --------------
593          */
594         override bool opEquals(scope const Object o) const
595         {
596             const doc = toType!(const Document)(o);
597             return prolog == doc.prolog
598                 && (cast(const) this).Element.opEquals(cast(const) doc)
599                 && epilog == doc.epilog;
600         }
601 
602         /*
603          * Compares two Documents
604          *
605          * You should rarely need to call this function. It exists so that
606          * Documents can be used as associative array keys.
607          *
608          * Example:
609          * --------------
610          * Document d1,d2;
611          * if (d1 < d2) { }
612          * --------------
613          */
614         override int opCmp(scope const Object o) scope const
615         {
616             const doc = toType!(const Document)(o);
617             if (prolog != doc.prolog)
618                 return prolog < doc.prolog ? -1 : 1;
619             if (int cmp = this.Element.opCmp(doc))
620                 return cmp;
621             if (epilog != doc.epilog)
622                 return epilog < doc.epilog ? -1 : 1;
623             return 0;
624         }
625 
626         /*
627          * Returns the hash of a Document
628          *
629          * You should rarely need to call this function. It exists so that
630          * Documents can be used as associative array keys.
631          */
632         override size_t toHash() scope const @trusted
633         {
634             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
635         }
636 
637         /*
638          * Returns the string representation of a Document. (That is, the
639          * complete XML of a document).
640          */
641         override string toString() scope const @safe
642         {
643             return prolog ~ super.toString() ~ epilog;
644         }
645     }
646 }
647 
648 @system unittest
649 {
650     // https://issues.dlang.org/show_bug.cgi?id=14966
651     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
652 
653     auto a = new Document(xml);
654     auto b = new Document(xml);
655     assert(a == b);
656     assert(!(a < b));
657     int[Document] aa;
658     aa[a] = 1;
659     assert(aa[b] == 1);
660 
661     b ~= new Element("b");
662     assert(a < b);
663     assert(b > a);
664 }
665 
666 /*
667  * Class representing an XML element.
668  *
669  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
670  */
671 class Element : Item
672 {
673     Tag tag; // The start tag of the element
674     Item[] items; // The element's items
675     Text[] texts; // The element's text items
676     CData[] cdatas; // The element's CData items
677     Comment[] comments; // The element's comments
678     ProcessingInstruction[] pis; // The element's processing instructions
679     Element[] elements; // The element's child elements
680 
681     /*
682      * Constructs an Element given a name and a string to be used as a Text
683      * interior.
684      *
685      * Params:
686      *      name = the name of the element.
687      *      interior = (optional) the string interior.
688      *
689      * Example:
690      * -------------------------------------------------------
691      * auto element = new Element("title","Serenity")
692      *     // constructs the element <title>Serenity</title>
693      * -------------------------------------------------------
694      */
695     this(string name, string interior=null) @safe pure
696     {
697         this(new Tag(name));
698         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
699     }
700 
701     /*
702      * Constructs an Element from a Tag.
703      *
704      * Params:
705      *      tag_ = the start or empty tag of the element.
706      */
707     this(const(Tag) tag_) @safe pure
708     {
709         this.tag = new Tag(tag_.name);
710         tag.type = TagType.EMPTY;
711         foreach (k,v;tag_.attr) tag.attr[k] = v;
712         tag.tagString = tag_.tagString;
713     }
714 
715     /*
716      * Append a text item to the interior of this element
717      *
718      * Params:
719      *      item = the item you wish to append.
720      *
721      * Example:
722      * --------------
723      * Element element;
724      * element ~= new Text("hello");
725      * --------------
726      */
727     void opOpAssign(string op)(Text item) @safe pure
728         if (op == "~")
729     {
730         texts ~= item;
731         appendItem(item);
732     }
733 
734     /*
735      * Append a CData item to the interior of this element
736      *
737      * Params:
738      *      item = the item you wish to append.
739      *
740      * Example:
741      * --------------
742      * Element element;
743      * element ~= new CData("hello");
744      * --------------
745      */
746     void opOpAssign(string op)(CData item) @safe pure
747         if (op == "~")
748     {
749         cdatas ~= item;
750         appendItem(item);
751     }
752 
753     /*
754      * Append a comment to the interior of this element
755      *
756      * Params:
757      *      item = the item you wish to append.
758      *
759      * Example:
760      * --------------
761      * Element element;
762      * element ~= new Comment("hello");
763      * --------------
764      */
765     void opOpAssign(string op)(Comment item) @safe pure
766         if (op == "~")
767     {
768         comments ~= item;
769         appendItem(item);
770     }
771 
772     /*
773      * Append a processing instruction to the interior of this element
774      *
775      * Params:
776      *      item = the item you wish to append.
777      *
778      * Example:
779      * --------------
780      * Element element;
781      * element ~= new ProcessingInstruction("hello");
782      * --------------
783      */
784     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
785         if (op == "~")
786     {
787         pis ~= item;
788         appendItem(item);
789     }
790 
791     /*
792      * Append a complete element to the interior of this element
793      *
794      * Params:
795      *      item = the item you wish to append.
796      *
797      * Example:
798      * --------------
799      * Element element;
800      * Element other = new Element("br");
801      * element ~= other;
802      *    // appends element representing <br />
803      * --------------
804      */
805     void opOpAssign(string op)(Element item) @safe pure
806         if (op == "~")
807     {
808         elements ~= item;
809         appendItem(item);
810     }
811 
812     private void appendItem(Item item) @safe pure
813     {
814         items ~= item;
815         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
816             tag.type = TagType.START;
817     }
818 
819     private void parse(ElementParser xml)
820     {
821         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
822         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
823         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
824         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
825 
826         xml.onStartTag[null] = (ElementParser xml)
827         {
828             auto e = new Element(xml.tag);
829             e.parse(xml);
830             opOpAssign!("~")(e);
831         };
832 
833         xml.parse();
834     }
835 
836     /*
837      * Compares two Elements for equality
838      *
839      * Example:
840      * --------------
841      * Element e1,e2;
842      * if (e1 == e2) { }
843      * --------------
844      */
845     override bool opEquals(scope const Object o) const
846     {
847         const element = toType!(const Element)(o);
848         immutable len = items.length;
849         if (len != element.items.length) return false;
850         foreach (i; 0 .. len)
851         {
852             if (!items[i].opEquals(element.items[i])) return false;
853         }
854         return true;
855     }
856 
857     /*
858      * Compares two Elements
859      *
860      * You should rarely need to call this function. It exists so that Elements
861      * can be used as associative array keys.
862      *
863      * Example:
864      * --------------
865      * Element e1,e2;
866      * if (e1 < e2) { }
867      * --------------
868      */
869     override int opCmp(scope const Object o) @safe const
870     {
871         const element = toType!(const Element)(o);
872         for (uint i=0; ; ++i)
873         {
874             if (i == items.length && i == element.items.length) return 0;
875             if (i == items.length) return -1;
876             if (i == element.items.length) return 1;
877             if (!items[i].opEquals(element.items[i]))
878                 return items[i].opCmp(element.items[i]);
879         }
880     }
881 
882     /*
883      * Returns the hash of an Element
884      *
885      * You should rarely need to call this function. It exists so that Elements
886      * can be used as associative array keys.
887      */
888     override size_t toHash() scope const @safe
889     {
890         size_t hash = tag.toHash();
891         foreach (item;items) hash += item.toHash();
892         return hash;
893     }
894 
895     const
896     {
897         /*
898          * Returns the decoded interior of an element.
899          *
900          * The element is assumed to contain text <i>only</i>. So, for
901          * example, given XML such as "&lt;title&gt;Good &amp;amp;
902          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
903          *
904          * Params:
905          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
906          *
907          * Throws: DecodeException if decode fails
908          */
909         string text(DecodeMode mode=DecodeMode.LOOSE)
910         {
911             string buffer;
912             foreach (item;items)
913             {
914                 Text t = cast(Text) item;
915                 if (t is null) throw new DecodeException(item.toString());
916                 buffer ~= decode(t.toString(),mode);
917             }
918             return buffer;
919         }
920 
921         /*
922          * Returns an indented string representation of this item
923          *
924          * Params:
925          *      indent = (optional) number of spaces by which to indent this
926          *          element. Defaults to 2.
927          */
928         override string[] pretty(uint indent=2) scope
929         {
930             import std.algorithm.searching : count;
931             import std.string : rightJustify;
932 
933             if (isEmptyXML) return [ tag.toEmptyString() ];
934 
935             if (items.length == 1)
936             {
937                 auto t = cast(const(Text))(items[0]);
938                 if (t !is null)
939                 {
940                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
941                 }
942             }
943 
944             string[] a = [ tag.toStartString() ];
945             foreach (item;items)
946             {
947                 string[] b = item.pretty(indent);
948                 foreach (s;b)
949                 {
950                     a ~= rightJustify(s,count(s) + indent);
951                 }
952             }
953             a ~= tag.toEndString();
954             return a;
955         }
956 
957         /*
958          * Returns the string representation of an Element
959          *
960          * Example:
961          * --------------
962          * auto element = new Element("br");
963          * writefln(element.toString()); // writes "<br />"
964          * --------------
965          */
966         override string toString() scope @safe
967         {
968             if (isEmptyXML) return tag.toEmptyString();
969 
970             string buffer = tag.toStartString();
971             foreach (item;items) { buffer ~= item.toString(); }
972             buffer ~= tag.toEndString();
973             return buffer;
974         }
975 
976         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
977     }
978 }
979 
980 /*
981  * Tag types.
982  *
983  * $(DDOC_ENUM_MEMBERS START) Used for start tags
984  * $(DDOC_ENUM_MEMBERS END) Used for end tags
985  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
986  *
987  */
988 enum TagType { START, END, EMPTY }
989 
990 /*
991  * Class representing an XML tag.
992  *
993  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
994  *
995  * The class invariant guarantees
996  * <ul>
997  * <li> that $(B type) is a valid enum TagType value</li>
998  * <li> that $(B name) consists of valid characters</li>
999  * <li> that each attribute name consists of valid characters</li>
1000  * </ul>
1001  */
1002 class Tag
1003 {
1004     TagType type = TagType.START;   // Type of tag
1005     string name;                    // Tag name
1006     string[string] attr;            // Associative array of attributes
1007     private string tagString;
1008 
1009     invariant()
1010     {
1011         string s;
1012         string t;
1013 
1014         assert(type == TagType.START
1015             || type == TagType.END
1016             || type == TagType.EMPTY);
1017 
1018         s = name;
1019         try { checkName(s,t); }
1020         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1021 
1022         foreach (k,v;attr)
1023         {
1024             s = k;
1025             try { checkName(s,t); }
1026             catch (Err e)
1027                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1028         }
1029     }
1030 
1031     /*
1032      * Constructs an instance of Tag with a specified name and type
1033      *
1034      * The constructor does not initialize the attributes. To initialize the
1035      * attributes, you access the $(B attr) member variable.
1036      *
1037      * Params:
1038      *      name = the Tag's name
1039      *      type = (optional) the Tag's type. If omitted, defaults to
1040      *          TagType.START.
1041      *
1042      * Example:
1043      * --------------
1044      * auto tag = new Tag("img",Tag.EMPTY);
1045      * tag.attr["src"] = "http://example.com/example.jpg";
1046      * --------------
1047      */
1048     this(string name, TagType type=TagType.START) @safe pure
1049     {
1050         this.name = name;
1051         this.type = type;
1052     }
1053 
1054     /* Private constructor (so don't ddoc this!)
1055      *
1056      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1057      *
1058      * The string is passed by reference, and is advanced over all characters
1059      * consumed.
1060      *
1061      * The second parameter is a dummy parameter only, required solely to
1062      * distinguish this constructor from the public one.
1063      */
1064     private this(ref string s, bool dummy) @safe pure
1065     {
1066         import std.algorithm.searching : countUntil;
1067         import std.ascii : isWhite;
1068         import std.utf : byCodeUnit;
1069 
1070         tagString = s;
1071         try
1072         {
1073             reqc(s,'<');
1074             if (optc(s,'/')) type = TagType.END;
1075             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1076             name = s[0 .. i];
1077             s = s[i .. $];
1078 
1079             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1080             s = s[i .. $];
1081 
1082             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1083             {
1084                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1085                 string key = s[0 .. i];
1086                 s = s[i .. $];
1087 
1088                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1089                 s = s[i .. $];
1090                 reqc(s,'=');
1091                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1092                 s = s[i .. $];
1093 
1094                 immutable char quote = requireOneOf(s,"'\"");
1095                 i = s.byCodeUnit.countUntil(quote);
1096                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1097                 s = s[i .. $];
1098                 reqc(s,quote);
1099 
1100                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1101                 s = s[i .. $];
1102                 attr[key] = val;
1103             }
1104             if (optc(s,'/'))
1105             {
1106                 if (type == TagType.END) throw new TagException("");
1107                 type = TagType.EMPTY;
1108             }
1109             reqc(s,'>');
1110             tagString.length = tagString.length - s.length;
1111         }
1112         catch (XMLException e)
1113         {
1114             tagString.length = tagString.length - s.length;
1115             throw new TagException(tagString);
1116         }
1117     }
1118 
1119     const
1120     {
1121         /*
1122          * Compares two Tags for equality
1123          *
1124          * You should rarely need to call this function. It exists so that Tags
1125          * can be used as associative array keys.
1126          *
1127          * Example:
1128          * --------------
1129          * Tag tag1,tag2
1130          * if (tag1 == tag2) { }
1131          * --------------
1132          */
1133         override bool opEquals(scope Object o)
1134         {
1135             const tag = toType!(const Tag)(o);
1136             return
1137                 (name != tag.name) ? false : (
1138                 (attr != tag.attr) ? false : (
1139                 (type != tag.type) ? false : (
1140             true )));
1141         }
1142 
1143         /*
1144          * Compares two Tags
1145          *
1146          * Example:
1147          * --------------
1148          * Tag tag1,tag2
1149          * if (tag1 < tag2) { }
1150          * --------------
1151          */
1152         override int opCmp(Object o)
1153         {
1154             const tag = toType!(const Tag)(o);
1155             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1156             return
1157                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1158                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1159                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1160             0 )));
1161         }
1162 
1163         /*
1164          * Returns the hash of a Tag
1165          *
1166          * You should rarely need to call this function. It exists so that Tags
1167          * can be used as associative array keys.
1168          */
1169         override size_t toHash()
1170         {
1171             return .hashOf(name);
1172         }
1173 
1174         /*
1175          * Returns the string representation of a Tag
1176          *
1177          * Example:
1178          * --------------
1179          * auto tag = new Tag("book",TagType.START);
1180          * writefln(tag.toString()); // writes "<book>"
1181          * --------------
1182          */
1183         override string toString() @safe
1184         {
1185             if (isEmpty) return toEmptyString();
1186             return (isEnd) ? toEndString() : toStartString();
1187         }
1188 
1189         private
1190         {
1191             string toNonEndString() @safe
1192             {
1193                 import std.format : format;
1194 
1195                 string s = "<" ~ name;
1196                 foreach (key,val;attr)
1197                     s ~= format(" %s=\"%s\"",key,encode(val));
1198                 return s;
1199             }
1200 
1201             string toStartString() @safe { return toNonEndString() ~ ">"; }
1202 
1203             string toEndString() @safe { return "</" ~ name ~ ">"; }
1204 
1205             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1206         }
1207 
1208         /*
1209          * Returns true if the Tag is a start tag
1210          *
1211          * Example:
1212          * --------------
1213          * if (tag.isStart) { }
1214          * --------------
1215          */
1216         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1217 
1218         /*
1219          * Returns true if the Tag is an end tag
1220          *
1221          * Example:
1222          * --------------
1223          * if (tag.isEnd) { }
1224          * --------------
1225          */
1226         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1227 
1228         /*
1229          * Returns true if the Tag is an empty tag
1230          *
1231          * Example:
1232          * --------------
1233          * if (tag.isEmpty) { }
1234          * --------------
1235          */
1236         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1237     }
1238 }
1239 
1240 /*
1241  * Class representing a comment
1242  */
1243 class Comment : Item
1244 {
1245     private string content;
1246 
1247     /*
1248      * Construct a comment
1249      *
1250      * Params:
1251      *      content = the body of the comment
1252      *
1253      * Throws: CommentException if the comment body is illegal (contains "--"
1254      * or exactly equals "-")
1255      *
1256      * Example:
1257      * --------------
1258      * auto item = new Comment("This is a comment");
1259      *    // constructs <!--This is a comment-->
1260      * --------------
1261      */
1262     this(string content) @safe pure
1263     {
1264         import std.string : indexOf;
1265 
1266         if (content == "-" || content.indexOf("--") != -1)
1267             throw new CommentException(content);
1268         this.content = content;
1269     }
1270 
1271     /*
1272      * Compares two comments for equality
1273      *
1274      * Example:
1275      * --------------
1276      * Comment item1,item2;
1277      * if (item1 == item2) { }
1278      * --------------
1279      */
1280     override bool opEquals(scope const Object o) const
1281     {
1282         const item = toType!(const Item)(o);
1283         const t = cast(const Comment) item;
1284         return t !is null && content == t.content;
1285     }
1286 
1287     /*
1288      * Compares two comments
1289      *
1290      * You should rarely need to call this function. It exists so that Comments
1291      * can be used as associative array keys.
1292      *
1293      * Example:
1294      * --------------
1295      * Comment item1,item2;
1296      * if (item1 < item2) { }
1297      * --------------
1298      */
1299     override int opCmp(scope const Object o) scope const
1300     {
1301         const item = toType!(const Item)(o);
1302         const t = cast(const Comment) item;
1303         return t !is null && (content != t.content
1304             ? (content < t.content ? -1 : 1 ) : 0 );
1305     }
1306 
1307     /*
1308      * Returns the hash of a Comment
1309      *
1310      * You should rarely need to call this function. It exists so that Comments
1311      * can be used as associative array keys.
1312      */
1313     override size_t toHash() scope const nothrow { return hash(content); }
1314 
1315     /*
1316      * Returns a string representation of this comment
1317      */
1318     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1319 
1320     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1321 }
1322 
1323 // https://issues.dlang.org/show_bug.cgi?id=16241
1324 @safe unittest
1325 {
1326     import std.exception : assertThrown;
1327     auto c = new Comment("==");
1328     assert(c.content == "==");
1329     assertThrown!CommentException(new Comment("--"));
1330 }
1331 
1332 /*
1333  * Class representing a Character Data section
1334  */
1335 class CData : Item
1336 {
1337     private string content;
1338 
1339     /*
1340      * Construct a character data section
1341      *
1342      * Params:
1343      *      content = the body of the character data segment
1344      *
1345      * Throws: CDataException if the segment body is illegal (contains "]]>")
1346      *
1347      * Example:
1348      * --------------
1349      * auto item = new CData("<b>hello</b>");
1350      *    // constructs <![CDATA[<b>hello</b>]]>
1351      * --------------
1352      */
1353     this(string content) @safe pure
1354     {
1355         import std.string : indexOf;
1356         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1357         this.content = content;
1358     }
1359 
1360     /*
1361      * Compares two CDatas for equality
1362      *
1363      * Example:
1364      * --------------
1365      * CData item1,item2;
1366      * if (item1 == item2) { }
1367      * --------------
1368      */
1369     override bool opEquals(scope const Object o) const
1370     {
1371         const item = toType!(const Item)(o);
1372         const t = cast(const CData) item;
1373         return t !is null && content == t.content;
1374     }
1375 
1376     /*
1377      * Compares two CDatas
1378      *
1379      * You should rarely need to call this function. It exists so that CDatas
1380      * can be used as associative array keys.
1381      *
1382      * Example:
1383      * --------------
1384      * CData item1,item2;
1385      * if (item1 < item2) { }
1386      * --------------
1387      */
1388     override int opCmp(scope const Object o) scope const
1389     {
1390         const item = toType!(const Item)(o);
1391         const t = cast(const CData) item;
1392         return t !is null && (content != t.content
1393             ? (content < t.content ? -1 : 1 ) : 0 );
1394     }
1395 
1396     /*
1397      * Returns the hash of a CData
1398      *
1399      * You should rarely need to call this function. It exists so that CDatas
1400      * can be used as associative array keys.
1401      */
1402     override size_t toHash() scope const nothrow { return hash(content); }
1403 
1404     /*
1405      * Returns a string representation of this CData section
1406      */
1407     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1408 
1409     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1410 }
1411 
1412 /*
1413  * Class representing a text (aka Parsed Character Data) section
1414  */
1415 class Text : Item
1416 {
1417     private string content;
1418 
1419     /*
1420      * Construct a text (aka PCData) section
1421      *
1422      * Params:
1423      *      content = the text. This function encodes the text before
1424      *      insertion, so it is safe to insert any text
1425      *
1426      * Example:
1427      * --------------
1428      * auto Text = new CData("a < b");
1429      *    // constructs a &lt; b
1430      * --------------
1431      */
1432     this(string content) @safe pure
1433     {
1434         this.content = encode(content);
1435     }
1436 
1437     /*
1438      * Compares two text sections for equality
1439      *
1440      * Example:
1441      * --------------
1442      * Text item1,item2;
1443      * if (item1 == item2) { }
1444      * --------------
1445      */
1446     override bool opEquals(scope const Object o) const
1447     {
1448         const item = toType!(const Item)(o);
1449         const t = cast(const Text) item;
1450         return t !is null && content == t.content;
1451     }
1452 
1453     /*
1454      * Compares two text sections
1455      *
1456      * You should rarely need to call this function. It exists so that Texts
1457      * can be used as associative array keys.
1458      *
1459      * Example:
1460      * --------------
1461      * Text item1,item2;
1462      * if (item1 < item2) { }
1463      * --------------
1464      */
1465     override int opCmp(scope const Object o) scope const
1466     {
1467         const item = toType!(const Item)(o);
1468         const t = cast(const Text) item;
1469         return t !is null
1470             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1471     }
1472 
1473     /*
1474      * Returns the hash of a text section
1475      *
1476      * You should rarely need to call this function. It exists so that Texts
1477      * can be used as associative array keys.
1478      */
1479     override size_t toHash() scope const nothrow { return hash(content); }
1480 
1481     /*
1482      * Returns a string representation of this Text section
1483      */
1484     override string toString() scope const @safe @nogc pure nothrow { return content; }
1485 
1486     /*
1487      * Returns true if the content is the empty string
1488      */
1489     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1490 }
1491 
1492 /*
1493  * Class representing an XML Instruction section
1494  */
1495 class XMLInstruction : Item
1496 {
1497     private string content;
1498 
1499     /*
1500      * Construct an XML Instruction section
1501      *
1502      * Params:
1503      *      content = the body of the instruction segment
1504      *
1505      * Throws: XIException if the segment body is illegal (contains ">")
1506      *
1507      * Example:
1508      * --------------
1509      * auto item = new XMLInstruction("ATTLIST");
1510      *    // constructs <!ATTLIST>
1511      * --------------
1512      */
1513     this(string content) @safe pure
1514     {
1515         import std.string : indexOf;
1516         if (content.indexOf(">") != -1) throw new XIException(content);
1517         this.content = content;
1518     }
1519 
1520     /*
1521      * Compares two XML instructions for equality
1522      *
1523      * Example:
1524      * --------------
1525      * XMLInstruction item1,item2;
1526      * if (item1 == item2) { }
1527      * --------------
1528      */
1529     override bool opEquals(scope const Object o) const
1530     {
1531         const item = toType!(const Item)(o);
1532         const t = cast(const XMLInstruction) item;
1533         return t !is null && content == t.content;
1534     }
1535 
1536     /*
1537      * Compares two XML instructions
1538      *
1539      * You should rarely need to call this function. It exists so that
1540      * XmlInstructions can be used as associative array keys.
1541      *
1542      * Example:
1543      * --------------
1544      * XMLInstruction item1,item2;
1545      * if (item1 < item2) { }
1546      * --------------
1547      */
1548     override int opCmp(scope const Object o) scope const
1549     {
1550         const item = toType!(const Item)(o);
1551         const t = cast(const XMLInstruction) item;
1552         return t !is null
1553             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1554     }
1555 
1556     /*
1557      * Returns the hash of an XMLInstruction
1558      *
1559      * You should rarely need to call this function. It exists so that
1560      * XmlInstructions can be used as associative array keys.
1561      */
1562     override size_t toHash() scope const nothrow { return hash(content); }
1563 
1564     /*
1565      * Returns a string representation of this XmlInstruction
1566      */
1567     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1568 
1569     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always
1570 }
1571 
1572 /*
1573  * Class representing a Processing Instruction section
1574  */
1575 class ProcessingInstruction : Item
1576 {
1577     private string content;
1578 
1579     /*
1580      * Construct a Processing Instruction section
1581      *
1582      * Params:
1583      *      content = the body of the instruction segment
1584      *
1585      * Throws: PIException if the segment body is illegal (contains "?>")
1586      *
1587      * Example:
1588      * --------------
1589      * auto item = new ProcessingInstruction("php");
1590      *    // constructs <?php?>
1591      * --------------
1592      */
1593     this(string content) @safe pure
1594     {
1595         import std.string : indexOf;
1596         if (content.indexOf("?>") != -1) throw new PIException(content);
1597         this.content = content;
1598     }
1599 
1600     /*
1601      * Compares two processing instructions for equality
1602      *
1603      * Example:
1604      * --------------
1605      * ProcessingInstruction item1,item2;
1606      * if (item1 == item2) { }
1607      * --------------
1608      */
1609     override bool opEquals(scope const Object o) const
1610     {
1611         const item = toType!(const Item)(o);
1612         const t = cast(const ProcessingInstruction) item;
1613         return t !is null && content == t.content;
1614     }
1615 
1616     /*
1617      * Compares two processing instructions
1618      *
1619      * You should rarely need to call this function. It exists so that
1620      * ProcessingInstructions can be used as associative array keys.
1621      *
1622      * Example:
1623      * --------------
1624      * ProcessingInstruction item1,item2;
1625      * if (item1 < item2) { }
1626      * --------------
1627      */
1628     override int opCmp(scope const Object o) scope const
1629     {
1630         const item = toType!(const Item)(o);
1631         const t = cast(const ProcessingInstruction) item;
1632         return t !is null
1633             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1634     }
1635 
1636     /*
1637      * Returns the hash of a ProcessingInstruction
1638      *
1639      * You should rarely need to call this function. It exists so that
1640      * ProcessingInstructions can be used as associative array keys.
1641      */
1642     override size_t toHash() scope const nothrow { return hash(content); }
1643 
1644     /*
1645      * Returns a string representation of this ProcessingInstruction
1646      */
1647     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1648 
1649     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } // Returns false always
1650 }
1651 
1652 /*
1653  * Abstract base class for XML items
1654  */
1655 abstract class Item
1656 {
1657     // Compares with another Item of same type for equality
1658     abstract override bool opEquals(scope const Object o) @safe const;
1659 
1660     // Compares with another Item of same type
1661     abstract override int opCmp(scope const Object o) @safe const;
1662 
1663     // Returns the hash of this item
1664     abstract override size_t toHash() @safe scope const;
1665 
1666     // Returns a string representation of this item
1667     abstract override string toString() @safe scope const;
1668 
1669     /*
1670      * Returns an indented string representation of this item
1671      *
1672      * Params:
1673      *      indent = number of spaces by which to indent child elements
1674      */
1675     string[] pretty(uint indent) @safe scope const
1676     {
1677         import std.string : strip;
1678         string s = strip(toString());
1679         return s.length == 0 ? [] : [ s ];
1680     }
1681 
1682     // Returns true if the item represents empty XML text
1683     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1684 }
1685 
1686 /*
1687  * Class for parsing an XML Document.
1688  *
1689  * This is a subclass of ElementParser. Most of the useful functions are
1690  * documented there.
1691  *
1692  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1693  *
1694  * Bugs:
1695  *      Currently only supports UTF documents.
1696  *
1697  *      If there is an encoding attribute in the prolog, it is ignored.
1698  *
1699  */
1700 class DocumentParser : ElementParser
1701 {
1702     string xmlText;
1703 
1704     /*
1705      * Constructs a DocumentParser.
1706      *
1707      * The input to this function MUST be valid XML.
1708      * This is enforced by the function's in contract.
1709      *
1710      * Params:
1711      *      xmlText_ = the entire XML document as text
1712      *
1713      */
1714     this(string xmlText_)
1715     in
1716     {
1717         assert(xmlText_.length != 0);
1718         try
1719         {
1720             // Confirm that the input is valid XML
1721             check(xmlText_);
1722         }
1723         catch (CheckException e)
1724         {
1725             // And if it's not, tell the user why not
1726             assert(false, "\n" ~ e.toString());
1727         }
1728     }
1729     do
1730     {
1731         xmlText = xmlText_;
1732         s = &xmlText;
1733         super();    // Initialize everything
1734         parse();    // Parse through the root tag (but not beyond)
1735     }
1736 }
1737 
1738 @system unittest
1739 {
1740     auto doc = new Document("<root><child><grandchild/></child></root>");
1741     assert(doc.elements.length == 1);
1742     assert(doc.elements[0].tag.name == "child");
1743     assert(doc.items == doc.elements);
1744 }
1745 
1746 /*
1747  * Class for parsing an XML element.
1748  *
1749  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1750  *
1751  * Note that you cannot construct instances of this class directly. You can
1752  * construct a DocumentParser (which is a subclass of ElementParser), but
1753  * otherwise, Instances of ElementParser will be created for you by the
1754  * library, and passed your way via onStartTag handlers.
1755  *
1756  */
1757 class ElementParser
1758 {
1759     alias Handler = void delegate(string);
1760     alias ElementHandler = void delegate(in Element element);
1761     alias ParserHandler = void delegate(ElementParser parser);
1762 
1763     private
1764     {
1765         Tag tag_;
1766         string elementStart;
1767         string* s;
1768 
1769         Handler commentHandler = null;
1770         Handler cdataHandler = null;
1771         Handler xiHandler = null;
1772         Handler piHandler = null;
1773         Handler rawTextHandler = null;
1774         Handler textHandler = null;
1775 
1776         // Private constructor for start tags
1777         this(ElementParser parent) @safe @nogc pure nothrow
1778         {
1779             s = parent.s;
1780             this();
1781             tag_ = parent.tag_;
1782         }
1783 
1784         // Private constructor for empty tags
1785         this(Tag tag, string* t) @safe @nogc pure nothrow
1786         {
1787             s = t;
1788             this();
1789             tag_ = tag;
1790         }
1791     }
1792 
1793     /*
1794      * The Tag at the start of the element being parsed. You can read this to
1795      * determine the tag's name and attributes.
1796      */
1797     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1798 
1799     /*
1800      * Register a handler which will be called whenever a start tag is
1801      * encountered which matches the specified name. You can also pass null as
1802      * the name, in which case the handler will be called for any unmatched
1803      * start tag.
1804      *
1805      * Example:
1806      * --------------
1807      * // Call this function whenever a <podcast> start tag is encountered
1808      * onStartTag["podcast"] = (ElementParser xml)
1809      * {
1810      *     // Your code here
1811      *     //
1812      *     // This is a a closure, so code here may reference
1813      *     // variables which are outside of this scope
1814      * };
1815      *
1816      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1817      * // start tag is encountered
1818      * onStartTag["episode"] = &myEpisodeStartHandler;
1819      *
1820      * // call delegate dg for all other start tags
1821      * onStartTag[null] = dg;
1822      * --------------
1823      *
1824      * This library will supply your function with a new instance of
1825      * ElementHandler, which may be used to parse inside the element whose
1826      * start tag was just found, or to identify the tag attributes of the
1827      * element, etc.
1828      *
1829      * Note that your function will be called for both start tags and empty
1830      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1831      * and &lt;br/&gt;.
1832      */
1833     ParserHandler[string] onStartTag;
1834 
1835     /*
1836      * Register a handler which will be called whenever an end tag is
1837      * encountered which matches the specified name. You can also pass null as
1838      * the name, in which case the handler will be called for any unmatched
1839      * end tag.
1840      *
1841      * Example:
1842      * --------------
1843      * // Call this function whenever a </podcast> end tag is encountered
1844      * onEndTag["podcast"] = (in Element e)
1845      * {
1846      *     // Your code here
1847      *     //
1848      *     // This is a a closure, so code here may reference
1849      *     // variables which are outside of this scope
1850      * };
1851      *
1852      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1853      * // end tag is encountered
1854      * onEndTag["episode"] = &myEpisodeEndHandler;
1855      *
1856      * // call delegate dg for all other end tags
1857      * onEndTag[null] = dg;
1858      * --------------
1859      *
1860      * Note that your function will be called for both start tags and empty
1861      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1862      * and &lt;br/&gt;.
1863      */
1864     ElementHandler[string] onEndTag;
1865 
1866     protected this() @safe @nogc pure nothrow
1867     {
1868         elementStart = *s;
1869     }
1870 
1871     /*
1872      * Register a handler which will be called whenever text is encountered.
1873      *
1874      * Example:
1875      * --------------
1876      * // Call this function whenever text is encountered
1877      * onText = (string s)
1878      * {
1879      *     // Your code here
1880      *
1881      *     // The passed parameter s will have been decoded by the time you see
1882      *     // it, and so may contain any character.
1883      *     //
1884      *     // This is a a closure, so code here may reference
1885      *     // variables which are outside of this scope
1886      * };
1887      * --------------
1888      */
1889     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1890 
1891     /*
1892      * Register an alternative handler which will be called whenever text
1893      * is encountered. This differs from onText in that onText will decode
1894      * the text, whereas onTextRaw will not. This allows you to make design
1895      * choices, since onText will be more accurate, but slower, while
1896      * onTextRaw will be faster, but less accurate. Of course, you can
1897      * still call decode() within your handler, if you want, but you'd
1898      * probably want to use onTextRaw only in circumstances where you
1899      * know that decoding is unnecessary.
1900      *
1901      * Example:
1902      * --------------
1903      * // Call this function whenever text is encountered
1904      * onText = (string s)
1905      * {
1906      *     // Your code here
1907      *
1908      *     // The passed parameter s will NOT have been decoded.
1909      *     //
1910      *     // This is a a closure, so code here may reference
1911      *     // variables which are outside of this scope
1912      * };
1913      * --------------
1914      */
1915     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1916 
1917     /*
1918      * Register a handler which will be called whenever a character data
1919      * segment is encountered.
1920      *
1921      * Example:
1922      * --------------
1923      * // Call this function whenever a CData section is encountered
1924      * onCData = (string s)
1925      * {
1926      *     // Your code here
1927      *
1928      *     // The passed parameter s does not include the opening <![CDATA[
1929      *     // nor closing ]]>
1930      *     //
1931      *     // This is a a closure, so code here may reference
1932      *     // variables which are outside of this scope
1933      * };
1934      * --------------
1935      */
1936     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1937 
1938     /*
1939      * Register a handler which will be called whenever a comment is
1940      * encountered.
1941      *
1942      * Example:
1943      * --------------
1944      * // Call this function whenever a comment is encountered
1945      * onComment = (string s)
1946      * {
1947      *     // Your code here
1948      *
1949      *     // The passed parameter s does not include the opening <!-- nor
1950      *     // closing -->
1951      *     //
1952      *     // This is a a closure, so code here may reference
1953      *     // variables which are outside of this scope
1954      * };
1955      * --------------
1956      */
1957     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1958 
1959     /*
1960      * Register a handler which will be called whenever a processing
1961      * instruction is encountered.
1962      *
1963      * Example:
1964      * --------------
1965      * // Call this function whenever a processing instruction is encountered
1966      * onPI = (string s)
1967      * {
1968      *     // Your code here
1969      *
1970      *     // The passed parameter s does not include the opening <? nor
1971      *     // closing ?>
1972      *     //
1973      *     // This is a a closure, so code here may reference
1974      *     // variables which are outside of this scope
1975      * };
1976      * --------------
1977      */
1978     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1979 
1980     /*
1981      * Register a handler which will be called whenever an XML instruction is
1982      * encountered.
1983      *
1984      * Example:
1985      * --------------
1986      * // Call this function whenever an XML instruction is encountered
1987      * // (Note: XML instructions may only occur preceding the root tag of a
1988      * // document).
1989      * onPI = (string s)
1990      * {
1991      *     // Your code here
1992      *
1993      *     // The passed parameter s does not include the opening <! nor
1994      *     // closing >
1995      *     //
1996      *     // This is a a closure, so code here may reference
1997      *     // variables which are outside of this scope
1998      * };
1999      * --------------
2000      */
2001     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2002 
2003     /*
2004      * Parse an XML element.
2005      *
2006      * Parsing will continue until the end of the current element. Any items
2007      * encountered for which a handler has been registered will invoke that
2008      * handler.
2009      *
2010      * Throws: various kinds of XMLException
2011      */
2012     void parse()
2013     {
2014         import std.algorithm.searching : startsWith;
2015         import std.string : indexOf;
2016 
2017         string t;
2018         const Tag root = tag_;
2019         Tag[string] startTags;
2020         if (tag_ !is null) startTags[tag_.name] = tag_;
2021 
2022         while (s.length != 0)
2023         {
2024             if (startsWith(*s,"<!--"))
2025             {
2026                 chop(*s,4);
2027                 t = chop(*s,indexOf(*s,"-->"));
2028                 if (commentHandler.funcptr !is null) commentHandler(t);
2029                 chop(*s,3);
2030             }
2031             else if (startsWith(*s,"<![CDATA["))
2032             {
2033                 chop(*s,9);
2034                 t = chop(*s,indexOf(*s,"]]>"));
2035                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2036                 chop(*s,3);
2037             }
2038             else if (startsWith(*s,"<!"))
2039             {
2040                 chop(*s,2);
2041                 t = chop(*s,indexOf(*s,">"));
2042                 if (xiHandler.funcptr !is null) xiHandler(t);
2043                 chop(*s,1);
2044             }
2045             else if (startsWith(*s,"<?"))
2046             {
2047                 chop(*s,2);
2048                 t = chop(*s,indexOf(*s,"?>"));
2049                 if (piHandler.funcptr !is null) piHandler(t);
2050                 chop(*s,2);
2051             }
2052             else if (startsWith(*s,"<"))
2053             {
2054                 tag_ = new Tag(*s,true);
2055                 if (root is null)
2056                     return; // Return to constructor of derived class
2057 
2058                 if (tag_.isStart)
2059                 {
2060                     startTags[tag_.name] = tag_;
2061 
2062                     auto parser = new ElementParser(this);
2063 
2064                     auto handler = tag_.name in onStartTag;
2065                     if (handler !is null) (*handler)(parser);
2066                     else
2067                     {
2068                         handler = null in onStartTag;
2069                         if (handler !is null) (*handler)(parser);
2070                     }
2071                 }
2072                 else if (tag_.isEnd)
2073                 {
2074                     const startTag = startTags[tag_.name];
2075                     string text;
2076 
2077                     if (startTag.tagString.length == 0)
2078                         assert(0);
2079 
2080                     immutable(char)* p = startTag.tagString.ptr
2081                         + startTag.tagString.length;
2082                     immutable(char)* q = &tag_.tagString[0];
2083                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2084 
2085                     auto element = new Element(startTag);
2086                     if (text.length != 0) element ~= new Text(text);
2087 
2088                     auto handler = tag_.name in onEndTag;
2089                     if (handler !is null) (*handler)(element);
2090                     else
2091                     {
2092                         handler = null in onEndTag;
2093                         if (handler !is null) (*handler)(element);
2094                     }
2095 
2096                     if (tag_.name == root.name) return;
2097                 }
2098                 else if (tag_.isEmpty)
2099                 {
2100                     Tag startTag = new Tag(tag_.name);
2101 
2102                     // FIX by hed010gy
2103                     // https://issues.dlang.org/show_bug.cgi?id=2979
2104                     if (tag_.attr.length > 0)
2105                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2106                     // END FIX
2107 
2108                     // Handle the pretend start tag
2109                     string s2;
2110                     auto parser = new ElementParser(startTag,&s2);
2111                     auto handler1 = startTag.name in onStartTag;
2112                     if (handler1 !is null) (*handler1)(parser);
2113                     else
2114                     {
2115                         handler1 = null in onStartTag;
2116                         if (handler1 !is null) (*handler1)(parser);
2117                     }
2118 
2119                     // Handle the pretend end tag
2120                     auto element = new Element(startTag);
2121                     auto handler2 = tag_.name in onEndTag;
2122                     if (handler2 !is null) (*handler2)(element);
2123                     else
2124                     {
2125                         handler2 = null in onEndTag;
2126                         if (handler2 !is null) (*handler2)(element);
2127                     }
2128                 }
2129             }
2130             else
2131             {
2132                 t = chop(*s,indexOf(*s,"<"));
2133                 if (rawTextHandler.funcptr !is null)
2134                     rawTextHandler(t);
2135                 else if (textHandler.funcptr !is null)
2136                     textHandler(decode(t,DecodeMode.LOOSE));
2137             }
2138         }
2139     }
2140 
2141     /*
2142      * Returns that part of the element which has already been parsed
2143      */
2144     override string toString() const @nogc @safe pure nothrow
2145     {
2146         assert(elementStart.length >= s.length);
2147         return elementStart[0 .. elementStart.length - s.length];
2148     }
2149 
2150 }
2151 
2152 private
2153 {
2154     template Check(string msg)
2155     {
2156         string old = s;
2157 
2158         void fail() @safe pure
2159         {
2160             s = old;
2161             throw new Err(s,msg);
2162         }
2163 
2164         void fail(Err e) @safe pure
2165         {
2166             s = old;
2167             throw new Err(s,msg,e);
2168         }
2169 
2170         void fail(string msg2) @safe pure
2171         {
2172             fail(new Err(s,msg2));
2173         }
2174     }
2175 
2176     void checkMisc(ref string s) @safe pure // rule 27
2177     {
2178         import std.algorithm.searching : startsWith;
2179 
2180         mixin Check!("Misc");
2181 
2182         try
2183         {
2184                  if (s.startsWith("<!--")) { checkComment(s); }
2185             else if (s.startsWith("<?"))   { checkPI(s); }
2186             else                           { checkSpace(s); }
2187         }
2188         catch (Err e) { fail(e); }
2189     }
2190 
2191     void checkDocument(ref string s) @safe pure // rule 1
2192     {
2193         mixin Check!("Document");
2194         try
2195         {
2196             checkProlog(s);
2197             checkElement(s);
2198             star!(checkMisc)(s);
2199         }
2200         catch (Err e) { fail(e); }
2201     }
2202 
2203     void checkChars(ref string s) @safe pure // rule 2
2204     {
2205         // TO DO - Fix std.utf stride and decode functions, then use those
2206         // instead
2207         import std.format : format;
2208 
2209         mixin Check!("Chars");
2210 
2211         dchar c;
2212         ptrdiff_t n = -1;
2213         // 'i' must not be smaller than size_t because size_t is used internally in
2214         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2215         foreach (size_t i, dchar d; s)
2216         {
2217             if (!isChar(d))
2218             {
2219                 c = d;
2220                 n = i;
2221                 break;
2222             }
2223         }
2224         if (n != -1)
2225         {
2226             s = s[n..$];
2227             fail(format("invalid character: U+%04X",c));
2228         }
2229     }
2230 
2231     void checkSpace(ref string s) @safe pure // rule 3
2232     {
2233         import std.algorithm.searching : countUntil;
2234         import std.ascii : isWhite;
2235         import std.utf : byCodeUnit;
2236 
2237         mixin Check!("Whitespace");
2238         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2239         if (i == -1 && s.length > 0 && isWhite(s[0]))
2240             s = s[$ .. $];
2241         else if (i > -1)
2242             s = s[i .. $];
2243         if (s is old) fail();
2244     }
2245 
2246     void checkName(ref string s, out string name) @safe pure // rule 5
2247     {
2248         mixin Check!("Name");
2249 
2250         if (s.length == 0) fail();
2251         ptrdiff_t n;
2252         // 'i' must not be smaller than size_t because size_t is used internally in
2253         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2254         foreach (size_t i, dchar c; s)
2255         {
2256             if (c == '_' || c == ':' || isLetter(c)) continue;
2257             if (i == 0) fail();
2258             if (c == '-' || c == '.' || isDigit(c)
2259                 || isCombiningChar(c) || isExtender(c)) continue;
2260             n = i;
2261             break;
2262         }
2263         name = s[0 .. n];
2264         s = s[n..$];
2265     }
2266 
2267     void checkAttValue(ref string s) @safe pure // rule 10
2268     {
2269         import std.algorithm.searching : countUntil;
2270         import std.utf : byCodeUnit;
2271 
2272         mixin Check!("AttValue");
2273 
2274         if (s.length == 0) fail();
2275         char c = s[0];
2276         if (c != '\u0022' && c != '\u0027')
2277             fail("attribute value requires quotes");
2278         s = s[1..$];
2279         for (;;)
2280         {
2281             s = s[s.byCodeUnit.countUntil(c) .. $];
2282             if (s.length == 0) fail("unterminated attribute value");
2283             if (s[0] == '<') fail("< found in attribute value");
2284             if (s[0] == c) break;
2285             try { checkReference(s); } catch (Err e) { fail(e); }
2286         }
2287         s = s[1..$];
2288     }
2289 
2290     void checkCharData(ref string s) @safe pure // rule 14
2291     {
2292         import std.algorithm.searching : startsWith;
2293 
2294         mixin Check!("CharData");
2295 
2296         while (s.length != 0)
2297         {
2298             if (s.startsWith("&")) break;
2299             if (s.startsWith("<")) break;
2300             if (s.startsWith("]]>")) fail("]]> found within char data");
2301             s = s[1..$];
2302         }
2303     }
2304 
2305     void checkComment(ref string s) @safe pure // rule 15
2306     {
2307         import std.string : indexOf;
2308 
2309         mixin Check!("Comment");
2310 
2311         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2312         ptrdiff_t n = s.indexOf("--");
2313         if (n == -1) fail("unterminated comment");
2314         s = s[n..$];
2315         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2316     }
2317 
2318     void checkPI(ref string s) @safe pure // rule 16
2319     {
2320         mixin Check!("PI");
2321 
2322         try
2323         {
2324             checkLiteral("<?",s);
2325             checkEnd("?>",s);
2326         }
2327         catch (Err e) { fail(e); }
2328     }
2329 
2330     void checkCDSect(ref string s) @safe pure // rule 18
2331     {
2332         mixin Check!("CDSect");
2333 
2334         try
2335         {
2336             checkLiteral(cdata,s);
2337             checkEnd("]]>",s);
2338         }
2339         catch (Err e) { fail(e); }
2340     }
2341 
2342     void checkProlog(ref string s) @safe pure // rule 22
2343     {
2344         mixin Check!("Prolog");
2345 
2346         try
2347         {
2348             /* The XML declaration is optional
2349              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2350              */
2351             opt!(checkXMLDecl)(s);
2352 
2353             star!(checkMisc)(s);
2354             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2355         }
2356         catch (Err e) { fail(e); }
2357     }
2358 
2359     void checkXMLDecl(ref string s) @safe pure // rule 23
2360     {
2361         mixin Check!("XMLDecl");
2362 
2363         try
2364         {
2365             checkLiteral("<?xml",s);
2366             checkVersionInfo(s);
2367             opt!(checkEncodingDecl)(s);
2368             opt!(checkSDDecl)(s);
2369             opt!(checkSpace)(s);
2370             checkLiteral("?>",s);
2371         }
2372         catch (Err e) { fail(e); }
2373     }
2374 
2375     void checkVersionInfo(ref string s) @safe pure // rule 24
2376     {
2377         mixin Check!("VersionInfo");
2378 
2379         try
2380         {
2381             checkSpace(s);
2382             checkLiteral("version",s);
2383             checkEq(s);
2384             quoted!(checkVersionNum)(s);
2385         }
2386         catch (Err e) { fail(e); }
2387     }
2388 
2389     void checkEq(ref string s) @safe pure // rule 25
2390     {
2391         mixin Check!("Eq");
2392 
2393         try
2394         {
2395             opt!(checkSpace)(s);
2396             checkLiteral("=",s);
2397             opt!(checkSpace)(s);
2398         }
2399         catch (Err e) { fail(e); }
2400     }
2401 
2402     void checkVersionNum(ref string s) @safe pure // rule 26
2403     {
2404         import std.algorithm.searching : countUntil;
2405         import std.utf : byCodeUnit;
2406 
2407         mixin Check!("VersionNum");
2408 
2409         s = s[s.byCodeUnit.countUntil('\"') .. $];
2410         if (s is old) fail();
2411     }
2412 
2413     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2414     {
2415         mixin Check!("DocTypeDecl");
2416 
2417         try
2418         {
2419             checkLiteral("<!DOCTYPE",s);
2420             //
2421             // TO DO -- ensure DOCTYPE is well formed
2422             // (But not yet. That's one of our "future directions")
2423             //
2424             checkEnd(">",s);
2425         }
2426         catch (Err e) { fail(e); }
2427     }
2428 
2429     void checkSDDecl(ref string s) @safe pure // rule 32
2430     {
2431         import std.algorithm.searching : startsWith;
2432 
2433         mixin Check!("SDDecl");
2434 
2435         try
2436         {
2437             checkSpace(s);
2438             checkLiteral("standalone",s);
2439             checkEq(s);
2440         }
2441         catch (Err e) { fail(e); }
2442 
2443         int n = 0;
2444              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2445         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2446         else fail("standalone attribute value must be 'yes', \"yes\","~
2447             " 'no' or \"no\"");
2448         s = s[n..$];
2449     }
2450 
2451     void checkElement(ref string s) @safe pure // rule 39
2452     {
2453         mixin Check!("Element");
2454 
2455         string sname,ename,t;
2456         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2457 
2458         if (t == "STag")
2459         {
2460             try
2461             {
2462                 checkContent(s);
2463                 t = s;
2464                 checkETag(s,ename);
2465             }
2466             catch (Err e) { fail(e); }
2467 
2468             if (sname != ename)
2469             {
2470                 s = t;
2471                 fail("end tag name \"" ~ ename
2472                     ~ "\" differs from start tag name \""~sname~"\"");
2473             }
2474         }
2475     }
2476 
2477     // rules 40 and 44
2478     void checkTag(ref string s, out string type, out string name) @safe pure
2479     {
2480         mixin Check!("Tag");
2481 
2482         try
2483         {
2484             type = "STag";
2485             checkLiteral("<",s);
2486             checkName(s,name);
2487             star!(seq!(checkSpace,checkAttribute))(s);
2488             opt!(checkSpace)(s);
2489             if (s.length != 0 && s[0] == '/')
2490             {
2491                 s = s[1..$];
2492                 type = "ETag";
2493             }
2494             checkLiteral(">",s);
2495         }
2496         catch (Err e) { fail(e); }
2497     }
2498 
2499     void checkAttribute(ref string s) @safe pure // rule 41
2500     {
2501         mixin Check!("Attribute");
2502 
2503         try
2504         {
2505             string name;
2506             checkName(s,name);
2507             checkEq(s);
2508             checkAttValue(s);
2509         }
2510         catch (Err e) { fail(e); }
2511     }
2512 
2513     void checkETag(ref string s, out string name) @safe pure // rule 42
2514     {
2515         mixin Check!("ETag");
2516 
2517         try
2518         {
2519             checkLiteral("</",s);
2520             checkName(s,name);
2521             opt!(checkSpace)(s);
2522             checkLiteral(">",s);
2523         }
2524         catch (Err e) { fail(e); }
2525     }
2526 
2527     void checkContent(ref string s) @safe pure // rule 43
2528     {
2529         import std.algorithm.searching : startsWith;
2530 
2531         mixin Check!("Content");
2532 
2533         try
2534         {
2535             while (s.length != 0)
2536             {
2537                 old = s;
2538                      if (s.startsWith("&"))        { checkReference(s); }
2539                 else if (s.startsWith("<!--"))     { checkComment(s); }
2540                 else if (s.startsWith("<?"))       { checkPI(s); }
2541                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2542                 else if (s.startsWith("</"))       { break; }
2543                 else if (s.startsWith("<"))        { checkElement(s); }
2544                 else                               { checkCharData(s); }
2545             }
2546         }
2547         catch (Err e) { fail(e); }
2548     }
2549 
2550     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2551     {
2552         import std.format : format;
2553 
2554         mixin Check!("CharRef");
2555 
2556         c = 0;
2557         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2558         int radix = 10;
2559         if (s.length != 0 && s[0] == 'x')
2560         {
2561             s = s[1..$];
2562             radix = 16;
2563         }
2564         if (s.length == 0) fail("unterminated character reference");
2565         if (s[0] == ';')
2566             fail("character reference must have at least one digit");
2567         while (s.length != 0)
2568         {
2569             immutable char d = s[0];
2570             int n = 0;
2571             switch (d)
2572             {
2573                 case 'F','f': ++n;      goto case;
2574                 case 'E','e': ++n;      goto case;
2575                 case 'D','d': ++n;      goto case;
2576                 case 'C','c': ++n;      goto case;
2577                 case 'B','b': ++n;      goto case;
2578                 case 'A','a': ++n;      goto case;
2579                 case '9':     ++n;      goto case;
2580                 case '8':     ++n;      goto case;
2581                 case '7':     ++n;      goto case;
2582                 case '6':     ++n;      goto case;
2583                 case '5':     ++n;      goto case;
2584                 case '4':     ++n;      goto case;
2585                 case '3':     ++n;      goto case;
2586                 case '2':     ++n;      goto case;
2587                 case '1':     ++n;      goto case;
2588                 case '0':     break;
2589                 default: n = 100; break;
2590             }
2591             if (n >= radix) break;
2592             c *= radix;
2593             c += n;
2594             s = s[1..$];
2595         }
2596         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2597         if (s.length == 0 || s[0] != ';') fail("expected ;");
2598         else s = s[1..$];
2599     }
2600 
2601     void checkReference(ref string s) @safe pure // rule 67
2602     {
2603         import std.algorithm.searching : startsWith;
2604 
2605         mixin Check!("Reference");
2606 
2607         try
2608         {
2609             dchar c;
2610             if (s.startsWith("&#")) checkCharRef(s,c);
2611             else checkEntityRef(s);
2612         }
2613         catch (Err e) { fail(e); }
2614     }
2615 
2616     void checkEntityRef(ref string s) @safe pure // rule 68
2617     {
2618         mixin Check!("EntityRef");
2619 
2620         try
2621         {
2622             string name;
2623             checkLiteral("&",s);
2624             checkName(s,name);
2625             checkLiteral(";",s);
2626         }
2627         catch (Err e) { fail(e); }
2628     }
2629 
2630     void checkEncName(ref string s) @safe pure // rule 81
2631     {
2632         import std.algorithm.searching : countUntil;
2633         import std.ascii : isAlpha;
2634         import std.utf : byCodeUnit;
2635 
2636         mixin Check!("EncName");
2637 
2638         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2639         if (s is old) fail();
2640         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2641     }
2642 
2643     void checkEncodingDecl(ref string s) @safe pure // rule 80
2644     {
2645         mixin Check!("EncodingDecl");
2646 
2647         try
2648         {
2649             checkSpace(s);
2650             checkLiteral("encoding",s);
2651             checkEq(s);
2652             quoted!(checkEncName)(s);
2653         }
2654         catch (Err e) { fail(e); }
2655     }
2656 
2657     // Helper functions
2658 
2659     void checkLiteral(string literal,ref string s) @safe pure
2660     {
2661         import std.string : startsWith;
2662 
2663         mixin Check!("Literal");
2664 
2665         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2666         s = s[literal.length..$];
2667     }
2668 
2669     void checkEnd(string end,ref string s) @safe pure
2670     {
2671         import std.string : indexOf;
2672         // Deliberately no mixin Check here.
2673 
2674         auto n = s.indexOf(end);
2675         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2676         s = s[n..$];
2677         checkLiteral(end,s);
2678     }
2679 
2680     // Metafunctions -- none of these use mixin Check
2681 
2682     void opt(alias f)(ref string s)
2683     {
2684         try { f(s); } catch (Err e) {}
2685     }
2686 
2687     void plus(alias f)(ref string s)
2688     {
2689         f(s);
2690         star!(f)(s);
2691     }
2692 
2693     void star(alias f)(ref string s)
2694     {
2695         while (s.length != 0)
2696         {
2697             try { f(s); }
2698             catch (Err e) { return; }
2699         }
2700     }
2701 
2702     void quoted(alias f)(ref string s)
2703     {
2704         import std.string : startsWith;
2705 
2706         if (s.startsWith("'"))
2707         {
2708             checkLiteral("'",s);
2709             f(s);
2710             checkLiteral("'",s);
2711         }
2712         else
2713         {
2714             checkLiteral("\"",s);
2715             f(s);
2716             checkLiteral("\"",s);
2717         }
2718     }
2719 
2720     void seq(alias f,alias g)(ref string s)
2721     {
2722         f(s);
2723         g(s);
2724     }
2725 }
2726 
2727 /*
2728  * Check an entire XML document for well-formedness
2729  *
2730  * Params:
2731  *      s = the document to be checked, passed as a string
2732  *
2733  * Throws: CheckException if the document is not well formed
2734  *
2735  * CheckException's toString() method will yield the complete hierarchy of
2736  * parse failure (the XML equivalent of a stack trace), giving the line and
2737  * column number of every failure at every level.
2738  */
2739 void check(string s) @safe pure
2740 {
2741     try
2742     {
2743         checkChars(s);
2744         checkDocument(s);
2745         if (s.length != 0) throw new Err(s,"Junk found after document");
2746     }
2747     catch (Err e)
2748     {
2749         e.complete(s);
2750         throw e;
2751     }
2752 }
2753 
2754 @system pure unittest
2755 {
2756     import std.string : indexOf;
2757 
2758     try
2759     {
2760         check(q"[<?xml version="1.0"?>
2761         <catalog>
2762            <book id="bk101">
2763               <author>Gambardella, Matthew</author>
2764               <title>XML Developer's Guide</title>
2765               <genre>Computer</genre>
2766               <price>44.95</price>
2767               <publish_date>2000-10-01</publish_date>
2768               <description>An in-depth look at creating applications
2769               with XML.</description>
2770            </book>
2771            <book id="bk102">
2772               <author>Ralls, Kim</author>
2773               <title>Midnight Rain</title>
2774               <genre>Fantasy</genres>
2775               <price>5.95</price>
2776               <publish_date>2000-12-16</publish_date>
2777               <description>A former architect battles corporate zombies,
2778               an evil sorceress, and her own childhood to become queen
2779               of the world.</description>
2780            </book>
2781            <book id="bk103">
2782               <author>Corets, Eva</author>
2783               <title>Maeve Ascendant</title>
2784               <genre>Fantasy</genre>
2785               <price>5.95</price>
2786               <publish_date>2000-11-17</publish_date>
2787               <description>After the collapse of a nanotechnology
2788               society in England, the young survivors lay the
2789               foundation for a new society.</description>
2790            </book>
2791         </catalog>
2792         ]");
2793         assert(false);
2794     }
2795     catch (CheckException e)
2796     {
2797         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2798                                       " from start tag name \"genre\"");
2799         assert(n != -1);
2800     }
2801 }
2802 
2803 @system unittest
2804 {
2805     string s = q"EOS
2806 <?xml version="1.0"?>
2807 <set>
2808     <one>A</one>
2809     <!-- comment -->
2810     <two>B</two>
2811 </set>
2812 EOS";
2813     try
2814     {
2815         check(s);
2816     }
2817     catch (CheckException e)
2818     {
2819         assert(0, e.toString());
2820     }
2821 }
2822 
2823 @system unittest
2824 {
2825     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2826                         xmlns:stream="http://etherx.'jabber'.org/streams"
2827                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2828                         xml:lang="en" version="1.0" attr='a"b"c'>
2829                         </stream:stream></r>`;
2830 
2831     DocumentParser parser = new DocumentParser(test_xml);
2832     bool tested = false;
2833     parser.onStartTag["stream:stream"] = (ElementParser p) {
2834         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2835         assert(p.tag.attr["from"] == "jid.pl");
2836         assert(p.tag.attr["attr"] == "a\"b\"c");
2837         tested = true;
2838     };
2839     parser.parse();
2840     assert(tested);
2841 }
2842 
2843 @system unittest
2844 {
2845     string s = q"EOS
2846 <?xml version="1.0" encoding="utf-8"?> <Tests>
2847     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2848 </Tests>
2849 EOS";
2850     auto xml = new DocumentParser(s);
2851 
2852     xml.onStartTag["Test"] = (ElementParser xml) {
2853         assert(xml.tag.attr["thing"] == "What & Up");
2854     };
2855 
2856     xml.onEndTag["Test"] = (in Element e) {
2857         assert(e.text() == "What & Up Second");
2858     };
2859     xml.parse();
2860 }
2861 
2862 @system unittest
2863 {
2864     string s = `<tag attr="&quot;value&gt;" />`;
2865     auto doc = new Document(s);
2866     assert(doc.toString() == s);
2867 }
2868 
2869 /* The base class for exceptions thrown by this module */
2870 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2871 
2872 // Other exceptions
2873 
2874 // Thrown during Comment constructor
2875 class CommentException : XMLException
2876 { private this(string msg) @safe pure { super(msg); } }
2877 
2878 // Thrown during CData constructor
2879 class CDataException : XMLException
2880 { private this(string msg) @safe pure { super(msg); } }
2881 
2882 // Thrown during XMLInstruction constructor
2883 class XIException : XMLException
2884 { private this(string msg) @safe pure { super(msg); } }
2885 
2886 // Thrown during ProcessingInstruction constructor
2887 class PIException : XMLException
2888 { private this(string msg) @safe pure { super(msg); } }
2889 
2890 // Thrown during Text constructor
2891 class TextException : XMLException
2892 { private this(string msg) @safe pure { super(msg); } }
2893 
2894 // Thrown during decode()
2895 class DecodeException : XMLException
2896 { private this(string msg) @safe pure { super(msg); } }
2897 
2898 // Thrown if comparing with wrong type
2899 class InvalidTypeException : XMLException
2900 { private this(string msg) @safe pure { super(msg); } }
2901 
2902 // Thrown when parsing for Tags
2903 class TagException : XMLException
2904 { private this(string msg) @safe pure { super(msg); } }
2905 
2906 /*
2907  * Thrown during check()
2908  */
2909 class CheckException : XMLException
2910 {
2911     CheckException err; // Parent in hierarchy
2912     private string tail;
2913     /*
2914      * Name of production rule which failed to parse,
2915      * or specific error message
2916      */
2917     string msg;
2918     size_t line = 0; // Line number at which parse failure occurred
2919     size_t column = 0; // Column number at which parse failure occurred
2920 
2921     private this(string tail,string msg,Err err=null) @safe pure
2922     {
2923         super(null);
2924         this.tail = tail;
2925         this.msg = msg;
2926         this.err = err;
2927     }
2928 
2929     private void complete(string entire) @safe pure
2930     {
2931         import std.string : count, lastIndexOf;
2932         import std.utf : toUTF32;
2933 
2934         string head = entire[0..$-tail.length];
2935         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2936         line = head.count("\n") + 1;
2937         dstring t = toUTF32(head[n..$]);
2938         column = t.length + 1;
2939         if (err !is null) err.complete(entire);
2940     }
2941 
2942     override string toString() const @safe pure
2943     {
2944         import std.format : format;
2945 
2946         string s;
2947         if (line != 0) s = format("Line %d, column %d: ",line,column);
2948         s ~= msg;
2949         s ~= '\n';
2950         if (err !is null) s = err.toString() ~ s;
2951         return s;
2952     }
2953 }
2954 
2955 private alias Err = CheckException;
2956 
2957 // Private helper functions
2958 
2959 private
2960 {
2961     inout(T) toType(T)(inout return scope Object o)
2962     {
2963         T t = cast(T)(o);
2964         if (t is null)
2965         {
2966             throw new InvalidTypeException("Attempt to compare a "
2967                 ~ T.stringof ~ " with an instance of another type");
2968         }
2969         return t;
2970     }
2971 
2972     string chop(ref string s, size_t n) @safe pure nothrow
2973     {
2974         if (n == -1) n = s.length;
2975         string t = s[0 .. n];
2976         s = s[n..$];
2977         return t;
2978     }
2979 
2980     bool optc(ref string s, char c) @safe pure nothrow
2981     {
2982         immutable bool b = s.length != 0 && s[0] == c;
2983         if (b) s = s[1..$];
2984         return b;
2985     }
2986 
2987     void reqc(ref string s, char c) @safe pure
2988     {
2989         if (s.length == 0 || s[0] != c) throw new TagException("");
2990         s = s[1..$];
2991     }
2992 
2993     char requireOneOf(ref string s, string chars) @safe pure
2994     {
2995         import std.string : indexOf;
2996 
2997         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2998             throw new TagException("");
2999         immutable char ch = s[0];
3000         s = s[1..$];
3001         return ch;
3002     }
3003 
3004     alias hash = .hashOf;
3005 
3006     // Definitions from the XML specification
3007     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3008         0x10000,0x10FFFF];
3009     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3010         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3011         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3012         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3013         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3014         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3015         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3016         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3017         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3018         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3019         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3020         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3021         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3022         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3023         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3024         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3025         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3026         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3027         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3028         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3029         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3030         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3031         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3032         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3033         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3034         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3035         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3036         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3037         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3038         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3039         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3040         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3041         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3042         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3043         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3044         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3045         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3046         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3047         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3048         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3049         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3050     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3051     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3052         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3053         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3054         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3055         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3056         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3057         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3058         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3059         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3060         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3061         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3062         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3063         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3064         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3065         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3066         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3067         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3068         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3069         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3070         0x3099,0x3099,0x309A,0x309A];
3071     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3072         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3073         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3074         0x0ED9,0x0F20,0x0F29];
3075     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3076         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3077         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3078 
3079     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3080     {
3081         while (table.length != 0)
3082         {
3083             auto m = (table.length >> 1) & ~1;
3084             if (c < table[m])
3085             {
3086                 table = table[0 .. m];
3087             }
3088             else if (c > table[m+1])
3089             {
3090                 table = table[m+2..$];
3091             }
3092             else return true;
3093         }
3094         return false;
3095     }
3096 
3097     string startOf(string s) @safe nothrow pure
3098     {
3099         string r;
3100         foreach (char c;s)
3101         {
3102             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3103             if (r.length >= 40) { r ~= "___"; break; }
3104         }
3105         return r;
3106     }
3107 
3108     void exit(string s=null)
3109     {
3110         throw new XMLException(s);
3111     }
3112 }