1 // Written in the D programming language. 2 3 /** 4 $(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will be removed from Phobos in 2.101.0. 6 If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD)) 7 */ 8 9 /* 10 Classes and functions for creating and parsing XML 11 12 The basic architecture of this module is that there are standalone functions, 13 classes for constructing an XML document from scratch (Tag, Element and 14 Document), and also classes for parsing a pre-existing XML file (ElementParser 15 and DocumentParser). The parsing classes <i>may</i> be used to build a 16 Document, but that is not their primary purpose. The handling capabilities of 17 DocumentParser and ElementParser are sufficiently customizable that you can 18 make them do pretty much whatever you want. 19 20 Example: This example creates a DOM (Document Object Model) tree 21 from an XML file. 22 ------------------------------------------------------------------------------ 23 import undead.xml; 24 import std.stdio; 25 import std.string; 26 import std.file; 27 28 // books.xml is used in various samples throughout the Microsoft XML Core 29 // Services (MSXML) SDK. 30 // 31 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 32 33 void main() 34 { 35 string s = cast(string) std.file.read("books.xml"); 36 37 // Check for well-formedness 38 check(s); 39 40 // Make a DOM tree 41 auto doc = new Document(s); 42 43 // Plain-print it 44 writeln(doc); 45 } 46 ------------------------------------------------------------------------------ 47 48 Example: This example does much the same thing, except that the file is 49 deconstructed and reconstructed by hand. This is more work, but the 50 techniques involved offer vastly more power. 51 ------------------------------------------------------------------------------ 52 import undead.xml; 53 import std.stdio; 54 import std.string; 55 56 struct Book 57 { 58 string id; 59 string author; 60 string title; 61 string genre; 62 string price; 63 string pubDate; 64 string description; 65 } 66 67 void main() 68 { 69 string s = cast(string) std.file.read("books.xml"); 70 71 // Check for well-formedness 72 check(s); 73 74 // Take it apart 75 Book[] books; 76 77 auto xml = new DocumentParser(s); 78 xml.onStartTag["book"] = (ElementParser xml) 79 { 80 Book book; 81 book.id = xml.tag.attr["id"]; 82 83 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 84 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 85 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 86 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 87 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 88 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 89 90 xml.parse(); 91 92 books ~= book; 93 }; 94 xml.parse(); 95 96 // Put it back together again; 97 auto doc = new Document(new Tag("catalog")); 98 foreach (book;books) 99 { 100 auto element = new Element("book"); 101 element.tag.attr["id"] = book.id; 102 103 element ~= new Element("author", book.author); 104 element ~= new Element("title", book.title); 105 element ~= new Element("genre", book.genre); 106 element ~= new Element("price", book.price); 107 element ~= new Element("publish-date",book.pubDate); 108 element ~= new Element("description", book.description); 109 110 doc ~= element; 111 } 112 113 // Pretty-print it 114 writefln(join(doc.pretty(3),"\n")); 115 } 116 ------------------------------------------------------------------------------- 117 Copyright: Copyright Janice Caron 2008 - 2009. 118 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 119 Authors: Janice Caron 120 Source: $(PHOBOSSRC undead.xml.d) 121 */ 122 /* 123 Copyright Janice Caron 2008 - 2009. 124 Distributed under the Boost Software License, Version 1.0. 125 (See accompanying file LICENSE_1_0.txt or copy at 126 http://www.boost.org/LICENSE_1_0.txt) 127 */ 128 module undead.xml; 129 130 enum cdata = "<![CDATA["; 131 132 /* 133 * Returns true if the character is a character according to the XML standard 134 * 135 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 136 * 137 * Params: 138 * c = the character to be tested 139 */ 140 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 141 { 142 if (c <= 0xD7FF) 143 { 144 if (c >= 0x20) 145 return true; 146 switch (c) 147 { 148 case 0xA: 149 case 0x9: 150 case 0xD: 151 return true; 152 default: 153 return false; 154 } 155 } 156 else if (0xE000 <= c && c <= 0x10FFFF) 157 { 158 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 159 return true; 160 } 161 return false; 162 } 163 164 @safe @nogc nothrow pure unittest 165 { 166 assert(!isChar(cast(dchar) 0x8)); 167 assert( isChar(cast(dchar) 0x9)); 168 assert( isChar(cast(dchar) 0xA)); 169 assert(!isChar(cast(dchar) 0xB)); 170 assert(!isChar(cast(dchar) 0xC)); 171 assert( isChar(cast(dchar) 0xD)); 172 assert(!isChar(cast(dchar) 0xE)); 173 assert(!isChar(cast(dchar) 0x1F)); 174 assert( isChar(cast(dchar) 0x20)); 175 assert( isChar('J')); 176 assert( isChar(cast(dchar) 0xD7FF)); 177 assert(!isChar(cast(dchar) 0xD800)); 178 assert(!isChar(cast(dchar) 0xDFFF)); 179 assert( isChar(cast(dchar) 0xE000)); 180 assert( isChar(cast(dchar) 0xFFFD)); 181 assert(!isChar(cast(dchar) 0xFFFE)); 182 assert(!isChar(cast(dchar) 0xFFFF)); 183 assert( isChar(cast(dchar) 0x10000)); 184 assert( isChar(cast(dchar) 0x10FFFF)); 185 assert(!isChar(cast(dchar) 0x110000)); 186 187 debug (stdxml_TestHardcodedChecks) 188 { 189 foreach (c; 0 .. dchar.max + 1) 190 assert(isChar(c) == lookup(CharTable, c)); 191 } 192 } 193 194 /* 195 * Returns true if the character is whitespace according to the XML standard 196 * 197 * Only the following characters are considered whitespace in XML - space, tab, 198 * carriage return and linefeed 199 * 200 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 201 * 202 * Params: 203 * c = the character to be tested 204 */ 205 bool isSpace(dchar c) @safe @nogc pure nothrow 206 { 207 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 208 } 209 210 /* 211 * Returns true if the character is a digit according to the XML standard 212 * 213 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 214 * 215 * Params: 216 * c = the character to be tested 217 */ 218 bool isDigit(dchar c) @safe @nogc pure nothrow 219 { 220 if (c <= 0x0039 && c >= 0x0030) 221 return true; 222 else 223 return lookup(DigitTable,c); 224 } 225 226 @safe @nogc nothrow pure unittest 227 { 228 debug (stdxml_TestHardcodedChecks) 229 { 230 foreach (c; 0 .. dchar.max + 1) 231 assert(isDigit(c) == lookup(DigitTable, c)); 232 } 233 } 234 235 /* 236 * Returns true if the character is a letter according to the XML standard 237 * 238 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 239 * 240 * Params: 241 * c = the character to be tested 242 */ 243 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 244 { 245 return isIdeographic(c) || isBaseChar(c); 246 } 247 248 /* 249 * Returns true if the character is an ideographic character according to the 250 * XML standard 251 * 252 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 253 * 254 * Params: 255 * c = the character to be tested 256 */ 257 bool isIdeographic(dchar c) @safe @nogc nothrow pure 258 { 259 if (c == 0x3007) 260 return true; 261 if (c <= 0x3029 && c >= 0x3021 ) 262 return true; 263 if (c <= 0x9FA5 && c >= 0x4E00) 264 return true; 265 return false; 266 } 267 268 @safe @nogc nothrow pure unittest 269 { 270 assert(isIdeographic('\u4E00')); 271 assert(isIdeographic('\u9FA5')); 272 assert(isIdeographic('\u3007')); 273 assert(isIdeographic('\u3021')); 274 assert(isIdeographic('\u3029')); 275 276 debug (stdxml_TestHardcodedChecks) 277 { 278 foreach (c; 0 .. dchar.max + 1) 279 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 280 } 281 } 282 283 /* 284 * Returns true if the character is a base character according to the XML 285 * standard 286 * 287 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 288 * 289 * Params: 290 * c = the character to be tested 291 */ 292 bool isBaseChar(dchar c) @safe @nogc nothrow pure 293 { 294 return lookup(BaseCharTable,c); 295 } 296 297 /* 298 * Returns true if the character is a combining character according to the 299 * XML standard 300 * 301 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 302 * 303 * Params: 304 * c = the character to be tested 305 */ 306 bool isCombiningChar(dchar c) @safe @nogc nothrow pure 307 { 308 return lookup(CombiningCharTable,c); 309 } 310 311 /* 312 * Returns true if the character is an extender according to the XML standard 313 * 314 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 315 * 316 * Params: 317 * c = the character to be tested 318 */ 319 bool isExtender(dchar c) @safe @nogc nothrow pure 320 { 321 return lookup(ExtenderTable,c); 322 } 323 324 /* 325 * Encodes a string by replacing all characters which need to be escaped with 326 * appropriate predefined XML entities. 327 * 328 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 329 * and greater-than), and similarly, decode() unescapes them. These functions 330 * are provided for convenience only. You do not need to use them when using 331 * the undead.xml classes, because then all the encoding and decoding will be done 332 * for you automatically. 333 * 334 * If the string is not modified, the original will be returned. 335 * 336 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 337 * 338 * Params: 339 * s = The string to be encoded 340 * 341 * Returns: The encoded string 342 * 343 * Example: 344 * -------------- 345 * writefln(encode("a > b")); // writes "a > b" 346 * -------------- 347 */ 348 S encode(S)(S s) 349 { 350 import std.array : appender; 351 352 string r; 353 size_t lastI; 354 auto result = appender!S(); 355 356 foreach (i, c; s) 357 { 358 switch (c) 359 { 360 case '&': r = "&"; break; 361 case '"': r = """; break; 362 case '\'': r = "'"; break; 363 case '<': r = "<"; break; 364 case '>': r = ">"; break; 365 default: continue; 366 } 367 // Replace with r 368 result.put(s[lastI .. i]); 369 result.put(r); 370 lastI = i + 1; 371 } 372 373 if (!result.data.ptr) return s; 374 result.put(s[lastI .. $]); 375 return result.data; 376 } 377 378 @safe pure unittest 379 { 380 auto s = "hello"; 381 assert(encode(s) is s); 382 assert(encode("a > b") == "a > b", encode("a > b")); 383 assert(encode("a < b") == "a < b"); 384 assert(encode("don't") == "don't"); 385 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 386 assert(encode("cat & dog") == "cat & dog"); 387 } 388 389 /* 390 * Mode to use for decoding. 391 * 392 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 393 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 394 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 395 */ 396 enum DecodeMode 397 { 398 NONE, LOOSE, STRICT 399 } 400 401 /* 402 * Decodes a string by unescaping all predefined XML entities. 403 * 404 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 405 * and greater-than), and similarly, decode() unescapes them. These functions 406 * are provided for convenience only. You do not need to use them when using 407 * the undead.xml classes, because then all the encoding and decoding will be done 408 * for you automatically. 409 * 410 * This function decodes the entities &amp;, &quot;, &apos;, 411 * &lt; and &gt, 412 * as well as decimal and hexadecimal entities such as &#x20AC; 413 * 414 * If the string does not contain an ampersand, the original will be returned. 415 * 416 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 417 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 418 * (decode, and throw a DecodeException in the event of an error). 419 * 420 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 421 * 422 * Params: 423 * s = The string to be decoded 424 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 425 * 426 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 427 * 428 * Returns: The decoded string 429 * 430 * Example: 431 * -------------- 432 * writefln(decode("a > b")); // writes "a > b" 433 * -------------- 434 */ 435 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 436 { 437 import std.algorithm.searching : startsWith; 438 439 if (mode == DecodeMode.NONE) return s; 440 441 string buffer; 442 foreach (ref i; 0 .. s.length) 443 { 444 char c = s[i]; 445 if (c != '&') 446 { 447 if (buffer.length != 0) buffer ~= c; 448 } 449 else 450 { 451 if (buffer.length == 0) 452 { 453 buffer = s[0 .. i].dup; 454 } 455 if (startsWith(s[i..$],"&#")) 456 { 457 try 458 { 459 dchar d; 460 string t = s[i..$]; 461 checkCharRef(t, d); 462 char[4] temp; 463 import std.utf : encode; 464 buffer ~= temp[0 .. encode(temp, d)]; 465 i = s.length - t.length - 1; 466 } 467 catch (Err e) 468 { 469 if (mode == DecodeMode.STRICT) 470 throw new DecodeException("Unescaped &"); 471 buffer ~= '&'; 472 } 473 } 474 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 475 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 476 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 477 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 478 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 479 else 480 { 481 if (mode == DecodeMode.STRICT) 482 throw new DecodeException("Unescaped &"); 483 buffer ~= '&'; 484 } 485 } 486 } 487 return (buffer.length == 0) ? s : buffer; 488 } 489 490 @safe pure unittest 491 { 492 void assertNot(string s) pure 493 { 494 bool b = false; 495 try { decode(s,DecodeMode.STRICT); } 496 catch (DecodeException e) { b = true; } 497 assert(b,s); 498 } 499 500 // Assert that things that should work, do 501 auto s = "hello"; 502 assert(decode(s, DecodeMode.STRICT) is s); 503 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 504 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 505 assert(decode("don't", DecodeMode.STRICT) == "don't"); 506 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 507 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 508 assert(decode("*", DecodeMode.STRICT) == "*"); 509 assert(decode("*", DecodeMode.STRICT) == "*"); 510 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 511 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 512 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 513 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 514 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 515 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 516 517 // Assert that things that shouldn't work, don't 518 assertNot("cat & dog"); 519 assertNot("a > b"); 520 assertNot("&#;"); 521 assertNot("&#x;"); 522 assertNot("G;"); 523 assertNot("G;"); 524 } 525 526 /* 527 * Class representing an XML document. 528 * 529 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 530 * 531 */ 532 class Document : Element 533 { 534 /* 535 * Contains all text which occurs before the root element. 536 * Defaults to <?xml version="1.0"?> 537 */ 538 string prolog = "<?xml version=\"1.0\"?>"; 539 /* 540 * Contains all text which occurs after the root element. 541 * Defaults to the empty string 542 */ 543 string epilog; 544 545 /* 546 * Constructs a Document by parsing XML text. 547 * 548 * This function creates a complete DOM (Document Object Model) tree. 549 * 550 * The input to this function MUST be valid XML. 551 * This is enforced by DocumentParser's in contract. 552 * 553 * Params: 554 * s = the complete XML text. 555 */ 556 this(string s) 557 in 558 { 559 assert(s.length != 0); 560 } 561 do 562 { 563 auto xml = new DocumentParser(s); 564 string tagString = xml.tag.tagString; 565 566 this(xml.tag); 567 prolog = s[0 .. tagString.ptr - s.ptr]; 568 parse(xml); 569 epilog = *xml.s; 570 } 571 572 /* 573 * Constructs a Document from a Tag. 574 * 575 * Params: 576 * tag = the start tag of the document. 577 */ 578 this(const(Tag) tag) 579 { 580 super(tag); 581 } 582 583 const 584 { 585 /* 586 * Compares two Documents for equality 587 * 588 * Example: 589 * -------------- 590 * Document d1,d2; 591 * if (d1 == d2) { } 592 * -------------- 593 */ 594 override bool opEquals(scope const Object o) const 595 { 596 const doc = toType!(const Document)(o); 597 return prolog == doc.prolog 598 && (cast(const) this).Element.opEquals(cast(const) doc) 599 && epilog == doc.epilog; 600 } 601 602 /* 603 * Compares two Documents 604 * 605 * You should rarely need to call this function. It exists so that 606 * Documents can be used as associative array keys. 607 * 608 * Example: 609 * -------------- 610 * Document d1,d2; 611 * if (d1 < d2) { } 612 * -------------- 613 */ 614 override int opCmp(scope const Object o) scope const 615 { 616 const doc = toType!(const Document)(o); 617 if (prolog != doc.prolog) 618 return prolog < doc.prolog ? -1 : 1; 619 if (int cmp = this.Element.opCmp(doc)) 620 return cmp; 621 if (epilog != doc.epilog) 622 return epilog < doc.epilog ? -1 : 1; 623 return 0; 624 } 625 626 /* 627 * Returns the hash of a Document 628 * 629 * You should rarely need to call this function. It exists so that 630 * Documents can be used as associative array keys. 631 */ 632 override size_t toHash() scope const @trusted 633 { 634 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 635 } 636 637 /* 638 * Returns the string representation of a Document. (That is, the 639 * complete XML of a document). 640 */ 641 override string toString() scope const @safe 642 { 643 return prolog ~ super.toString() ~ epilog; 644 } 645 } 646 } 647 648 @system unittest 649 { 650 // https://issues.dlang.org/show_bug.cgi?id=14966 651 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 652 653 auto a = new Document(xml); 654 auto b = new Document(xml); 655 assert(a == b); 656 assert(!(a < b)); 657 int[Document] aa; 658 aa[a] = 1; 659 assert(aa[b] == 1); 660 661 b ~= new Element("b"); 662 assert(a < b); 663 assert(b > a); 664 } 665 666 /* 667 * Class representing an XML element. 668 * 669 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 670 */ 671 class Element : Item 672 { 673 Tag tag; // The start tag of the element 674 Item[] items; // The element's items 675 Text[] texts; // The element's text items 676 CData[] cdatas; // The element's CData items 677 Comment[] comments; // The element's comments 678 ProcessingInstruction[] pis; // The element's processing instructions 679 Element[] elements; // The element's child elements 680 681 /* 682 * Constructs an Element given a name and a string to be used as a Text 683 * interior. 684 * 685 * Params: 686 * name = the name of the element. 687 * interior = (optional) the string interior. 688 * 689 * Example: 690 * ------------------------------------------------------- 691 * auto element = new Element("title","Serenity") 692 * // constructs the element <title>Serenity</title> 693 * ------------------------------------------------------- 694 */ 695 this(string name, string interior=null) @safe pure 696 { 697 this(new Tag(name)); 698 if (interior.length != 0) opOpAssign!("~")(new Text(interior)); 699 } 700 701 /* 702 * Constructs an Element from a Tag. 703 * 704 * Params: 705 * tag_ = the start or empty tag of the element. 706 */ 707 this(const(Tag) tag_) @safe pure 708 { 709 this.tag = new Tag(tag_.name); 710 tag.type = TagType.EMPTY; 711 foreach (k,v;tag_.attr) tag.attr[k] = v; 712 tag.tagString = tag_.tagString; 713 } 714 715 /* 716 * Append a text item to the interior of this element 717 * 718 * Params: 719 * item = the item you wish to append. 720 * 721 * Example: 722 * -------------- 723 * Element element; 724 * element ~= new Text("hello"); 725 * -------------- 726 */ 727 void opOpAssign(string op)(Text item) @safe pure 728 if (op == "~") 729 { 730 texts ~= item; 731 appendItem(item); 732 } 733 734 /* 735 * Append a CData item to the interior of this element 736 * 737 * Params: 738 * item = the item you wish to append. 739 * 740 * Example: 741 * -------------- 742 * Element element; 743 * element ~= new CData("hello"); 744 * -------------- 745 */ 746 void opOpAssign(string op)(CData item) @safe pure 747 if (op == "~") 748 { 749 cdatas ~= item; 750 appendItem(item); 751 } 752 753 /* 754 * Append a comment to the interior of this element 755 * 756 * Params: 757 * item = the item you wish to append. 758 * 759 * Example: 760 * -------------- 761 * Element element; 762 * element ~= new Comment("hello"); 763 * -------------- 764 */ 765 void opOpAssign(string op)(Comment item) @safe pure 766 if (op == "~") 767 { 768 comments ~= item; 769 appendItem(item); 770 } 771 772 /* 773 * Append a processing instruction to the interior of this element 774 * 775 * Params: 776 * item = the item you wish to append. 777 * 778 * Example: 779 * -------------- 780 * Element element; 781 * element ~= new ProcessingInstruction("hello"); 782 * -------------- 783 */ 784 void opOpAssign(string op)(ProcessingInstruction item) @safe pure 785 if (op == "~") 786 { 787 pis ~= item; 788 appendItem(item); 789 } 790 791 /* 792 * Append a complete element to the interior of this element 793 * 794 * Params: 795 * item = the item you wish to append. 796 * 797 * Example: 798 * -------------- 799 * Element element; 800 * Element other = new Element("br"); 801 * element ~= other; 802 * // appends element representing <br /> 803 * -------------- 804 */ 805 void opOpAssign(string op)(Element item) @safe pure 806 if (op == "~") 807 { 808 elements ~= item; 809 appendItem(item); 810 } 811 812 private void appendItem(Item item) @safe pure 813 { 814 items ~= item; 815 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 816 tag.type = TagType.START; 817 } 818 819 private void parse(ElementParser xml) 820 { 821 xml.onText = (string s) { opOpAssign!("~")(new Text(s)); }; 822 xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); }; 823 xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); }; 824 xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); }; 825 826 xml.onStartTag[null] = (ElementParser xml) 827 { 828 auto e = new Element(xml.tag); 829 e.parse(xml); 830 opOpAssign!("~")(e); 831 }; 832 833 xml.parse(); 834 } 835 836 /* 837 * Compares two Elements for equality 838 * 839 * Example: 840 * -------------- 841 * Element e1,e2; 842 * if (e1 == e2) { } 843 * -------------- 844 */ 845 override bool opEquals(scope const Object o) const 846 { 847 const element = toType!(const Element)(o); 848 immutable len = items.length; 849 if (len != element.items.length) return false; 850 foreach (i; 0 .. len) 851 { 852 if (!items[i].opEquals(element.items[i])) return false; 853 } 854 return true; 855 } 856 857 /* 858 * Compares two Elements 859 * 860 * You should rarely need to call this function. It exists so that Elements 861 * can be used as associative array keys. 862 * 863 * Example: 864 * -------------- 865 * Element e1,e2; 866 * if (e1 < e2) { } 867 * -------------- 868 */ 869 override int opCmp(scope const Object o) @safe const 870 { 871 const element = toType!(const Element)(o); 872 for (uint i=0; ; ++i) 873 { 874 if (i == items.length && i == element.items.length) return 0; 875 if (i == items.length) return -1; 876 if (i == element.items.length) return 1; 877 if (!items[i].opEquals(element.items[i])) 878 return items[i].opCmp(element.items[i]); 879 } 880 } 881 882 /* 883 * Returns the hash of an Element 884 * 885 * You should rarely need to call this function. It exists so that Elements 886 * can be used as associative array keys. 887 */ 888 override size_t toHash() scope const @safe 889 { 890 size_t hash = tag.toHash(); 891 foreach (item;items) hash += item.toHash(); 892 return hash; 893 } 894 895 const 896 { 897 /* 898 * Returns the decoded interior of an element. 899 * 900 * The element is assumed to contain text <i>only</i>. So, for 901 * example, given XML such as "<title>Good &amp; 902 * Bad</title>", will return "Good & Bad". 903 * 904 * Params: 905 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 906 * 907 * Throws: DecodeException if decode fails 908 */ 909 string text(DecodeMode mode=DecodeMode.LOOSE) 910 { 911 string buffer; 912 foreach (item;items) 913 { 914 Text t = cast(Text) item; 915 if (t is null) throw new DecodeException(item.toString()); 916 buffer ~= decode(t.toString(),mode); 917 } 918 return buffer; 919 } 920 921 /* 922 * Returns an indented string representation of this item 923 * 924 * Params: 925 * indent = (optional) number of spaces by which to indent this 926 * element. Defaults to 2. 927 */ 928 override string[] pretty(uint indent=2) scope 929 { 930 import std.algorithm.searching : count; 931 import std.string : rightJustify; 932 933 if (isEmptyXML) return [ tag.toEmptyString() ]; 934 935 if (items.length == 1) 936 { 937 auto t = cast(const(Text))(items[0]); 938 if (t !is null) 939 { 940 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 941 } 942 } 943 944 string[] a = [ tag.toStartString() ]; 945 foreach (item;items) 946 { 947 string[] b = item.pretty(indent); 948 foreach (s;b) 949 { 950 a ~= rightJustify(s,count(s) + indent); 951 } 952 } 953 a ~= tag.toEndString(); 954 return a; 955 } 956 957 /* 958 * Returns the string representation of an Element 959 * 960 * Example: 961 * -------------- 962 * auto element = new Element("br"); 963 * writefln(element.toString()); // writes "<br />" 964 * -------------- 965 */ 966 override string toString() scope @safe 967 { 968 if (isEmptyXML) return tag.toEmptyString(); 969 970 string buffer = tag.toStartString(); 971 foreach (item;items) { buffer ~= item.toString(); } 972 buffer ~= tag.toEndString(); 973 return buffer; 974 } 975 976 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 977 } 978 } 979 980 /* 981 * Tag types. 982 * 983 * $(DDOC_ENUM_MEMBERS START) Used for start tags 984 * $(DDOC_ENUM_MEMBERS END) Used for end tags 985 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 986 * 987 */ 988 enum TagType { START, END, EMPTY } 989 990 /* 991 * Class representing an XML tag. 992 * 993 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 994 * 995 * The class invariant guarantees 996 * <ul> 997 * <li> that $(B type) is a valid enum TagType value</li> 998 * <li> that $(B name) consists of valid characters</li> 999 * <li> that each attribute name consists of valid characters</li> 1000 * </ul> 1001 */ 1002 class Tag 1003 { 1004 TagType type = TagType.START; // Type of tag 1005 string name; // Tag name 1006 string[string] attr; // Associative array of attributes 1007 private string tagString; 1008 1009 invariant() 1010 { 1011 string s; 1012 string t; 1013 1014 assert(type == TagType.START 1015 || type == TagType.END 1016 || type == TagType.EMPTY); 1017 1018 s = name; 1019 try { checkName(s,t); } 1020 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1021 1022 foreach (k,v;attr) 1023 { 1024 s = k; 1025 try { checkName(s,t); } 1026 catch (Err e) 1027 { assert(false,"Invalid attribute name:" ~ e.toString()); } 1028 } 1029 } 1030 1031 /* 1032 * Constructs an instance of Tag with a specified name and type 1033 * 1034 * The constructor does not initialize the attributes. To initialize the 1035 * attributes, you access the $(B attr) member variable. 1036 * 1037 * Params: 1038 * name = the Tag's name 1039 * type = (optional) the Tag's type. If omitted, defaults to 1040 * TagType.START. 1041 * 1042 * Example: 1043 * -------------- 1044 * auto tag = new Tag("img",Tag.EMPTY); 1045 * tag.attr["src"] = "http://example.com/example.jpg"; 1046 * -------------- 1047 */ 1048 this(string name, TagType type=TagType.START) @safe pure 1049 { 1050 this.name = name; 1051 this.type = type; 1052 } 1053 1054 /* Private constructor (so don't ddoc this!) 1055 * 1056 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1057 * 1058 * The string is passed by reference, and is advanced over all characters 1059 * consumed. 1060 * 1061 * The second parameter is a dummy parameter only, required solely to 1062 * distinguish this constructor from the public one. 1063 */ 1064 private this(ref string s, bool dummy) @safe pure 1065 { 1066 import std.algorithm.searching : countUntil; 1067 import std.ascii : isWhite; 1068 import std.utf : byCodeUnit; 1069 1070 tagString = s; 1071 try 1072 { 1073 reqc(s,'<'); 1074 if (optc(s,'/')) type = TagType.END; 1075 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1076 name = s[0 .. i]; 1077 s = s[i .. $]; 1078 1079 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1080 s = s[i .. $]; 1081 1082 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1083 { 1084 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1085 string key = s[0 .. i]; 1086 s = s[i .. $]; 1087 1088 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1089 s = s[i .. $]; 1090 reqc(s,'='); 1091 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1092 s = s[i .. $]; 1093 1094 immutable char quote = requireOneOf(s,"'\""); 1095 i = s.byCodeUnit.countUntil(quote); 1096 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1097 s = s[i .. $]; 1098 reqc(s,quote); 1099 1100 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1101 s = s[i .. $]; 1102 attr[key] = val; 1103 } 1104 if (optc(s,'/')) 1105 { 1106 if (type == TagType.END) throw new TagException(""); 1107 type = TagType.EMPTY; 1108 } 1109 reqc(s,'>'); 1110 tagString.length = tagString.length - s.length; 1111 } 1112 catch (XMLException e) 1113 { 1114 tagString.length = tagString.length - s.length; 1115 throw new TagException(tagString); 1116 } 1117 } 1118 1119 const 1120 { 1121 /* 1122 * Compares two Tags for equality 1123 * 1124 * You should rarely need to call this function. It exists so that Tags 1125 * can be used as associative array keys. 1126 * 1127 * Example: 1128 * -------------- 1129 * Tag tag1,tag2 1130 * if (tag1 == tag2) { } 1131 * -------------- 1132 */ 1133 override bool opEquals(scope Object o) 1134 { 1135 const tag = toType!(const Tag)(o); 1136 return 1137 (name != tag.name) ? false : ( 1138 (attr != tag.attr) ? false : ( 1139 (type != tag.type) ? false : ( 1140 true ))); 1141 } 1142 1143 /* 1144 * Compares two Tags 1145 * 1146 * Example: 1147 * -------------- 1148 * Tag tag1,tag2 1149 * if (tag1 < tag2) { } 1150 * -------------- 1151 */ 1152 override int opCmp(Object o) 1153 { 1154 const tag = toType!(const Tag)(o); 1155 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1156 return 1157 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1158 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1159 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1160 0 ))); 1161 } 1162 1163 /* 1164 * Returns the hash of a Tag 1165 * 1166 * You should rarely need to call this function. It exists so that Tags 1167 * can be used as associative array keys. 1168 */ 1169 override size_t toHash() 1170 { 1171 return .hashOf(name); 1172 } 1173 1174 /* 1175 * Returns the string representation of a Tag 1176 * 1177 * Example: 1178 * -------------- 1179 * auto tag = new Tag("book",TagType.START); 1180 * writefln(tag.toString()); // writes "<book>" 1181 * -------------- 1182 */ 1183 override string toString() @safe 1184 { 1185 if (isEmpty) return toEmptyString(); 1186 return (isEnd) ? toEndString() : toStartString(); 1187 } 1188 1189 private 1190 { 1191 string toNonEndString() @safe 1192 { 1193 import std.format : format; 1194 1195 string s = "<" ~ name; 1196 foreach (key,val;attr) 1197 s ~= format(" %s=\"%s\"",key,encode(val)); 1198 return s; 1199 } 1200 1201 string toStartString() @safe { return toNonEndString() ~ ">"; } 1202 1203 string toEndString() @safe { return "</" ~ name ~ ">"; } 1204 1205 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1206 } 1207 1208 /* 1209 * Returns true if the Tag is a start tag 1210 * 1211 * Example: 1212 * -------------- 1213 * if (tag.isStart) { } 1214 * -------------- 1215 */ 1216 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1217 1218 /* 1219 * Returns true if the Tag is an end tag 1220 * 1221 * Example: 1222 * -------------- 1223 * if (tag.isEnd) { } 1224 * -------------- 1225 */ 1226 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1227 1228 /* 1229 * Returns true if the Tag is an empty tag 1230 * 1231 * Example: 1232 * -------------- 1233 * if (tag.isEmpty) { } 1234 * -------------- 1235 */ 1236 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1237 } 1238 } 1239 1240 /* 1241 * Class representing a comment 1242 */ 1243 class Comment : Item 1244 { 1245 private string content; 1246 1247 /* 1248 * Construct a comment 1249 * 1250 * Params: 1251 * content = the body of the comment 1252 * 1253 * Throws: CommentException if the comment body is illegal (contains "--" 1254 * or exactly equals "-") 1255 * 1256 * Example: 1257 * -------------- 1258 * auto item = new Comment("This is a comment"); 1259 * // constructs <!--This is a comment--> 1260 * -------------- 1261 */ 1262 this(string content) @safe pure 1263 { 1264 import std.string : indexOf; 1265 1266 if (content == "-" || content.indexOf("--") != -1) 1267 throw new CommentException(content); 1268 this.content = content; 1269 } 1270 1271 /* 1272 * Compares two comments for equality 1273 * 1274 * Example: 1275 * -------------- 1276 * Comment item1,item2; 1277 * if (item1 == item2) { } 1278 * -------------- 1279 */ 1280 override bool opEquals(scope const Object o) const 1281 { 1282 const item = toType!(const Item)(o); 1283 const t = cast(const Comment) item; 1284 return t !is null && content == t.content; 1285 } 1286 1287 /* 1288 * Compares two comments 1289 * 1290 * You should rarely need to call this function. It exists so that Comments 1291 * can be used as associative array keys. 1292 * 1293 * Example: 1294 * -------------- 1295 * Comment item1,item2; 1296 * if (item1 < item2) { } 1297 * -------------- 1298 */ 1299 override int opCmp(scope const Object o) scope const 1300 { 1301 const item = toType!(const Item)(o); 1302 const t = cast(const Comment) item; 1303 return t !is null && (content != t.content 1304 ? (content < t.content ? -1 : 1 ) : 0 ); 1305 } 1306 1307 /* 1308 * Returns the hash of a Comment 1309 * 1310 * You should rarely need to call this function. It exists so that Comments 1311 * can be used as associative array keys. 1312 */ 1313 override size_t toHash() scope const nothrow { return hash(content); } 1314 1315 /* 1316 * Returns a string representation of this comment 1317 */ 1318 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1319 1320 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always 1321 } 1322 1323 // https://issues.dlang.org/show_bug.cgi?id=16241 1324 @safe unittest 1325 { 1326 import std.exception : assertThrown; 1327 auto c = new Comment("=="); 1328 assert(c.content == "=="); 1329 assertThrown!CommentException(new Comment("--")); 1330 } 1331 1332 /* 1333 * Class representing a Character Data section 1334 */ 1335 class CData : Item 1336 { 1337 private string content; 1338 1339 /* 1340 * Construct a character data section 1341 * 1342 * Params: 1343 * content = the body of the character data segment 1344 * 1345 * Throws: CDataException if the segment body is illegal (contains "]]>") 1346 * 1347 * Example: 1348 * -------------- 1349 * auto item = new CData("<b>hello</b>"); 1350 * // constructs <![CDATA[<b>hello</b>]]> 1351 * -------------- 1352 */ 1353 this(string content) @safe pure 1354 { 1355 import std.string : indexOf; 1356 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1357 this.content = content; 1358 } 1359 1360 /* 1361 * Compares two CDatas for equality 1362 * 1363 * Example: 1364 * -------------- 1365 * CData item1,item2; 1366 * if (item1 == item2) { } 1367 * -------------- 1368 */ 1369 override bool opEquals(scope const Object o) const 1370 { 1371 const item = toType!(const Item)(o); 1372 const t = cast(const CData) item; 1373 return t !is null && content == t.content; 1374 } 1375 1376 /* 1377 * Compares two CDatas 1378 * 1379 * You should rarely need to call this function. It exists so that CDatas 1380 * can be used as associative array keys. 1381 * 1382 * Example: 1383 * -------------- 1384 * CData item1,item2; 1385 * if (item1 < item2) { } 1386 * -------------- 1387 */ 1388 override int opCmp(scope const Object o) scope const 1389 { 1390 const item = toType!(const Item)(o); 1391 const t = cast(const CData) item; 1392 return t !is null && (content != t.content 1393 ? (content < t.content ? -1 : 1 ) : 0 ); 1394 } 1395 1396 /* 1397 * Returns the hash of a CData 1398 * 1399 * You should rarely need to call this function. It exists so that CDatas 1400 * can be used as associative array keys. 1401 */ 1402 override size_t toHash() scope const nothrow { return hash(content); } 1403 1404 /* 1405 * Returns a string representation of this CData section 1406 */ 1407 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1408 1409 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always 1410 } 1411 1412 /* 1413 * Class representing a text (aka Parsed Character Data) section 1414 */ 1415 class Text : Item 1416 { 1417 private string content; 1418 1419 /* 1420 * Construct a text (aka PCData) section 1421 * 1422 * Params: 1423 * content = the text. This function encodes the text before 1424 * insertion, so it is safe to insert any text 1425 * 1426 * Example: 1427 * -------------- 1428 * auto Text = new CData("a < b"); 1429 * // constructs a < b 1430 * -------------- 1431 */ 1432 this(string content) @safe pure 1433 { 1434 this.content = encode(content); 1435 } 1436 1437 /* 1438 * Compares two text sections for equality 1439 * 1440 * Example: 1441 * -------------- 1442 * Text item1,item2; 1443 * if (item1 == item2) { } 1444 * -------------- 1445 */ 1446 override bool opEquals(scope const Object o) const 1447 { 1448 const item = toType!(const Item)(o); 1449 const t = cast(const Text) item; 1450 return t !is null && content == t.content; 1451 } 1452 1453 /* 1454 * Compares two text sections 1455 * 1456 * You should rarely need to call this function. It exists so that Texts 1457 * can be used as associative array keys. 1458 * 1459 * Example: 1460 * -------------- 1461 * Text item1,item2; 1462 * if (item1 < item2) { } 1463 * -------------- 1464 */ 1465 override int opCmp(scope const Object o) scope const 1466 { 1467 const item = toType!(const Item)(o); 1468 const t = cast(const Text) item; 1469 return t !is null 1470 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1471 } 1472 1473 /* 1474 * Returns the hash of a text section 1475 * 1476 * You should rarely need to call this function. It exists so that Texts 1477 * can be used as associative array keys. 1478 */ 1479 override size_t toHash() scope const nothrow { return hash(content); } 1480 1481 /* 1482 * Returns a string representation of this Text section 1483 */ 1484 override string toString() scope const @safe @nogc pure nothrow { return content; } 1485 1486 /* 1487 * Returns true if the content is the empty string 1488 */ 1489 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1490 } 1491 1492 /* 1493 * Class representing an XML Instruction section 1494 */ 1495 class XMLInstruction : Item 1496 { 1497 private string content; 1498 1499 /* 1500 * Construct an XML Instruction section 1501 * 1502 * Params: 1503 * content = the body of the instruction segment 1504 * 1505 * Throws: XIException if the segment body is illegal (contains ">") 1506 * 1507 * Example: 1508 * -------------- 1509 * auto item = new XMLInstruction("ATTLIST"); 1510 * // constructs <!ATTLIST> 1511 * -------------- 1512 */ 1513 this(string content) @safe pure 1514 { 1515 import std.string : indexOf; 1516 if (content.indexOf(">") != -1) throw new XIException(content); 1517 this.content = content; 1518 } 1519 1520 /* 1521 * Compares two XML instructions for equality 1522 * 1523 * Example: 1524 * -------------- 1525 * XMLInstruction item1,item2; 1526 * if (item1 == item2) { } 1527 * -------------- 1528 */ 1529 override bool opEquals(scope const Object o) const 1530 { 1531 const item = toType!(const Item)(o); 1532 const t = cast(const XMLInstruction) item; 1533 return t !is null && content == t.content; 1534 } 1535 1536 /* 1537 * Compares two XML instructions 1538 * 1539 * You should rarely need to call this function. It exists so that 1540 * XmlInstructions can be used as associative array keys. 1541 * 1542 * Example: 1543 * -------------- 1544 * XMLInstruction item1,item2; 1545 * if (item1 < item2) { } 1546 * -------------- 1547 */ 1548 override int opCmp(scope const Object o) scope const 1549 { 1550 const item = toType!(const Item)(o); 1551 const t = cast(const XMLInstruction) item; 1552 return t !is null 1553 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1554 } 1555 1556 /* 1557 * Returns the hash of an XMLInstruction 1558 * 1559 * You should rarely need to call this function. It exists so that 1560 * XmlInstructions can be used as associative array keys. 1561 */ 1562 override size_t toHash() scope const nothrow { return hash(content); } 1563 1564 /* 1565 * Returns a string representation of this XmlInstruction 1566 */ 1567 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1568 1569 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } // Returns false always 1570 } 1571 1572 /* 1573 * Class representing a Processing Instruction section 1574 */ 1575 class ProcessingInstruction : Item 1576 { 1577 private string content; 1578 1579 /* 1580 * Construct a Processing Instruction section 1581 * 1582 * Params: 1583 * content = the body of the instruction segment 1584 * 1585 * Throws: PIException if the segment body is illegal (contains "?>") 1586 * 1587 * Example: 1588 * -------------- 1589 * auto item = new ProcessingInstruction("php"); 1590 * // constructs <?php?> 1591 * -------------- 1592 */ 1593 this(string content) @safe pure 1594 { 1595 import std.string : indexOf; 1596 if (content.indexOf("?>") != -1) throw new PIException(content); 1597 this.content = content; 1598 } 1599 1600 /* 1601 * Compares two processing instructions for equality 1602 * 1603 * Example: 1604 * -------------- 1605 * ProcessingInstruction item1,item2; 1606 * if (item1 == item2) { } 1607 * -------------- 1608 */ 1609 override bool opEquals(scope const Object o) const 1610 { 1611 const item = toType!(const Item)(o); 1612 const t = cast(const ProcessingInstruction) item; 1613 return t !is null && content == t.content; 1614 } 1615 1616 /* 1617 * Compares two processing instructions 1618 * 1619 * You should rarely need to call this function. It exists so that 1620 * ProcessingInstructions can be used as associative array keys. 1621 * 1622 * Example: 1623 * -------------- 1624 * ProcessingInstruction item1,item2; 1625 * if (item1 < item2) { } 1626 * -------------- 1627 */ 1628 override int opCmp(scope const Object o) scope const 1629 { 1630 const item = toType!(const Item)(o); 1631 const t = cast(const ProcessingInstruction) item; 1632 return t !is null 1633 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1634 } 1635 1636 /* 1637 * Returns the hash of a ProcessingInstruction 1638 * 1639 * You should rarely need to call this function. It exists so that 1640 * ProcessingInstructions can be used as associative array keys. 1641 */ 1642 override size_t toHash() scope const nothrow { return hash(content); } 1643 1644 /* 1645 * Returns a string representation of this ProcessingInstruction 1646 */ 1647 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1648 1649 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } // Returns false always 1650 } 1651 1652 /* 1653 * Abstract base class for XML items 1654 */ 1655 abstract class Item 1656 { 1657 // Compares with another Item of same type for equality 1658 abstract override bool opEquals(scope const Object o) @safe const; 1659 1660 // Compares with another Item of same type 1661 abstract override int opCmp(scope const Object o) @safe const; 1662 1663 // Returns the hash of this item 1664 abstract override size_t toHash() @safe scope const; 1665 1666 // Returns a string representation of this item 1667 abstract override string toString() @safe scope const; 1668 1669 /* 1670 * Returns an indented string representation of this item 1671 * 1672 * Params: 1673 * indent = number of spaces by which to indent child elements 1674 */ 1675 string[] pretty(uint indent) @safe scope const 1676 { 1677 import std.string : strip; 1678 string s = strip(toString()); 1679 return s.length == 0 ? [] : [ s ]; 1680 } 1681 1682 // Returns true if the item represents empty XML text 1683 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1684 } 1685 1686 /* 1687 * Class for parsing an XML Document. 1688 * 1689 * This is a subclass of ElementParser. Most of the useful functions are 1690 * documented there. 1691 * 1692 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1693 * 1694 * Bugs: 1695 * Currently only supports UTF documents. 1696 * 1697 * If there is an encoding attribute in the prolog, it is ignored. 1698 * 1699 */ 1700 class DocumentParser : ElementParser 1701 { 1702 string xmlText; 1703 1704 /* 1705 * Constructs a DocumentParser. 1706 * 1707 * The input to this function MUST be valid XML. 1708 * This is enforced by the function's in contract. 1709 * 1710 * Params: 1711 * xmlText_ = the entire XML document as text 1712 * 1713 */ 1714 this(string xmlText_) 1715 in 1716 { 1717 assert(xmlText_.length != 0); 1718 try 1719 { 1720 // Confirm that the input is valid XML 1721 check(xmlText_); 1722 } 1723 catch (CheckException e) 1724 { 1725 // And if it's not, tell the user why not 1726 assert(false, "\n" ~ e.toString()); 1727 } 1728 } 1729 do 1730 { 1731 xmlText = xmlText_; 1732 s = &xmlText; 1733 super(); // Initialize everything 1734 parse(); // Parse through the root tag (but not beyond) 1735 } 1736 } 1737 1738 @system unittest 1739 { 1740 auto doc = new Document("<root><child><grandchild/></child></root>"); 1741 assert(doc.elements.length == 1); 1742 assert(doc.elements[0].tag.name == "child"); 1743 assert(doc.items == doc.elements); 1744 } 1745 1746 /* 1747 * Class for parsing an XML element. 1748 * 1749 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1750 * 1751 * Note that you cannot construct instances of this class directly. You can 1752 * construct a DocumentParser (which is a subclass of ElementParser), but 1753 * otherwise, Instances of ElementParser will be created for you by the 1754 * library, and passed your way via onStartTag handlers. 1755 * 1756 */ 1757 class ElementParser 1758 { 1759 alias Handler = void delegate(string); 1760 alias ElementHandler = void delegate(in Element element); 1761 alias ParserHandler = void delegate(ElementParser parser); 1762 1763 private 1764 { 1765 Tag tag_; 1766 string elementStart; 1767 string* s; 1768 1769 Handler commentHandler = null; 1770 Handler cdataHandler = null; 1771 Handler xiHandler = null; 1772 Handler piHandler = null; 1773 Handler rawTextHandler = null; 1774 Handler textHandler = null; 1775 1776 // Private constructor for start tags 1777 this(ElementParser parent) @safe @nogc pure nothrow 1778 { 1779 s = parent.s; 1780 this(); 1781 tag_ = parent.tag_; 1782 } 1783 1784 // Private constructor for empty tags 1785 this(Tag tag, string* t) @safe @nogc pure nothrow 1786 { 1787 s = t; 1788 this(); 1789 tag_ = tag; 1790 } 1791 } 1792 1793 /* 1794 * The Tag at the start of the element being parsed. You can read this to 1795 * determine the tag's name and attributes. 1796 */ 1797 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1798 1799 /* 1800 * Register a handler which will be called whenever a start tag is 1801 * encountered which matches the specified name. You can also pass null as 1802 * the name, in which case the handler will be called for any unmatched 1803 * start tag. 1804 * 1805 * Example: 1806 * -------------- 1807 * // Call this function whenever a <podcast> start tag is encountered 1808 * onStartTag["podcast"] = (ElementParser xml) 1809 * { 1810 * // Your code here 1811 * // 1812 * // This is a a closure, so code here may reference 1813 * // variables which are outside of this scope 1814 * }; 1815 * 1816 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1817 * // start tag is encountered 1818 * onStartTag["episode"] = &myEpisodeStartHandler; 1819 * 1820 * // call delegate dg for all other start tags 1821 * onStartTag[null] = dg; 1822 * -------------- 1823 * 1824 * This library will supply your function with a new instance of 1825 * ElementHandler, which may be used to parse inside the element whose 1826 * start tag was just found, or to identify the tag attributes of the 1827 * element, etc. 1828 * 1829 * Note that your function will be called for both start tags and empty 1830 * tags. That is, we make no distinction between <br></br> 1831 * and <br/>. 1832 */ 1833 ParserHandler[string] onStartTag; 1834 1835 /* 1836 * Register a handler which will be called whenever an end tag is 1837 * encountered which matches the specified name. You can also pass null as 1838 * the name, in which case the handler will be called for any unmatched 1839 * end tag. 1840 * 1841 * Example: 1842 * -------------- 1843 * // Call this function whenever a </podcast> end tag is encountered 1844 * onEndTag["podcast"] = (in Element e) 1845 * { 1846 * // Your code here 1847 * // 1848 * // This is a a closure, so code here may reference 1849 * // variables which are outside of this scope 1850 * }; 1851 * 1852 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1853 * // end tag is encountered 1854 * onEndTag["episode"] = &myEpisodeEndHandler; 1855 * 1856 * // call delegate dg for all other end tags 1857 * onEndTag[null] = dg; 1858 * -------------- 1859 * 1860 * Note that your function will be called for both start tags and empty 1861 * tags. That is, we make no distinction between <br></br> 1862 * and <br/>. 1863 */ 1864 ElementHandler[string] onEndTag; 1865 1866 protected this() @safe @nogc pure nothrow 1867 { 1868 elementStart = *s; 1869 } 1870 1871 /* 1872 * Register a handler which will be called whenever text is encountered. 1873 * 1874 * Example: 1875 * -------------- 1876 * // Call this function whenever text is encountered 1877 * onText = (string s) 1878 * { 1879 * // Your code here 1880 * 1881 * // The passed parameter s will have been decoded by the time you see 1882 * // it, and so may contain any character. 1883 * // 1884 * // This is a a closure, so code here may reference 1885 * // variables which are outside of this scope 1886 * }; 1887 * -------------- 1888 */ 1889 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1890 1891 /* 1892 * Register an alternative handler which will be called whenever text 1893 * is encountered. This differs from onText in that onText will decode 1894 * the text, whereas onTextRaw will not. This allows you to make design 1895 * choices, since onText will be more accurate, but slower, while 1896 * onTextRaw will be faster, but less accurate. Of course, you can 1897 * still call decode() within your handler, if you want, but you'd 1898 * probably want to use onTextRaw only in circumstances where you 1899 * know that decoding is unnecessary. 1900 * 1901 * Example: 1902 * -------------- 1903 * // Call this function whenever text is encountered 1904 * onText = (string s) 1905 * { 1906 * // Your code here 1907 * 1908 * // The passed parameter s will NOT have been decoded. 1909 * // 1910 * // This is a a closure, so code here may reference 1911 * // variables which are outside of this scope 1912 * }; 1913 * -------------- 1914 */ 1915 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1916 1917 /* 1918 * Register a handler which will be called whenever a character data 1919 * segment is encountered. 1920 * 1921 * Example: 1922 * -------------- 1923 * // Call this function whenever a CData section is encountered 1924 * onCData = (string s) 1925 * { 1926 * // Your code here 1927 * 1928 * // The passed parameter s does not include the opening <![CDATA[ 1929 * // nor closing ]]> 1930 * // 1931 * // This is a a closure, so code here may reference 1932 * // variables which are outside of this scope 1933 * }; 1934 * -------------- 1935 */ 1936 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1937 1938 /* 1939 * Register a handler which will be called whenever a comment is 1940 * encountered. 1941 * 1942 * Example: 1943 * -------------- 1944 * // Call this function whenever a comment is encountered 1945 * onComment = (string s) 1946 * { 1947 * // Your code here 1948 * 1949 * // The passed parameter s does not include the opening <!-- nor 1950 * // closing --> 1951 * // 1952 * // This is a a closure, so code here may reference 1953 * // variables which are outside of this scope 1954 * }; 1955 * -------------- 1956 */ 1957 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1958 1959 /* 1960 * Register a handler which will be called whenever a processing 1961 * instruction is encountered. 1962 * 1963 * Example: 1964 * -------------- 1965 * // Call this function whenever a processing instruction is encountered 1966 * onPI = (string s) 1967 * { 1968 * // Your code here 1969 * 1970 * // The passed parameter s does not include the opening <? nor 1971 * // closing ?> 1972 * // 1973 * // This is a a closure, so code here may reference 1974 * // variables which are outside of this scope 1975 * }; 1976 * -------------- 1977 */ 1978 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1979 1980 /* 1981 * Register a handler which will be called whenever an XML instruction is 1982 * encountered. 1983 * 1984 * Example: 1985 * -------------- 1986 * // Call this function whenever an XML instruction is encountered 1987 * // (Note: XML instructions may only occur preceding the root tag of a 1988 * // document). 1989 * onPI = (string s) 1990 * { 1991 * // Your code here 1992 * 1993 * // The passed parameter s does not include the opening <! nor 1994 * // closing > 1995 * // 1996 * // This is a a closure, so code here may reference 1997 * // variables which are outside of this scope 1998 * }; 1999 * -------------- 2000 */ 2001 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 2002 2003 /* 2004 * Parse an XML element. 2005 * 2006 * Parsing will continue until the end of the current element. Any items 2007 * encountered for which a handler has been registered will invoke that 2008 * handler. 2009 * 2010 * Throws: various kinds of XMLException 2011 */ 2012 void parse() 2013 { 2014 import std.algorithm.searching : startsWith; 2015 import std.string : indexOf; 2016 2017 string t; 2018 const Tag root = tag_; 2019 Tag[string] startTags; 2020 if (tag_ !is null) startTags[tag_.name] = tag_; 2021 2022 while (s.length != 0) 2023 { 2024 if (startsWith(*s,"<!--")) 2025 { 2026 chop(*s,4); 2027 t = chop(*s,indexOf(*s,"-->")); 2028 if (commentHandler.funcptr !is null) commentHandler(t); 2029 chop(*s,3); 2030 } 2031 else if (startsWith(*s,"<![CDATA[")) 2032 { 2033 chop(*s,9); 2034 t = chop(*s,indexOf(*s,"]]>")); 2035 if (cdataHandler.funcptr !is null) cdataHandler(t); 2036 chop(*s,3); 2037 } 2038 else if (startsWith(*s,"<!")) 2039 { 2040 chop(*s,2); 2041 t = chop(*s,indexOf(*s,">")); 2042 if (xiHandler.funcptr !is null) xiHandler(t); 2043 chop(*s,1); 2044 } 2045 else if (startsWith(*s,"<?")) 2046 { 2047 chop(*s,2); 2048 t = chop(*s,indexOf(*s,"?>")); 2049 if (piHandler.funcptr !is null) piHandler(t); 2050 chop(*s,2); 2051 } 2052 else if (startsWith(*s,"<")) 2053 { 2054 tag_ = new Tag(*s,true); 2055 if (root is null) 2056 return; // Return to constructor of derived class 2057 2058 if (tag_.isStart) 2059 { 2060 startTags[tag_.name] = tag_; 2061 2062 auto parser = new ElementParser(this); 2063 2064 auto handler = tag_.name in onStartTag; 2065 if (handler !is null) (*handler)(parser); 2066 else 2067 { 2068 handler = null in onStartTag; 2069 if (handler !is null) (*handler)(parser); 2070 } 2071 } 2072 else if (tag_.isEnd) 2073 { 2074 const startTag = startTags[tag_.name]; 2075 string text; 2076 2077 if (startTag.tagString.length == 0) 2078 assert(0); 2079 2080 immutable(char)* p = startTag.tagString.ptr 2081 + startTag.tagString.length; 2082 immutable(char)* q = &tag_.tagString[0]; 2083 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2084 2085 auto element = new Element(startTag); 2086 if (text.length != 0) element ~= new Text(text); 2087 2088 auto handler = tag_.name in onEndTag; 2089 if (handler !is null) (*handler)(element); 2090 else 2091 { 2092 handler = null in onEndTag; 2093 if (handler !is null) (*handler)(element); 2094 } 2095 2096 if (tag_.name == root.name) return; 2097 } 2098 else if (tag_.isEmpty) 2099 { 2100 Tag startTag = new Tag(tag_.name); 2101 2102 // FIX by hed010gy 2103 // https://issues.dlang.org/show_bug.cgi?id=2979 2104 if (tag_.attr.length > 0) 2105 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2106 // END FIX 2107 2108 // Handle the pretend start tag 2109 string s2; 2110 auto parser = new ElementParser(startTag,&s2); 2111 auto handler1 = startTag.name in onStartTag; 2112 if (handler1 !is null) (*handler1)(parser); 2113 else 2114 { 2115 handler1 = null in onStartTag; 2116 if (handler1 !is null) (*handler1)(parser); 2117 } 2118 2119 // Handle the pretend end tag 2120 auto element = new Element(startTag); 2121 auto handler2 = tag_.name in onEndTag; 2122 if (handler2 !is null) (*handler2)(element); 2123 else 2124 { 2125 handler2 = null in onEndTag; 2126 if (handler2 !is null) (*handler2)(element); 2127 } 2128 } 2129 } 2130 else 2131 { 2132 t = chop(*s,indexOf(*s,"<")); 2133 if (rawTextHandler.funcptr !is null) 2134 rawTextHandler(t); 2135 else if (textHandler.funcptr !is null) 2136 textHandler(decode(t,DecodeMode.LOOSE)); 2137 } 2138 } 2139 } 2140 2141 /* 2142 * Returns that part of the element which has already been parsed 2143 */ 2144 override string toString() const @nogc @safe pure nothrow 2145 { 2146 assert(elementStart.length >= s.length); 2147 return elementStart[0 .. elementStart.length - s.length]; 2148 } 2149 2150 } 2151 2152 private 2153 { 2154 template Check(string msg) 2155 { 2156 string old = s; 2157 2158 void fail() @safe pure 2159 { 2160 s = old; 2161 throw new Err(s,msg); 2162 } 2163 2164 void fail(Err e) @safe pure 2165 { 2166 s = old; 2167 throw new Err(s,msg,e); 2168 } 2169 2170 void fail(string msg2) @safe pure 2171 { 2172 fail(new Err(s,msg2)); 2173 } 2174 } 2175 2176 void checkMisc(ref string s) @safe pure // rule 27 2177 { 2178 import std.algorithm.searching : startsWith; 2179 2180 mixin Check!("Misc"); 2181 2182 try 2183 { 2184 if (s.startsWith("<!--")) { checkComment(s); } 2185 else if (s.startsWith("<?")) { checkPI(s); } 2186 else { checkSpace(s); } 2187 } 2188 catch (Err e) { fail(e); } 2189 } 2190 2191 void checkDocument(ref string s) @safe pure // rule 1 2192 { 2193 mixin Check!("Document"); 2194 try 2195 { 2196 checkProlog(s); 2197 checkElement(s); 2198 star!(checkMisc)(s); 2199 } 2200 catch (Err e) { fail(e); } 2201 } 2202 2203 void checkChars(ref string s) @safe pure // rule 2 2204 { 2205 // TO DO - Fix std.utf stride and decode functions, then use those 2206 // instead 2207 import std.format : format; 2208 2209 mixin Check!("Chars"); 2210 2211 dchar c; 2212 ptrdiff_t n = -1; 2213 // 'i' must not be smaller than size_t because size_t is used internally in 2214 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2215 foreach (size_t i, dchar d; s) 2216 { 2217 if (!isChar(d)) 2218 { 2219 c = d; 2220 n = i; 2221 break; 2222 } 2223 } 2224 if (n != -1) 2225 { 2226 s = s[n..$]; 2227 fail(format("invalid character: U+%04X",c)); 2228 } 2229 } 2230 2231 void checkSpace(ref string s) @safe pure // rule 3 2232 { 2233 import std.algorithm.searching : countUntil; 2234 import std.ascii : isWhite; 2235 import std.utf : byCodeUnit; 2236 2237 mixin Check!("Whitespace"); 2238 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2239 if (i == -1 && s.length > 0 && isWhite(s[0])) 2240 s = s[$ .. $]; 2241 else if (i > -1) 2242 s = s[i .. $]; 2243 if (s is old) fail(); 2244 } 2245 2246 void checkName(ref string s, out string name) @safe pure // rule 5 2247 { 2248 mixin Check!("Name"); 2249 2250 if (s.length == 0) fail(); 2251 ptrdiff_t n; 2252 // 'i' must not be smaller than size_t because size_t is used internally in 2253 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2254 foreach (size_t i, dchar c; s) 2255 { 2256 if (c == '_' || c == ':' || isLetter(c)) continue; 2257 if (i == 0) fail(); 2258 if (c == '-' || c == '.' || isDigit(c) 2259 || isCombiningChar(c) || isExtender(c)) continue; 2260 n = i; 2261 break; 2262 } 2263 name = s[0 .. n]; 2264 s = s[n..$]; 2265 } 2266 2267 void checkAttValue(ref string s) @safe pure // rule 10 2268 { 2269 import std.algorithm.searching : countUntil; 2270 import std.utf : byCodeUnit; 2271 2272 mixin Check!("AttValue"); 2273 2274 if (s.length == 0) fail(); 2275 char c = s[0]; 2276 if (c != '\u0022' && c != '\u0027') 2277 fail("attribute value requires quotes"); 2278 s = s[1..$]; 2279 for (;;) 2280 { 2281 s = s[s.byCodeUnit.countUntil(c) .. $]; 2282 if (s.length == 0) fail("unterminated attribute value"); 2283 if (s[0] == '<') fail("< found in attribute value"); 2284 if (s[0] == c) break; 2285 try { checkReference(s); } catch (Err e) { fail(e); } 2286 } 2287 s = s[1..$]; 2288 } 2289 2290 void checkCharData(ref string s) @safe pure // rule 14 2291 { 2292 import std.algorithm.searching : startsWith; 2293 2294 mixin Check!("CharData"); 2295 2296 while (s.length != 0) 2297 { 2298 if (s.startsWith("&")) break; 2299 if (s.startsWith("<")) break; 2300 if (s.startsWith("]]>")) fail("]]> found within char data"); 2301 s = s[1..$]; 2302 } 2303 } 2304 2305 void checkComment(ref string s) @safe pure // rule 15 2306 { 2307 import std.string : indexOf; 2308 2309 mixin Check!("Comment"); 2310 2311 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2312 ptrdiff_t n = s.indexOf("--"); 2313 if (n == -1) fail("unterminated comment"); 2314 s = s[n..$]; 2315 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2316 } 2317 2318 void checkPI(ref string s) @safe pure // rule 16 2319 { 2320 mixin Check!("PI"); 2321 2322 try 2323 { 2324 checkLiteral("<?",s); 2325 checkEnd("?>",s); 2326 } 2327 catch (Err e) { fail(e); } 2328 } 2329 2330 void checkCDSect(ref string s) @safe pure // rule 18 2331 { 2332 mixin Check!("CDSect"); 2333 2334 try 2335 { 2336 checkLiteral(cdata,s); 2337 checkEnd("]]>",s); 2338 } 2339 catch (Err e) { fail(e); } 2340 } 2341 2342 void checkProlog(ref string s) @safe pure // rule 22 2343 { 2344 mixin Check!("Prolog"); 2345 2346 try 2347 { 2348 /* The XML declaration is optional 2349 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2350 */ 2351 opt!(checkXMLDecl)(s); 2352 2353 star!(checkMisc)(s); 2354 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2355 } 2356 catch (Err e) { fail(e); } 2357 } 2358 2359 void checkXMLDecl(ref string s) @safe pure // rule 23 2360 { 2361 mixin Check!("XMLDecl"); 2362 2363 try 2364 { 2365 checkLiteral("<?xml",s); 2366 checkVersionInfo(s); 2367 opt!(checkEncodingDecl)(s); 2368 opt!(checkSDDecl)(s); 2369 opt!(checkSpace)(s); 2370 checkLiteral("?>",s); 2371 } 2372 catch (Err e) { fail(e); } 2373 } 2374 2375 void checkVersionInfo(ref string s) @safe pure // rule 24 2376 { 2377 mixin Check!("VersionInfo"); 2378 2379 try 2380 { 2381 checkSpace(s); 2382 checkLiteral("version",s); 2383 checkEq(s); 2384 quoted!(checkVersionNum)(s); 2385 } 2386 catch (Err e) { fail(e); } 2387 } 2388 2389 void checkEq(ref string s) @safe pure // rule 25 2390 { 2391 mixin Check!("Eq"); 2392 2393 try 2394 { 2395 opt!(checkSpace)(s); 2396 checkLiteral("=",s); 2397 opt!(checkSpace)(s); 2398 } 2399 catch (Err e) { fail(e); } 2400 } 2401 2402 void checkVersionNum(ref string s) @safe pure // rule 26 2403 { 2404 import std.algorithm.searching : countUntil; 2405 import std.utf : byCodeUnit; 2406 2407 mixin Check!("VersionNum"); 2408 2409 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2410 if (s is old) fail(); 2411 } 2412 2413 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2414 { 2415 mixin Check!("DocTypeDecl"); 2416 2417 try 2418 { 2419 checkLiteral("<!DOCTYPE",s); 2420 // 2421 // TO DO -- ensure DOCTYPE is well formed 2422 // (But not yet. That's one of our "future directions") 2423 // 2424 checkEnd(">",s); 2425 } 2426 catch (Err e) { fail(e); } 2427 } 2428 2429 void checkSDDecl(ref string s) @safe pure // rule 32 2430 { 2431 import std.algorithm.searching : startsWith; 2432 2433 mixin Check!("SDDecl"); 2434 2435 try 2436 { 2437 checkSpace(s); 2438 checkLiteral("standalone",s); 2439 checkEq(s); 2440 } 2441 catch (Err e) { fail(e); } 2442 2443 int n = 0; 2444 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2445 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2446 else fail("standalone attribute value must be 'yes', \"yes\","~ 2447 " 'no' or \"no\""); 2448 s = s[n..$]; 2449 } 2450 2451 void checkElement(ref string s) @safe pure // rule 39 2452 { 2453 mixin Check!("Element"); 2454 2455 string sname,ename,t; 2456 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2457 2458 if (t == "STag") 2459 { 2460 try 2461 { 2462 checkContent(s); 2463 t = s; 2464 checkETag(s,ename); 2465 } 2466 catch (Err e) { fail(e); } 2467 2468 if (sname != ename) 2469 { 2470 s = t; 2471 fail("end tag name \"" ~ ename 2472 ~ "\" differs from start tag name \""~sname~"\""); 2473 } 2474 } 2475 } 2476 2477 // rules 40 and 44 2478 void checkTag(ref string s, out string type, out string name) @safe pure 2479 { 2480 mixin Check!("Tag"); 2481 2482 try 2483 { 2484 type = "STag"; 2485 checkLiteral("<",s); 2486 checkName(s,name); 2487 star!(seq!(checkSpace,checkAttribute))(s); 2488 opt!(checkSpace)(s); 2489 if (s.length != 0 && s[0] == '/') 2490 { 2491 s = s[1..$]; 2492 type = "ETag"; 2493 } 2494 checkLiteral(">",s); 2495 } 2496 catch (Err e) { fail(e); } 2497 } 2498 2499 void checkAttribute(ref string s) @safe pure // rule 41 2500 { 2501 mixin Check!("Attribute"); 2502 2503 try 2504 { 2505 string name; 2506 checkName(s,name); 2507 checkEq(s); 2508 checkAttValue(s); 2509 } 2510 catch (Err e) { fail(e); } 2511 } 2512 2513 void checkETag(ref string s, out string name) @safe pure // rule 42 2514 { 2515 mixin Check!("ETag"); 2516 2517 try 2518 { 2519 checkLiteral("</",s); 2520 checkName(s,name); 2521 opt!(checkSpace)(s); 2522 checkLiteral(">",s); 2523 } 2524 catch (Err e) { fail(e); } 2525 } 2526 2527 void checkContent(ref string s) @safe pure // rule 43 2528 { 2529 import std.algorithm.searching : startsWith; 2530 2531 mixin Check!("Content"); 2532 2533 try 2534 { 2535 while (s.length != 0) 2536 { 2537 old = s; 2538 if (s.startsWith("&")) { checkReference(s); } 2539 else if (s.startsWith("<!--")) { checkComment(s); } 2540 else if (s.startsWith("<?")) { checkPI(s); } 2541 else if (s.startsWith(cdata)) { checkCDSect(s); } 2542 else if (s.startsWith("</")) { break; } 2543 else if (s.startsWith("<")) { checkElement(s); } 2544 else { checkCharData(s); } 2545 } 2546 } 2547 catch (Err e) { fail(e); } 2548 } 2549 2550 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2551 { 2552 import std.format : format; 2553 2554 mixin Check!("CharRef"); 2555 2556 c = 0; 2557 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2558 int radix = 10; 2559 if (s.length != 0 && s[0] == 'x') 2560 { 2561 s = s[1..$]; 2562 radix = 16; 2563 } 2564 if (s.length == 0) fail("unterminated character reference"); 2565 if (s[0] == ';') 2566 fail("character reference must have at least one digit"); 2567 while (s.length != 0) 2568 { 2569 immutable char d = s[0]; 2570 int n = 0; 2571 switch (d) 2572 { 2573 case 'F','f': ++n; goto case; 2574 case 'E','e': ++n; goto case; 2575 case 'D','d': ++n; goto case; 2576 case 'C','c': ++n; goto case; 2577 case 'B','b': ++n; goto case; 2578 case 'A','a': ++n; goto case; 2579 case '9': ++n; goto case; 2580 case '8': ++n; goto case; 2581 case '7': ++n; goto case; 2582 case '6': ++n; goto case; 2583 case '5': ++n; goto case; 2584 case '4': ++n; goto case; 2585 case '3': ++n; goto case; 2586 case '2': ++n; goto case; 2587 case '1': ++n; goto case; 2588 case '0': break; 2589 default: n = 100; break; 2590 } 2591 if (n >= radix) break; 2592 c *= radix; 2593 c += n; 2594 s = s[1..$]; 2595 } 2596 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2597 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2598 else s = s[1..$]; 2599 } 2600 2601 void checkReference(ref string s) @safe pure // rule 67 2602 { 2603 import std.algorithm.searching : startsWith; 2604 2605 mixin Check!("Reference"); 2606 2607 try 2608 { 2609 dchar c; 2610 if (s.startsWith("&#")) checkCharRef(s,c); 2611 else checkEntityRef(s); 2612 } 2613 catch (Err e) { fail(e); } 2614 } 2615 2616 void checkEntityRef(ref string s) @safe pure // rule 68 2617 { 2618 mixin Check!("EntityRef"); 2619 2620 try 2621 { 2622 string name; 2623 checkLiteral("&",s); 2624 checkName(s,name); 2625 checkLiteral(";",s); 2626 } 2627 catch (Err e) { fail(e); } 2628 } 2629 2630 void checkEncName(ref string s) @safe pure // rule 81 2631 { 2632 import std.algorithm.searching : countUntil; 2633 import std.ascii : isAlpha; 2634 import std.utf : byCodeUnit; 2635 2636 mixin Check!("EncName"); 2637 2638 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2639 if (s is old) fail(); 2640 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2641 } 2642 2643 void checkEncodingDecl(ref string s) @safe pure // rule 80 2644 { 2645 mixin Check!("EncodingDecl"); 2646 2647 try 2648 { 2649 checkSpace(s); 2650 checkLiteral("encoding",s); 2651 checkEq(s); 2652 quoted!(checkEncName)(s); 2653 } 2654 catch (Err e) { fail(e); } 2655 } 2656 2657 // Helper functions 2658 2659 void checkLiteral(string literal,ref string s) @safe pure 2660 { 2661 import std.string : startsWith; 2662 2663 mixin Check!("Literal"); 2664 2665 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2666 s = s[literal.length..$]; 2667 } 2668 2669 void checkEnd(string end,ref string s) @safe pure 2670 { 2671 import std.string : indexOf; 2672 // Deliberately no mixin Check here. 2673 2674 auto n = s.indexOf(end); 2675 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2676 s = s[n..$]; 2677 checkLiteral(end,s); 2678 } 2679 2680 // Metafunctions -- none of these use mixin Check 2681 2682 void opt(alias f)(ref string s) 2683 { 2684 try { f(s); } catch (Err e) {} 2685 } 2686 2687 void plus(alias f)(ref string s) 2688 { 2689 f(s); 2690 star!(f)(s); 2691 } 2692 2693 void star(alias f)(ref string s) 2694 { 2695 while (s.length != 0) 2696 { 2697 try { f(s); } 2698 catch (Err e) { return; } 2699 } 2700 } 2701 2702 void quoted(alias f)(ref string s) 2703 { 2704 import std.string : startsWith; 2705 2706 if (s.startsWith("'")) 2707 { 2708 checkLiteral("'",s); 2709 f(s); 2710 checkLiteral("'",s); 2711 } 2712 else 2713 { 2714 checkLiteral("\"",s); 2715 f(s); 2716 checkLiteral("\"",s); 2717 } 2718 } 2719 2720 void seq(alias f,alias g)(ref string s) 2721 { 2722 f(s); 2723 g(s); 2724 } 2725 } 2726 2727 /* 2728 * Check an entire XML document for well-formedness 2729 * 2730 * Params: 2731 * s = the document to be checked, passed as a string 2732 * 2733 * Throws: CheckException if the document is not well formed 2734 * 2735 * CheckException's toString() method will yield the complete hierarchy of 2736 * parse failure (the XML equivalent of a stack trace), giving the line and 2737 * column number of every failure at every level. 2738 */ 2739 void check(string s) @safe pure 2740 { 2741 try 2742 { 2743 checkChars(s); 2744 checkDocument(s); 2745 if (s.length != 0) throw new Err(s,"Junk found after document"); 2746 } 2747 catch (Err e) 2748 { 2749 e.complete(s); 2750 throw e; 2751 } 2752 } 2753 2754 @system pure unittest 2755 { 2756 import std.string : indexOf; 2757 2758 try 2759 { 2760 check(q"[<?xml version="1.0"?> 2761 <catalog> 2762 <book id="bk101"> 2763 <author>Gambardella, Matthew</author> 2764 <title>XML Developer's Guide</title> 2765 <genre>Computer</genre> 2766 <price>44.95</price> 2767 <publish_date>2000-10-01</publish_date> 2768 <description>An in-depth look at creating applications 2769 with XML.</description> 2770 </book> 2771 <book id="bk102"> 2772 <author>Ralls, Kim</author> 2773 <title>Midnight Rain</title> 2774 <genre>Fantasy</genres> 2775 <price>5.95</price> 2776 <publish_date>2000-12-16</publish_date> 2777 <description>A former architect battles corporate zombies, 2778 an evil sorceress, and her own childhood to become queen 2779 of the world.</description> 2780 </book> 2781 <book id="bk103"> 2782 <author>Corets, Eva</author> 2783 <title>Maeve Ascendant</title> 2784 <genre>Fantasy</genre> 2785 <price>5.95</price> 2786 <publish_date>2000-11-17</publish_date> 2787 <description>After the collapse of a nanotechnology 2788 society in England, the young survivors lay the 2789 foundation for a new society.</description> 2790 </book> 2791 </catalog> 2792 ]"); 2793 assert(false); 2794 } 2795 catch (CheckException e) 2796 { 2797 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2798 " from start tag name \"genre\""); 2799 assert(n != -1); 2800 } 2801 } 2802 2803 @system unittest 2804 { 2805 string s = q"EOS 2806 <?xml version="1.0"?> 2807 <set> 2808 <one>A</one> 2809 <!-- comment --> 2810 <two>B</two> 2811 </set> 2812 EOS"; 2813 try 2814 { 2815 check(s); 2816 } 2817 catch (CheckException e) 2818 { 2819 assert(0, e.toString()); 2820 } 2821 } 2822 2823 @system unittest 2824 { 2825 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2826 xmlns:stream="http://etherx.'jabber'.org/streams" 2827 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2828 xml:lang="en" version="1.0" attr='a"b"c'> 2829 </stream:stream></r>`; 2830 2831 DocumentParser parser = new DocumentParser(test_xml); 2832 bool tested = false; 2833 parser.onStartTag["stream:stream"] = (ElementParser p) { 2834 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2835 assert(p.tag.attr["from"] == "jid.pl"); 2836 assert(p.tag.attr["attr"] == "a\"b\"c"); 2837 tested = true; 2838 }; 2839 parser.parse(); 2840 assert(tested); 2841 } 2842 2843 @system unittest 2844 { 2845 string s = q"EOS 2846 <?xml version="1.0" encoding="utf-8"?> <Tests> 2847 <Test thing="What & Up">What & Up Second</Test> 2848 </Tests> 2849 EOS"; 2850 auto xml = new DocumentParser(s); 2851 2852 xml.onStartTag["Test"] = (ElementParser xml) { 2853 assert(xml.tag.attr["thing"] == "What & Up"); 2854 }; 2855 2856 xml.onEndTag["Test"] = (in Element e) { 2857 assert(e.text() == "What & Up Second"); 2858 }; 2859 xml.parse(); 2860 } 2861 2862 @system unittest 2863 { 2864 string s = `<tag attr=""value>" />`; 2865 auto doc = new Document(s); 2866 assert(doc.toString() == s); 2867 } 2868 2869 /* The base class for exceptions thrown by this module */ 2870 class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2871 2872 // Other exceptions 2873 2874 // Thrown during Comment constructor 2875 class CommentException : XMLException 2876 { private this(string msg) @safe pure { super(msg); } } 2877 2878 // Thrown during CData constructor 2879 class CDataException : XMLException 2880 { private this(string msg) @safe pure { super(msg); } } 2881 2882 // Thrown during XMLInstruction constructor 2883 class XIException : XMLException 2884 { private this(string msg) @safe pure { super(msg); } } 2885 2886 // Thrown during ProcessingInstruction constructor 2887 class PIException : XMLException 2888 { private this(string msg) @safe pure { super(msg); } } 2889 2890 // Thrown during Text constructor 2891 class TextException : XMLException 2892 { private this(string msg) @safe pure { super(msg); } } 2893 2894 // Thrown during decode() 2895 class DecodeException : XMLException 2896 { private this(string msg) @safe pure { super(msg); } } 2897 2898 // Thrown if comparing with wrong type 2899 class InvalidTypeException : XMLException 2900 { private this(string msg) @safe pure { super(msg); } } 2901 2902 // Thrown when parsing for Tags 2903 class TagException : XMLException 2904 { private this(string msg) @safe pure { super(msg); } } 2905 2906 /* 2907 * Thrown during check() 2908 */ 2909 class CheckException : XMLException 2910 { 2911 CheckException err; // Parent in hierarchy 2912 private string tail; 2913 /* 2914 * Name of production rule which failed to parse, 2915 * or specific error message 2916 */ 2917 string msg; 2918 size_t line = 0; // Line number at which parse failure occurred 2919 size_t column = 0; // Column number at which parse failure occurred 2920 2921 private this(string tail,string msg,Err err=null) @safe pure 2922 { 2923 super(null); 2924 this.tail = tail; 2925 this.msg = msg; 2926 this.err = err; 2927 } 2928 2929 private void complete(string entire) @safe pure 2930 { 2931 import std.string : count, lastIndexOf; 2932 import std.utf : toUTF32; 2933 2934 string head = entire[0..$-tail.length]; 2935 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2936 line = head.count("\n") + 1; 2937 dstring t = toUTF32(head[n..$]); 2938 column = t.length + 1; 2939 if (err !is null) err.complete(entire); 2940 } 2941 2942 override string toString() const @safe pure 2943 { 2944 import std.format : format; 2945 2946 string s; 2947 if (line != 0) s = format("Line %d, column %d: ",line,column); 2948 s ~= msg; 2949 s ~= '\n'; 2950 if (err !is null) s = err.toString() ~ s; 2951 return s; 2952 } 2953 } 2954 2955 private alias Err = CheckException; 2956 2957 // Private helper functions 2958 2959 private 2960 { 2961 inout(T) toType(T)(inout return scope Object o) 2962 { 2963 T t = cast(T)(o); 2964 if (t is null) 2965 { 2966 throw new InvalidTypeException("Attempt to compare a " 2967 ~ T.stringof ~ " with an instance of another type"); 2968 } 2969 return t; 2970 } 2971 2972 string chop(ref string s, size_t n) @safe pure nothrow 2973 { 2974 if (n == -1) n = s.length; 2975 string t = s[0 .. n]; 2976 s = s[n..$]; 2977 return t; 2978 } 2979 2980 bool optc(ref string s, char c) @safe pure nothrow 2981 { 2982 immutable bool b = s.length != 0 && s[0] == c; 2983 if (b) s = s[1..$]; 2984 return b; 2985 } 2986 2987 void reqc(ref string s, char c) @safe pure 2988 { 2989 if (s.length == 0 || s[0] != c) throw new TagException(""); 2990 s = s[1..$]; 2991 } 2992 2993 char requireOneOf(ref string s, string chars) @safe pure 2994 { 2995 import std.string : indexOf; 2996 2997 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2998 throw new TagException(""); 2999 immutable char ch = s[0]; 3000 s = s[1..$]; 3001 return ch; 3002 } 3003 3004 alias hash = .hashOf; 3005 3006 // Definitions from the XML specification 3007 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3008 0x10000,0x10FFFF]; 3009 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3010 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3011 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3012 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3013 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3014 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3015 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3016 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3017 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3018 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3019 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3020 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3021 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3022 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3023 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3024 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3025 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3026 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3027 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3028 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3029 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3030 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3031 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3032 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3033 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3034 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3035 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3036 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3037 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3038 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3039 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3040 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3041 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3042 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3043 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3044 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3045 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3046 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3047 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3048 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3049 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3050 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3051 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3052 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3053 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3054 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3055 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3056 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3057 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3058 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3059 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3060 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3061 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3062 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3063 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3064 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3065 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3066 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3067 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3068 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3069 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3070 0x3099,0x3099,0x309A,0x309A]; 3071 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3072 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3073 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3074 0x0ED9,0x0F20,0x0F29]; 3075 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3076 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3077 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3078 3079 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3080 { 3081 while (table.length != 0) 3082 { 3083 auto m = (table.length >> 1) & ~1; 3084 if (c < table[m]) 3085 { 3086 table = table[0 .. m]; 3087 } 3088 else if (c > table[m+1]) 3089 { 3090 table = table[m+2..$]; 3091 } 3092 else return true; 3093 } 3094 return false; 3095 } 3096 3097 string startOf(string s) @safe nothrow pure 3098 { 3099 string r; 3100 foreach (char c;s) 3101 { 3102 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3103 if (r.length >= 40) { r ~= "___"; break; } 3104 } 3105 return r; 3106 } 3107 3108 void exit(string s=null) 3109 { 3110 throw new XMLException(s); 3111 } 3112 }