1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 // FIXME: https://developer.mozilla.org/en-US/docs/Web/CSS/:is 9 10 // xml entity references?! 11 12 /++ 13 This is an html DOM implementation, started with cloning 14 what the browser offers in Javascript, but going well beyond 15 it in convenience. 16 17 If you can do it in Javascript, you can probably do it with 18 this module, and much more. 19 20 --- 21 import arsd.dom; 22 23 void main() { 24 auto document = new Document("<html><p>paragraph</p></html>"); 25 writeln(document.querySelector("p")); 26 document.root.innerHTML = "<p>hey</p>"; 27 writeln(document); 28 } 29 --- 30 31 BTW: this file optionally depends on `arsd.characterencodings`, to 32 help it correctly read files from the internet. You should be able to 33 get characterencodings.d from the same place you got this file. 34 35 If you want it to stand alone, just always use the `Document.parseUtf8` 36 function or the constructor that takes a string. 37 38 Symbol_groups: 39 40 core_functionality = 41 42 These members provide core functionality. The members on these classes 43 will provide most your direct interaction. 44 45 bonus_functionality = 46 47 These provide additional functionality for special use cases. 48 49 implementations = 50 51 These provide implementations of other functionality. 52 +/ 53 module arsd.dom; 54 55 // FIXME: support the css standard namespace thing in the selectors too 56 57 version (with_arsd_jsvar) 58 import arsd.jsvar; 59 else { 60 enum scriptable = "arsd_jsvar_compatible"; 61 } 62 63 // this is only meant to be used at compile time, as a filter for opDispatch 64 // lists the attributes we want to allow without the use of .attr 65 bool isConvenientAttribute(string name) { 66 static immutable list = [ 67 "name", "id", "href", "value", "checked", "selected", "type", "src", 68 "content", "pattern", "placeholder", "required", "alt", "rel", 69 "method", "action", "enctype" 70 ]; 71 foreach (l; list) 72 if (name == l) 73 return true; 74 return false; 75 } 76 77 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 78 // FIXME: failing to close a paragraph sometimes messes things up too 79 80 // FIXME: it would be kinda cool to have some support for internal DTDs 81 // and maybe XPath as well, to some extent 82 /* 83 we could do 84 meh this sux 85 86 auto xpath = XPath(element); 87 88 // get the first p 89 xpath.p[0].a["href"] 90 */ 91 92 /// The main document interface, including a html parser. 93 /// Group: core_functionality 94 class Document : FileResource { 95 /// Convenience method for web scraping. Requires [arsd.http2] to be 96 /// included in the build as well as [arsd.characterencodings]. 97 static Document fromUrl()(string url, bool strictMode = false) { 98 import arsd.http2; 99 100 auto client = new HttpClient(); 101 102 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 103 auto res = req.waitForCompletion(); 104 105 auto document = new Document(); 106 if (strictMode) { 107 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 108 } else { 109 document.parseGarbage(cast(string) res.content); 110 } 111 112 return document; 113 } 114 115 ///. 116 this(string data, bool caseSensitive = false, bool strict = false) { 117 parseUtf8(data, caseSensitive, strict); 118 } 119 120 /** 121 Creates an empty document. It has *nothing* in it at all. 122 */ 123 this() { 124 125 } 126 127 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 128 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 129 /// can chain it. 130 /// 131 /// Example: document["p"].innerText("hello").addClass("modified"); 132 /// 133 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 134 /// 135 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 136 /// 137 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 138 /// you could put in some kind of custom filter function tho. 139 ElementCollection opIndex(string selector) { 140 auto e = ElementCollection(this.root); 141 return e[selector]; 142 } 143 144 string _contentType = "text/html; charset=utf-8"; 145 146 /// If you're using this for some other kind of XML, you can 147 /// set the content type here. 148 /// 149 /// Note: this has no impact on the function of this class. 150 /// It is only used if the document is sent via a protocol like HTTP. 151 /// 152 /// This may be called by parse() if it recognizes the data. Otherwise, 153 /// if you don't set it, it assumes text/html; charset=utf-8. 154 @property string contentType(string mimeType) { 155 _contentType = mimeType; 156 return _contentType; 157 } 158 159 /// implementing the FileResource interface, useful for sending via 160 /// http automatically. 161 override @property string contentType() const { 162 return _contentType; 163 } 164 165 /// implementing the FileResource interface; it calls toString. 166 override immutable(ubyte)[] getData() const { 167 return cast(immutable(ubyte)[]) this.toString(); 168 } 169 170 /// Concatenates any consecutive text nodes 171 /* 172 void normalize() { 173 174 } 175 */ 176 177 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 178 /// Call this before calling parse(). 179 180 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 181 void enableAddingSpecialTagsToDom() { 182 parseSawComment = (string) => true; 183 parseSawAspCode = (string) => true; 184 parseSawPhpCode = (string) => true; 185 parseSawQuestionInstruction = (string) => true; 186 parseSawBangInstruction = (string) => true; 187 } 188 189 /// If the parser sees a html comment, it will call this callback 190 /// <!-- comment --> will call parseSawComment(" comment ") 191 /// Return true if you want the node appended to the document. 192 bool delegate(string) parseSawComment; 193 194 /// If the parser sees <% asp code... %>, it will call this callback. 195 /// It will be passed "% asp code... %" or "%= asp code .. %" 196 /// Return true if you want the node appended to the document. 197 bool delegate(string) parseSawAspCode; 198 199 /// If the parser sees <?php php code... ?>, it will call this callback. 200 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 201 /// Note: dom.d cannot identify the other php <? code ?> short format. 202 /// Return true if you want the node appended to the document. 203 bool delegate(string) parseSawPhpCode; 204 205 /// if it sees a <?xxx> that is not php or asp 206 /// it calls this function with the contents. 207 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 208 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 209 /// Return true if you want the node appended to the document. 210 bool delegate(string) parseSawQuestionInstruction; 211 212 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 213 /// it calls this function with the contents. 214 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 215 /// Return true if you want the node appended to the document. 216 bool delegate(string) parseSawBangInstruction; 217 218 /// Given the kind of garbage you find on the Internet, try to make sense of it. 219 /// Equivalent to document.parse(data, false, false, null); 220 /// (Case-insensitive, non-strict, determine character encoding from the data.) 221 222 /// NOTE: this makes no attempt at added security. 223 /// 224 /// It is a template so it lazily imports characterencodings. 225 void parseGarbage()(string data) { 226 parse(data, false, false, null); 227 } 228 229 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 230 /// Will throw exceptions on things like unclosed tags. 231 void parseStrict(string data) { 232 parseStream(toUtf8Stream(data), true, true); 233 } 234 235 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 236 /// tag soup, but does NOT try to correct bad character encodings. 237 /// 238 /// They will still throw an exception. 239 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 240 parseStream(toUtf8Stream(data), caseSensitive, strict); 241 } 242 243 // this is a template so we get lazy import behavior 244 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 245 import arsd.characterencodings; 246 247 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 248 if (dataEncoding is null) { 249 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 250 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 251 // Now, XML and HTML can both list encoding in the document, but we can't really parse 252 // it here without changing a lot of code until we know the encoding. So I'm going to 253 // do some hackish string checking. 254 if (dataEncoding is null) { 255 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 256 // first, look for an XML prolog 257 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 258 if (idx != -1) { 259 idx += "encoding=\"".length; 260 // we're probably past the prolog if it's this far in; we might be looking at 261 // content. Forget about it. 262 if (idx > 100) 263 idx = -1; 264 } 265 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 266 if (idx == -1) { 267 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 268 if (idx != -1) { 269 idx += "charset=".length; 270 if (dataAsBytes[idx] == '"') 271 idx++; 272 } 273 } 274 275 // found something in either branch... 276 if (idx != -1) { 277 // read till a quote or about 12 chars, whichever comes first... 278 auto end = idx; 279 while (end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 280 end++; 281 282 dataEncoding = cast(string) dataAsBytes[idx .. end]; 283 } 284 // otherwise, we just don't know. 285 } 286 } 287 288 if (dataEncoding is null) { 289 if (strict) 290 throw new MarkupException("I couldn't figure out the encoding of this document."); 291 else // if we really don't know by here, it means we already tried UTF-8, 292 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 293 // tags... let's assume it's Windows-1252, since that's probably the most 294 // common aside from utf that wouldn't be labeled. 295 296 dataEncoding = "Windows 1252"; 297 } 298 299 // and now, go ahead and convert it. 300 301 string data; 302 303 if (!strict) { 304 // if we're in non-strict mode, we need to check 305 // the document for mislabeling too; sometimes 306 // web documents will say they are utf-8, but aren't 307 // actually properly encoded. If it fails to validate, 308 // we'll assume it's actually Windows encoding - the most 309 // likely candidate for mislabeled garbage. 310 dataEncoding = dataEncoding.toLower(); 311 dataEncoding = dataEncoding.replace(" ", ""); 312 dataEncoding = dataEncoding.replace("-", ""); 313 dataEncoding = dataEncoding.replace("_", ""); 314 if (dataEncoding == "utf8") { 315 try { 316 validate(rawdata); 317 } catch (UTFException e) { 318 dataEncoding = "Windows 1252"; 319 } 320 } 321 } 322 323 if (dataEncoding != "UTF-8") { 324 if (strict) 325 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 326 else { 327 try { 328 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 329 } catch (Exception e) { 330 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 331 } 332 } 333 } else 334 data = rawdata; 335 336 return toUtf8Stream(data); 337 } 338 339 private Utf8Stream toUtf8Stream(in string rawdata) { 340 string data = rawdata; 341 static if (is(Utf8Stream == string)) 342 return data; 343 else 344 return new Utf8Stream(data); 345 } 346 347 /** 348 Take XMLish data and try to make the DOM tree out of it. 349 350 The goal isn't to be perfect, but to just be good enough to 351 approximate Javascript's behavior. 352 353 If strict, it throws on something that doesn't make sense. 354 (Examples: mismatched tags. It doesn't validate!) 355 If not strict, it tries to recover anyway, and only throws 356 when something is REALLY unworkable. 357 358 If strict is false, it uses a magic list of tags that needn't 359 be closed. If you are writing a document specifically for this, 360 try to avoid such - use self closed tags at least. Easier to parse. 361 362 The dataEncoding argument can be used to pass a specific 363 charset encoding for automatic conversion. If null (which is NOT 364 the default!), it tries to determine from the data itself, 365 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 366 367 If this assumption is wrong, it can throw on non-ascii 368 characters! 369 370 371 Note that it previously assumed the data was encoded as UTF-8, which 372 is why the dataEncoding argument defaults to that. 373 374 So it shouldn't break backward compatibility. 375 376 But, if you want the best behavior on wild data - figuring it out from the document 377 instead of assuming - you'll probably want to change that argument to null. 378 379 This is a template so it lazily imports arsd.characterencodings, which is required 380 to fix up data encodings. 381 382 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 383 dependency. If it is data from the Internet though, a random website, the encoding 384 is often a lie. This function, if dataEncoding == null, can correct for that, or 385 you can try parseGarbage. In those cases, arsd.characterencodings is required to 386 compile. 387 */ 388 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, 389 string dataEncoding = "UTF-8") { 390 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 391 parseStream(data, caseSensitive, strict); 392 } 393 394 // note: this work best in strict mode, unless data is just a simple string wrapper 395 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 396 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 397 // of my big app. 398 399 assert(data !is null); 400 401 // go through character by character. 402 // if you see a <, consider it a tag. 403 // name goes until the first non tagname character 404 // then see if it self closes or has an attribute 405 406 // if not in a tag, anything not a tag is a big text 407 // node child. It ends as soon as it sees a < 408 409 // Whitespace in text or attributes is preserved, but not between attributes 410 411 // & and friends are converted when I know them, left the same otherwise 412 413 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 414 //validate(data); // it *must* be UTF-8 for this to work correctly 415 416 sizediff_t pos = 0; 417 418 clear(); 419 420 loose = !caseSensitive; 421 422 bool sawImproperNesting = false; 423 bool paragraphHackfixRequired = false; 424 425 int getLineNumber(sizediff_t p) { 426 int line = 1; 427 foreach (c; data[0 .. p]) 428 if (c == '\n') 429 line++; 430 return line; 431 } 432 433 void parseError(string message) { 434 throw new MarkupException(format("char %d (line %d): %s", pos, 435 getLineNumber(pos), message)); 436 } 437 438 bool eatWhitespace() { 439 bool ateAny = false; 440 while (pos < data.length && data[pos].isSimpleWhite) { 441 pos++; 442 ateAny = true; 443 } 444 return ateAny; 445 } 446 447 string readTagName() { 448 // remember to include : for namespaces 449 // basically just keep going until >, /, or whitespace 450 auto start = pos; 451 while (data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) { 452 pos++; 453 if (pos == data.length) { 454 if (strict) 455 throw new Exception("tag name incomplete when file ended"); 456 else 457 break; 458 } 459 } 460 461 if (!caseSensitive) 462 return toLower(data[start .. pos]); 463 else 464 return data[start .. pos]; 465 } 466 467 string readAttributeName() { 468 // remember to include : for namespaces 469 // basically just keep going until >, /, or whitespace 470 auto start = pos; 471 while (data[pos] != '>' && data[pos] != '/' && data[pos] != '=' 472 && !data[pos].isSimpleWhite) { 473 if (data[pos] == '<') { 474 if (strict) 475 throw new MarkupException( 476 "The character < can never appear in an attribute name. Line " ~ to!string( 477 getLineNumber(pos))); 478 else 479 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 480 } 481 pos++; 482 if (pos == data.length) { 483 if (strict) 484 throw new Exception("unterminated attribute name"); 485 else 486 break; 487 } 488 } 489 490 if (!caseSensitive) 491 return toLower(data[start .. pos]); 492 else 493 return data[start .. pos]; 494 } 495 496 string readAttributeValue() { 497 if (pos >= data.length) { 498 if (strict) 499 throw new Exception("no attribute value before end of file"); 500 else 501 return null; 502 } 503 switch (data[pos]) { 504 case '\'': 505 case '"': 506 auto started = pos; 507 char end = data[pos]; 508 pos++; 509 auto start = pos; 510 while (pos < data.length && data[pos] != end) 511 pos++; 512 if (strict && pos == data.length) 513 throw new MarkupException( 514 "Unclosed attribute value, started on char " ~ to!string(started)); 515 string v = htmlEntitiesDecode(data[start .. pos], strict); 516 pos++; // skip over the end 517 return v; 518 default: 519 if (strict) 520 parseError("Attributes must be quoted"); 521 // read until whitespace or terminator (/> or >) 522 auto start = pos; 523 while (pos < data.length && data[pos] != '>' && // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 524 !(data[pos] == '/' 525 && pos + 1 < data.length && data[pos + 1] == '>') 526 && !data[pos].isSimpleWhite) 527 pos++; 528 529 string v = htmlEntitiesDecode(data[start .. pos], strict); 530 // don't skip the end - we'll need it later 531 return v; 532 } 533 } 534 535 TextNode readTextNode() { 536 auto start = pos; 537 while (pos < data.length && data[pos] != '<') { 538 pos++; 539 } 540 541 return TextNode.fromUndecodedString(this, data[start .. pos]); 542 } 543 544 // this is obsolete! 545 RawSource readCDataNode() { 546 auto start = pos; 547 while (pos < data.length && data[pos] != '<') { 548 pos++; 549 } 550 551 return new RawSource(this, data[start .. pos]); 552 } 553 554 struct Ele { 555 int type; // element or closing tag or nothing 556 /* 557 type == 0 means regular node, self-closed (element is valid) 558 type == 1 means closing tag (payload is the tag name, element may be valid) 559 type == 2 means you should ignore it completely 560 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 561 type == 4 means the document was totally empty 562 */ 563 Element element; // for type == 0 or type == 3 564 string payload; // for type == 1 565 } 566 // recursively read a tag 567 Ele readElement(string[] parentChain = null) { 568 // FIXME: this is the slowest function in this module, by far, even in strict mode. 569 // Loose mode should perform decently, but strict mode is the important one. 570 if (!strict && parentChain is null) 571 parentChain = []; 572 573 static string[] recentAutoClosedTags; 574 575 if (pos >= data.length) { 576 if (strict) { 577 throw new MarkupException( 578 "Gone over the input (is there no root element or did it never close?), chain: " ~ to!string( 579 parentChain)); 580 } else { 581 if (parentChain.length) 582 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 583 else 584 return Ele(4); // signal emptiness upstream 585 } 586 } 587 588 if (data[pos] != '<') { 589 return Ele(0, readTextNode(), null); 590 } 591 592 enforce(data[pos] == '<'); 593 pos++; 594 if (pos == data.length) { 595 if (strict) 596 throw new MarkupException("Found trailing < at end of file"); 597 // if not strict, we'll just skip the switch 598 } else 599 switch (data[pos]) { 600 // I don't care about these, so I just want to skip them 601 case '!': // might be a comment, a doctype, or a special instruction 602 pos++; 603 604 // FIXME: we should store these in the tree too 605 // though I like having it stripped out tbh. 606 607 if (pos == data.length) { 608 if (strict) 609 throw new MarkupException("<! opened at end of file"); 610 } else if (data[pos] == '-' && (pos + 1 < data.length) && data[pos + 1] == '-') { 611 // comment 612 pos += 2; 613 614 // FIXME: technically, a comment is anything 615 // between -- and -- inside a <!> block. 616 // so in <!-- test -- lol> , the " lol" is NOT a comment 617 // and should probably be handled differently in here, but for now 618 // I'll just keep running until --> since that's the common way 619 620 auto commentStart = pos; 621 while (pos + 3 < data.length && data[pos .. pos + 3] != "-->") 622 pos++; 623 624 auto end = commentStart; 625 626 if (pos + 3 >= data.length) { 627 if (strict) 628 throw new MarkupException("unclosed comment"); 629 end = data.length; 630 pos = data.length; 631 } else { 632 end = pos; 633 assert(data[pos] == '-'); 634 pos++; 635 assert(data[pos] == '-'); 636 pos++; 637 assert(data[pos] == '>'); 638 pos++; 639 } 640 641 if (parseSawComment !is null) 642 if (parseSawComment(data[commentStart .. end])) { 643 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 644 } 645 } else if (pos + 7 <= data.length && data[pos .. pos + 7] == "[CDATA[") { 646 pos += 7; 647 648 auto cdataStart = pos; 649 650 ptrdiff_t end = -1; 651 typeof(end) cdataEnd; 652 653 if (pos < data.length) { 654 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 655 end = data[pos .. $].indexOf("]]>"); 656 } 657 658 if (end == -1) { 659 if (strict) 660 throw new MarkupException("Unclosed CDATA section"); 661 end = pos; 662 cdataEnd = pos; 663 } else { 664 cdataEnd = pos + end; 665 pos = cdataEnd + 3; 666 } 667 668 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 669 } else { 670 auto start = pos; 671 while (pos < data.length && data[pos] != '>') 672 pos++; 673 674 auto bangEnds = pos; 675 if (pos == data.length) { 676 if (strict) 677 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 678 } else 679 pos++; // skipping the > 680 681 if (parseSawBangInstruction !is null) 682 if (parseSawBangInstruction(data[start .. bangEnds])) { 683 // FIXME: these should be able to modify the parser state, 684 // doing things like adding entities, somehow. 685 686 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 687 } 688 } 689 690 /* 691 if(pos < data.length && data[pos] == '>') 692 pos++; // skip the > 693 else 694 assert(!strict); 695 */ 696 break; 697 case '%': 698 case '?': 699 /* 700 Here's what we want to support: 701 702 <% asp code %> 703 <%= asp code %> 704 <?php php code ?> 705 <?= php code ?> 706 707 The contents don't really matter, just if it opens with 708 one of the above for, it ends on the two char terminator. 709 710 <?something> 711 this is NOT php code 712 because I've seen this in the wild: <?EM-dummyText> 713 714 This could be php with shorttags which would be cut off 715 prematurely because if(a >) - that > counts as the close 716 of the tag, but since dom.d can't tell the difference 717 between that and the <?EM> real world example, it will 718 not try to look for the ?> ending. 719 720 The difference between this and the asp/php stuff is that it 721 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 722 on >. 723 */ 724 725 char end = data[pos]; 726 auto started = pos; 727 bool isAsp = end == '%'; 728 int currentIndex = 0; 729 bool isPhp = false; 730 bool isEqualTag = false; 731 int phpCount = 0; 732 733 more: 734 pos++; // skip the start 735 if (pos == data.length) { 736 if (strict) 737 throw new MarkupException("Unclosed <" ~ end ~ " by end of file"); 738 } else { 739 currentIndex++; 740 if (currentIndex == 1 && data[pos] == '=') { 741 if (!isAsp) 742 isPhp = true; 743 isEqualTag = true; 744 goto more; 745 } 746 if (currentIndex == 1 && data[pos] == 'p') 747 phpCount++; 748 if (currentIndex == 2 && data[pos] == 'h') 749 phpCount++; 750 if (currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 751 isPhp = true; 752 753 if (data[pos] == '>') { 754 if ((isAsp || isPhp) && data[pos - 1] != end) 755 goto more; 756 // otherwise we're done 757 } else 758 goto more; 759 } 760 761 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 762 auto code = data[started .. pos]; 763 764 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 765 if (pos < data.length) 766 pos++; // get past the > 767 768 if (isAsp && parseSawAspCode !is null) { 769 if (parseSawAspCode(code)) { 770 return Ele(3, new AspCode(this, code), null); 771 } 772 } else if (isPhp && parseSawPhpCode !is null) { 773 if (parseSawPhpCode(code)) { 774 return Ele(3, new PhpCode(this, code), null); 775 } 776 } else if (!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 777 if (parseSawQuestionInstruction(code)) { 778 return Ele(3, new QuestionInstruction(this, code), null); 779 } 780 } 781 break; 782 case '/': // closing an element 783 pos++; // skip the start 784 auto p = pos; 785 while (pos < data.length && data[pos] != '>') 786 pos++; 787 //writefln("</%s>", data[p..pos]); 788 if (pos == data.length && data[pos - 1] != '>') { 789 if (strict) 790 throw new MarkupException("File ended before closing tag had a required >"); 791 else 792 data ~= ">"; // just hack it in 793 } 794 pos++; // skip the '>' 795 796 string tname = data[p .. pos - 1]; 797 if (!caseSensitive) 798 tname = tname.toLower(); 799 800 return Ele(1, null, tname); // closing tag reports itself here 801 case ' ': // assume it isn't a real element... 802 if (strict) { 803 parseError("bad markup - improperly placed <"); 804 assert(0); // parseError always throws 805 } else 806 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 807 default: 808 809 if (!strict) { 810 // what about something that kinda looks like a tag, but isn't? 811 auto nextTag = data[pos .. $].indexOf("<"); 812 auto closeTag = data[pos .. $].indexOf(">"); 813 if (closeTag != -1 && nextTag != -1) 814 if (nextTag < closeTag) { 815 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 816 817 auto equal = data[pos .. $].indexOf("=\""); 818 if (equal != -1 && equal < closeTag) { 819 // this MIGHT be ok, soldier on 820 } else { 821 // definitely no good, this must be a (horribly distorted) text node 822 pos++; // skip the < we're on - don't want text node to end prematurely 823 auto node = readTextNode(); 824 node.contents = "<" ~ node.contents; // put this back 825 return Ele(0, node, null); 826 } 827 } 828 } 829 830 string tagName = readTagName(); 831 string[string] attributes; 832 833 Ele addTag(bool selfClosed) { 834 if (selfClosed) 835 pos++; 836 else { 837 if (!strict) 838 if (tagName.isInArray(selfClosedElements)) // these are de-facto self closed 839 selfClosed = true; 840 } 841 842 if (strict) 843 enforce(data[pos] == '>', 844 format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", 845 data[pos], data[pos - 100 .. pos + 100])); 846 else { 847 // if we got here, it's probably because a slash was in an 848 // unquoted attribute - don't trust the selfClosed value 849 if (!selfClosed) 850 selfClosed = tagName.isInArray(selfClosedElements); 851 852 while (pos < data.length && data[pos] != '>') 853 pos++; 854 855 if (pos >= data.length) { 856 // the tag never closed 857 assert(data.length != 0); 858 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 859 } 860 } 861 862 auto whereThisTagStarted = pos; // for better error messages 863 864 pos++; 865 866 auto e = createElement(tagName); 867 e.attributes = attributes; 868 version (dom_node_indexes) { 869 if (e.dataset.nodeIndex.length == 0) 870 e.dataset.nodeIndex = to!string(&(e.attributes)); 871 } 872 e.selfClosed = selfClosed; 873 e.parseAttributes(); 874 875 // HACK to handle script and style as a raw data section as it is in HTML browsers 876 if (tagName == "script" || tagName == "style") { 877 if (!selfClosed) { 878 string closer = "</" ~ tagName ~ ">"; 879 ptrdiff_t ending; 880 if (pos >= data.length) 881 ending = -1; 882 else 883 ending = indexOf(data[pos .. $], closer); 884 885 ending = indexOf(data[pos .. $], closer, 0, (loose 886 ? CaseSensitive.no : CaseSensitive.yes)); 887 /* 888 if(loose && ending == -1 && pos < data.length) 889 ending = indexOf(data[pos..$], closer.toUpper()); 890 */ 891 if (ending == -1) { 892 if (strict) 893 throw new Exception("tag " ~ tagName ~ " never closed"); 894 else { 895 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 896 if (pos < data.length) { 897 e = new TextNode(this, data[pos .. $]); 898 pos = data.length; 899 } 900 } 901 } else { 902 ending += pos; 903 e.innerRawSource = data[pos .. ending]; 904 pos = ending + closer.length; 905 } 906 } 907 return Ele(0, e, null); 908 } 909 910 bool closed = selfClosed; 911 912 void considerHtmlParagraphHack(Element n) { 913 assert(!strict); 914 if (e.tagName == "p" && e.tagName == n.tagName) { 915 // html lets you write <p> para 1 <p> para 1 916 // but in the dom tree, they should be siblings, not children. 917 paragraphHackfixRequired = true; 918 } 919 } 920 921 //writef("<%s>", tagName); 922 while (!closed) { 923 Ele n; 924 if (strict) 925 n = readElement(); 926 else 927 n = readElement(parentChain ~ tagName); 928 929 if (n.type == 4) 930 return n; // the document is empty 931 932 if (n.type == 3 && n.element !is null) { 933 // special node, append if possible 934 if (e !is null) 935 e.appendChild(n.element); 936 else 937 piecesBeforeRoot ~= n.element; 938 } else if (n.type == 0) { 939 if (!strict) 940 considerHtmlParagraphHack(n.element); 941 e.appendChild(n.element); 942 } else if (n.type == 1) { 943 bool found = false; 944 if (n.payload != tagName) { 945 if (strict) 946 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, 947 tagName, getLineNumber(whereThisTagStarted))); 948 else { 949 sawImproperNesting = true; 950 // this is so we don't drop several levels of awful markup 951 if (n.element) { 952 if (!strict) 953 considerHtmlParagraphHack(n.element); 954 e.appendChild(n.element); 955 n.element = null; 956 } 957 958 // is the element open somewhere up the chain? 959 foreach (i, parent; parentChain) 960 if (parent == n.payload) { 961 recentAutoClosedTags ~= tagName; 962 // just rotating it so we don't inadvertently break stuff with vile crap 963 if (recentAutoClosedTags.length > 4) 964 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 965 966 n.element = e; 967 return n; 968 } 969 970 // if not, this is a text node; we can't fix it up... 971 972 // If it's already in the tree somewhere, assume it is closed by algorithm 973 // and we shouldn't output it - odds are the user just flipped a couple tags 974 foreach (ele; e.tree) { 975 if (ele.tagName == n.payload) { 976 found = true; 977 break; 978 } 979 } 980 981 foreach (ele; recentAutoClosedTags) { 982 if (ele == n.payload) { 983 found = true; 984 break; 985 } 986 } 987 988 if (!found) // if not found in the tree though, it's probably just text 989 e.appendChild(TextNode.fromUndecodedString(this, 990 "</" ~ n.payload ~ ">")); 991 } 992 } else { 993 if (n.element) { 994 if (!strict) 995 considerHtmlParagraphHack(n.element); 996 e.appendChild(n.element); 997 } 998 } 999 1000 if (n.payload == tagName) // in strict mode, this is always true 1001 closed = true; 1002 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1003 } 1004 //writef("</%s>\n", tagName); 1005 return Ele(0, e, null); 1006 } 1007 1008 // if a tag was opened but not closed by end of file, we can arrive here 1009 if (!strict && pos >= data.length) 1010 return addTag(false); 1011 //else if(strict) assert(0); // should be caught before 1012 1013 switch (data[pos]) { 1014 default: 1015 assert(0); 1016 case '/': // self closing tag 1017 return addTag(true); 1018 case '>': 1019 return addTag(false); 1020 case ' ': 1021 case '\t': 1022 case '\n': 1023 case '\r': 1024 // there might be attributes... 1025 moreAttributes: 1026 eatWhitespace(); 1027 1028 // same deal as above the switch.... 1029 if (!strict && pos >= data.length) 1030 return addTag(false); 1031 1032 if (strict && pos >= data.length) 1033 throw new MarkupException("tag open, didn't find > before end of file"); 1034 1035 switch (data[pos]) { 1036 case '/': // self closing tag 1037 return addTag(true); 1038 case '>': // closed tag; open -- we now read the contents 1039 return addTag(false); 1040 default: // it is an attribute 1041 string attrName = readAttributeName(); 1042 string attrValue = attrName; 1043 1044 bool ateAny = eatWhitespace(); 1045 if (strict && ateAny) 1046 throw new MarkupException( 1047 "inappropriate whitespace after attribute name"); 1048 1049 if (pos >= data.length) { 1050 if (strict) 1051 assert(0, "this should have thrown in readAttributeName"); 1052 else { 1053 data ~= ">"; 1054 goto blankValue; 1055 } 1056 } 1057 if (data[pos] == '=') { 1058 pos++; 1059 1060 ateAny = eatWhitespace(); 1061 if (strict && ateAny) 1062 throw new MarkupException( 1063 "inappropriate whitespace after attribute equals"); 1064 1065 attrValue = readAttributeValue(); 1066 1067 eatWhitespace(); 1068 } 1069 1070 blankValue: 1071 1072 if (strict && attrName in attributes) 1073 throw new MarkupException("Repeated attribute: " ~ attrName); 1074 1075 if (attrName.strip().length) 1076 attributes[attrName] = attrValue; 1077 else if (strict) 1078 throw new MarkupException("wtf, zero length attribute name"); 1079 1080 if (!strict && pos < data.length && data[pos] == '<') { 1081 // this is the broken tag that doesn't have a > at the end 1082 data = data[0 .. pos] ~ ">" ~ data[pos .. $]; 1083 // let's insert one as a hack 1084 goto case '>'; 1085 } 1086 1087 goto moreAttributes; 1088 } 1089 } 1090 } 1091 1092 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1093 //assert(0); 1094 } 1095 1096 eatWhitespace(); 1097 Ele r; 1098 do { 1099 r = readElement(); // there SHOULD only be one element... 1100 1101 if (r.type == 3 && r.element !is null) 1102 piecesBeforeRoot ~= r.element; 1103 1104 if (r.type == 4) 1105 break; // the document is completely empty... 1106 } 1107 while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1108 1109 root = r.element; 1110 1111 if (!strict) // in strict mode, we'll just ignore stuff after the xml 1112 while (r.type != 4) { 1113 r = readElement(); 1114 if (r.type != 4 && r.type != 2) { // if not empty and not ignored 1115 if (r.element !is null) 1116 piecesAfterRoot ~= r.element; 1117 } 1118 } 1119 1120 if (root is null) { 1121 if (strict) 1122 assert(0, "empty document should be impossible in strict mode"); 1123 else 1124 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1125 } 1126 1127 if (paragraphHackfixRequired) { 1128 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1129 1130 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1131 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1132 1133 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1134 // Kind of inefficient because we can't detect when we recurse back out of a node. 1135 Element[Element] insertLocations; 1136 auto iterator = root.tree; 1137 foreach (ele; iterator) { 1138 if (ele.parentNode is null) 1139 continue; 1140 1141 if (ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1142 auto shouldBePreviousSibling = ele.parentNode; 1143 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1144 if (auto p = holder in insertLocations) { 1145 shouldBePreviousSibling = *p; 1146 assert(shouldBePreviousSibling.parentNode is holder); 1147 } 1148 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1149 insertLocations[holder] = ele; 1150 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1151 } 1152 } 1153 } 1154 } 1155 1156 /* end massive parse function */ 1157 1158 /// Gets the <title> element's innerText, if one exists 1159 @property string title() { 1160 bool doesItMatch(Element e) { 1161 return (e.tagName == "title"); 1162 } 1163 1164 auto e = findFirst(&doesItMatch); 1165 if (e) 1166 return e.innerText(); 1167 return ""; 1168 } 1169 1170 /// Sets the title of the page, creating a <title> element if needed. 1171 @property void title(string t) { 1172 bool doesItMatch(Element e) { 1173 return (e.tagName == "title"); 1174 } 1175 1176 auto e = findFirst(&doesItMatch); 1177 1178 if (!e) { 1179 e = createElement("title"); 1180 auto heads = getElementsByTagName("head"); 1181 if (heads.length) 1182 heads[0].appendChild(e); 1183 } 1184 1185 if (e) 1186 e.innerText = t; 1187 } 1188 1189 // FIXME: would it work to alias root this; ???? might be a good idea 1190 /// These functions all forward to the root element. See the documentation in the Element class. 1191 Element getElementById(string id) { 1192 return root.getElementById(id); 1193 } 1194 1195 /// ditto 1196 final SomeElementType requireElementById(SomeElementType = Element)(string id, 1197 string file = __FILE__, size_t line = __LINE__) 1198 if (is(SomeElementType : Element)) 1199 out (ret) { 1200 assert(ret !is null); 1201 } 1202 body { 1203 return root.requireElementById!(SomeElementType)(id, file, line); 1204 } 1205 1206 /// ditto 1207 final SomeElementType requireSelector(SomeElementType = Element)(string selector, 1208 string file = __FILE__, size_t line = __LINE__) 1209 if (is(SomeElementType : Element)) 1210 out (ret) { 1211 assert(ret !is null); 1212 } 1213 body { 1214 auto e = cast(SomeElementType) querySelector(selector); 1215 if (e is null) 1216 throw new ElementNotFoundException(SomeElementType.stringof, 1217 selector, this.root, file, line); 1218 return e; 1219 } 1220 1221 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)( 1222 string selector, string file = __FILE__, size_t line = __LINE__) 1223 if (is(SomeElementType : Element)) { 1224 auto e = cast(SomeElementType) querySelector(selector); 1225 return MaybeNullElement!SomeElementType(e); 1226 } 1227 1228 /// ditto 1229 @scriptable Element querySelector(string selector) { 1230 // see comment below on Document.querySelectorAll 1231 auto s = Selector(selector); //, !loose); 1232 foreach (ref comp; s.components) 1233 if (comp.parts.length && comp.parts[0].separation == 0) 1234 comp.parts[0].separation = -1; 1235 foreach (e; s.getMatchingElementsLazy(this.root)) 1236 return e; 1237 return null; 1238 1239 } 1240 1241 /// ditto 1242 @scriptable Element[] querySelectorAll(string selector) { 1243 // In standards-compliant code, the document is slightly magical 1244 // in that it is a pseudoelement at top level. It should actually 1245 // match the root as one of its children. 1246 // 1247 // In versions of dom.d before Dec 29 2019, this worked because 1248 // querySelectorAll was willing to return itself. With that bug fix 1249 // (search "arbitrary id asduiwh" in this file for associated unittest) 1250 // this would have failed. Hence adding back the root if it matches the 1251 // selector itself. 1252 // 1253 // I'd love to do this better later. 1254 1255 auto s = Selector(selector); //, !loose); 1256 foreach (ref comp; s.components) 1257 if (comp.parts.length && comp.parts[0].separation == 0) 1258 comp.parts[0].separation = -1; 1259 return s.getMatchingElements(this.root); 1260 } 1261 1262 /// ditto 1263 deprecated("use querySelectorAll instead") Element[] getElementsBySelector(string selector) { 1264 return root.getElementsBySelector(selector); 1265 } 1266 1267 /// ditto 1268 @scriptable Element[] getElementsByTagName(string tag) { 1269 return root.getElementsByTagName(tag); 1270 } 1271 1272 /// ditto 1273 @scriptable Element[] getElementsByClassName(string tag) { 1274 return root.getElementsByClassName(tag); 1275 } 1276 1277 /** FIXME: btw, this could just be a lazy range...... */ 1278 Element getFirstElementByTagName(string tag) { 1279 if (loose) 1280 tag = tag.toLower(); 1281 bool doesItMatch(Element e) { 1282 return e.tagName == tag; 1283 } 1284 1285 return findFirst(&doesItMatch); 1286 } 1287 1288 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1289 Element mainBody() { 1290 return getFirstElementByTagName("body"); 1291 } 1292 1293 /// this uses a weird thing... it's [name=] if no colon and 1294 /// [property=] if colon 1295 string getMeta(string name) { 1296 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1297 auto e = querySelector("head meta[" ~ thing ~ "=" ~ name ~ "]"); 1298 if (e is null) 1299 return null; 1300 return e.content; 1301 } 1302 1303 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1304 void setMeta(string name, string value) { 1305 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1306 auto e = querySelector("head meta[" ~ thing ~ "=" ~ name ~ "]"); 1307 if (e is null) { 1308 e = requireSelector("head").addChild("meta"); 1309 e.setAttribute(thing, name); 1310 } 1311 1312 e.content = value; 1313 } 1314 1315 ///. 1316 Form[] forms() { 1317 return cast(Form[]) getElementsByTagName("form"); 1318 } 1319 1320 ///. 1321 Form createForm() 1322 out (ret) { 1323 assert(ret !is null); 1324 } 1325 body { 1326 return cast(Form) createElement("form"); 1327 } 1328 1329 ///. 1330 Element createElement(string name) { 1331 if (loose) 1332 name = name.toLower(); 1333 1334 auto e = Element.make(name); 1335 e.parentDocument = this; 1336 1337 return e; 1338 1339 // return new Element(this, name, null, selfClosed); 1340 } 1341 1342 ///. 1343 Element createFragment() { 1344 return new DocumentFragment(this); 1345 } 1346 1347 ///. 1348 Element createTextNode(string content) { 1349 return new TextNode(this, content); 1350 } 1351 1352 ///. 1353 Element findFirst(bool delegate(Element) doesItMatch) { 1354 Element result; 1355 1356 bool goThroughElement(Element e) { 1357 if (doesItMatch(e)) { 1358 result = e; 1359 return true; 1360 } 1361 1362 foreach (child; e.children) { 1363 if (goThroughElement(child)) 1364 return true; 1365 } 1366 1367 return false; 1368 } 1369 1370 goThroughElement(root); 1371 1372 return result; 1373 } 1374 1375 ///. 1376 void clear() { 1377 root = null; 1378 loose = false; 1379 } 1380 1381 ///. 1382 void setProlog(string d) { 1383 _prolog = d; 1384 prologWasSet = true; 1385 } 1386 1387 ///. 1388 private string _prolog = "<!DOCTYPE html>\n"; 1389 private bool prologWasSet = false; // set to true if the user changed it 1390 1391 @property string prolog() const { 1392 // if the user explicitly changed it, do what they want 1393 // or if we didn't keep/find stuff from the document itself, 1394 // we'll use the builtin one as a default. 1395 if (prologWasSet || piecesBeforeRoot.length == 0) 1396 return _prolog; 1397 1398 string p; 1399 foreach (e; piecesBeforeRoot) 1400 p ~= e.toString() ~ "\n"; 1401 return p; 1402 } 1403 1404 ///. 1405 override string toString() const { 1406 return prolog ~ root.toString(); 1407 } 1408 1409 /++ 1410 Writes it out with whitespace for easier eyeball debugging 1411 1412 Do NOT use for anything other than eyeball debugging, 1413 because whitespace may be significant content in XML. 1414 +/ 1415 string toPrettyString(bool insertComments = false, int indentationLevel = 0, 1416 string indentWith = "\t") const { 1417 string s = prolog; 1418 1419 if (insertComments) 1420 s ~= "<!--"; 1421 s ~= "\n"; 1422 if (insertComments) 1423 s ~= "-->"; 1424 1425 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1426 foreach (a; piecesAfterRoot) 1427 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1428 return s; 1429 } 1430 1431 ///. 1432 Element root; 1433 1434 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1435 Element[] piecesBeforeRoot; 1436 1437 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1438 Element[] piecesAfterRoot; 1439 1440 ///. 1441 bool loose; 1442 1443 // what follows are for mutation events that you can observe 1444 void delegate(DomMutationEvent)[] eventObservers; 1445 1446 void dispatchMutationEvent(DomMutationEvent e) { 1447 foreach (o; eventObservers) 1448 o(e); 1449 } 1450 } 1451 1452 /// This represents almost everything in the DOM. 1453 /// Group: core_functionality 1454 class Element { 1455 /// Returns a collection of elements by selector. 1456 /// See: [Document.opIndex] 1457 ElementCollection opIndex(string selector) { 1458 auto e = ElementCollection(this); 1459 return e[selector]; 1460 } 1461 1462 /++ 1463 Returns the child node with the particular index. 1464 1465 Be aware that child nodes include text nodes, including 1466 whitespace-only nodes. 1467 +/ 1468 Element opIndex(size_t index) { 1469 if (index >= children.length) 1470 return null; 1471 return this.children[index]; 1472 } 1473 1474 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1475 final SomeElementType requireElementById(SomeElementType = Element)(string id, 1476 string file = __FILE__, size_t line = __LINE__) 1477 if (is(SomeElementType : Element)) 1478 out (ret) { 1479 assert(ret !is null); 1480 } 1481 body { 1482 auto e = cast(SomeElementType) getElementById(id); 1483 if (e is null) 1484 throw new ElementNotFoundException(SomeElementType.stringof, 1485 "id=" ~ id, this, file, line); 1486 return e; 1487 } 1488 1489 /// ditto but with selectors instead of ids 1490 final SomeElementType requireSelector(SomeElementType = Element)(string selector, 1491 string file = __FILE__, size_t line = __LINE__) 1492 if (is(SomeElementType : Element)) 1493 out (ret) { 1494 assert(ret !is null); 1495 } 1496 body { 1497 auto e = cast(SomeElementType) querySelector(selector); 1498 if (e is null) 1499 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1500 return e; 1501 } 1502 1503 /++ 1504 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1505 +/ 1506 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)( 1507 string selector, string file = __FILE__, size_t line = __LINE__) 1508 if (is(SomeElementType : Element)) { 1509 auto e = cast(SomeElementType) querySelector(selector); 1510 return MaybeNullElement!SomeElementType(e); 1511 } 1512 1513 /// get all the classes on this element 1514 @property string[] classes() { 1515 return split(className, " "); 1516 } 1517 1518 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1519 @scriptable Element addClass(string c) { 1520 if (hasClass(c)) 1521 return this; // don't add it twice 1522 1523 string cn = getAttribute("class"); 1524 if (cn.length == 0) { 1525 setAttribute("class", c); 1526 return this; 1527 } else { 1528 setAttribute("class", cn ~ " " ~ c); 1529 } 1530 1531 return this; 1532 } 1533 1534 /// Removes a particular class name. 1535 @scriptable Element removeClass(string c) { 1536 if (!hasClass(c)) 1537 return this; 1538 string n; 1539 foreach (name; classes) { 1540 if (c == name) 1541 continue; // cut it out 1542 if (n.length) 1543 n ~= " "; 1544 n ~= name; 1545 } 1546 1547 className = n.strip(); 1548 1549 return this; 1550 } 1551 1552 /// Returns whether the given class appears in this element. 1553 bool hasClass(string c) { 1554 string cn = className; 1555 1556 auto idx = cn.indexOf(c); 1557 if (idx == -1) 1558 return false; 1559 1560 foreach (cla; cn.split(" ")) 1561 if (cla == c) 1562 return true; 1563 return false; 1564 1565 /* 1566 int rightSide = idx + c.length; 1567 1568 bool checkRight() { 1569 if(rightSide == cn.length) 1570 return true; // it's the only class 1571 else if(iswhite(cn[rightSide])) 1572 return true; 1573 return false; // this is a substring of something else.. 1574 } 1575 1576 if(idx == 0) { 1577 return checkRight(); 1578 } else { 1579 if(!iswhite(cn[idx - 1])) 1580 return false; // substring 1581 return checkRight(); 1582 } 1583 1584 assert(0); 1585 */ 1586 } 1587 1588 /* ******************************* 1589 DOM Mutation 1590 *********************************/ 1591 /// convenience function to quickly add a tag with some text or 1592 /// other relevant info (for example, it's a src for an <img> element 1593 /// instead of inner text) 1594 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1595 in { 1596 assert(tagName !is null); 1597 } 1598 out (e) { 1599 //assert(e.parentNode is this); 1600 //assert(e.parentDocument is this.parentDocument); 1601 } 1602 body { 1603 auto e = Element.make(tagName, childInfo, childInfo2); 1604 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1605 // return the parent. That will break existing code though. 1606 return appendChild(e); 1607 } 1608 1609 /// Another convenience function. Adds a child directly after the current one, returning 1610 /// the new child. 1611 /// 1612 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1613 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1614 in { 1615 assert(tagName !is null); 1616 assert(parentNode !is null); 1617 } 1618 out (e) { 1619 assert(e.parentNode is this.parentNode); 1620 assert(e.parentDocument is this.parentDocument); 1621 } 1622 body { 1623 auto e = Element.make(tagName, childInfo, childInfo2); 1624 return parentNode.insertAfter(this, e); 1625 } 1626 1627 /// 1628 Element addSibling(Element e) { 1629 return parentNode.insertAfter(this, e); 1630 } 1631 1632 /// 1633 Element addChild(Element e) { 1634 return this.appendChild(e); 1635 } 1636 1637 /// Convenience function to append text intermixed with other children. 1638 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1639 /// or div.addChildren("Hello, ", user.name, "!"); 1640 1641 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1642 void addChildren(T...)(T t) { 1643 foreach (item; t) { 1644 static if (is(item : Element)) 1645 appendChild(item); 1646 else static if (is(isSomeString!(item))) 1647 appendText(to!string(item)); 1648 else 1649 static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1650 } 1651 } 1652 1653 ///. 1654 Element addChild(string tagName, Element firstChild, string info2 = null) 1655 in { 1656 assert(firstChild !is null); 1657 } 1658 out (ret) { 1659 assert(ret !is null); 1660 assert(ret.parentNode is this); 1661 assert(firstChild.parentNode is ret); 1662 1663 assert(ret.parentDocument is this.parentDocument); 1664 //assert(firstChild.parentDocument is this.parentDocument); 1665 } 1666 body { 1667 auto e = Element.make(tagName, "", info2); 1668 e.appendChild(firstChild); 1669 this.appendChild(e); 1670 return e; 1671 } 1672 1673 /// 1674 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1675 in { 1676 } 1677 out (ret) { 1678 assert(ret !is null); 1679 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString); // e.parentNode ? e.parentNode.toString : "null"); 1680 assert(ret.parentDocument is this.parentDocument); 1681 } 1682 body { 1683 auto e = Element.make(tagName, "", info2); 1684 this.appendChild(e); 1685 e.innerHTML = innerHtml.source; 1686 return e; 1687 } 1688 1689 /// . 1690 void appendChildren(Element[] children) { 1691 foreach (ele; children) 1692 appendChild(ele); 1693 } 1694 1695 ///. 1696 void reparent(Element newParent) 1697 in { 1698 assert(newParent !is null); 1699 assert(parentNode !is null); 1700 } 1701 out { 1702 assert(this.parentNode is newParent); 1703 //assert(isInArray(this, newParent.children)); 1704 } 1705 body { 1706 parentNode.removeChild(this); 1707 newParent.appendChild(this); 1708 } 1709 1710 /** 1711 Strips this tag out of the document, putting its inner html 1712 as children of the parent. 1713 1714 For example, given: `<p>hello <b>there</b></p>`, if you 1715 call `stripOut` on the `b` element, you'll be left with 1716 `<p>hello there<p>`. 1717 1718 The idea here is to make it easy to get rid of garbage 1719 markup you aren't interested in. 1720 */ 1721 void stripOut() 1722 in { 1723 assert(parentNode !is null); 1724 } 1725 out { 1726 assert(parentNode is null); 1727 assert(children.length == 0); 1728 } 1729 body { 1730 foreach (c; children) 1731 c.parentNode = null; // remove the parent 1732 if (children.length) 1733 parentNode.replaceChild(this, this.children); 1734 else 1735 parentNode.removeChild(this); 1736 this.children.length = 0; // we reparented them all above 1737 } 1738 1739 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1740 /// if the element already isn't in a tree, it does nothing. 1741 Element removeFromTree() 1742 in { 1743 1744 } 1745 out (var) { 1746 assert(this.parentNode is null); 1747 assert(var is this); 1748 } 1749 body { 1750 if (this.parentNode is null) 1751 return this; 1752 1753 this.parentNode.removeChild(this); 1754 1755 return this; 1756 } 1757 1758 /++ 1759 Wraps this element inside the given element. 1760 It's like `this.replaceWith(what); what.appendchild(this);` 1761 1762 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1763 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1764 +/ 1765 Element wrapIn(Element what) 1766 in { 1767 assert(what !is null); 1768 } 1769 out (ret) { 1770 assert(this.parentNode is what); 1771 assert(ret is what); 1772 } 1773 body { 1774 this.replaceWith(what); 1775 what.appendChild(this); 1776 1777 return what; 1778 } 1779 1780 /// Replaces this element with something else in the tree. 1781 Element replaceWith(Element e) 1782 in { 1783 assert(this.parentNode !is null); 1784 } 1785 body { 1786 e.removeFromTree(); 1787 this.parentNode.replaceChild(this, e); 1788 return e; 1789 } 1790 1791 /** 1792 Splits the className into an array of each class given 1793 */ 1794 string[] classNames() const { 1795 return className().split(" "); 1796 } 1797 1798 /** 1799 Fetches the first consecutive text nodes concatenated together. 1800 1801 1802 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1803 1804 See_also: [directText], [innerText] 1805 */ 1806 string firstInnerText() const { 1807 string s; 1808 foreach (child; children) { 1809 if (child.nodeType != NodeType.Text) 1810 break; 1811 1812 s ~= child.nodeValue(); 1813 } 1814 return s; 1815 } 1816 1817 /** 1818 Returns the text directly under this element. 1819 1820 1821 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1822 past child tags. So, `<example>some <b>bold</b> text</example>` 1823 will return `some text` because it only gets the text, skipping non-text children. 1824 1825 See_also: [firstInnerText], [innerText] 1826 */ 1827 @property string directText() { 1828 string ret; 1829 foreach (e; children) { 1830 if (e.nodeType == NodeType.Text) 1831 ret ~= e.nodeValue(); 1832 } 1833 1834 return ret; 1835 } 1836 1837 /** 1838 Sets the direct text, without modifying other child nodes. 1839 1840 1841 Unlike [innerText], this does *not* remove existing elements in the element. 1842 1843 It only replaces the first text node it sees. 1844 1845 If there are no text nodes, it calls [appendText]. 1846 1847 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1848 */ 1849 @property void directText(string text) { 1850 foreach (e; children) { 1851 if (e.nodeType == NodeType.Text) { 1852 auto it = cast(TextNode) e; 1853 it.contents = text; 1854 return; 1855 } 1856 } 1857 1858 appendText(text); 1859 } 1860 1861 // do nothing, this is primarily a virtual hook 1862 // for links and forms 1863 void setValue(string field, string value) { 1864 } 1865 1866 // this is a thing so i can remove observer support if it gets slow 1867 // I have not implemented all these yet 1868 private void sendObserverEvent(DomMutationOperations operation, 1869 string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1870 if (parentDocument is null) 1871 return; 1872 DomMutationEvent me; 1873 me.operation = operation; 1874 me.target = this; 1875 me.relatedString = s1; 1876 me.relatedString2 = s2; 1877 me.related = r; 1878 me.related2 = r2; 1879 parentDocument.dispatchMutationEvent(me); 1880 } 1881 1882 // putting all the members up front 1883 1884 // this ought to be private. don't use it directly. 1885 Element[] children; 1886 1887 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1888 string tagName; 1889 1890 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1891 string[string] attributes; 1892 1893 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1894 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1895 private bool selfClosed; 1896 1897 /// Get the parent Document object that contains this element. 1898 /// It may be null, so remember to check for that. 1899 Document parentDocument; 1900 1901 ///. 1902 inout(Element) parentNode() inout { 1903 auto p = _parentNode; 1904 1905 if (cast(DocumentFragment) p) 1906 return p._parentNode; 1907 1908 return p; 1909 } 1910 1911 //protected 1912 Element parentNode(Element e) { 1913 return _parentNode = e; 1914 } 1915 1916 private Element _parentNode; 1917 1918 // the next few methods are for implementing interactive kind of things 1919 private CssStyle _computedStyle; 1920 1921 // these are here for event handlers. Don't forget that this library never fires events. 1922 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1923 EventHandler[][string] bubblingEventHandlers; 1924 EventHandler[][string] capturingEventHandlers; 1925 EventHandler[string] defaultEventHandlers; 1926 1927 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1928 if (event.length > 2 && event[0 .. 2] == "on") 1929 event = event[2 .. $]; 1930 1931 if (useCapture) 1932 capturingEventHandlers[event] ~= handler; 1933 else 1934 bubblingEventHandlers[event] ~= handler; 1935 } 1936 1937 // and now methods 1938 1939 /// Convenience function to try to do the right thing for HTML. This is the main 1940 /// way I create elements. 1941 static Element make(string tagName, string childInfo = null, string childInfo2 = null) { 1942 bool selfClosed = tagName.isInArray(selfClosedElements); 1943 1944 Element e; 1945 // want to create the right kind of object for the given tag... 1946 switch (tagName) { 1947 case "#text": 1948 e = new TextNode(null, childInfo); 1949 return e; 1950 // break; 1951 case "table": 1952 e = new Table(null); 1953 break; 1954 case "a": 1955 e = new Link(null); 1956 break; 1957 case "form": 1958 e = new Form(null); 1959 break; 1960 case "tr": 1961 e = new TableRow(null); 1962 break; 1963 case "td", "th": 1964 e = new TableCell(null, tagName); 1965 break; 1966 default: 1967 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 1968 } 1969 1970 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 1971 e.tagName = tagName; 1972 e.selfClosed = selfClosed; 1973 1974 if (childInfo !is null) switch (tagName) { 1975 /* html5 convenience tags */ 1976 case "audio": 1977 if (childInfo.length) 1978 e.addChild("source", childInfo); 1979 if (childInfo2 !is null) 1980 e.appendText(childInfo2); 1981 break; 1982 case "source": 1983 e.src = childInfo; 1984 if (childInfo2 !is null) 1985 e.type = childInfo2; 1986 break; 1987 /* regular html 4 stuff */ 1988 case "img": 1989 e.src = childInfo; 1990 if (childInfo2 !is null) 1991 e.alt = childInfo2; 1992 break; 1993 case "link": 1994 e.href = childInfo; 1995 if (childInfo2 !is null) 1996 e.rel = childInfo2; 1997 break; 1998 case "option": 1999 e.innerText = childInfo; 2000 if (childInfo2 !is null) 2001 e.value = childInfo2; 2002 break; 2003 case "input": 2004 e.type = "hidden"; 2005 e.name = childInfo; 2006 if (childInfo2 !is null) 2007 e.value = childInfo2; 2008 break; 2009 case "button": 2010 e.innerText = childInfo; 2011 if (childInfo2 !is null) 2012 e.type = childInfo2; 2013 break; 2014 case "a": 2015 e.innerText = childInfo; 2016 if (childInfo2 !is null) 2017 e.href = childInfo2; 2018 break; 2019 case "script": 2020 case "style": 2021 e.innerRawSource = childInfo; 2022 break; 2023 case "meta": 2024 e.name = childInfo; 2025 if (childInfo2 !is null) 2026 e.content = childInfo2; 2027 break; 2028 /* generically, assume we were passed text and perhaps class */ 2029 default: 2030 e.innerText = childInfo; 2031 if (childInfo2.length) 2032 e.className = childInfo2; 2033 } 2034 2035 return e; 2036 } 2037 2038 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2039 // FIXME: childInfo2 is ignored when info1 is null 2040 auto m = Element.make(tagName, "not null"[0 .. 0], childInfo2); 2041 m.innerHTML = innerHtml.source; 2042 return m; 2043 } 2044 2045 static Element make(string tagName, Element child, string childInfo2 = null) { 2046 auto m = Element.make(tagName, cast(string) null, childInfo2); 2047 m.appendChild(child); 2048 return m; 2049 } 2050 2051 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2052 this(Document _parentDocument, string _tagName, 2053 string[string] _attributes = null, bool _selfClosed = false) { 2054 parentDocument = _parentDocument; 2055 tagName = _tagName; 2056 if (_attributes !is null) 2057 attributes = _attributes; 2058 selfClosed = _selfClosed; 2059 2060 version (dom_node_indexes) 2061 this.dataset.nodeIndex = to!string(&(this.attributes)); 2062 2063 assert(_tagName.indexOf(" ") == -1); //, "<" ~ _tagName ~ "> is invalid"); 2064 } 2065 2066 /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2067 /// Note also that without a parent document, elements are always in strict, case-sensitive mode. 2068 this(string _tagName, string[string] _attributes = null) { 2069 tagName = _tagName; 2070 if (_attributes !is null) 2071 attributes = _attributes; 2072 selfClosed = tagName.isInArray(selfClosedElements); 2073 2074 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2075 //children.length = 8; 2076 //children.length = 0; 2077 2078 version (dom_node_indexes) 2079 this.dataset.nodeIndex = to!string(&(this.attributes)); 2080 } 2081 2082 private this(Document _parentDocument) { 2083 parentDocument = _parentDocument; 2084 2085 version (dom_node_indexes) 2086 this.dataset.nodeIndex = to!string(&(this.attributes)); 2087 } 2088 2089 /* ******************************* 2090 Navigating the DOM 2091 *********************************/ 2092 2093 /// Returns the first child of this element. If it has no children, returns null. 2094 /// Remember, text nodes are children too. 2095 @property Element firstChild() { 2096 return children.length ? children[0] : null; 2097 } 2098 2099 /// 2100 @property Element lastChild() { 2101 return children.length ? children[$ - 1] : null; 2102 } 2103 2104 /// UNTESTED 2105 /// the next element you would encounter if you were reading it in the source 2106 Element nextInSource() { 2107 auto n = firstChild; 2108 if (n is null) 2109 n = nextSibling(); 2110 if (n is null) { 2111 auto p = this.parentNode; 2112 while (p !is null && n is null) { 2113 n = p.nextSibling; 2114 } 2115 } 2116 2117 return n; 2118 } 2119 2120 /// UNTESTED 2121 /// ditto 2122 Element previousInSource() { 2123 auto p = previousSibling; 2124 if (p is null) { 2125 auto par = parentNode; 2126 if (par) 2127 p = par.lastChild; 2128 if (p is null) 2129 p = par; 2130 } 2131 return p; 2132 } 2133 2134 ///. 2135 @property Element previousSibling(string tagName = null) { 2136 if (this.parentNode is null) 2137 return null; 2138 Element ps = null; 2139 foreach (e; this.parentNode.childNodes) { 2140 if (e is this) 2141 break; 2142 if (tagName == "*" && e.nodeType != NodeType.Text) { 2143 ps = e; 2144 break; 2145 } 2146 if (tagName is null || e.tagName == tagName) 2147 ps = e; 2148 } 2149 2150 return ps; 2151 } 2152 2153 ///. 2154 @property Element nextSibling(string tagName = null) { 2155 if (this.parentNode is null) 2156 return null; 2157 Element ns = null; 2158 bool mightBe = false; 2159 foreach (e; this.parentNode.childNodes) { 2160 if (e is this) { 2161 mightBe = true; 2162 continue; 2163 } 2164 if (mightBe) { 2165 if (tagName == "*" && e.nodeType != NodeType.Text) { 2166 ns = e; 2167 break; 2168 } 2169 if (tagName is null || e.tagName == tagName) { 2170 ns = e; 2171 break; 2172 } 2173 } 2174 } 2175 2176 return ns; 2177 } 2178 2179 /// Gets the nearest node, going up the chain, with the given tagName 2180 /// May return null or throw. 2181 T getParent(T = Element)(string tagName = null) if (is(T : Element)) { 2182 if (tagName is null) { 2183 static if (is(T == Form)) 2184 tagName = "form"; 2185 else static if (is(T == Table)) 2186 tagName = "table"; 2187 else static if (is(T == Link)) 2188 tagName == "a"; 2189 } 2190 2191 auto par = this.parentNode; 2192 while (par !is null) { 2193 if (tagName is null || par.tagName == tagName) 2194 break; 2195 par = par.parentNode; 2196 } 2197 2198 static if (!is(T == Element)) { 2199 auto t = cast(T) par; 2200 if (t is null) 2201 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2202 } else 2203 auto t = par; 2204 2205 return t; 2206 } 2207 2208 ///. 2209 Element getElementById(string id) { 2210 // FIXME: I use this function a lot, and it's kinda slow 2211 // not terribly slow, but not great. 2212 foreach (e; tree) 2213 if (e.id == id) 2214 return e; 2215 return null; 2216 } 2217 2218 /// Note: you can give multiple selectors, separated by commas. 2219 /// It will return the first match it finds. 2220 @scriptable Element querySelector(string selector) { 2221 // FIXME: inefficient; it gets all results just to discard most of them 2222 auto list = getElementsBySelector(selector); 2223 if (list.length == 0) 2224 return null; 2225 return list[0]; 2226 } 2227 2228 /// a more standards-compliant alias for getElementsBySelector 2229 @scriptable Element[] querySelectorAll(string selector) { 2230 return getElementsBySelector(selector); 2231 } 2232 2233 /// If the element matches the given selector. Previously known as `matchesSelector`. 2234 @scriptable bool matches(string selector) { 2235 /+ 2236 bool caseSensitiveTags = true; 2237 if(parentDocument && parentDocument.loose) 2238 caseSensitiveTags = false; 2239 +/ 2240 2241 Selector s = Selector(selector); 2242 return s.matchesElement(this); 2243 } 2244 2245 /// Returns itself or the closest parent that matches the given selector, or null if none found 2246 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2247 @scriptable Element closest(string selector) { 2248 Element e = this; 2249 while (e !is null) { 2250 if (e.matches(selector)) 2251 return e; 2252 e = e.parentNode; 2253 } 2254 return null; 2255 } 2256 2257 /** 2258 Returns elements that match the given CSS selector 2259 2260 * -- all, default if nothing else is there 2261 2262 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2263 2264 It is all additive 2265 2266 OP 2267 2268 space = descendant 2269 > = direct descendant 2270 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2271 2272 [foo] Foo is present as an attribute 2273 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2274 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2275 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2276 2277 [item$=sdas] ends with 2278 [item^-sdsad] begins with 2279 2280 Quotes are optional here. 2281 2282 Pseudos: 2283 :first-child 2284 :last-child 2285 :link (same as a[href] for our purposes here) 2286 2287 2288 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2289 2290 2291 2292 This ONLY cares about elements. text, etc, are ignored 2293 2294 2295 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2296 */ 2297 Element[] getElementsBySelector(string selector) { 2298 // FIXME: this function could probably use some performance attention 2299 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2300 2301 bool caseSensitiveTags = true; 2302 if (parentDocument && parentDocument.loose) 2303 caseSensitiveTags = false; 2304 2305 Element[] ret; 2306 foreach (sel; parseSelectorString(selector, caseSensitiveTags)) 2307 ret ~= sel.getElements(this); 2308 return ret; 2309 } 2310 2311 /// . 2312 Element[] getElementsByClassName(string cn) { 2313 // is this correct? 2314 return getElementsBySelector("." ~ cn); 2315 } 2316 2317 ///. 2318 Element[] getElementsByTagName(string tag) { 2319 if (parentDocument && parentDocument.loose) 2320 tag = tag.toLower(); 2321 Element[] ret; 2322 foreach (e; tree) 2323 if (e.tagName == tag) 2324 ret ~= e; 2325 return ret; 2326 } 2327 2328 /* ******************************* 2329 Attributes 2330 *********************************/ 2331 2332 /** 2333 Gets the given attribute value, or null if the 2334 attribute is not set. 2335 2336 Note that the returned string is decoded, so it no longer contains any xml entities. 2337 */ 2338 @scriptable string getAttribute(string name) const { 2339 if (parentDocument && parentDocument.loose) 2340 name = name.toLower(); 2341 auto e = name in attributes; 2342 if (e) 2343 return *e; 2344 else 2345 return null; 2346 } 2347 2348 /** 2349 Sets an attribute. Returns this for easy chaining 2350 */ 2351 @scriptable Element setAttribute(string name, string value) { 2352 if (parentDocument && parentDocument.loose) 2353 name = name.toLower(); 2354 2355 // I never use this shit legitimately and neither should you 2356 auto it = name.toLower(); 2357 if (it == "href" || it == "src") { 2358 auto v = value.strip().toLower(); 2359 if (v.startsWith("vbscript:")) 2360 value = value[9 .. $]; 2361 if (v.startsWith("javascript:")) 2362 value = value[11 .. $]; 2363 } 2364 2365 attributes[name] = value; 2366 2367 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2368 2369 return this; 2370 } 2371 2372 /** 2373 Returns if the attribute exists. 2374 */ 2375 @scriptable bool hasAttribute(string name) { 2376 if (parentDocument && parentDocument.loose) 2377 name = name.toLower(); 2378 2379 if (name in attributes) 2380 return true; 2381 else 2382 return false; 2383 } 2384 2385 /** 2386 Removes the given attribute from the element. 2387 */ 2388 @scriptable Element removeAttribute(string name) 2389 out (ret) { 2390 assert(ret is this); 2391 } 2392 body { 2393 if (parentDocument && parentDocument.loose) 2394 name = name.toLower(); 2395 if (name in attributes) 2396 attributes.remove(name); 2397 2398 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2399 return this; 2400 } 2401 2402 /** 2403 Gets the class attribute's contents. Returns 2404 an empty string if it has no class. 2405 */ 2406 @property string className() const { 2407 auto c = getAttribute("class"); 2408 if (c is null) 2409 return ""; 2410 return c; 2411 } 2412 2413 ///. 2414 @property Element className(string c) { 2415 setAttribute("class", c); 2416 return this; 2417 } 2418 2419 /** 2420 Provides easy access to common HTML attributes, object style. 2421 2422 --- 2423 auto element = Element.make("a"); 2424 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2425 string where = a.href; // same as a.getAttribute("href"); 2426 --- 2427 2428 */ 2429 @property string opDispatch(string name)(string v = null) 2430 if (isConvenientAttribute(name)) { 2431 if (v !is null) 2432 setAttribute(name, v); 2433 return getAttribute(name); 2434 } 2435 2436 /** 2437 Old access to attributes. Use [attrs] instead. 2438 2439 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2440 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2441 2442 Instead, use element.attrs.attribute, element.attrs["attribute"], 2443 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2444 */ 2445 @property string opDispatch(string name)(string v = null) 2446 if (!isConvenientAttribute(name)) { 2447 static assert(0, 2448 "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2449 } 2450 2451 /* 2452 // this would be nice for convenience, but it broke the getter above. 2453 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2454 if(boolean) 2455 setAttribute(name, name); 2456 else 2457 removeAttribute(name); 2458 } 2459 */ 2460 2461 /** 2462 Returns the element's children. 2463 */ 2464 @property const(Element[]) childNodes() const { 2465 return children; 2466 } 2467 2468 /// Mutable version of the same 2469 @property Element[] childNodes() { // FIXME: the above should be inout 2470 return children; 2471 } 2472 2473 /++ 2474 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2475 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2476 +/ 2477 @property DataSet dataset() { 2478 return DataSet(this); 2479 } 2480 2481 /++ 2482 Gives dot/opIndex access to attributes 2483 --- 2484 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2485 --- 2486 +/ 2487 @property AttributeSet attrs() { 2488 return AttributeSet(this); 2489 } 2490 2491 /++ 2492 Provides both string and object style (like in Javascript) access to the style attribute. 2493 2494 --- 2495 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2496 --- 2497 +/ 2498 @property ElementStyle style() { 2499 return ElementStyle(this); 2500 } 2501 2502 /++ 2503 This sets the style attribute with a string. 2504 +/ 2505 @property ElementStyle style(string s) { 2506 this.setAttribute("style", s); 2507 return this.style; 2508 } 2509 2510 private void parseAttributes(string[] whichOnes = null) { 2511 /+ 2512 if(whichOnes is null) 2513 whichOnes = attributes.keys; 2514 foreach(attr; whichOnes) { 2515 switch(attr) { 2516 case "id": 2517 2518 break; 2519 case "class": 2520 2521 break; 2522 case "style": 2523 2524 break; 2525 default: 2526 // we don't care about it 2527 } 2528 } 2529 +/ 2530 } 2531 2532 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2533 /// Don't use this. 2534 @property CssStyle computedStyle() { 2535 if (_computedStyle is null) { 2536 auto style = this.getAttribute("style"); 2537 /* we'll treat shitty old html attributes as css here */ 2538 if (this.hasAttribute("width")) 2539 style ~= "; width: " ~ this.attrs.width; 2540 if (this.hasAttribute("height")) 2541 style ~= "; height: " ~ this.attrs.height; 2542 if (this.hasAttribute("bgcolor")) 2543 style ~= "; background-color: " ~ this.attrs.bgcolor; 2544 if (this.tagName == "body" && this.hasAttribute("text")) 2545 style ~= "; color: " ~ this.attrs.text; 2546 if (this.hasAttribute("color")) 2547 style ~= "; color: " ~ this.attrs.color; 2548 /* done */ 2549 2550 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2551 } 2552 return _computedStyle; 2553 } 2554 2555 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2556 version (browser) { 2557 void* expansionHook; ///ditto 2558 int offsetWidth; ///ditto 2559 int offsetHeight; ///ditto 2560 int offsetLeft; ///ditto 2561 int offsetTop; ///ditto 2562 Element offsetParent; ///ditto 2563 bool hasLayout; ///ditto 2564 int zIndex; ///ditto 2565 2566 ///ditto 2567 int absoluteLeft() { 2568 int a = offsetLeft; 2569 auto p = offsetParent; 2570 while (p) { 2571 a += p.offsetLeft; 2572 p = p.offsetParent; 2573 } 2574 2575 return a; 2576 } 2577 2578 ///ditto 2579 int absoluteTop() { 2580 int a = offsetTop; 2581 auto p = offsetParent; 2582 while (p) { 2583 a += p.offsetTop; 2584 p = p.offsetParent; 2585 } 2586 2587 return a; 2588 } 2589 } 2590 2591 // Back to the regular dom functions 2592 2593 public: 2594 2595 /* ******************************* 2596 DOM Mutation 2597 *********************************/ 2598 2599 /// Removes all inner content from the tag; all child text and elements are gone. 2600 void removeAllChildren() 2601 out { 2602 assert(this.children.length == 0); 2603 } 2604 body { 2605 children = null; 2606 } 2607 2608 /++ 2609 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2610 2611 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2612 2613 History: 2614 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2615 +/ 2616 Element appendChild(Element e) 2617 in { 2618 assert(e !is null); 2619 } 2620 out (ret) { 2621 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString); // e.parentNode ? e.parentNode.toString : "null"); 2622 assert(e.parentDocument is this.parentDocument); 2623 assert(e is ret); 2624 } 2625 body { 2626 if (e.parentNode !is null) 2627 e.parentNode.removeChild(e); 2628 2629 selfClosed = false; 2630 e.parentNode = this; 2631 e.parentDocument = this.parentDocument; 2632 if (auto frag = cast(DocumentFragment) e) 2633 children ~= frag.children; 2634 else 2635 children ~= e; 2636 2637 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2638 2639 return e; 2640 } 2641 2642 /// Inserts the second element to this node, right before the first param 2643 Element insertBefore(in Element where, Element what) 2644 in { 2645 assert(where !is null); 2646 assert(where.parentNode is this); 2647 assert(what !is null); 2648 assert(what.parentNode is null); 2649 } 2650 out (ret) { 2651 assert(where.parentNode is this); 2652 assert(what.parentNode is this); 2653 2654 assert(what.parentDocument is this.parentDocument); 2655 assert(ret is what); 2656 } 2657 body { 2658 foreach (i, e; children) { 2659 if (e is where) { 2660 if (auto frag = cast(DocumentFragment) what) 2661 children = children[0 .. i] ~ frag.children ~ children[i .. $]; 2662 else 2663 children = children[0 .. i] ~ what ~ children[i .. $]; 2664 what.parentDocument = this.parentDocument; 2665 what.parentNode = this; 2666 return what; 2667 } 2668 } 2669 2670 return what; 2671 2672 assert(0); 2673 } 2674 2675 /++ 2676 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2677 +/ 2678 Element insertAfter(in Element where, Element what) 2679 in { 2680 assert(where !is null); 2681 assert(where.parentNode is this); 2682 assert(what !is null); 2683 assert(what.parentNode is null); 2684 } 2685 out (ret) { 2686 assert(where.parentNode is this); 2687 assert(what.parentNode is this); 2688 assert(what.parentDocument is this.parentDocument); 2689 assert(ret is what); 2690 } 2691 body { 2692 foreach (i, e; children) { 2693 if (e is where) { 2694 if (auto frag = cast(DocumentFragment) what) 2695 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2696 else 2697 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2698 what.parentNode = this; 2699 what.parentDocument = this.parentDocument; 2700 return what; 2701 } 2702 } 2703 2704 return what; 2705 2706 assert(0); 2707 } 2708 2709 /// swaps one child for a new thing. Returns the old child which is now parentless. 2710 Element swapNode(Element child, Element replacement) 2711 in { 2712 assert(child !is null); 2713 assert(replacement !is null); 2714 assert(child.parentNode is this); 2715 } 2716 out (ret) { 2717 assert(ret is child); 2718 assert(ret.parentNode is null); 2719 assert(replacement.parentNode is this); 2720 assert(replacement.parentDocument is this.parentDocument); 2721 } 2722 body { 2723 foreach (ref c; this.children) 2724 if (c is child) { 2725 c.parentNode = null; 2726 c = replacement; 2727 c.parentNode = this; 2728 c.parentDocument = this.parentDocument; 2729 return child; 2730 } 2731 assert(0); 2732 } 2733 2734 /++ 2735 Appends the given to the node. 2736 2737 2738 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2739 yields `<example>text <b>bold</b> hi</example>`. 2740 2741 See_Also: 2742 [firstInnerText], [directText], [innerText], [appendChild] 2743 +/ 2744 @scriptable Element appendText(string text) { 2745 Element e = new TextNode(parentDocument, text); 2746 appendChild(e); 2747 return this; 2748 } 2749 2750 /++ 2751 Returns child elements which are of a tag type (excludes text, comments, etc.). 2752 2753 2754 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2755 2756 Params: 2757 tagName = filter results to only the child elements with the given tag name. 2758 +/ 2759 @property Element[] childElements(string tagName = null) { 2760 Element[] ret; 2761 foreach (c; children) 2762 if (c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2763 ret ~= c; 2764 return ret; 2765 } 2766 2767 /++ 2768 Appends the given html to the element, returning the elements appended 2769 2770 2771 This is similar to `element.innerHTML += "html string";` in Javascript. 2772 +/ 2773 @scriptable Element[] appendHtml(string html) { 2774 Document d = new Document("<root>" ~ html ~ "</root>"); 2775 return stealChildren(d.root); 2776 } 2777 2778 ///. 2779 void insertChildAfter(Element child, Element where) 2780 in { 2781 assert(child !is null); 2782 assert(where !is null); 2783 assert(where.parentNode is this); 2784 assert(!selfClosed); 2785 //assert(isInArray(where, children)); 2786 } 2787 out { 2788 assert(child.parentNode is this); 2789 assert(where.parentNode is this); 2790 //assert(isInArray(where, children)); 2791 //assert(isInArray(child, children)); 2792 } 2793 body { 2794 foreach (ref i, c; children) { 2795 if (c is where) { 2796 i++; 2797 if (auto frag = cast(DocumentFragment) child) 2798 children = children[0 .. i] ~ child.children ~ children[i .. $]; 2799 else 2800 children = children[0 .. i] ~ child ~ children[i .. $]; 2801 child.parentNode = this; 2802 child.parentDocument = this.parentDocument; 2803 break; 2804 } 2805 } 2806 } 2807 2808 /++ 2809 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2810 2811 Params: 2812 e = the element whose children you want to steal 2813 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2814 +/ 2815 Element[] stealChildren(Element e, Element position = null) 2816 in { 2817 assert(!selfClosed); 2818 assert(e !is null); 2819 //if(position !is null) 2820 //assert(isInArray(position, children)); 2821 } 2822 out (ret) { 2823 assert(e.children.length == 0); 2824 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2825 version (none) 2826 debug foreach (child; ret) { 2827 assert(child.parentNode is this); 2828 assert(child.parentDocument is this.parentDocument); 2829 } 2830 } 2831 body { 2832 foreach (c; e.children) { 2833 c.parentNode = this; 2834 c.parentDocument = this.parentDocument; 2835 } 2836 if (position is null) 2837 children ~= e.children; 2838 else { 2839 foreach (i, child; children) { 2840 if (child is position) { 2841 children = children[0 .. i] ~ e.children ~ children[i .. $]; 2842 break; 2843 } 2844 } 2845 } 2846 2847 auto ret = e.children[]; 2848 e.children.length = 0; 2849 2850 return ret; 2851 } 2852 2853 /// Puts the current element first in our children list. The given element must not have a parent already. 2854 Element prependChild(Element e) 2855 in { 2856 assert(e.parentNode is null); 2857 assert(!selfClosed); 2858 } 2859 out { 2860 assert(e.parentNode is this); 2861 assert(e.parentDocument is this.parentDocument); 2862 assert(children[0] is e); 2863 } 2864 body { 2865 e.parentNode = this; 2866 e.parentDocument = this.parentDocument; 2867 if (auto frag = cast(DocumentFragment) e) 2868 children = e.children ~ children; 2869 else 2870 children = e ~ children; 2871 return e; 2872 } 2873 2874 /** 2875 Returns a string containing all child elements, formatted such that it could be pasted into 2876 an XML file. 2877 */ 2878 @property string innerHTML(Appender!string where = appender!string()) const { 2879 if (children is null) 2880 return ""; 2881 2882 auto start = where.data.length; 2883 2884 foreach (child; children) { 2885 assert(child !is null); 2886 2887 child.writeToAppender(where); 2888 } 2889 2890 return where.data[start .. $]; 2891 } 2892 2893 /** 2894 Takes some html and replaces the element's children with the tree made from the string. 2895 */ 2896 @property Element innerHTML(string html, bool strict = false) { 2897 if (html.length) 2898 selfClosed = false; 2899 2900 if (html.length == 0) { 2901 // I often say innerHTML = ""; as a shortcut to clear it out, 2902 // so let's optimize that slightly. 2903 removeAllChildren(); 2904 return this; 2905 } 2906 2907 auto doc = new Document(); 2908 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 2909 2910 children = doc.root.children; 2911 foreach (c; children) { 2912 c.parentNode = this; 2913 c.parentDocument = this.parentDocument; 2914 } 2915 2916 reparentTreeDocuments(); 2917 2918 doc.root.children = null; 2919 2920 return this; 2921 } 2922 2923 /// ditto 2924 @property Element innerHTML(Html html) { 2925 return this.innerHTML = html.source; 2926 } 2927 2928 private void reparentTreeDocuments() { 2929 foreach (c; this.tree) 2930 c.parentDocument = this.parentDocument; 2931 } 2932 2933 /** 2934 Replaces this node with the given html string, which is parsed 2935 2936 Note: this invalidates the this reference, since it is removed 2937 from the tree. 2938 2939 Returns the new children that replace this. 2940 */ 2941 @property Element[] outerHTML(string html) { 2942 auto doc = new Document(); 2943 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 2944 2945 children = doc.root.children; 2946 foreach (c; children) { 2947 c.parentNode = this; 2948 c.parentDocument = this.parentDocument; 2949 } 2950 2951 reparentTreeDocuments(); 2952 2953 stripOut(); 2954 2955 return doc.root.children; 2956 } 2957 2958 /++ 2959 Returns all the html for this element, including the tag itself. 2960 2961 This is equivalent to calling toString(). 2962 +/ 2963 @property string outerHTML() { 2964 return this.toString(); 2965 } 2966 2967 /// This sets the inner content of the element *without* trying to parse it. 2968 /// You can inject any code in there; this serves as an escape hatch from the dom. 2969 /// 2970 /// The only times you might actually need it are for < style > and < script > tags in html. 2971 /// Other than that, innerHTML and/or innerText should do the job. 2972 @property void innerRawSource(string rawSource) { 2973 children.length = 0; 2974 auto rs = new RawSource(parentDocument, rawSource); 2975 rs.parentNode = this; 2976 2977 children ~= rs; 2978 } 2979 2980 ///. 2981 Element replaceChild(Element find, Element replace) 2982 in { 2983 assert(find !is null); 2984 assert(replace !is null); 2985 assert(replace.parentNode is null); 2986 } 2987 out (ret) { 2988 assert(ret is replace); 2989 assert(replace.parentNode is this); 2990 assert(replace.parentDocument is this.parentDocument); 2991 assert(find.parentNode is null); 2992 } 2993 body { 2994 // FIXME 2995 //if(auto frag = cast(DocumentFragment) replace) 2996 //return this.replaceChild(frag, replace.children); 2997 for (int i = 0; i < children.length; i++) { 2998 if (children[i] is find) { 2999 replace.parentNode = this; 3000 children[i].parentNode = null; 3001 children[i] = replace; 3002 replace.parentDocument = this.parentDocument; 3003 return replace; 3004 } 3005 } 3006 3007 throw new Exception("no such child"); 3008 } 3009 3010 /** 3011 Replaces the given element with a whole group. 3012 */ 3013 void replaceChild(Element find, Element[] replace) 3014 in { 3015 assert(find !is null); 3016 assert(replace !is null); 3017 assert(find.parentNode is this); 3018 debug foreach (r; replace) 3019 assert(r.parentNode is null); 3020 } 3021 out { 3022 assert(find.parentNode is null); 3023 assert(children.length >= replace.length); 3024 debug foreach (child; children) 3025 assert(child !is find); 3026 debug foreach (r; replace) 3027 assert(r.parentNode is this); 3028 } 3029 body { 3030 if (replace.length == 0) { 3031 removeChild(find); 3032 return; 3033 } 3034 assert(replace.length); 3035 for (int i = 0; i < children.length; i++) { 3036 if (children[i] is find) { 3037 children[i].parentNode = null; // this element should now be dead 3038 children[i] = replace[0]; 3039 foreach (e; replace) { 3040 e.parentNode = this; 3041 e.parentDocument = this.parentDocument; 3042 } 3043 3044 children = .insertAfter(children, i, replace[1 .. $]); 3045 3046 return; 3047 } 3048 } 3049 3050 throw new Exception("no such child"); 3051 } 3052 3053 /** 3054 Removes the given child from this list. 3055 3056 Returns the removed element. 3057 */ 3058 Element removeChild(Element c) 3059 in { 3060 assert(c !is null); 3061 assert(c.parentNode is this); 3062 } 3063 out { 3064 debug foreach (child; children) 3065 assert(child !is c); 3066 assert(c.parentNode is null); 3067 } 3068 body { 3069 foreach (i, e; children) { 3070 if (e is c) { 3071 children = children[0 .. i] ~ children[i + 1 .. $]; 3072 c.parentNode = null; 3073 return c; 3074 } 3075 } 3076 3077 throw new Exception("no such child"); 3078 } 3079 3080 /// This removes all the children from this element, returning the old list. 3081 Element[] removeChildren() 3082 out (ret) { 3083 assert(children.length == 0); 3084 debug foreach (r; ret) 3085 assert(r.parentNode is null); 3086 } 3087 body { 3088 Element[] oldChildren = children.dup; 3089 foreach (c; oldChildren) 3090 c.parentNode = null; 3091 3092 children.length = 0; 3093 3094 return oldChildren; 3095 } 3096 3097 /** 3098 Fetch the inside text, with all tags stripped out. 3099 3100 <p>cool <b>api</b> & code dude<p> 3101 innerText of that is "cool api & code dude". 3102 3103 This does not match what real innerText does! 3104 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3105 3106 It is more like textContent. 3107 */ 3108 @scriptable @property string innerText() const { 3109 string s; 3110 foreach (child; children) { 3111 if (child.nodeType != NodeType.Text) 3112 s ~= child.innerText; 3113 else 3114 s ~= child.nodeValue(); 3115 } 3116 return s; 3117 } 3118 3119 /// 3120 alias textContent = innerText; 3121 3122 /** 3123 Sets the inside text, replacing all children. You don't 3124 have to worry about entity encoding. 3125 */ 3126 @scriptable @property void innerText(string text) { 3127 selfClosed = false; 3128 Element e = new TextNode(parentDocument, text); 3129 e.parentNode = this; 3130 children = [e]; 3131 } 3132 3133 /** 3134 Strips this node out of the document, replacing it with the given text 3135 */ 3136 @property void outerText(string text) { 3137 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3138 } 3139 3140 /** 3141 Same result as innerText; the tag with all inner tags stripped out 3142 */ 3143 @property string outerText() const { 3144 return innerText; 3145 } 3146 3147 /* ******************************* 3148 Miscellaneous 3149 *********************************/ 3150 3151 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3152 @property Element cloned() /+ 3153 out(ret) { 3154 // FIXME: not sure why these fail... 3155 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3156 assert(ret.tagName == this.tagName); 3157 } 3158 body { 3159 +/ { 3160 return this.cloneNode(true); 3161 } 3162 3163 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3164 Element cloneNode(bool deepClone) { 3165 auto e = Element.make(this.tagName); 3166 e.parentDocument = this.parentDocument; 3167 e.attributes = this.attributes.aadup; 3168 e.selfClosed = this.selfClosed; 3169 3170 if (deepClone) { 3171 foreach (child; children) { 3172 e.appendChild(child.cloneNode(true)); 3173 } 3174 } 3175 3176 return e; 3177 } 3178 3179 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3180 string nodeValue() const { 3181 return ""; 3182 } 3183 3184 // should return int 3185 ///. 3186 @property int nodeType() const { 3187 return 1; 3188 } 3189 3190 invariant() { 3191 assert(tagName.indexOf(" ") == -1); 3192 3193 if (children !is null) 3194 debug foreach (child; children) { 3195 // assert(parentNode !is null); 3196 assert(child !is null); 3197 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3198 assert(child !is this); 3199 //assert(child !is parentNode); 3200 } 3201 3202 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3203 if(parentNode !is null) { 3204 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3205 auto lol = cast(TextNode) this; 3206 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3207 } 3208 +/ 3209 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3210 // reason is so you can create these without needing a reference to the document 3211 } 3212 3213 /** 3214 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3215 an XML file. 3216 */ 3217 override string toString() const { 3218 return writeToAppender(); 3219 } 3220 3221 protected string toPrettyStringIndent(bool insertComments, 3222 int indentationLevel, string indentWith) const { 3223 if (indentWith is null) 3224 return null; 3225 string s; 3226 3227 if (insertComments) 3228 s ~= "<!--"; 3229 s ~= "\n"; 3230 foreach (indent; 0 .. indentationLevel) 3231 s ~= indentWith; 3232 if (insertComments) 3233 s ~= "-->"; 3234 3235 return s; 3236 } 3237 3238 /++ 3239 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3240 for eyeball debugging. 3241 +/ 3242 string toPrettyString(bool insertComments = false, int indentationLevel = 0, 3243 string indentWith = "\t") const { 3244 3245 // first step is to concatenate any consecutive text nodes to simplify 3246 // the white space analysis. this changes the tree! but i'm allowed since 3247 // the comment always says it changes the comments 3248 // 3249 // actually i'm not allowed cuz it is const so i will cheat and lie 3250 /+ 3251 TextNode lastTextChild = null; 3252 for(int a = 0; a < this.children.length; a++) { 3253 auto child = this.children[a]; 3254 if(auto tn = cast(TextNode) child) { 3255 if(lastTextChild) { 3256 lastTextChild.contents ~= tn.contents; 3257 for(int b = a; b < this.children.length - 1; b++) 3258 this.children[b] = this.children[b + 1]; 3259 this.children = this.children[0 .. $-1]; 3260 } else { 3261 lastTextChild = tn; 3262 } 3263 } else { 3264 lastTextChild = null; 3265 } 3266 } 3267 +/ 3268 3269 const(Element)[] children; 3270 3271 TextNode lastTextChild = null; 3272 for (int a = 0; a < this.children.length; a++) { 3273 auto child = this.children[a]; 3274 if (auto tn = cast(const(TextNode)) child) { 3275 if (lastTextChild !is null) { 3276 lastTextChild.contents ~= tn.contents; 3277 } else { 3278 lastTextChild = new TextNode(""); 3279 lastTextChild.parentNode = cast(Element) this; 3280 lastTextChild.contents ~= tn.contents; 3281 children ~= lastTextChild; 3282 } 3283 } else { 3284 lastTextChild = null; 3285 children ~= child; 3286 } 3287 } 3288 3289 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3290 3291 s ~= "<"; 3292 s ~= tagName; 3293 3294 // i sort these for consistent output. might be more legible 3295 // but especially it keeps it the same for diff purposes. 3296 import std.algorithm : sort; 3297 3298 auto keys = sort(attributes.keys); 3299 foreach (n; keys) { 3300 auto v = attributes[n]; 3301 s ~= " "; 3302 s ~= n; 3303 s ~= "=\""; 3304 s ~= htmlEntitiesEncode(v); 3305 s ~= "\""; 3306 } 3307 3308 if (selfClosed) { 3309 s ~= " />"; 3310 return s; 3311 } 3312 3313 s ~= ">"; 3314 3315 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3316 // just keep them on the same line 3317 if (tagName.isInArray(inlineElements) || allAreInlineHtml(children)) { 3318 foreach (child; children) { 3319 s ~= child.toString(); //toPrettyString(false, 0, null); 3320 } 3321 } else { 3322 foreach (child; children) { 3323 assert(child !is null); 3324 3325 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3326 } 3327 3328 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3329 } 3330 3331 s ~= "</"; 3332 s ~= tagName; 3333 s ~= ">"; 3334 3335 return s; 3336 } 3337 3338 /+ 3339 /// Writes out the opening tag only, if applicable. 3340 string writeTagOnly(Appender!string where = appender!string()) const { 3341 +/ 3342 3343 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3344 /// Note: the ordering of attributes in the string is undefined. 3345 /// Returns the string it creates. 3346 string writeToAppender(Appender!string where = appender!string()) const { 3347 assert(tagName !is null); 3348 3349 where.reserve((this.children.length + 1) * 512); 3350 3351 auto start = where.data.length; 3352 3353 where.put("<"); 3354 where.put(tagName); 3355 3356 import std.algorithm : sort; 3357 3358 auto keys = sort(attributes.keys); 3359 foreach (n; keys) { 3360 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3361 //assert(v !is null); 3362 where.put(" "); 3363 where.put(n); 3364 where.put("=\""); 3365 htmlEntitiesEncode(v, where); 3366 where.put("\""); 3367 } 3368 3369 if (selfClosed) { 3370 where.put(" />"); 3371 return where.data[start .. $]; 3372 } 3373 3374 where.put('>'); 3375 3376 innerHTML(where); 3377 3378 where.put("</"); 3379 where.put(tagName); 3380 where.put('>'); 3381 3382 return where.data[start .. $]; 3383 } 3384 3385 /** 3386 Returns a lazy range of all its children, recursively. 3387 */ 3388 @property ElementStream tree() { 3389 return new ElementStream(this); 3390 } 3391 3392 // I moved these from Form because they are generally useful. 3393 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3394 /// Tags: HTML, HTML5 3395 // FIXME: add overloads for other label types... 3396 Element addField(string label, string name, string type = "text", 3397 FormFieldOptions fieldOptions = FormFieldOptions.none) { 3398 auto fs = this; 3399 auto i = fs.addChild("label"); 3400 3401 if (!(type == "checkbox" || type == "radio")) 3402 i.addChild("span", label); 3403 3404 Element input; 3405 if (type == "textarea") 3406 input = i.addChild("textarea").setAttribute("name", name).setAttribute("rows", "6"); 3407 else 3408 input = i.addChild("input").setAttribute("name", name).setAttribute("type", type); 3409 3410 if (type == "checkbox" || type == "radio") 3411 i.addChild("span", label); 3412 3413 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3414 fieldOptions.applyToElement(input); 3415 return i; 3416 } 3417 3418 Element addField(Element label, string name, string type = "text", 3419 FormFieldOptions fieldOptions = FormFieldOptions.none) { 3420 auto fs = this; 3421 auto i = fs.addChild("label"); 3422 i.addChild(label); 3423 Element input; 3424 if (type == "textarea") 3425 input = i.addChild("textarea").setAttribute("name", name).setAttribute("rows", "6"); 3426 else 3427 input = i.addChild("input").setAttribute("name", name).setAttribute("type", type); 3428 3429 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3430 fieldOptions.applyToElement(input); 3431 return i; 3432 } 3433 3434 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3435 return addField(label, name, "text", fieldOptions); 3436 } 3437 3438 Element addField(string label, string name, string[string] options, 3439 FormFieldOptions fieldOptions = FormFieldOptions.none) { 3440 auto fs = this; 3441 auto i = fs.addChild("label"); 3442 i.addChild("span", label); 3443 auto sel = i.addChild("select").setAttribute("name", name); 3444 3445 foreach (k, opt; options) 3446 sel.addChild("option", opt, k); 3447 3448 // FIXME: implement requirements somehow 3449 3450 return i; 3451 } 3452 3453 Element addSubmitButton(string label = null) { 3454 auto t = this; 3455 auto holder = t.addChild("div"); 3456 holder.addClass("submit-holder"); 3457 auto i = holder.addChild("input"); 3458 i.type = "submit"; 3459 if (label.length) 3460 i.value = label; 3461 return holder; 3462 } 3463 3464 } 3465 3466 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3467 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3468 /// Group: core_functionality 3469 class XmlDocument : Document { 3470 this(string data) { 3471 contentType = "text/xml; charset=utf-8"; 3472 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3473 3474 parseStrict(data); 3475 } 3476 } 3477 3478 import std.string; 3479 3480 /* domconvenience follows { */ 3481 3482 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3483 /// Group: core_functionality 3484 Element[] findComments(Document document, string txt) { 3485 return findComments(document.root, txt); 3486 } 3487 3488 /// ditto 3489 Element[] findComments(Element element, string txt) { 3490 txt = txt.strip().toLower(); 3491 Element[] ret; 3492 3493 foreach (comment; element.getElementsByTagName("#comment")) { 3494 string t = comment.nodeValue().strip().toLower(); 3495 if (t == txt) 3496 ret ~= comment; 3497 } 3498 3499 return ret; 3500 } 3501 3502 /// An option type that propagates null. See: [Element.optionSelector] 3503 /// Group: implementations 3504 struct MaybeNullElement(SomeElementType) { 3505 this(SomeElementType ele) { 3506 this.element = ele; 3507 } 3508 3509 SomeElementType element; 3510 3511 /// Forwards to the element, wit a null check inserted that propagates null. 3512 auto opDispatch(string method, T...)(T args) { 3513 alias type = typeof(__traits(getMember, element, method)(args)); 3514 static if (is(type : Element)) { 3515 if (element is null) 3516 return MaybeNullElement!type(null); 3517 return __traits(getMember, element, method)(args); 3518 } else static if (is(type == string)) { 3519 if (element is null) 3520 return cast(string) null; 3521 return __traits(getMember, element, method)(args); 3522 } else static if (is(type == void)) { 3523 if (element is null) 3524 return; 3525 __traits(getMember, element, method)(args); 3526 } else { 3527 static assert(0); 3528 } 3529 } 3530 3531 /// Allows implicit casting to the wrapped element. 3532 alias element this; 3533 } 3534 3535 /++ 3536 A collection of elements which forwards methods to the children. 3537 +/ 3538 /// Group: implementations 3539 struct ElementCollection { 3540 /// 3541 this(Element e) { 3542 elements = [e]; 3543 } 3544 3545 /// 3546 this(Element e, string selector) { 3547 elements = e.querySelectorAll(selector); 3548 } 3549 3550 /// 3551 this(Element[] e) { 3552 elements = e; 3553 } 3554 3555 Element[] elements; 3556 //alias elements this; // let it implicitly convert to the underlying array 3557 3558 /// 3559 ElementCollection opIndex(string selector) { 3560 ElementCollection ec; 3561 foreach (e; elements) 3562 ec.elements ~= e.getElementsBySelector(selector); 3563 return ec; 3564 } 3565 3566 /// 3567 Element opIndex(int i) { 3568 return elements[i]; 3569 } 3570 3571 /// if you slice it, give the underlying array for easy forwarding of the 3572 /// collection to range expecting algorithms or looping over. 3573 Element[] opSlice() { 3574 return elements; 3575 } 3576 3577 /// And input range primitives so we can foreach over this 3578 void popFront() { 3579 elements = elements[1 .. $]; 3580 } 3581 3582 /// ditto 3583 Element front() { 3584 return elements[0]; 3585 } 3586 3587 /// ditto 3588 bool empty() { 3589 return !elements.length; 3590 } 3591 3592 /++ 3593 Collects strings from the collection, concatenating them together 3594 Kinda like running reduce and ~= on it. 3595 3596 --- 3597 document["p"].collect!"innerText"; 3598 --- 3599 +/ 3600 string collect(string method)(string separator = "") { 3601 string text; 3602 foreach (e; elements) { 3603 text ~= mixin("e." ~ method); 3604 text ~= separator; 3605 } 3606 return text; 3607 } 3608 3609 /// Forward method calls to each individual [Element|element] of the collection 3610 /// returns this so it can be chained. 3611 ElementCollection opDispatch(string name, T...)(T t) { 3612 foreach (e; elements) { 3613 mixin("e." ~ name)(t); 3614 } 3615 return this; 3616 } 3617 3618 /++ 3619 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3620 +/ 3621 ElementCollection wrapIn(Element what) { 3622 foreach (e; elements) { 3623 e.wrapIn(what.cloneNode(false)); 3624 } 3625 3626 return this; 3627 } 3628 3629 /// Concatenates two ElementCollection together. 3630 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3631 return ElementCollection(this.elements ~ rhs.elements); 3632 } 3633 } 3634 3635 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3636 /// Group: implementations 3637 mixin template JavascriptStyleDispatch() { 3638 /// 3639 string opDispatch(string name)(string v = null) if (name != "popFront") { // popFront will make this look like a range. Do not want. 3640 if (v !is null) 3641 return set(name, v); 3642 return get(name); 3643 } 3644 3645 /// 3646 string opIndex(string key) const { 3647 return get(key); 3648 } 3649 3650 /// 3651 string opIndexAssign(string value, string field) { 3652 return set(field, value); 3653 } 3654 3655 // FIXME: doesn't seem to work 3656 string* opBinary(string op)(string key) if (op == "in") { 3657 return key in fields; 3658 } 3659 } 3660 3661 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3662 /// 3663 /// Do not create this object directly. 3664 /// Group: implementations 3665 struct DataSet { 3666 /// 3667 this(Element e) { 3668 this._element = e; 3669 } 3670 3671 private Element _element; 3672 /// 3673 string set(string name, string value) { 3674 _element.setAttribute("data-" ~ unCamelCase(name), value); 3675 return value; 3676 } 3677 3678 /// 3679 string get(string name) const { 3680 return _element.getAttribute("data-" ~ unCamelCase(name)); 3681 } 3682 3683 /// 3684 mixin JavascriptStyleDispatch!(); 3685 } 3686 3687 /// Proxy object for attributes which will replace the main opDispatch eventually 3688 /// Group: implementations 3689 struct AttributeSet { 3690 /// 3691 this(Element e) { 3692 this._element = e; 3693 } 3694 3695 private Element _element; 3696 /// 3697 string set(string name, string value) { 3698 _element.setAttribute(name, value); 3699 return value; 3700 } 3701 3702 /// 3703 string get(string name) const { 3704 return _element.getAttribute(name); 3705 } 3706 3707 /// 3708 mixin JavascriptStyleDispatch!(); 3709 } 3710 3711 /// for style, i want to be able to set it with a string like a plain attribute, 3712 /// but also be able to do properties Javascript style. 3713 3714 /// Group: implementations 3715 struct ElementStyle { 3716 this(Element parent) { 3717 _element = parent; 3718 } 3719 3720 Element _element; 3721 3722 @property ref inout(string) _attribute() inout { 3723 auto s = "style" in _element.attributes; 3724 if (s is null) { 3725 auto e = cast() _element; // const_cast 3726 e.attributes["style"] = ""; // we need something to reference 3727 s = cast(inout)("style" in e.attributes); 3728 } 3729 3730 assert(s !is null); 3731 return *s; 3732 } 3733 3734 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3735 3736 string set(string name, string value) { 3737 if (name.length == 0) 3738 return value; 3739 if (name == "cssFloat") 3740 name = "float"; 3741 else 3742 name = unCamelCase(name); 3743 auto r = rules(); 3744 r[name] = value; 3745 3746 _attribute = ""; 3747 foreach (k, v; r) { 3748 if (v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3749 continue; 3750 if (_attribute.length) 3751 _attribute ~= " "; 3752 _attribute ~= k ~ ": " ~ v ~ ";"; 3753 } 3754 3755 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3756 3757 return value; 3758 } 3759 3760 string get(string name) const { 3761 if (name == "cssFloat") 3762 name = "float"; 3763 else 3764 name = unCamelCase(name); 3765 auto r = rules(); 3766 if (name in r) 3767 return r[name]; 3768 return null; 3769 } 3770 3771 string[string] rules() const { 3772 string[string] ret; 3773 foreach (rule; _attribute.split(";")) { 3774 rule = rule.strip(); 3775 if (rule.length == 0) 3776 continue; 3777 auto idx = rule.indexOf(":"); 3778 if (idx == -1) 3779 ret[rule] = ""; 3780 else { 3781 auto name = rule[0 .. idx].strip(); 3782 auto value = rule[idx + 1 .. $].strip(); 3783 3784 ret[name] = value; 3785 } 3786 } 3787 3788 return ret; 3789 } 3790 3791 mixin JavascriptStyleDispatch!(); 3792 } 3793 3794 /// Converts a camel cased propertyName to a css style dashed property-name 3795 string unCamelCase(string a) { 3796 string ret; 3797 foreach (c; a) 3798 if ((c >= 'A' && c <= 'Z')) 3799 ret ~= "-" ~ toLower("" ~ c)[0]; 3800 else 3801 ret ~= c; 3802 return ret; 3803 } 3804 3805 /// Translates a css style property-name to a camel cased propertyName 3806 string camelCase(string a) { 3807 string ret; 3808 bool justSawDash = false; 3809 foreach (c; a) 3810 if (c == '-') { 3811 justSawDash = true; 3812 } else { 3813 if (justSawDash) { 3814 justSawDash = false; 3815 ret ~= toUpper("" ~ c); 3816 } else 3817 ret ~= c; 3818 } 3819 return ret; 3820 } 3821 3822 // domconvenience ends } 3823 3824 // @safe: 3825 3826 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3827 // Instead, override writeToAppender(); 3828 3829 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3830 3831 // Stripping them is useful for reading php as html.... but adding them 3832 // is good for building php. 3833 3834 // I need to maintain compatibility with the way it is now too. 3835 3836 import std.string; 3837 import std.exception; 3838 import std.uri; 3839 import std.array; 3840 import std.range; 3841 3842 //import std.stdio; 3843 3844 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3845 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3846 // most likely a typo so I say kill kill kill. 3847 3848 /++ 3849 This might belong in another module, but it represents a file with a mime type and some data. 3850 Document implements this interface with type = text/html (see Document.contentType for more info) 3851 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3852 +/ 3853 /// Group: bonus_functionality 3854 interface FileResource { 3855 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3856 @property string contentType() const; 3857 /// the data 3858 immutable(ubyte)[] getData() const; 3859 } 3860 3861 ///. 3862 /// Group: bonus_functionality 3863 enum NodeType { 3864 Text = 3 3865 } 3866 3867 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 3868 /// Group: core_functionality 3869 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) 3870 if (is(T : Element)) 3871 in { 3872 } 3873 out (ret) { 3874 assert(ret !is null); 3875 } 3876 body { 3877 auto ret = cast(T) e; 3878 if (ret is null) 3879 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 3880 return ret; 3881 } 3882 3883 ///. 3884 /// Group: core_functionality 3885 class DocumentFragment : Element { 3886 ///. 3887 this(Document _parentDocument) { 3888 tagName = "#fragment"; 3889 super(_parentDocument); 3890 } 3891 3892 /++ 3893 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 3894 3895 Since: March 29, 2018 (or git tagged v2.1.0) 3896 +/ 3897 this(Html html) { 3898 this(null); 3899 3900 this.innerHTML = html.source; 3901 } 3902 3903 ///. 3904 override string writeToAppender(Appender!string where = appender!string()) const { 3905 return this.innerHTML(where); 3906 } 3907 3908 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 3909 string s; 3910 foreach (child; children) 3911 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 3912 return s; 3913 } 3914 3915 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 3916 /* 3917 override inout(Element) parentNode() inout { 3918 return children.length ? children[0].parentNode : null; 3919 } 3920 */ 3921 override Element parentNode(Element p) { 3922 this._parentNode = p; 3923 foreach (child; children) 3924 child.parentNode = p; 3925 return p; 3926 } 3927 } 3928 3929 /// Given text, encode all html entities on it - &, <, >, and ". This function also 3930 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 3931 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 3932 /// 3933 /// The output parameter can be given to append to an existing buffer. You don't have to 3934 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 3935 /// Group: core_functionality 3936 string htmlEntitiesEncode(string data, 3937 Appender!string output = appender!string(), bool encodeNonAscii = true) { 3938 // if there's no entities, we can save a lot of time by not bothering with the 3939 // decoding loop. This check cuts the net toString time by better than half in my test. 3940 // let me know if it made your tests worse though, since if you use an entity in just about 3941 // every location, the check will add time... but I suspect the average experience is like mine 3942 // since the check gives up as soon as it can anyway. 3943 3944 bool shortcut = true; 3945 foreach (char c; data) { 3946 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 3947 if (c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 3948 shortcut = false; // there's actual work to be done 3949 break; 3950 } 3951 } 3952 3953 if (shortcut) { 3954 output.put(data); 3955 return data; 3956 } 3957 3958 auto start = output.data.length; 3959 3960 output.reserve(data.length + 64); // grab some extra space for the encoded entities 3961 3962 foreach (dchar d; data) { 3963 if (d == '&') 3964 output.put("&"); 3965 else if (d == '<') 3966 output.put("<"); 3967 else if (d == '>') 3968 output.put(">"); 3969 else if (d == '\"') 3970 output.put("""); 3971 // else if (d == '\'') 3972 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 3973 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 3974 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 3975 // idk about apostrophes though. Might be worth it, might not. 3976 else if (!encodeNonAscii || (d < 128 && d > 0)) 3977 output.put(d); 3978 else 3979 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 3980 } 3981 3982 //assert(output !is null); // this fails on empty attributes..... 3983 return output.data[start .. $]; 3984 3985 // data = data.replace("\u00a0", " "); 3986 } 3987 3988 /// An alias for htmlEntitiesEncode; it works for xml too 3989 /// Group: core_functionality 3990 string xmlEntitiesEncode(string data) { 3991 return htmlEntitiesEncode(data); 3992 } 3993 3994 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 3995 /// Group: core_functionality 3996 dchar parseEntity(in dchar[] entity) { 3997 switch (entity[1 .. $ - 1]) { 3998 case "quot": 3999 return '"'; 4000 case "apos": 4001 return '\''; 4002 case "lt": 4003 return '<'; 4004 case "gt": 4005 return '>'; 4006 case "amp": 4007 return '&'; 4008 // the next are html rather than xml 4009 4010 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4011 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4012 case "Tab": 4013 return '\u0009'; 4014 case "NewLine": 4015 return '\u000A'; 4016 case "excl": 4017 return '\u0021'; 4018 case "QUOT": 4019 return '\u0022'; 4020 case "num": 4021 return '\u0023'; 4022 case "dollar": 4023 return '\u0024'; 4024 case "percnt": 4025 return '\u0025'; 4026 case "AMP": 4027 return '\u0026'; 4028 case "lpar": 4029 return '\u0028'; 4030 case "rpar": 4031 return '\u0029'; 4032 case "ast": 4033 case "midast": 4034 return '\u002A'; 4035 case "plus": 4036 return '\u002B'; 4037 case "comma": 4038 return '\u002C'; 4039 case "period": 4040 return '\u002E'; 4041 case "sol": 4042 return '\u002F'; 4043 case "colon": 4044 return '\u003A'; 4045 case "semi": 4046 return '\u003B'; 4047 case "LT": 4048 return '\u003C'; 4049 case "equals": 4050 return '\u003D'; 4051 case "GT": 4052 return '\u003E'; 4053 case "quest": 4054 return '\u003F'; 4055 case "commat": 4056 return '\u0040'; 4057 case "lsqb": 4058 case "lbrack": 4059 return '\u005B'; 4060 case "bsol": 4061 return '\u005C'; 4062 case "rsqb": 4063 case "rbrack": 4064 return '\u005D'; 4065 case "Hat": 4066 return '\u005E'; 4067 case "lowbar": 4068 case "UnderBar": 4069 return '\u005F'; 4070 case "grave": 4071 case "DiacriticalGrave": 4072 return '\u0060'; 4073 case "lcub": 4074 case "lbrace": 4075 return '\u007B'; 4076 case "verbar": 4077 case "vert": 4078 case "VerticalLine": 4079 return '\u007C'; 4080 case "rcub": 4081 case "rbrace": 4082 return '\u007D'; 4083 case "nbsp": 4084 case "NonBreakingSpace": 4085 return '\u00A0'; 4086 case "iexcl": 4087 return '\u00A1'; 4088 case "cent": 4089 return '\u00A2'; 4090 case "pound": 4091 return '\u00A3'; 4092 case "curren": 4093 return '\u00A4'; 4094 case "yen": 4095 return '\u00A5'; 4096 case "brvbar": 4097 return '\u00A6'; 4098 case "sect": 4099 return '\u00A7'; 4100 case "Dot": 4101 case "die": 4102 case "DoubleDot": 4103 case "uml": 4104 return '\u00A8'; 4105 case "copy": 4106 case "COPY": 4107 return '\u00A9'; 4108 case "ordf": 4109 return '\u00AA'; 4110 case "laquo": 4111 return '\u00AB'; 4112 case "not": 4113 return '\u00AC'; 4114 case "shy": 4115 return '\u00AD'; 4116 case "reg": 4117 case "circledR": 4118 case "REG": 4119 return '\u00AE'; 4120 case "macr": 4121 case "strns": 4122 return '\u00AF'; 4123 case "deg": 4124 return '\u00B0'; 4125 case "plusmn": 4126 case "pm": 4127 case "PlusMinus": 4128 return '\u00B1'; 4129 case "sup2": 4130 return '\u00B2'; 4131 case "sup3": 4132 return '\u00B3'; 4133 case "acute": 4134 case "DiacriticalAcute": 4135 return '\u00B4'; 4136 case "micro": 4137 return '\u00B5'; 4138 case "para": 4139 return '\u00B6'; 4140 case "middot": 4141 case "centerdot": 4142 case "CenterDot": 4143 return '\u00B7'; 4144 case "cedil": 4145 case "Cedilla": 4146 return '\u00B8'; 4147 case "sup1": 4148 return '\u00B9'; 4149 case "ordm": 4150 return '\u00BA'; 4151 case "raquo": 4152 return '\u00BB'; 4153 case "frac14": 4154 return '\u00BC'; 4155 case "frac12": 4156 case "half": 4157 return '\u00BD'; 4158 case "frac34": 4159 return '\u00BE'; 4160 case "iquest": 4161 return '\u00BF'; 4162 case "Agrave": 4163 return '\u00C0'; 4164 case "Aacute": 4165 return '\u00C1'; 4166 case "Acirc": 4167 return '\u00C2'; 4168 case "Atilde": 4169 return '\u00C3'; 4170 case "Auml": 4171 return '\u00C4'; 4172 case "Aring": 4173 case "angst": 4174 return '\u00C5'; 4175 case "AElig": 4176 return '\u00C6'; 4177 case "Ccedil": 4178 return '\u00C7'; 4179 case "Egrave": 4180 return '\u00C8'; 4181 case "Eacute": 4182 return '\u00C9'; 4183 case "Ecirc": 4184 return '\u00CA'; 4185 case "Euml": 4186 return '\u00CB'; 4187 case "Igrave": 4188 return '\u00CC'; 4189 case "Iacute": 4190 return '\u00CD'; 4191 case "Icirc": 4192 return '\u00CE'; 4193 case "Iuml": 4194 return '\u00CF'; 4195 case "ETH": 4196 return '\u00D0'; 4197 case "Ntilde": 4198 return '\u00D1'; 4199 case "Ograve": 4200 return '\u00D2'; 4201 case "Oacute": 4202 return '\u00D3'; 4203 case "Ocirc": 4204 return '\u00D4'; 4205 case "Otilde": 4206 return '\u00D5'; 4207 case "Ouml": 4208 return '\u00D6'; 4209 case "times": 4210 return '\u00D7'; 4211 case "Oslash": 4212 return '\u00D8'; 4213 case "Ugrave": 4214 return '\u00D9'; 4215 case "Uacute": 4216 return '\u00DA'; 4217 case "Ucirc": 4218 return '\u00DB'; 4219 case "Uuml": 4220 return '\u00DC'; 4221 case "Yacute": 4222 return '\u00DD'; 4223 case "THORN": 4224 return '\u00DE'; 4225 case "szlig": 4226 return '\u00DF'; 4227 case "agrave": 4228 return '\u00E0'; 4229 case "aacute": 4230 return '\u00E1'; 4231 case "acirc": 4232 return '\u00E2'; 4233 case "atilde": 4234 return '\u00E3'; 4235 case "auml": 4236 return '\u00E4'; 4237 case "aring": 4238 return '\u00E5'; 4239 case "aelig": 4240 return '\u00E6'; 4241 case "ccedil": 4242 return '\u00E7'; 4243 case "egrave": 4244 return '\u00E8'; 4245 case "eacute": 4246 return '\u00E9'; 4247 case "ecirc": 4248 return '\u00EA'; 4249 case "euml": 4250 return '\u00EB'; 4251 case "igrave": 4252 return '\u00EC'; 4253 case "iacute": 4254 return '\u00ED'; 4255 case "icirc": 4256 return '\u00EE'; 4257 case "iuml": 4258 return '\u00EF'; 4259 case "eth": 4260 return '\u00F0'; 4261 case "ntilde": 4262 return '\u00F1'; 4263 case "ograve": 4264 return '\u00F2'; 4265 case "oacute": 4266 return '\u00F3'; 4267 case "ocirc": 4268 return '\u00F4'; 4269 case "otilde": 4270 return '\u00F5'; 4271 case "ouml": 4272 return '\u00F6'; 4273 case "divide": 4274 case "div": 4275 return '\u00F7'; 4276 case "oslash": 4277 return '\u00F8'; 4278 case "ugrave": 4279 return '\u00F9'; 4280 case "uacute": 4281 return '\u00FA'; 4282 case "ucirc": 4283 return '\u00FB'; 4284 case "uuml": 4285 return '\u00FC'; 4286 case "yacute": 4287 return '\u00FD'; 4288 case "thorn": 4289 return '\u00FE'; 4290 case "yuml": 4291 return '\u00FF'; 4292 case "Amacr": 4293 return '\u0100'; 4294 case "amacr": 4295 return '\u0101'; 4296 case "Abreve": 4297 return '\u0102'; 4298 case "abreve": 4299 return '\u0103'; 4300 case "Aogon": 4301 return '\u0104'; 4302 case "aogon": 4303 return '\u0105'; 4304 case "Cacute": 4305 return '\u0106'; 4306 case "cacute": 4307 return '\u0107'; 4308 case "Ccirc": 4309 return '\u0108'; 4310 case "ccirc": 4311 return '\u0109'; 4312 case "Cdot": 4313 return '\u010A'; 4314 case "cdot": 4315 return '\u010B'; 4316 case "Ccaron": 4317 return '\u010C'; 4318 case "ccaron": 4319 return '\u010D'; 4320 case "Dcaron": 4321 return '\u010E'; 4322 case "dcaron": 4323 return '\u010F'; 4324 case "Dstrok": 4325 return '\u0110'; 4326 case "dstrok": 4327 return '\u0111'; 4328 case "Emacr": 4329 return '\u0112'; 4330 case "emacr": 4331 return '\u0113'; 4332 case "Edot": 4333 return '\u0116'; 4334 case "edot": 4335 return '\u0117'; 4336 case "Eogon": 4337 return '\u0118'; 4338 case "eogon": 4339 return '\u0119'; 4340 case "Ecaron": 4341 return '\u011A'; 4342 case "ecaron": 4343 return '\u011B'; 4344 case "Gcirc": 4345 return '\u011C'; 4346 case "gcirc": 4347 return '\u011D'; 4348 case "Gbreve": 4349 return '\u011E'; 4350 case "gbreve": 4351 return '\u011F'; 4352 case "Gdot": 4353 return '\u0120'; 4354 case "gdot": 4355 return '\u0121'; 4356 case "Gcedil": 4357 return '\u0122'; 4358 case "Hcirc": 4359 return '\u0124'; 4360 case "hcirc": 4361 return '\u0125'; 4362 case "Hstrok": 4363 return '\u0126'; 4364 case "hstrok": 4365 return '\u0127'; 4366 case "Itilde": 4367 return '\u0128'; 4368 case "itilde": 4369 return '\u0129'; 4370 case "Imacr": 4371 return '\u012A'; 4372 case "imacr": 4373 return '\u012B'; 4374 case "Iogon": 4375 return '\u012E'; 4376 case "iogon": 4377 return '\u012F'; 4378 case "Idot": 4379 return '\u0130'; 4380 case "imath": 4381 case "inodot": 4382 return '\u0131'; 4383 case "IJlig": 4384 return '\u0132'; 4385 case "ijlig": 4386 return '\u0133'; 4387 case "Jcirc": 4388 return '\u0134'; 4389 case "jcirc": 4390 return '\u0135'; 4391 case "Kcedil": 4392 return '\u0136'; 4393 case "kcedil": 4394 return '\u0137'; 4395 case "kgreen": 4396 return '\u0138'; 4397 case "Lacute": 4398 return '\u0139'; 4399 case "lacute": 4400 return '\u013A'; 4401 case "Lcedil": 4402 return '\u013B'; 4403 case "lcedil": 4404 return '\u013C'; 4405 case "Lcaron": 4406 return '\u013D'; 4407 case "lcaron": 4408 return '\u013E'; 4409 case "Lmidot": 4410 return '\u013F'; 4411 case "lmidot": 4412 return '\u0140'; 4413 case "Lstrok": 4414 return '\u0141'; 4415 case "lstrok": 4416 return '\u0142'; 4417 case "Nacute": 4418 return '\u0143'; 4419 case "nacute": 4420 return '\u0144'; 4421 case "Ncedil": 4422 return '\u0145'; 4423 case "ncedil": 4424 return '\u0146'; 4425 case "Ncaron": 4426 return '\u0147'; 4427 case "ncaron": 4428 return '\u0148'; 4429 case "napos": 4430 return '\u0149'; 4431 case "ENG": 4432 return '\u014A'; 4433 case "eng": 4434 return '\u014B'; 4435 case "Omacr": 4436 return '\u014C'; 4437 case "omacr": 4438 return '\u014D'; 4439 case "Odblac": 4440 return '\u0150'; 4441 case "odblac": 4442 return '\u0151'; 4443 case "OElig": 4444 return '\u0152'; 4445 case "oelig": 4446 return '\u0153'; 4447 case "Racute": 4448 return '\u0154'; 4449 case "racute": 4450 return '\u0155'; 4451 case "Rcedil": 4452 return '\u0156'; 4453 case "rcedil": 4454 return '\u0157'; 4455 case "Rcaron": 4456 return '\u0158'; 4457 case "rcaron": 4458 return '\u0159'; 4459 case "Sacute": 4460 return '\u015A'; 4461 case "sacute": 4462 return '\u015B'; 4463 case "Scirc": 4464 return '\u015C'; 4465 case "scirc": 4466 return '\u015D'; 4467 case "Scedil": 4468 return '\u015E'; 4469 case "scedil": 4470 return '\u015F'; 4471 case "Scaron": 4472 return '\u0160'; 4473 case "scaron": 4474 return '\u0161'; 4475 case "Tcedil": 4476 return '\u0162'; 4477 case "tcedil": 4478 return '\u0163'; 4479 case "Tcaron": 4480 return '\u0164'; 4481 case "tcaron": 4482 return '\u0165'; 4483 case "Tstrok": 4484 return '\u0166'; 4485 case "tstrok": 4486 return '\u0167'; 4487 case "Utilde": 4488 return '\u0168'; 4489 case "utilde": 4490 return '\u0169'; 4491 case "Umacr": 4492 return '\u016A'; 4493 case "umacr": 4494 return '\u016B'; 4495 case "Ubreve": 4496 return '\u016C'; 4497 case "ubreve": 4498 return '\u016D'; 4499 case "Uring": 4500 return '\u016E'; 4501 case "uring": 4502 return '\u016F'; 4503 case "Udblac": 4504 return '\u0170'; 4505 case "udblac": 4506 return '\u0171'; 4507 case "Uogon": 4508 return '\u0172'; 4509 case "uogon": 4510 return '\u0173'; 4511 case "Wcirc": 4512 return '\u0174'; 4513 case "wcirc": 4514 return '\u0175'; 4515 case "Ycirc": 4516 return '\u0176'; 4517 case "ycirc": 4518 return '\u0177'; 4519 case "Yuml": 4520 return '\u0178'; 4521 case "Zacute": 4522 return '\u0179'; 4523 case "zacute": 4524 return '\u017A'; 4525 case "Zdot": 4526 return '\u017B'; 4527 case "zdot": 4528 return '\u017C'; 4529 case "Zcaron": 4530 return '\u017D'; 4531 case "zcaron": 4532 return '\u017E'; 4533 case "fnof": 4534 return '\u0192'; 4535 case "imped": 4536 return '\u01B5'; 4537 case "gacute": 4538 return '\u01F5'; 4539 case "jmath": 4540 return '\u0237'; 4541 case "circ": 4542 return '\u02C6'; 4543 case "caron": 4544 case "Hacek": 4545 return '\u02C7'; 4546 case "breve": 4547 case "Breve": 4548 return '\u02D8'; 4549 case "dot": 4550 case "DiacriticalDot": 4551 return '\u02D9'; 4552 case "ring": 4553 return '\u02DA'; 4554 case "ogon": 4555 return '\u02DB'; 4556 case "tilde": 4557 case "DiacriticalTilde": 4558 return '\u02DC'; 4559 case "dblac": 4560 case "DiacriticalDoubleAcute": 4561 return '\u02DD'; 4562 case "DownBreve": 4563 return '\u0311'; 4564 case "Alpha": 4565 return '\u0391'; 4566 case "Beta": 4567 return '\u0392'; 4568 case "Gamma": 4569 return '\u0393'; 4570 case "Delta": 4571 return '\u0394'; 4572 case "Epsilon": 4573 return '\u0395'; 4574 case "Zeta": 4575 return '\u0396'; 4576 case "Eta": 4577 return '\u0397'; 4578 case "Theta": 4579 return '\u0398'; 4580 case "Iota": 4581 return '\u0399'; 4582 case "Kappa": 4583 return '\u039A'; 4584 case "Lambda": 4585 return '\u039B'; 4586 case "Mu": 4587 return '\u039C'; 4588 case "Nu": 4589 return '\u039D'; 4590 case "Xi": 4591 return '\u039E'; 4592 case "Omicron": 4593 return '\u039F'; 4594 case "Pi": 4595 return '\u03A0'; 4596 case "Rho": 4597 return '\u03A1'; 4598 case "Sigma": 4599 return '\u03A3'; 4600 case "Tau": 4601 return '\u03A4'; 4602 case "Upsilon": 4603 return '\u03A5'; 4604 case "Phi": 4605 return '\u03A6'; 4606 case "Chi": 4607 return '\u03A7'; 4608 case "Psi": 4609 return '\u03A8'; 4610 case "Omega": 4611 case "ohm": 4612 return '\u03A9'; 4613 case "alpha": 4614 return '\u03B1'; 4615 case "beta": 4616 return '\u03B2'; 4617 case "gamma": 4618 return '\u03B3'; 4619 case "delta": 4620 return '\u03B4'; 4621 case "epsi": 4622 case "epsilon": 4623 return '\u03B5'; 4624 case "zeta": 4625 return '\u03B6'; 4626 case "eta": 4627 return '\u03B7'; 4628 case "theta": 4629 return '\u03B8'; 4630 case "iota": 4631 return '\u03B9'; 4632 case "kappa": 4633 return '\u03BA'; 4634 case "lambda": 4635 return '\u03BB'; 4636 case "mu": 4637 return '\u03BC'; 4638 case "nu": 4639 return '\u03BD'; 4640 case "xi": 4641 return '\u03BE'; 4642 case "omicron": 4643 return '\u03BF'; 4644 case "pi": 4645 return '\u03C0'; 4646 case "rho": 4647 return '\u03C1'; 4648 case "sigmav": 4649 case "varsigma": 4650 case "sigmaf": 4651 return '\u03C2'; 4652 case "sigma": 4653 return '\u03C3'; 4654 case "tau": 4655 return '\u03C4'; 4656 case "upsi": 4657 case "upsilon": 4658 return '\u03C5'; 4659 case "phi": 4660 return '\u03C6'; 4661 case "chi": 4662 return '\u03C7'; 4663 case "psi": 4664 return '\u03C8'; 4665 case "omega": 4666 return '\u03C9'; 4667 case "thetav": 4668 case "vartheta": 4669 case "thetasym": 4670 return '\u03D1'; 4671 case "Upsi": 4672 case "upsih": 4673 return '\u03D2'; 4674 case "straightphi": 4675 case "phiv": 4676 case "varphi": 4677 return '\u03D5'; 4678 case "piv": 4679 case "varpi": 4680 return '\u03D6'; 4681 case "Gammad": 4682 return '\u03DC'; 4683 case "gammad": 4684 case "digamma": 4685 return '\u03DD'; 4686 case "kappav": 4687 case "varkappa": 4688 return '\u03F0'; 4689 case "rhov": 4690 case "varrho": 4691 return '\u03F1'; 4692 case "epsiv": 4693 case "varepsilon": 4694 case "straightepsilon": 4695 return '\u03F5'; 4696 case "bepsi": 4697 case "backepsilon": 4698 return '\u03F6'; 4699 case "IOcy": 4700 return '\u0401'; 4701 case "DJcy": 4702 return '\u0402'; 4703 case "GJcy": 4704 return '\u0403'; 4705 case "Jukcy": 4706 return '\u0404'; 4707 case "DScy": 4708 return '\u0405'; 4709 case "Iukcy": 4710 return '\u0406'; 4711 case "YIcy": 4712 return '\u0407'; 4713 case "Jsercy": 4714 return '\u0408'; 4715 case "LJcy": 4716 return '\u0409'; 4717 case "NJcy": 4718 return '\u040A'; 4719 case "TSHcy": 4720 return '\u040B'; 4721 case "KJcy": 4722 return '\u040C'; 4723 case "Ubrcy": 4724 return '\u040E'; 4725 case "DZcy": 4726 return '\u040F'; 4727 case "Acy": 4728 return '\u0410'; 4729 case "Bcy": 4730 return '\u0411'; 4731 case "Vcy": 4732 return '\u0412'; 4733 case "Gcy": 4734 return '\u0413'; 4735 case "Dcy": 4736 return '\u0414'; 4737 case "IEcy": 4738 return '\u0415'; 4739 case "ZHcy": 4740 return '\u0416'; 4741 case "Zcy": 4742 return '\u0417'; 4743 case "Icy": 4744 return '\u0418'; 4745 case "Jcy": 4746 return '\u0419'; 4747 case "Kcy": 4748 return '\u041A'; 4749 case "Lcy": 4750 return '\u041B'; 4751 case "Mcy": 4752 return '\u041C'; 4753 case "Ncy": 4754 return '\u041D'; 4755 case "Ocy": 4756 return '\u041E'; 4757 case "Pcy": 4758 return '\u041F'; 4759 case "Rcy": 4760 return '\u0420'; 4761 case "Scy": 4762 return '\u0421'; 4763 case "Tcy": 4764 return '\u0422'; 4765 case "Ucy": 4766 return '\u0423'; 4767 case "Fcy": 4768 return '\u0424'; 4769 case "KHcy": 4770 return '\u0425'; 4771 case "TScy": 4772 return '\u0426'; 4773 case "CHcy": 4774 return '\u0427'; 4775 case "SHcy": 4776 return '\u0428'; 4777 case "SHCHcy": 4778 return '\u0429'; 4779 case "HARDcy": 4780 return '\u042A'; 4781 case "Ycy": 4782 return '\u042B'; 4783 case "SOFTcy": 4784 return '\u042C'; 4785 case "Ecy": 4786 return '\u042D'; 4787 case "YUcy": 4788 return '\u042E'; 4789 case "YAcy": 4790 return '\u042F'; 4791 case "acy": 4792 return '\u0430'; 4793 case "bcy": 4794 return '\u0431'; 4795 case "vcy": 4796 return '\u0432'; 4797 case "gcy": 4798 return '\u0433'; 4799 case "dcy": 4800 return '\u0434'; 4801 case "iecy": 4802 return '\u0435'; 4803 case "zhcy": 4804 return '\u0436'; 4805 case "zcy": 4806 return '\u0437'; 4807 case "icy": 4808 return '\u0438'; 4809 case "jcy": 4810 return '\u0439'; 4811 case "kcy": 4812 return '\u043A'; 4813 case "lcy": 4814 return '\u043B'; 4815 case "mcy": 4816 return '\u043C'; 4817 case "ncy": 4818 return '\u043D'; 4819 case "ocy": 4820 return '\u043E'; 4821 case "pcy": 4822 return '\u043F'; 4823 case "rcy": 4824 return '\u0440'; 4825 case "scy": 4826 return '\u0441'; 4827 case "tcy": 4828 return '\u0442'; 4829 case "ucy": 4830 return '\u0443'; 4831 case "fcy": 4832 return '\u0444'; 4833 case "khcy": 4834 return '\u0445'; 4835 case "tscy": 4836 return '\u0446'; 4837 case "chcy": 4838 return '\u0447'; 4839 case "shcy": 4840 return '\u0448'; 4841 case "shchcy": 4842 return '\u0449'; 4843 case "hardcy": 4844 return '\u044A'; 4845 case "ycy": 4846 return '\u044B'; 4847 case "softcy": 4848 return '\u044C'; 4849 case "ecy": 4850 return '\u044D'; 4851 case "yucy": 4852 return '\u044E'; 4853 case "yacy": 4854 return '\u044F'; 4855 case "iocy": 4856 return '\u0451'; 4857 case "djcy": 4858 return '\u0452'; 4859 case "gjcy": 4860 return '\u0453'; 4861 case "jukcy": 4862 return '\u0454'; 4863 case "dscy": 4864 return '\u0455'; 4865 case "iukcy": 4866 return '\u0456'; 4867 case "yicy": 4868 return '\u0457'; 4869 case "jsercy": 4870 return '\u0458'; 4871 case "ljcy": 4872 return '\u0459'; 4873 case "njcy": 4874 return '\u045A'; 4875 case "tshcy": 4876 return '\u045B'; 4877 case "kjcy": 4878 return '\u045C'; 4879 case "ubrcy": 4880 return '\u045E'; 4881 case "dzcy": 4882 return '\u045F'; 4883 case "ensp": 4884 return '\u2002'; 4885 case "emsp": 4886 return '\u2003'; 4887 case "emsp13": 4888 return '\u2004'; 4889 case "emsp14": 4890 return '\u2005'; 4891 case "numsp": 4892 return '\u2007'; 4893 case "puncsp": 4894 return '\u2008'; 4895 case "thinsp": 4896 case "ThinSpace": 4897 return '\u2009'; 4898 case "hairsp": 4899 case "VeryThinSpace": 4900 return '\u200A'; 4901 case "ZeroWidthSpace": 4902 case "NegativeVeryThinSpace": 4903 case "NegativeThinSpace": 4904 case "NegativeMediumSpace": 4905 case "NegativeThickSpace": 4906 return '\u200B'; 4907 case "zwnj": 4908 return '\u200C'; 4909 case "zwj": 4910 return '\u200D'; 4911 case "lrm": 4912 return '\u200E'; 4913 case "rlm": 4914 return '\u200F'; 4915 case "hyphen": 4916 case "dash": 4917 return '\u2010'; 4918 case "ndash": 4919 return '\u2013'; 4920 case "mdash": 4921 return '\u2014'; 4922 case "horbar": 4923 return '\u2015'; 4924 case "Verbar": 4925 case "Vert": 4926 return '\u2016'; 4927 case "lsquo": 4928 case "OpenCurlyQuote": 4929 return '\u2018'; 4930 case "rsquo": 4931 case "rsquor": 4932 case "CloseCurlyQuote": 4933 return '\u2019'; 4934 case "lsquor": 4935 case "sbquo": 4936 return '\u201A'; 4937 case "ldquo": 4938 case "OpenCurlyDoubleQuote": 4939 return '\u201C'; 4940 case "rdquo": 4941 case "rdquor": 4942 case "CloseCurlyDoubleQuote": 4943 return '\u201D'; 4944 case "ldquor": 4945 case "bdquo": 4946 return '\u201E'; 4947 case "dagger": 4948 return '\u2020'; 4949 case "Dagger": 4950 case "ddagger": 4951 return '\u2021'; 4952 case "bull": 4953 case "bullet": 4954 return '\u2022'; 4955 case "nldr": 4956 return '\u2025'; 4957 case "hellip": 4958 case "mldr": 4959 return '\u2026'; 4960 case "permil": 4961 return '\u2030'; 4962 case "pertenk": 4963 return '\u2031'; 4964 case "prime": 4965 return '\u2032'; 4966 case "Prime": 4967 return '\u2033'; 4968 case "tprime": 4969 return '\u2034'; 4970 case "bprime": 4971 case "backprime": 4972 return '\u2035'; 4973 case "lsaquo": 4974 return '\u2039'; 4975 case "rsaquo": 4976 return '\u203A'; 4977 case "oline": 4978 case "OverBar": 4979 return '\u203E'; 4980 case "caret": 4981 return '\u2041'; 4982 case "hybull": 4983 return '\u2043'; 4984 case "frasl": 4985 return '\u2044'; 4986 case "bsemi": 4987 return '\u204F'; 4988 case "qprime": 4989 return '\u2057'; 4990 case "MediumSpace": 4991 return '\u205F'; 4992 case "NoBreak": 4993 return '\u2060'; 4994 case "ApplyFunction": 4995 case "af": 4996 return '\u2061'; 4997 case "InvisibleTimes": 4998 case "it": 4999 return '\u2062'; 5000 case "InvisibleComma": 5001 case "ic": 5002 return '\u2063'; 5003 case "euro": 5004 return '\u20AC'; 5005 case "tdot": 5006 case "TripleDot": 5007 return '\u20DB'; 5008 case "DotDot": 5009 return '\u20DC'; 5010 case "Copf": 5011 case "complexes": 5012 return '\u2102'; 5013 case "incare": 5014 return '\u2105'; 5015 case "gscr": 5016 return '\u210A'; 5017 case "hamilt": 5018 case "HilbertSpace": 5019 case "Hscr": 5020 return '\u210B'; 5021 case "Hfr": 5022 case "Poincareplane": 5023 return '\u210C'; 5024 case "quaternions": 5025 case "Hopf": 5026 return '\u210D'; 5027 case "planckh": 5028 return '\u210E'; 5029 case "planck": 5030 case "hbar": 5031 case "plankv": 5032 case "hslash": 5033 return '\u210F'; 5034 case "Iscr": 5035 case "imagline": 5036 return '\u2110'; 5037 case "image": 5038 case "Im": 5039 case "imagpart": 5040 case "Ifr": 5041 return '\u2111'; 5042 case "Lscr": 5043 case "lagran": 5044 case "Laplacetrf": 5045 return '\u2112'; 5046 case "ell": 5047 return '\u2113'; 5048 case "Nopf": 5049 case "naturals": 5050 return '\u2115'; 5051 case "numero": 5052 return '\u2116'; 5053 case "copysr": 5054 return '\u2117'; 5055 case "weierp": 5056 case "wp": 5057 return '\u2118'; 5058 case "Popf": 5059 case "primes": 5060 return '\u2119'; 5061 case "rationals": 5062 case "Qopf": 5063 return '\u211A'; 5064 case "Rscr": 5065 case "realine": 5066 return '\u211B'; 5067 case "real": 5068 case "Re": 5069 case "realpart": 5070 case "Rfr": 5071 return '\u211C'; 5072 case "reals": 5073 case "Ropf": 5074 return '\u211D'; 5075 case "rx": 5076 return '\u211E'; 5077 case "trade": 5078 case "TRADE": 5079 return '\u2122'; 5080 case "integers": 5081 case "Zopf": 5082 return '\u2124'; 5083 case "mho": 5084 return '\u2127'; 5085 case "Zfr": 5086 case "zeetrf": 5087 return '\u2128'; 5088 case "iiota": 5089 return '\u2129'; 5090 case "bernou": 5091 case "Bernoullis": 5092 case "Bscr": 5093 return '\u212C'; 5094 case "Cfr": 5095 case "Cayleys": 5096 return '\u212D'; 5097 case "escr": 5098 return '\u212F'; 5099 case "Escr": 5100 case "expectation": 5101 return '\u2130'; 5102 case "Fscr": 5103 case "Fouriertrf": 5104 return '\u2131'; 5105 case "phmmat": 5106 case "Mellintrf": 5107 case "Mscr": 5108 return '\u2133'; 5109 case "order": 5110 case "orderof": 5111 case "oscr": 5112 return '\u2134'; 5113 case "alefsym": 5114 case "aleph": 5115 return '\u2135'; 5116 case "beth": 5117 return '\u2136'; 5118 case "gimel": 5119 return '\u2137'; 5120 case "daleth": 5121 return '\u2138'; 5122 case "CapitalDifferentialD": 5123 case "DD": 5124 return '\u2145'; 5125 case "DifferentialD": 5126 case "dd": 5127 return '\u2146'; 5128 case "ExponentialE": 5129 case "exponentiale": 5130 case "ee": 5131 return '\u2147'; 5132 case "ImaginaryI": 5133 case "ii": 5134 return '\u2148'; 5135 case "frac13": 5136 return '\u2153'; 5137 case "frac23": 5138 return '\u2154'; 5139 case "frac15": 5140 return '\u2155'; 5141 case "frac25": 5142 return '\u2156'; 5143 case "frac35": 5144 return '\u2157'; 5145 case "frac45": 5146 return '\u2158'; 5147 case "frac16": 5148 return '\u2159'; 5149 case "frac56": 5150 return '\u215A'; 5151 case "frac18": 5152 return '\u215B'; 5153 case "frac38": 5154 return '\u215C'; 5155 case "frac58": 5156 return '\u215D'; 5157 case "frac78": 5158 return '\u215E'; 5159 case "larr": 5160 case "leftarrow": 5161 case "LeftArrow": 5162 case "slarr": 5163 case "ShortLeftArrow": 5164 return '\u2190'; 5165 case "uarr": 5166 case "uparrow": 5167 case "UpArrow": 5168 case "ShortUpArrow": 5169 return '\u2191'; 5170 case "rarr": 5171 case "rightarrow": 5172 case "RightArrow": 5173 case "srarr": 5174 case "ShortRightArrow": 5175 return '\u2192'; 5176 case "darr": 5177 case "downarrow": 5178 case "DownArrow": 5179 case "ShortDownArrow": 5180 return '\u2193'; 5181 case "harr": 5182 case "leftrightarrow": 5183 case "LeftRightArrow": 5184 return '\u2194'; 5185 case "varr": 5186 case "updownarrow": 5187 case "UpDownArrow": 5188 return '\u2195'; 5189 case "nwarr": 5190 case "UpperLeftArrow": 5191 case "nwarrow": 5192 return '\u2196'; 5193 case "nearr": 5194 case "UpperRightArrow": 5195 case "nearrow": 5196 return '\u2197'; 5197 case "searr": 5198 case "searrow": 5199 case "LowerRightArrow": 5200 return '\u2198'; 5201 case "swarr": 5202 case "swarrow": 5203 case "LowerLeftArrow": 5204 return '\u2199'; 5205 case "nlarr": 5206 case "nleftarrow": 5207 return '\u219A'; 5208 case "nrarr": 5209 case "nrightarrow": 5210 return '\u219B'; 5211 case "rarrw": 5212 case "rightsquigarrow": 5213 return '\u219D'; 5214 case "Larr": 5215 case "twoheadleftarrow": 5216 return '\u219E'; 5217 case "Uarr": 5218 return '\u219F'; 5219 case "Rarr": 5220 case "twoheadrightarrow": 5221 return '\u21A0'; 5222 case "Darr": 5223 return '\u21A1'; 5224 case "larrtl": 5225 case "leftarrowtail": 5226 return '\u21A2'; 5227 case "rarrtl": 5228 case "rightarrowtail": 5229 return '\u21A3'; 5230 case "LeftTeeArrow": 5231 case "mapstoleft": 5232 return '\u21A4'; 5233 case "UpTeeArrow": 5234 case "mapstoup": 5235 return '\u21A5'; 5236 case "map": 5237 case "RightTeeArrow": 5238 case "mapsto": 5239 return '\u21A6'; 5240 case "DownTeeArrow": 5241 case "mapstodown": 5242 return '\u21A7'; 5243 case "larrhk": 5244 case "hookleftarrow": 5245 return '\u21A9'; 5246 case "rarrhk": 5247 case "hookrightarrow": 5248 return '\u21AA'; 5249 case "larrlp": 5250 case "looparrowleft": 5251 return '\u21AB'; 5252 case "rarrlp": 5253 case "looparrowright": 5254 return '\u21AC'; 5255 case "harrw": 5256 case "leftrightsquigarrow": 5257 return '\u21AD'; 5258 case "nharr": 5259 case "nleftrightarrow": 5260 return '\u21AE'; 5261 case "lsh": 5262 case "Lsh": 5263 return '\u21B0'; 5264 case "rsh": 5265 case "Rsh": 5266 return '\u21B1'; 5267 case "ldsh": 5268 return '\u21B2'; 5269 case "rdsh": 5270 return '\u21B3'; 5271 case "crarr": 5272 return '\u21B5'; 5273 case "cularr": 5274 case "curvearrowleft": 5275 return '\u21B6'; 5276 case "curarr": 5277 case "curvearrowright": 5278 return '\u21B7'; 5279 case "olarr": 5280 case "circlearrowleft": 5281 return '\u21BA'; 5282 case "orarr": 5283 case "circlearrowright": 5284 return '\u21BB'; 5285 case "lharu": 5286 case "LeftVector": 5287 case "leftharpoonup": 5288 return '\u21BC'; 5289 case "lhard": 5290 case "leftharpoondown": 5291 case "DownLeftVector": 5292 return '\u21BD'; 5293 case "uharr": 5294 case "upharpoonright": 5295 case "RightUpVector": 5296 return '\u21BE'; 5297 case "uharl": 5298 case "upharpoonleft": 5299 case "LeftUpVector": 5300 return '\u21BF'; 5301 case "rharu": 5302 case "RightVector": 5303 case "rightharpoonup": 5304 return '\u21C0'; 5305 case "rhard": 5306 case "rightharpoondown": 5307 case "DownRightVector": 5308 return '\u21C1'; 5309 case "dharr": 5310 case "RightDownVector": 5311 case "downharpoonright": 5312 return '\u21C2'; 5313 case "dharl": 5314 case "LeftDownVector": 5315 case "downharpoonleft": 5316 return '\u21C3'; 5317 case "rlarr": 5318 case "rightleftarrows": 5319 case "RightArrowLeftArrow": 5320 return '\u21C4'; 5321 case "udarr": 5322 case "UpArrowDownArrow": 5323 return '\u21C5'; 5324 case "lrarr": 5325 case "leftrightarrows": 5326 case "LeftArrowRightArrow": 5327 return '\u21C6'; 5328 case "llarr": 5329 case "leftleftarrows": 5330 return '\u21C7'; 5331 case "uuarr": 5332 case "upuparrows": 5333 return '\u21C8'; 5334 case "rrarr": 5335 case "rightrightarrows": 5336 return '\u21C9'; 5337 case "ddarr": 5338 case "downdownarrows": 5339 return '\u21CA'; 5340 case "lrhar": 5341 case "ReverseEquilibrium": 5342 case "leftrightharpoons": 5343 return '\u21CB'; 5344 case "rlhar": 5345 case "rightleftharpoons": 5346 case "Equilibrium": 5347 return '\u21CC'; 5348 case "nlArr": 5349 case "nLeftarrow": 5350 return '\u21CD'; 5351 case "nhArr": 5352 case "nLeftrightarrow": 5353 return '\u21CE'; 5354 case "nrArr": 5355 case "nRightarrow": 5356 return '\u21CF'; 5357 case "lArr": 5358 case "Leftarrow": 5359 case "DoubleLeftArrow": 5360 return '\u21D0'; 5361 case "uArr": 5362 case "Uparrow": 5363 case "DoubleUpArrow": 5364 return '\u21D1'; 5365 case "rArr": 5366 case "Rightarrow": 5367 case "Implies": 5368 case "DoubleRightArrow": 5369 return '\u21D2'; 5370 case "dArr": 5371 case "Downarrow": 5372 case "DoubleDownArrow": 5373 return '\u21D3'; 5374 case "hArr": 5375 case "Leftrightarrow": 5376 case "DoubleLeftRightArrow": 5377 case "iff": 5378 return '\u21D4'; 5379 case "vArr": 5380 case "Updownarrow": 5381 case "DoubleUpDownArrow": 5382 return '\u21D5'; 5383 case "nwArr": 5384 return '\u21D6'; 5385 case "neArr": 5386 return '\u21D7'; 5387 case "seArr": 5388 return '\u21D8'; 5389 case "swArr": 5390 return '\u21D9'; 5391 case "lAarr": 5392 case "Lleftarrow": 5393 return '\u21DA'; 5394 case "rAarr": 5395 case "Rrightarrow": 5396 return '\u21DB'; 5397 case "zigrarr": 5398 return '\u21DD'; 5399 case "larrb": 5400 case "LeftArrowBar": 5401 return '\u21E4'; 5402 case "rarrb": 5403 case "RightArrowBar": 5404 return '\u21E5'; 5405 case "duarr": 5406 case "DownArrowUpArrow": 5407 return '\u21F5'; 5408 case "loarr": 5409 return '\u21FD'; 5410 case "roarr": 5411 return '\u21FE'; 5412 case "hoarr": 5413 return '\u21FF'; 5414 case "forall": 5415 case "ForAll": 5416 return '\u2200'; 5417 case "comp": 5418 case "complement": 5419 return '\u2201'; 5420 case "part": 5421 case "PartialD": 5422 return '\u2202'; 5423 case "exist": 5424 case "Exists": 5425 return '\u2203'; 5426 case "nexist": 5427 case "NotExists": 5428 case "nexists": 5429 return '\u2204'; 5430 case "empty": 5431 case "emptyset": 5432 case "emptyv": 5433 case "varnothing": 5434 return '\u2205'; 5435 case "nabla": 5436 case "Del": 5437 return '\u2207'; 5438 case "isin": 5439 case "isinv": 5440 case "Element": 5441 case "in": 5442 return '\u2208'; 5443 case "notin": 5444 case "NotElement": 5445 case "notinva": 5446 return '\u2209'; 5447 case "niv": 5448 case "ReverseElement": 5449 case "ni": 5450 case "SuchThat": 5451 return '\u220B'; 5452 case "notni": 5453 case "notniva": 5454 case "NotReverseElement": 5455 return '\u220C'; 5456 case "prod": 5457 case "Product": 5458 return '\u220F'; 5459 case "coprod": 5460 case "Coproduct": 5461 return '\u2210'; 5462 case "sum": 5463 case "Sum": 5464 return '\u2211'; 5465 case "minus": 5466 return '\u2212'; 5467 case "mnplus": 5468 case "mp": 5469 case "MinusPlus": 5470 return '\u2213'; 5471 case "plusdo": 5472 case "dotplus": 5473 return '\u2214'; 5474 case "setmn": 5475 case "setminus": 5476 case "Backslash": 5477 case "ssetmn": 5478 case "smallsetminus": 5479 return '\u2216'; 5480 case "lowast": 5481 return '\u2217'; 5482 case "compfn": 5483 case "SmallCircle": 5484 return '\u2218'; 5485 case "radic": 5486 case "Sqrt": 5487 return '\u221A'; 5488 case "prop": 5489 case "propto": 5490 case "Proportional": 5491 case "vprop": 5492 case "varpropto": 5493 return '\u221D'; 5494 case "infin": 5495 return '\u221E'; 5496 case "angrt": 5497 return '\u221F'; 5498 case "ang": 5499 case "angle": 5500 return '\u2220'; 5501 case "angmsd": 5502 case "measuredangle": 5503 return '\u2221'; 5504 case "angsph": 5505 return '\u2222'; 5506 case "mid": 5507 case "VerticalBar": 5508 case "smid": 5509 case "shortmid": 5510 return '\u2223'; 5511 case "nmid": 5512 case "NotVerticalBar": 5513 case "nsmid": 5514 case "nshortmid": 5515 return '\u2224'; 5516 case "par": 5517 case "parallel": 5518 case "DoubleVerticalBar": 5519 case "spar": 5520 case "shortparallel": 5521 return '\u2225'; 5522 case "npar": 5523 case "nparallel": 5524 case "NotDoubleVerticalBar": 5525 case "nspar": 5526 case "nshortparallel": 5527 return '\u2226'; 5528 case "and": 5529 case "wedge": 5530 return '\u2227'; 5531 case "or": 5532 case "vee": 5533 return '\u2228'; 5534 case "cap": 5535 return '\u2229'; 5536 case "cup": 5537 return '\u222A'; 5538 case "int": 5539 case "Integral": 5540 return '\u222B'; 5541 case "Int": 5542 return '\u222C'; 5543 case "tint": 5544 case "iiint": 5545 return '\u222D'; 5546 case "conint": 5547 case "oint": 5548 case "ContourIntegral": 5549 return '\u222E'; 5550 case "Conint": 5551 case "DoubleContourIntegral": 5552 return '\u222F'; 5553 case "Cconint": 5554 return '\u2230'; 5555 case "cwint": 5556 return '\u2231'; 5557 case "cwconint": 5558 case "ClockwiseContourIntegral": 5559 return '\u2232'; 5560 case "awconint": 5561 case "CounterClockwiseContourIntegral": 5562 return '\u2233'; 5563 case "there4": 5564 case "therefore": 5565 case "Therefore": 5566 return '\u2234'; 5567 case "becaus": 5568 case "because": 5569 case "Because": 5570 return '\u2235'; 5571 case "ratio": 5572 return '\u2236'; 5573 case "Colon": 5574 case "Proportion": 5575 return '\u2237'; 5576 case "minusd": 5577 case "dotminus": 5578 return '\u2238'; 5579 case "mDDot": 5580 return '\u223A'; 5581 case "homtht": 5582 return '\u223B'; 5583 case "sim": 5584 case "Tilde": 5585 case "thksim": 5586 case "thicksim": 5587 return '\u223C'; 5588 case "bsim": 5589 case "backsim": 5590 return '\u223D'; 5591 case "ac": 5592 case "mstpos": 5593 return '\u223E'; 5594 case "acd": 5595 return '\u223F'; 5596 case "wreath": 5597 case "VerticalTilde": 5598 case "wr": 5599 return '\u2240'; 5600 case "nsim": 5601 case "NotTilde": 5602 return '\u2241'; 5603 case "esim": 5604 case "EqualTilde": 5605 case "eqsim": 5606 return '\u2242'; 5607 case "sime": 5608 case "TildeEqual": 5609 case "simeq": 5610 return '\u2243'; 5611 case "nsime": 5612 case "nsimeq": 5613 case "NotTildeEqual": 5614 return '\u2244'; 5615 case "cong": 5616 case "TildeFullEqual": 5617 return '\u2245'; 5618 case "simne": 5619 return '\u2246'; 5620 case "ncong": 5621 case "NotTildeFullEqual": 5622 return '\u2247'; 5623 case "asymp": 5624 case "ap": 5625 case "TildeTilde": 5626 case "approx": 5627 case "thkap": 5628 case "thickapprox": 5629 return '\u2248'; 5630 case "nap": 5631 case "NotTildeTilde": 5632 case "napprox": 5633 return '\u2249'; 5634 case "ape": 5635 case "approxeq": 5636 return '\u224A'; 5637 case "apid": 5638 return '\u224B'; 5639 case "bcong": 5640 case "backcong": 5641 return '\u224C'; 5642 case "asympeq": 5643 case "CupCap": 5644 return '\u224D'; 5645 case "bump": 5646 case "HumpDownHump": 5647 case "Bumpeq": 5648 return '\u224E'; 5649 case "bumpe": 5650 case "HumpEqual": 5651 case "bumpeq": 5652 return '\u224F'; 5653 case "esdot": 5654 case "DotEqual": 5655 case "doteq": 5656 return '\u2250'; 5657 case "eDot": 5658 case "doteqdot": 5659 return '\u2251'; 5660 case "efDot": 5661 case "fallingdotseq": 5662 return '\u2252'; 5663 case "erDot": 5664 case "risingdotseq": 5665 return '\u2253'; 5666 case "colone": 5667 case "coloneq": 5668 case "Assign": 5669 return '\u2254'; 5670 case "ecolon": 5671 case "eqcolon": 5672 return '\u2255'; 5673 case "ecir": 5674 case "eqcirc": 5675 return '\u2256'; 5676 case "cire": 5677 case "circeq": 5678 return '\u2257'; 5679 case "wedgeq": 5680 return '\u2259'; 5681 case "veeeq": 5682 return '\u225A'; 5683 case "trie": 5684 case "triangleq": 5685 return '\u225C'; 5686 case "equest": 5687 case "questeq": 5688 return '\u225F'; 5689 case "ne": 5690 case "NotEqual": 5691 return '\u2260'; 5692 case "equiv": 5693 case "Congruent": 5694 return '\u2261'; 5695 case "nequiv": 5696 case "NotCongruent": 5697 return '\u2262'; 5698 case "le": 5699 case "leq": 5700 return '\u2264'; 5701 case "ge": 5702 case "GreaterEqual": 5703 case "geq": 5704 return '\u2265'; 5705 case "lE": 5706 case "LessFullEqual": 5707 case "leqq": 5708 return '\u2266'; 5709 case "gE": 5710 case "GreaterFullEqual": 5711 case "geqq": 5712 return '\u2267'; 5713 case "lnE": 5714 case "lneqq": 5715 return '\u2268'; 5716 case "gnE": 5717 case "gneqq": 5718 return '\u2269'; 5719 case "Lt": 5720 case "NestedLessLess": 5721 case "ll": 5722 return '\u226A'; 5723 case "Gt": 5724 case "NestedGreaterGreater": 5725 case "gg": 5726 return '\u226B'; 5727 case "twixt": 5728 case "between": 5729 return '\u226C'; 5730 case "NotCupCap": 5731 return '\u226D'; 5732 case "nlt": 5733 case "NotLess": 5734 case "nless": 5735 return '\u226E'; 5736 case "ngt": 5737 case "NotGreater": 5738 case "ngtr": 5739 return '\u226F'; 5740 case "nle": 5741 case "NotLessEqual": 5742 case "nleq": 5743 return '\u2270'; 5744 case "nge": 5745 case "NotGreaterEqual": 5746 case "ngeq": 5747 return '\u2271'; 5748 case "lsim": 5749 case "LessTilde": 5750 case "lesssim": 5751 return '\u2272'; 5752 case "gsim": 5753 case "gtrsim": 5754 case "GreaterTilde": 5755 return '\u2273'; 5756 case "nlsim": 5757 case "NotLessTilde": 5758 return '\u2274'; 5759 case "ngsim": 5760 case "NotGreaterTilde": 5761 return '\u2275'; 5762 case "lg": 5763 case "lessgtr": 5764 case "LessGreater": 5765 return '\u2276'; 5766 case "gl": 5767 case "gtrless": 5768 case "GreaterLess": 5769 return '\u2277'; 5770 case "ntlg": 5771 case "NotLessGreater": 5772 return '\u2278'; 5773 case "ntgl": 5774 case "NotGreaterLess": 5775 return '\u2279'; 5776 case "pr": 5777 case "Precedes": 5778 case "prec": 5779 return '\u227A'; 5780 case "sc": 5781 case "Succeeds": 5782 case "succ": 5783 return '\u227B'; 5784 case "prcue": 5785 case "PrecedesSlantEqual": 5786 case "preccurlyeq": 5787 return '\u227C'; 5788 case "sccue": 5789 case "SucceedsSlantEqual": 5790 case "succcurlyeq": 5791 return '\u227D'; 5792 case "prsim": 5793 case "precsim": 5794 case "PrecedesTilde": 5795 return '\u227E'; 5796 case "scsim": 5797 case "succsim": 5798 case "SucceedsTilde": 5799 return '\u227F'; 5800 case "npr": 5801 case "nprec": 5802 case "NotPrecedes": 5803 return '\u2280'; 5804 case "nsc": 5805 case "nsucc": 5806 case "NotSucceeds": 5807 return '\u2281'; 5808 case "sub": 5809 case "subset": 5810 return '\u2282'; 5811 case "sup": 5812 case "supset": 5813 case "Superset": 5814 return '\u2283'; 5815 case "nsub": 5816 return '\u2284'; 5817 case "nsup": 5818 return '\u2285'; 5819 case "sube": 5820 case "SubsetEqual": 5821 case "subseteq": 5822 return '\u2286'; 5823 case "supe": 5824 case "supseteq": 5825 case "SupersetEqual": 5826 return '\u2287'; 5827 case "nsube": 5828 case "nsubseteq": 5829 case "NotSubsetEqual": 5830 return '\u2288'; 5831 case "nsupe": 5832 case "nsupseteq": 5833 case "NotSupersetEqual": 5834 return '\u2289'; 5835 case "subne": 5836 case "subsetneq": 5837 return '\u228A'; 5838 case "supne": 5839 case "supsetneq": 5840 return '\u228B'; 5841 case "cupdot": 5842 return '\u228D'; 5843 case "uplus": 5844 case "UnionPlus": 5845 return '\u228E'; 5846 case "sqsub": 5847 case "SquareSubset": 5848 case "sqsubset": 5849 return '\u228F'; 5850 case "sqsup": 5851 case "SquareSuperset": 5852 case "sqsupset": 5853 return '\u2290'; 5854 case "sqsube": 5855 case "SquareSubsetEqual": 5856 case "sqsubseteq": 5857 return '\u2291'; 5858 case "sqsupe": 5859 case "SquareSupersetEqual": 5860 case "sqsupseteq": 5861 return '\u2292'; 5862 case "sqcap": 5863 case "SquareIntersection": 5864 return '\u2293'; 5865 case "sqcup": 5866 case "SquareUnion": 5867 return '\u2294'; 5868 case "oplus": 5869 case "CirclePlus": 5870 return '\u2295'; 5871 case "ominus": 5872 case "CircleMinus": 5873 return '\u2296'; 5874 case "otimes": 5875 case "CircleTimes": 5876 return '\u2297'; 5877 case "osol": 5878 return '\u2298'; 5879 case "odot": 5880 case "CircleDot": 5881 return '\u2299'; 5882 case "ocir": 5883 case "circledcirc": 5884 return '\u229A'; 5885 case "oast": 5886 case "circledast": 5887 return '\u229B'; 5888 case "odash": 5889 case "circleddash": 5890 return '\u229D'; 5891 case "plusb": 5892 case "boxplus": 5893 return '\u229E'; 5894 case "minusb": 5895 case "boxminus": 5896 return '\u229F'; 5897 case "timesb": 5898 case "boxtimes": 5899 return '\u22A0'; 5900 case "sdotb": 5901 case "dotsquare": 5902 return '\u22A1'; 5903 case "vdash": 5904 case "RightTee": 5905 return '\u22A2'; 5906 case "dashv": 5907 case "LeftTee": 5908 return '\u22A3'; 5909 case "top": 5910 case "DownTee": 5911 return '\u22A4'; 5912 case "bottom": 5913 case "bot": 5914 case "perp": 5915 case "UpTee": 5916 return '\u22A5'; 5917 case "models": 5918 return '\u22A7'; 5919 case "vDash": 5920 case "DoubleRightTee": 5921 return '\u22A8'; 5922 case "Vdash": 5923 return '\u22A9'; 5924 case "Vvdash": 5925 return '\u22AA'; 5926 case "VDash": 5927 return '\u22AB'; 5928 case "nvdash": 5929 return '\u22AC'; 5930 case "nvDash": 5931 return '\u22AD'; 5932 case "nVdash": 5933 return '\u22AE'; 5934 case "nVDash": 5935 return '\u22AF'; 5936 case "prurel": 5937 return '\u22B0'; 5938 case "vltri": 5939 case "vartriangleleft": 5940 case "LeftTriangle": 5941 return '\u22B2'; 5942 case "vrtri": 5943 case "vartriangleright": 5944 case "RightTriangle": 5945 return '\u22B3'; 5946 case "ltrie": 5947 case "trianglelefteq": 5948 case "LeftTriangleEqual": 5949 return '\u22B4'; 5950 case "rtrie": 5951 case "trianglerighteq": 5952 case "RightTriangleEqual": 5953 return '\u22B5'; 5954 case "origof": 5955 return '\u22B6'; 5956 case "imof": 5957 return '\u22B7'; 5958 case "mumap": 5959 case "multimap": 5960 return '\u22B8'; 5961 case "hercon": 5962 return '\u22B9'; 5963 case "intcal": 5964 case "intercal": 5965 return '\u22BA'; 5966 case "veebar": 5967 return '\u22BB'; 5968 case "barvee": 5969 return '\u22BD'; 5970 case "angrtvb": 5971 return '\u22BE'; 5972 case "lrtri": 5973 return '\u22BF'; 5974 case "xwedge": 5975 case "Wedge": 5976 case "bigwedge": 5977 return '\u22C0'; 5978 case "xvee": 5979 case "Vee": 5980 case "bigvee": 5981 return '\u22C1'; 5982 case "xcap": 5983 case "Intersection": 5984 case "bigcap": 5985 return '\u22C2'; 5986 case "xcup": 5987 case "Union": 5988 case "bigcup": 5989 return '\u22C3'; 5990 case "diam": 5991 case "diamond": 5992 case "Diamond": 5993 return '\u22C4'; 5994 case "sdot": 5995 return '\u22C5'; 5996 case "sstarf": 5997 case "Star": 5998 return '\u22C6'; 5999 case "divonx": 6000 case "divideontimes": 6001 return '\u22C7'; 6002 case "bowtie": 6003 return '\u22C8'; 6004 case "ltimes": 6005 return '\u22C9'; 6006 case "rtimes": 6007 return '\u22CA'; 6008 case "lthree": 6009 case "leftthreetimes": 6010 return '\u22CB'; 6011 case "rthree": 6012 case "rightthreetimes": 6013 return '\u22CC'; 6014 case "bsime": 6015 case "backsimeq": 6016 return '\u22CD'; 6017 case "cuvee": 6018 case "curlyvee": 6019 return '\u22CE'; 6020 case "cuwed": 6021 case "curlywedge": 6022 return '\u22CF'; 6023 case "Sub": 6024 case "Subset": 6025 return '\u22D0'; 6026 case "Sup": 6027 case "Supset": 6028 return '\u22D1'; 6029 case "Cap": 6030 return '\u22D2'; 6031 case "Cup": 6032 return '\u22D3'; 6033 case "fork": 6034 case "pitchfork": 6035 return '\u22D4'; 6036 case "epar": 6037 return '\u22D5'; 6038 case "ltdot": 6039 case "lessdot": 6040 return '\u22D6'; 6041 case "gtdot": 6042 case "gtrdot": 6043 return '\u22D7'; 6044 case "Ll": 6045 return '\u22D8'; 6046 case "Gg": 6047 case "ggg": 6048 return '\u22D9'; 6049 case "leg": 6050 case "LessEqualGreater": 6051 case "lesseqgtr": 6052 return '\u22DA'; 6053 case "gel": 6054 case "gtreqless": 6055 case "GreaterEqualLess": 6056 return '\u22DB'; 6057 case "cuepr": 6058 case "curlyeqprec": 6059 return '\u22DE'; 6060 case "cuesc": 6061 case "curlyeqsucc": 6062 return '\u22DF'; 6063 case "nprcue": 6064 case "NotPrecedesSlantEqual": 6065 return '\u22E0'; 6066 case "nsccue": 6067 case "NotSucceedsSlantEqual": 6068 return '\u22E1'; 6069 case "nsqsube": 6070 case "NotSquareSubsetEqual": 6071 return '\u22E2'; 6072 case "nsqsupe": 6073 case "NotSquareSupersetEqual": 6074 return '\u22E3'; 6075 case "lnsim": 6076 return '\u22E6'; 6077 case "gnsim": 6078 return '\u22E7'; 6079 case "prnsim": 6080 case "precnsim": 6081 return '\u22E8'; 6082 case "scnsim": 6083 case "succnsim": 6084 return '\u22E9'; 6085 case "nltri": 6086 case "ntriangleleft": 6087 case "NotLeftTriangle": 6088 return '\u22EA'; 6089 case "nrtri": 6090 case "ntriangleright": 6091 case "NotRightTriangle": 6092 return '\u22EB'; 6093 case "nltrie": 6094 case "ntrianglelefteq": 6095 case "NotLeftTriangleEqual": 6096 return '\u22EC'; 6097 case "nrtrie": 6098 case "ntrianglerighteq": 6099 case "NotRightTriangleEqual": 6100 return '\u22ED'; 6101 case "vellip": 6102 return '\u22EE'; 6103 case "ctdot": 6104 return '\u22EF'; 6105 case "utdot": 6106 return '\u22F0'; 6107 case "dtdot": 6108 return '\u22F1'; 6109 case "disin": 6110 return '\u22F2'; 6111 case "isinsv": 6112 return '\u22F3'; 6113 case "isins": 6114 return '\u22F4'; 6115 case "isindot": 6116 return '\u22F5'; 6117 case "notinvc": 6118 return '\u22F6'; 6119 case "notinvb": 6120 return '\u22F7'; 6121 case "isinE": 6122 return '\u22F9'; 6123 case "nisd": 6124 return '\u22FA'; 6125 case "xnis": 6126 return '\u22FB'; 6127 case "nis": 6128 return '\u22FC'; 6129 case "notnivc": 6130 return '\u22FD'; 6131 case "notnivb": 6132 return '\u22FE'; 6133 case "barwed": 6134 case "barwedge": 6135 return '\u2305'; 6136 case "Barwed": 6137 case "doublebarwedge": 6138 return '\u2306'; 6139 case "lceil": 6140 case "LeftCeiling": 6141 return '\u2308'; 6142 case "rceil": 6143 case "RightCeiling": 6144 return '\u2309'; 6145 case "lfloor": 6146 case "LeftFloor": 6147 return '\u230A'; 6148 case "rfloor": 6149 case "RightFloor": 6150 return '\u230B'; 6151 case "drcrop": 6152 return '\u230C'; 6153 case "dlcrop": 6154 return '\u230D'; 6155 case "urcrop": 6156 return '\u230E'; 6157 case "ulcrop": 6158 return '\u230F'; 6159 case "bnot": 6160 return '\u2310'; 6161 case "profline": 6162 return '\u2312'; 6163 case "profsurf": 6164 return '\u2313'; 6165 case "telrec": 6166 return '\u2315'; 6167 case "target": 6168 return '\u2316'; 6169 case "ulcorn": 6170 case "ulcorner": 6171 return '\u231C'; 6172 case "urcorn": 6173 case "urcorner": 6174 return '\u231D'; 6175 case "dlcorn": 6176 case "llcorner": 6177 return '\u231E'; 6178 case "drcorn": 6179 case "lrcorner": 6180 return '\u231F'; 6181 case "frown": 6182 case "sfrown": 6183 return '\u2322'; 6184 case "smile": 6185 case "ssmile": 6186 return '\u2323'; 6187 case "cylcty": 6188 return '\u232D'; 6189 case "profalar": 6190 return '\u232E'; 6191 case "topbot": 6192 return '\u2336'; 6193 case "ovbar": 6194 return '\u233D'; 6195 case "solbar": 6196 return '\u233F'; 6197 case "angzarr": 6198 return '\u237C'; 6199 case "lmoust": 6200 case "lmoustache": 6201 return '\u23B0'; 6202 case "rmoust": 6203 case "rmoustache": 6204 return '\u23B1'; 6205 case "tbrk": 6206 case "OverBracket": 6207 return '\u23B4'; 6208 case "bbrk": 6209 case "UnderBracket": 6210 return '\u23B5'; 6211 case "bbrktbrk": 6212 return '\u23B6'; 6213 case "OverParenthesis": 6214 return '\u23DC'; 6215 case "UnderParenthesis": 6216 return '\u23DD'; 6217 case "OverBrace": 6218 return '\u23DE'; 6219 case "UnderBrace": 6220 return '\u23DF'; 6221 case "trpezium": 6222 return '\u23E2'; 6223 case "elinters": 6224 return '\u23E7'; 6225 case "blank": 6226 return '\u2423'; 6227 case "oS": 6228 case "circledS": 6229 return '\u24C8'; 6230 case "boxh": 6231 case "HorizontalLine": 6232 return '\u2500'; 6233 case "boxv": 6234 return '\u2502'; 6235 case "boxdr": 6236 return '\u250C'; 6237 case "boxdl": 6238 return '\u2510'; 6239 case "boxur": 6240 return '\u2514'; 6241 case "boxul": 6242 return '\u2518'; 6243 case "boxvr": 6244 return '\u251C'; 6245 case "boxvl": 6246 return '\u2524'; 6247 case "boxhd": 6248 return '\u252C'; 6249 case "boxhu": 6250 return '\u2534'; 6251 case "boxvh": 6252 return '\u253C'; 6253 case "boxH": 6254 return '\u2550'; 6255 case "boxV": 6256 return '\u2551'; 6257 case "boxdR": 6258 return '\u2552'; 6259 case "boxDr": 6260 return '\u2553'; 6261 case "boxDR": 6262 return '\u2554'; 6263 case "boxdL": 6264 return '\u2555'; 6265 case "boxDl": 6266 return '\u2556'; 6267 case "boxDL": 6268 return '\u2557'; 6269 case "boxuR": 6270 return '\u2558'; 6271 case "boxUr": 6272 return '\u2559'; 6273 case "boxUR": 6274 return '\u255A'; 6275 case "boxuL": 6276 return '\u255B'; 6277 case "boxUl": 6278 return '\u255C'; 6279 case "boxUL": 6280 return '\u255D'; 6281 case "boxvR": 6282 return '\u255E'; 6283 case "boxVr": 6284 return '\u255F'; 6285 case "boxVR": 6286 return '\u2560'; 6287 case "boxvL": 6288 return '\u2561'; 6289 case "boxVl": 6290 return '\u2562'; 6291 case "boxVL": 6292 return '\u2563'; 6293 case "boxHd": 6294 return '\u2564'; 6295 case "boxhD": 6296 return '\u2565'; 6297 case "boxHD": 6298 return '\u2566'; 6299 case "boxHu": 6300 return '\u2567'; 6301 case "boxhU": 6302 return '\u2568'; 6303 case "boxHU": 6304 return '\u2569'; 6305 case "boxvH": 6306 return '\u256A'; 6307 case "boxVh": 6308 return '\u256B'; 6309 case "boxVH": 6310 return '\u256C'; 6311 case "uhblk": 6312 return '\u2580'; 6313 case "lhblk": 6314 return '\u2584'; 6315 case "block": 6316 return '\u2588'; 6317 case "blk14": 6318 return '\u2591'; 6319 case "blk12": 6320 return '\u2592'; 6321 case "blk34": 6322 return '\u2593'; 6323 case "squ": 6324 case "square": 6325 case "Square": 6326 return '\u25A1'; 6327 case "squf": 6328 case "squarf": 6329 case "blacksquare": 6330 case "FilledVerySmallSquare": 6331 return '\u25AA'; 6332 case "EmptyVerySmallSquare": 6333 return '\u25AB'; 6334 case "rect": 6335 return '\u25AD'; 6336 case "marker": 6337 return '\u25AE'; 6338 case "fltns": 6339 return '\u25B1'; 6340 case "xutri": 6341 case "bigtriangleup": 6342 return '\u25B3'; 6343 case "utrif": 6344 case "blacktriangle": 6345 return '\u25B4'; 6346 case "utri": 6347 case "triangle": 6348 return '\u25B5'; 6349 case "rtrif": 6350 case "blacktriangleright": 6351 return '\u25B8'; 6352 case "rtri": 6353 case "triangleright": 6354 return '\u25B9'; 6355 case "xdtri": 6356 case "bigtriangledown": 6357 return '\u25BD'; 6358 case "dtrif": 6359 case "blacktriangledown": 6360 return '\u25BE'; 6361 case "dtri": 6362 case "triangledown": 6363 return '\u25BF'; 6364 case "ltrif": 6365 case "blacktriangleleft": 6366 return '\u25C2'; 6367 case "ltri": 6368 case "triangleleft": 6369 return '\u25C3'; 6370 case "loz": 6371 case "lozenge": 6372 return '\u25CA'; 6373 case "cir": 6374 return '\u25CB'; 6375 case "tridot": 6376 return '\u25EC'; 6377 case "xcirc": 6378 case "bigcirc": 6379 return '\u25EF'; 6380 case "ultri": 6381 return '\u25F8'; 6382 case "urtri": 6383 return '\u25F9'; 6384 case "lltri": 6385 return '\u25FA'; 6386 case "EmptySmallSquare": 6387 return '\u25FB'; 6388 case "FilledSmallSquare": 6389 return '\u25FC'; 6390 case "starf": 6391 case "bigstar": 6392 return '\u2605'; 6393 case "star": 6394 return '\u2606'; 6395 case "phone": 6396 return '\u260E'; 6397 case "female": 6398 return '\u2640'; 6399 case "male": 6400 return '\u2642'; 6401 case "spades": 6402 case "spadesuit": 6403 return '\u2660'; 6404 case "clubs": 6405 case "clubsuit": 6406 return '\u2663'; 6407 case "hearts": 6408 case "heartsuit": 6409 return '\u2665'; 6410 case "diams": 6411 case "diamondsuit": 6412 return '\u2666'; 6413 case "sung": 6414 return '\u266A'; 6415 case "flat": 6416 return '\u266D'; 6417 case "natur": 6418 case "natural": 6419 return '\u266E'; 6420 case "sharp": 6421 return '\u266F'; 6422 case "check": 6423 case "checkmark": 6424 return '\u2713'; 6425 case "cross": 6426 return '\u2717'; 6427 case "malt": 6428 case "maltese": 6429 return '\u2720'; 6430 case "sext": 6431 return '\u2736'; 6432 case "VerticalSeparator": 6433 return '\u2758'; 6434 case "lbbrk": 6435 return '\u2772'; 6436 case "rbbrk": 6437 return '\u2773'; 6438 case "bsolhsub": 6439 return '\u27C8'; 6440 case "suphsol": 6441 return '\u27C9'; 6442 case "lobrk": 6443 case "LeftDoubleBracket": 6444 return '\u27E6'; 6445 case "robrk": 6446 case "RightDoubleBracket": 6447 return '\u27E7'; 6448 case "lang": 6449 case "LeftAngleBracket": 6450 case "langle": 6451 return '\u27E8'; 6452 case "rang": 6453 case "RightAngleBracket": 6454 case "rangle": 6455 return '\u27E9'; 6456 case "Lang": 6457 return '\u27EA'; 6458 case "Rang": 6459 return '\u27EB'; 6460 case "loang": 6461 return '\u27EC'; 6462 case "roang": 6463 return '\u27ED'; 6464 case "xlarr": 6465 case "longleftarrow": 6466 case "LongLeftArrow": 6467 return '\u27F5'; 6468 case "xrarr": 6469 case "longrightarrow": 6470 case "LongRightArrow": 6471 return '\u27F6'; 6472 case "xharr": 6473 case "longleftrightarrow": 6474 case "LongLeftRightArrow": 6475 return '\u27F7'; 6476 case "xlArr": 6477 case "Longleftarrow": 6478 case "DoubleLongLeftArrow": 6479 return '\u27F8'; 6480 case "xrArr": 6481 case "Longrightarrow": 6482 case "DoubleLongRightArrow": 6483 return '\u27F9'; 6484 case "xhArr": 6485 case "Longleftrightarrow": 6486 case "DoubleLongLeftRightArrow": 6487 return '\u27FA'; 6488 case "xmap": 6489 case "longmapsto": 6490 return '\u27FC'; 6491 case "dzigrarr": 6492 return '\u27FF'; 6493 case "nvlArr": 6494 return '\u2902'; 6495 case "nvrArr": 6496 return '\u2903'; 6497 case "nvHarr": 6498 return '\u2904'; 6499 case "Map": 6500 return '\u2905'; 6501 case "lbarr": 6502 return '\u290C'; 6503 case "rbarr": 6504 case "bkarow": 6505 return '\u290D'; 6506 case "lBarr": 6507 return '\u290E'; 6508 case "rBarr": 6509 case "dbkarow": 6510 return '\u290F'; 6511 case "RBarr": 6512 case "drbkarow": 6513 return '\u2910'; 6514 case "DDotrahd": 6515 return '\u2911'; 6516 case "UpArrowBar": 6517 return '\u2912'; 6518 case "DownArrowBar": 6519 return '\u2913'; 6520 case "Rarrtl": 6521 return '\u2916'; 6522 case "latail": 6523 return '\u2919'; 6524 case "ratail": 6525 return '\u291A'; 6526 case "lAtail": 6527 return '\u291B'; 6528 case "rAtail": 6529 return '\u291C'; 6530 case "larrfs": 6531 return '\u291D'; 6532 case "rarrfs": 6533 return '\u291E'; 6534 case "larrbfs": 6535 return '\u291F'; 6536 case "rarrbfs": 6537 return '\u2920'; 6538 case "nwarhk": 6539 return '\u2923'; 6540 case "nearhk": 6541 return '\u2924'; 6542 case "searhk": 6543 case "hksearow": 6544 return '\u2925'; 6545 case "swarhk": 6546 case "hkswarow": 6547 return '\u2926'; 6548 case "nwnear": 6549 return '\u2927'; 6550 case "nesear": 6551 case "toea": 6552 return '\u2928'; 6553 case "seswar": 6554 case "tosa": 6555 return '\u2929'; 6556 case "swnwar": 6557 return '\u292A'; 6558 case "rarrc": 6559 return '\u2933'; 6560 case "cudarrr": 6561 return '\u2935'; 6562 case "ldca": 6563 return '\u2936'; 6564 case "rdca": 6565 return '\u2937'; 6566 case "cudarrl": 6567 return '\u2938'; 6568 case "larrpl": 6569 return '\u2939'; 6570 case "curarrm": 6571 return '\u293C'; 6572 case "cularrp": 6573 return '\u293D'; 6574 case "rarrpl": 6575 return '\u2945'; 6576 case "harrcir": 6577 return '\u2948'; 6578 case "Uarrocir": 6579 return '\u2949'; 6580 case "lurdshar": 6581 return '\u294A'; 6582 case "ldrushar": 6583 return '\u294B'; 6584 case "LeftRightVector": 6585 return '\u294E'; 6586 case "RightUpDownVector": 6587 return '\u294F'; 6588 case "DownLeftRightVector": 6589 return '\u2950'; 6590 case "LeftUpDownVector": 6591 return '\u2951'; 6592 case "LeftVectorBar": 6593 return '\u2952'; 6594 case "RightVectorBar": 6595 return '\u2953'; 6596 case "RightUpVectorBar": 6597 return '\u2954'; 6598 case "RightDownVectorBar": 6599 return '\u2955'; 6600 case "DownLeftVectorBar": 6601 return '\u2956'; 6602 case "DownRightVectorBar": 6603 return '\u2957'; 6604 case "LeftUpVectorBar": 6605 return '\u2958'; 6606 case "LeftDownVectorBar": 6607 return '\u2959'; 6608 case "LeftTeeVector": 6609 return '\u295A'; 6610 case "RightTeeVector": 6611 return '\u295B'; 6612 case "RightUpTeeVector": 6613 return '\u295C'; 6614 case "RightDownTeeVector": 6615 return '\u295D'; 6616 case "DownLeftTeeVector": 6617 return '\u295E'; 6618 case "DownRightTeeVector": 6619 return '\u295F'; 6620 case "LeftUpTeeVector": 6621 return '\u2960'; 6622 case "LeftDownTeeVector": 6623 return '\u2961'; 6624 case "lHar": 6625 return '\u2962'; 6626 case "uHar": 6627 return '\u2963'; 6628 case "rHar": 6629 return '\u2964'; 6630 case "dHar": 6631 return '\u2965'; 6632 case "luruhar": 6633 return '\u2966'; 6634 case "ldrdhar": 6635 return '\u2967'; 6636 case "ruluhar": 6637 return '\u2968'; 6638 case "rdldhar": 6639 return '\u2969'; 6640 case "lharul": 6641 return '\u296A'; 6642 case "llhard": 6643 return '\u296B'; 6644 case "rharul": 6645 return '\u296C'; 6646 case "lrhard": 6647 return '\u296D'; 6648 case "udhar": 6649 case "UpEquilibrium": 6650 return '\u296E'; 6651 case "duhar": 6652 case "ReverseUpEquilibrium": 6653 return '\u296F'; 6654 case "RoundImplies": 6655 return '\u2970'; 6656 case "erarr": 6657 return '\u2971'; 6658 case "simrarr": 6659 return '\u2972'; 6660 case "larrsim": 6661 return '\u2973'; 6662 case "rarrsim": 6663 return '\u2974'; 6664 case "rarrap": 6665 return '\u2975'; 6666 case "ltlarr": 6667 return '\u2976'; 6668 case "gtrarr": 6669 return '\u2978'; 6670 case "subrarr": 6671 return '\u2979'; 6672 case "suplarr": 6673 return '\u297B'; 6674 case "lfisht": 6675 return '\u297C'; 6676 case "rfisht": 6677 return '\u297D'; 6678 case "ufisht": 6679 return '\u297E'; 6680 case "dfisht": 6681 return '\u297F'; 6682 case "lopar": 6683 return '\u2985'; 6684 case "ropar": 6685 return '\u2986'; 6686 case "lbrke": 6687 return '\u298B'; 6688 case "rbrke": 6689 return '\u298C'; 6690 case "lbrkslu": 6691 return '\u298D'; 6692 case "rbrksld": 6693 return '\u298E'; 6694 case "lbrksld": 6695 return '\u298F'; 6696 case "rbrkslu": 6697 return '\u2990'; 6698 case "langd": 6699 return '\u2991'; 6700 case "rangd": 6701 return '\u2992'; 6702 case "lparlt": 6703 return '\u2993'; 6704 case "rpargt": 6705 return '\u2994'; 6706 case "gtlPar": 6707 return '\u2995'; 6708 case "ltrPar": 6709 return '\u2996'; 6710 case "vzigzag": 6711 return '\u299A'; 6712 case "vangrt": 6713 return '\u299C'; 6714 case "angrtvbd": 6715 return '\u299D'; 6716 case "ange": 6717 return '\u29A4'; 6718 case "range": 6719 return '\u29A5'; 6720 case "dwangle": 6721 return '\u29A6'; 6722 case "uwangle": 6723 return '\u29A7'; 6724 case "angmsdaa": 6725 return '\u29A8'; 6726 case "angmsdab": 6727 return '\u29A9'; 6728 case "angmsdac": 6729 return '\u29AA'; 6730 case "angmsdad": 6731 return '\u29AB'; 6732 case "angmsdae": 6733 return '\u29AC'; 6734 case "angmsdaf": 6735 return '\u29AD'; 6736 case "angmsdag": 6737 return '\u29AE'; 6738 case "angmsdah": 6739 return '\u29AF'; 6740 case "bemptyv": 6741 return '\u29B0'; 6742 case "demptyv": 6743 return '\u29B1'; 6744 case "cemptyv": 6745 return '\u29B2'; 6746 case "raemptyv": 6747 return '\u29B3'; 6748 case "laemptyv": 6749 return '\u29B4'; 6750 case "ohbar": 6751 return '\u29B5'; 6752 case "omid": 6753 return '\u29B6'; 6754 case "opar": 6755 return '\u29B7'; 6756 case "operp": 6757 return '\u29B9'; 6758 case "olcross": 6759 return '\u29BB'; 6760 case "odsold": 6761 return '\u29BC'; 6762 case "olcir": 6763 return '\u29BE'; 6764 case "ofcir": 6765 return '\u29BF'; 6766 case "olt": 6767 return '\u29C0'; 6768 case "ogt": 6769 return '\u29C1'; 6770 case "cirscir": 6771 return '\u29C2'; 6772 case "cirE": 6773 return '\u29C3'; 6774 case "solb": 6775 return '\u29C4'; 6776 case "bsolb": 6777 return '\u29C5'; 6778 case "boxbox": 6779 return '\u29C9'; 6780 case "trisb": 6781 return '\u29CD'; 6782 case "rtriltri": 6783 return '\u29CE'; 6784 case "LeftTriangleBar": 6785 return '\u29CF'; 6786 case "RightTriangleBar": 6787 return '\u29D0'; 6788 case "iinfin": 6789 return '\u29DC'; 6790 case "infintie": 6791 return '\u29DD'; 6792 case "nvinfin": 6793 return '\u29DE'; 6794 case "eparsl": 6795 return '\u29E3'; 6796 case "smeparsl": 6797 return '\u29E4'; 6798 case "eqvparsl": 6799 return '\u29E5'; 6800 case "lozf": 6801 case "blacklozenge": 6802 return '\u29EB'; 6803 case "RuleDelayed": 6804 return '\u29F4'; 6805 case "dsol": 6806 return '\u29F6'; 6807 case "xodot": 6808 case "bigodot": 6809 return '\u2A00'; 6810 case "xoplus": 6811 case "bigoplus": 6812 return '\u2A01'; 6813 case "xotime": 6814 case "bigotimes": 6815 return '\u2A02'; 6816 case "xuplus": 6817 case "biguplus": 6818 return '\u2A04'; 6819 case "xsqcup": 6820 case "bigsqcup": 6821 return '\u2A06'; 6822 case "qint": 6823 case "iiiint": 6824 return '\u2A0C'; 6825 case "fpartint": 6826 return '\u2A0D'; 6827 case "cirfnint": 6828 return '\u2A10'; 6829 case "awint": 6830 return '\u2A11'; 6831 case "rppolint": 6832 return '\u2A12'; 6833 case "scpolint": 6834 return '\u2A13'; 6835 case "npolint": 6836 return '\u2A14'; 6837 case "pointint": 6838 return '\u2A15'; 6839 case "quatint": 6840 return '\u2A16'; 6841 case "intlarhk": 6842 return '\u2A17'; 6843 case "pluscir": 6844 return '\u2A22'; 6845 case "plusacir": 6846 return '\u2A23'; 6847 case "simplus": 6848 return '\u2A24'; 6849 case "plusdu": 6850 return '\u2A25'; 6851 case "plussim": 6852 return '\u2A26'; 6853 case "plustwo": 6854 return '\u2A27'; 6855 case "mcomma": 6856 return '\u2A29'; 6857 case "minusdu": 6858 return '\u2A2A'; 6859 case "loplus": 6860 return '\u2A2D'; 6861 case "roplus": 6862 return '\u2A2E'; 6863 case "Cross": 6864 return '\u2A2F'; 6865 case "timesd": 6866 return '\u2A30'; 6867 case "timesbar": 6868 return '\u2A31'; 6869 case "smashp": 6870 return '\u2A33'; 6871 case "lotimes": 6872 return '\u2A34'; 6873 case "rotimes": 6874 return '\u2A35'; 6875 case "otimesas": 6876 return '\u2A36'; 6877 case "Otimes": 6878 return '\u2A37'; 6879 case "odiv": 6880 return '\u2A38'; 6881 case "triplus": 6882 return '\u2A39'; 6883 case "triminus": 6884 return '\u2A3A'; 6885 case "tritime": 6886 return '\u2A3B'; 6887 case "iprod": 6888 case "intprod": 6889 return '\u2A3C'; 6890 case "amalg": 6891 return '\u2A3F'; 6892 case "capdot": 6893 return '\u2A40'; 6894 case "ncup": 6895 return '\u2A42'; 6896 case "ncap": 6897 return '\u2A43'; 6898 case "capand": 6899 return '\u2A44'; 6900 case "cupor": 6901 return '\u2A45'; 6902 case "cupcap": 6903 return '\u2A46'; 6904 case "capcup": 6905 return '\u2A47'; 6906 case "cupbrcap": 6907 return '\u2A48'; 6908 case "capbrcup": 6909 return '\u2A49'; 6910 case "cupcup": 6911 return '\u2A4A'; 6912 case "capcap": 6913 return '\u2A4B'; 6914 case "ccups": 6915 return '\u2A4C'; 6916 case "ccaps": 6917 return '\u2A4D'; 6918 case "ccupssm": 6919 return '\u2A50'; 6920 case "And": 6921 return '\u2A53'; 6922 case "Or": 6923 return '\u2A54'; 6924 case "andand": 6925 return '\u2A55'; 6926 case "oror": 6927 return '\u2A56'; 6928 case "orslope": 6929 return '\u2A57'; 6930 case "andslope": 6931 return '\u2A58'; 6932 case "andv": 6933 return '\u2A5A'; 6934 case "orv": 6935 return '\u2A5B'; 6936 case "andd": 6937 return '\u2A5C'; 6938 case "ord": 6939 return '\u2A5D'; 6940 case "wedbar": 6941 return '\u2A5F'; 6942 case "sdote": 6943 return '\u2A66'; 6944 case "simdot": 6945 return '\u2A6A'; 6946 case "congdot": 6947 return '\u2A6D'; 6948 case "easter": 6949 return '\u2A6E'; 6950 case "apacir": 6951 return '\u2A6F'; 6952 case "apE": 6953 return '\u2A70'; 6954 case "eplus": 6955 return '\u2A71'; 6956 case "pluse": 6957 return '\u2A72'; 6958 case "Esim": 6959 return '\u2A73'; 6960 case "Colone": 6961 return '\u2A74'; 6962 case "Equal": 6963 return '\u2A75'; 6964 case "eDDot": 6965 case "ddotseq": 6966 return '\u2A77'; 6967 case "equivDD": 6968 return '\u2A78'; 6969 case "ltcir": 6970 return '\u2A79'; 6971 case "gtcir": 6972 return '\u2A7A'; 6973 case "ltquest": 6974 return '\u2A7B'; 6975 case "gtquest": 6976 return '\u2A7C'; 6977 case "les": 6978 case "LessSlantEqual": 6979 case "leqslant": 6980 return '\u2A7D'; 6981 case "ges": 6982 case "GreaterSlantEqual": 6983 case "geqslant": 6984 return '\u2A7E'; 6985 case "lesdot": 6986 return '\u2A7F'; 6987 case "gesdot": 6988 return '\u2A80'; 6989 case "lesdoto": 6990 return '\u2A81'; 6991 case "gesdoto": 6992 return '\u2A82'; 6993 case "lesdotor": 6994 return '\u2A83'; 6995 case "gesdotol": 6996 return '\u2A84'; 6997 case "lap": 6998 case "lessapprox": 6999 return '\u2A85'; 7000 case "gap": 7001 case "gtrapprox": 7002 return '\u2A86'; 7003 case "lne": 7004 case "lneq": 7005 return '\u2A87'; 7006 case "gne": 7007 case "gneq": 7008 return '\u2A88'; 7009 case "lnap": 7010 case "lnapprox": 7011 return '\u2A89'; 7012 case "gnap": 7013 case "gnapprox": 7014 return '\u2A8A'; 7015 case "lEg": 7016 case "lesseqqgtr": 7017 return '\u2A8B'; 7018 case "gEl": 7019 case "gtreqqless": 7020 return '\u2A8C'; 7021 case "lsime": 7022 return '\u2A8D'; 7023 case "gsime": 7024 return '\u2A8E'; 7025 case "lsimg": 7026 return '\u2A8F'; 7027 case "gsiml": 7028 return '\u2A90'; 7029 case "lgE": 7030 return '\u2A91'; 7031 case "glE": 7032 return '\u2A92'; 7033 case "lesges": 7034 return '\u2A93'; 7035 case "gesles": 7036 return '\u2A94'; 7037 case "els": 7038 case "eqslantless": 7039 return '\u2A95'; 7040 case "egs": 7041 case "eqslantgtr": 7042 return '\u2A96'; 7043 case "elsdot": 7044 return '\u2A97'; 7045 case "egsdot": 7046 return '\u2A98'; 7047 case "el": 7048 return '\u2A99'; 7049 case "eg": 7050 return '\u2A9A'; 7051 case "siml": 7052 return '\u2A9D'; 7053 case "simg": 7054 return '\u2A9E'; 7055 case "simlE": 7056 return '\u2A9F'; 7057 case "simgE": 7058 return '\u2AA0'; 7059 case "LessLess": 7060 return '\u2AA1'; 7061 case "GreaterGreater": 7062 return '\u2AA2'; 7063 case "glj": 7064 return '\u2AA4'; 7065 case "gla": 7066 return '\u2AA5'; 7067 case "ltcc": 7068 return '\u2AA6'; 7069 case "gtcc": 7070 return '\u2AA7'; 7071 case "lescc": 7072 return '\u2AA8'; 7073 case "gescc": 7074 return '\u2AA9'; 7075 case "smt": 7076 return '\u2AAA'; 7077 case "lat": 7078 return '\u2AAB'; 7079 case "smte": 7080 return '\u2AAC'; 7081 case "late": 7082 return '\u2AAD'; 7083 case "bumpE": 7084 return '\u2AAE'; 7085 case "pre": 7086 case "preceq": 7087 case "PrecedesEqual": 7088 return '\u2AAF'; 7089 case "sce": 7090 case "succeq": 7091 case "SucceedsEqual": 7092 return '\u2AB0'; 7093 case "prE": 7094 return '\u2AB3'; 7095 case "scE": 7096 return '\u2AB4'; 7097 case "prnE": 7098 case "precneqq": 7099 return '\u2AB5'; 7100 case "scnE": 7101 case "succneqq": 7102 return '\u2AB6'; 7103 case "prap": 7104 case "precapprox": 7105 return '\u2AB7'; 7106 case "scap": 7107 case "succapprox": 7108 return '\u2AB8'; 7109 case "prnap": 7110 case "precnapprox": 7111 return '\u2AB9'; 7112 case "scnap": 7113 case "succnapprox": 7114 return '\u2ABA'; 7115 case "Pr": 7116 return '\u2ABB'; 7117 case "Sc": 7118 return '\u2ABC'; 7119 case "subdot": 7120 return '\u2ABD'; 7121 case "supdot": 7122 return '\u2ABE'; 7123 case "subplus": 7124 return '\u2ABF'; 7125 case "supplus": 7126 return '\u2AC0'; 7127 case "submult": 7128 return '\u2AC1'; 7129 case "supmult": 7130 return '\u2AC2'; 7131 case "subedot": 7132 return '\u2AC3'; 7133 case "supedot": 7134 return '\u2AC4'; 7135 case "subE": 7136 case "subseteqq": 7137 return '\u2AC5'; 7138 case "supE": 7139 case "supseteqq": 7140 return '\u2AC6'; 7141 case "subsim": 7142 return '\u2AC7'; 7143 case "supsim": 7144 return '\u2AC8'; 7145 case "subnE": 7146 case "subsetneqq": 7147 return '\u2ACB'; 7148 case "supnE": 7149 case "supsetneqq": 7150 return '\u2ACC'; 7151 case "csub": 7152 return '\u2ACF'; 7153 case "csup": 7154 return '\u2AD0'; 7155 case "csube": 7156 return '\u2AD1'; 7157 case "csupe": 7158 return '\u2AD2'; 7159 case "subsup": 7160 return '\u2AD3'; 7161 case "supsub": 7162 return '\u2AD4'; 7163 case "subsub": 7164 return '\u2AD5'; 7165 case "supsup": 7166 return '\u2AD6'; 7167 case "suphsub": 7168 return '\u2AD7'; 7169 case "supdsub": 7170 return '\u2AD8'; 7171 case "forkv": 7172 return '\u2AD9'; 7173 case "topfork": 7174 return '\u2ADA'; 7175 case "mlcp": 7176 return '\u2ADB'; 7177 case "Dashv": 7178 case "DoubleLeftTee": 7179 return '\u2AE4'; 7180 case "Vdashl": 7181 return '\u2AE6'; 7182 case "Barv": 7183 return '\u2AE7'; 7184 case "vBar": 7185 return '\u2AE8'; 7186 case "vBarv": 7187 return '\u2AE9'; 7188 case "Vbar": 7189 return '\u2AEB'; 7190 case "Not": 7191 return '\u2AEC'; 7192 case "bNot": 7193 return '\u2AED'; 7194 case "rnmid": 7195 return '\u2AEE'; 7196 case "cirmid": 7197 return '\u2AEF'; 7198 case "midcir": 7199 return '\u2AF0'; 7200 case "topcir": 7201 return '\u2AF1'; 7202 case "nhpar": 7203 return '\u2AF2'; 7204 case "parsim": 7205 return '\u2AF3'; 7206 case "parsl": 7207 return '\u2AFD'; 7208 case "fflig": 7209 return '\uFB00'; 7210 case "filig": 7211 return '\uFB01'; 7212 case "fllig": 7213 return '\uFB02'; 7214 case "ffilig": 7215 return '\uFB03'; 7216 case "ffllig": 7217 return '\uFB04'; 7218 case "Ascr": 7219 return '\U0001D49C'; 7220 case "Cscr": 7221 return '\U0001D49E'; 7222 case "Dscr": 7223 return '\U0001D49F'; 7224 case "Gscr": 7225 return '\U0001D4A2'; 7226 case "Jscr": 7227 return '\U0001D4A5'; 7228 case "Kscr": 7229 return '\U0001D4A6'; 7230 case "Nscr": 7231 return '\U0001D4A9'; 7232 case "Oscr": 7233 return '\U0001D4AA'; 7234 case "Pscr": 7235 return '\U0001D4AB'; 7236 case "Qscr": 7237 return '\U0001D4AC'; 7238 case "Sscr": 7239 return '\U0001D4AE'; 7240 case "Tscr": 7241 return '\U0001D4AF'; 7242 case "Uscr": 7243 return '\U0001D4B0'; 7244 case "Vscr": 7245 return '\U0001D4B1'; 7246 case "Wscr": 7247 return '\U0001D4B2'; 7248 case "Xscr": 7249 return '\U0001D4B3'; 7250 case "Yscr": 7251 return '\U0001D4B4'; 7252 case "Zscr": 7253 return '\U0001D4B5'; 7254 case "ascr": 7255 return '\U0001D4B6'; 7256 case "bscr": 7257 return '\U0001D4B7'; 7258 case "cscr": 7259 return '\U0001D4B8'; 7260 case "dscr": 7261 return '\U0001D4B9'; 7262 case "fscr": 7263 return '\U0001D4BB'; 7264 case "hscr": 7265 return '\U0001D4BD'; 7266 case "iscr": 7267 return '\U0001D4BE'; 7268 case "jscr": 7269 return '\U0001D4BF'; 7270 case "kscr": 7271 return '\U0001D4C0'; 7272 case "lscr": 7273 return '\U0001D4C1'; 7274 case "mscr": 7275 return '\U0001D4C2'; 7276 case "nscr": 7277 return '\U0001D4C3'; 7278 case "pscr": 7279 return '\U0001D4C5'; 7280 case "qscr": 7281 return '\U0001D4C6'; 7282 case "rscr": 7283 return '\U0001D4C7'; 7284 case "sscr": 7285 return '\U0001D4C8'; 7286 case "tscr": 7287 return '\U0001D4C9'; 7288 case "uscr": 7289 return '\U0001D4CA'; 7290 case "vscr": 7291 return '\U0001D4CB'; 7292 case "wscr": 7293 return '\U0001D4CC'; 7294 case "xscr": 7295 return '\U0001D4CD'; 7296 case "yscr": 7297 return '\U0001D4CE'; 7298 case "zscr": 7299 return '\U0001D4CF'; 7300 case "Afr": 7301 return '\U0001D504'; 7302 case "Bfr": 7303 return '\U0001D505'; 7304 case "Dfr": 7305 return '\U0001D507'; 7306 case "Efr": 7307 return '\U0001D508'; 7308 case "Ffr": 7309 return '\U0001D509'; 7310 case "Gfr": 7311 return '\U0001D50A'; 7312 case "Jfr": 7313 return '\U0001D50D'; 7314 case "Kfr": 7315 return '\U0001D50E'; 7316 case "Lfr": 7317 return '\U0001D50F'; 7318 case "Mfr": 7319 return '\U0001D510'; 7320 case "Nfr": 7321 return '\U0001D511'; 7322 case "Ofr": 7323 return '\U0001D512'; 7324 case "Pfr": 7325 return '\U0001D513'; 7326 case "Qfr": 7327 return '\U0001D514'; 7328 case "Sfr": 7329 return '\U0001D516'; 7330 case "Tfr": 7331 return '\U0001D517'; 7332 case "Ufr": 7333 return '\U0001D518'; 7334 case "Vfr": 7335 return '\U0001D519'; 7336 case "Wfr": 7337 return '\U0001D51A'; 7338 case "Xfr": 7339 return '\U0001D51B'; 7340 case "Yfr": 7341 return '\U0001D51C'; 7342 case "afr": 7343 return '\U0001D51E'; 7344 case "bfr": 7345 return '\U0001D51F'; 7346 case "cfr": 7347 return '\U0001D520'; 7348 case "dfr": 7349 return '\U0001D521'; 7350 case "efr": 7351 return '\U0001D522'; 7352 case "ffr": 7353 return '\U0001D523'; 7354 case "gfr": 7355 return '\U0001D524'; 7356 case "hfr": 7357 return '\U0001D525'; 7358 case "ifr": 7359 return '\U0001D526'; 7360 case "jfr": 7361 return '\U0001D527'; 7362 case "kfr": 7363 return '\U0001D528'; 7364 case "lfr": 7365 return '\U0001D529'; 7366 case "mfr": 7367 return '\U0001D52A'; 7368 case "nfr": 7369 return '\U0001D52B'; 7370 case "ofr": 7371 return '\U0001D52C'; 7372 case "pfr": 7373 return '\U0001D52D'; 7374 case "qfr": 7375 return '\U0001D52E'; 7376 case "rfr": 7377 return '\U0001D52F'; 7378 case "sfr": 7379 return '\U0001D530'; 7380 case "tfr": 7381 return '\U0001D531'; 7382 case "ufr": 7383 return '\U0001D532'; 7384 case "vfr": 7385 return '\U0001D533'; 7386 case "wfr": 7387 return '\U0001D534'; 7388 case "xfr": 7389 return '\U0001D535'; 7390 case "yfr": 7391 return '\U0001D536'; 7392 case "zfr": 7393 return '\U0001D537'; 7394 case "Aopf": 7395 return '\U0001D538'; 7396 case "Bopf": 7397 return '\U0001D539'; 7398 case "Dopf": 7399 return '\U0001D53B'; 7400 case "Eopf": 7401 return '\U0001D53C'; 7402 case "Fopf": 7403 return '\U0001D53D'; 7404 case "Gopf": 7405 return '\U0001D53E'; 7406 case "Iopf": 7407 return '\U0001D540'; 7408 case "Jopf": 7409 return '\U0001D541'; 7410 case "Kopf": 7411 return '\U0001D542'; 7412 case "Lopf": 7413 return '\U0001D543'; 7414 case "Mopf": 7415 return '\U0001D544'; 7416 case "Oopf": 7417 return '\U0001D546'; 7418 case "Sopf": 7419 return '\U0001D54A'; 7420 case "Topf": 7421 return '\U0001D54B'; 7422 case "Uopf": 7423 return '\U0001D54C'; 7424 case "Vopf": 7425 return '\U0001D54D'; 7426 case "Wopf": 7427 return '\U0001D54E'; 7428 case "Xopf": 7429 return '\U0001D54F'; 7430 case "Yopf": 7431 return '\U0001D550'; 7432 case "aopf": 7433 return '\U0001D552'; 7434 case "bopf": 7435 return '\U0001D553'; 7436 case "copf": 7437 return '\U0001D554'; 7438 case "dopf": 7439 return '\U0001D555'; 7440 case "eopf": 7441 return '\U0001D556'; 7442 case "fopf": 7443 return '\U0001D557'; 7444 case "gopf": 7445 return '\U0001D558'; 7446 case "hopf": 7447 return '\U0001D559'; 7448 case "iopf": 7449 return '\U0001D55A'; 7450 case "jopf": 7451 return '\U0001D55B'; 7452 case "kopf": 7453 return '\U0001D55C'; 7454 case "lopf": 7455 return '\U0001D55D'; 7456 case "mopf": 7457 return '\U0001D55E'; 7458 case "nopf": 7459 return '\U0001D55F'; 7460 case "oopf": 7461 return '\U0001D560'; 7462 case "popf": 7463 return '\U0001D561'; 7464 case "qopf": 7465 return '\U0001D562'; 7466 case "ropf": 7467 return '\U0001D563'; 7468 case "sopf": 7469 return '\U0001D564'; 7470 case "topf": 7471 return '\U0001D565'; 7472 case "uopf": 7473 return '\U0001D566'; 7474 case "vopf": 7475 return '\U0001D567'; 7476 case "wopf": 7477 return '\U0001D568'; 7478 case "xopf": 7479 return '\U0001D569'; 7480 case "yopf": 7481 return '\U0001D56A'; 7482 case "zopf": 7483 return '\U0001D56B'; 7484 7485 // and handling numeric entities 7486 default: 7487 if (entity[1] == '#') { 7488 if (entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/ ) { 7489 auto hex = entity[3 .. $ - 1]; 7490 7491 auto p = intFromHex(to!string(hex).toLower()); 7492 return cast(dchar) p; 7493 } else { 7494 auto decimal = entity[2 .. $ - 1]; 7495 7496 // dealing with broken html entities 7497 while (decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 7498 decimal = decimal[1 .. $]; 7499 7500 if (decimal.length == 0) 7501 return ' '; // this is really broken html 7502 // done with dealing with broken stuff 7503 7504 auto p = std.conv.to!int(decimal); 7505 return cast(dchar) p; 7506 } 7507 } else 7508 return '\ufffd'; // replacement character diamond thing 7509 } 7510 7511 assert(0); 7512 } 7513 7514 import std.utf; 7515 import std.stdio; 7516 7517 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 7518 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 7519 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 7520 /// Group: core_functionality 7521 string htmlEntitiesDecode(string data, bool strict = false) { 7522 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 7523 if (data.indexOf("&") == -1) // all html entities begin with & 7524 return data; // if there are no entities in here, we can return the original slice and save some time 7525 7526 char[] a; // this seems to do a *better* job than appender! 7527 7528 char[4] buffer; 7529 7530 bool tryingEntity = false; 7531 dchar[16] entityBeingTried; 7532 int entityBeingTriedLength = 0; 7533 int entityAttemptIndex = 0; 7534 7535 foreach (dchar ch; data) { 7536 if (tryingEntity) { 7537 entityAttemptIndex++; 7538 entityBeingTried[entityBeingTriedLength++] = ch; 7539 7540 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 7541 if (ch == '&') { 7542 if (strict) 7543 throw new Exception("unterminated entity; & inside another at " ~ to!string( 7544 entityBeingTried[0 .. entityBeingTriedLength])); 7545 7546 // if not strict, let's try to parse both. 7547 7548 if (entityBeingTried[0 .. entityBeingTriedLength] == "&&") 7549 a ~= "&"; // double amp means keep the first one, still try to parse the next one 7550 else 7551 a ~= buffer[0 .. std.utf.encode(buffer, 7552 parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 7553 7554 // tryingEntity is still true 7555 entityBeingTriedLength = 1; 7556 entityAttemptIndex = 0; // restarting o this 7557 } else if (ch == ';') { 7558 tryingEntity = false; 7559 a ~= buffer[0 .. std.utf.encode(buffer, 7560 parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 7561 } else if (ch == ' ') { 7562 // e.g. you & i 7563 if (strict) 7564 throw new Exception("unterminated entity at " ~ to!string( 7565 entityBeingTried[0 .. entityBeingTriedLength])); 7566 else { 7567 tryingEntity = false; 7568 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 7569 } 7570 } else { 7571 if (entityAttemptIndex >= 9) { 7572 if (strict) 7573 throw new Exception("unterminated entity at " ~ to!string( 7574 entityBeingTried[0 .. entityBeingTriedLength])); 7575 else { 7576 tryingEntity = false; 7577 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 7578 } 7579 } 7580 } 7581 } else { 7582 if (ch == '&') { 7583 tryingEntity = true; 7584 entityBeingTriedLength = 0; 7585 entityBeingTried[entityBeingTriedLength++] = ch; 7586 entityAttemptIndex = 0; 7587 } else { 7588 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 7589 } 7590 } 7591 } 7592 7593 if (tryingEntity) { 7594 if (strict) 7595 throw new Exception("unterminated entity at " ~ to!string( 7596 entityBeingTried[0 .. entityBeingTriedLength])); 7597 7598 // otherwise, let's try to recover, at least so we don't drop any data 7599 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 7600 // FIXME: what if we have "cool &"? should we try to parse it? 7601 } 7602 7603 return cast(string) a; // assumeUnique is actually kinda slow, lol 7604 } 7605 7606 /// Group: implementations 7607 abstract class SpecialElement : Element { 7608 this(Document _parentDocument) { 7609 super(_parentDocument); 7610 } 7611 7612 ///. 7613 override Element appendChild(Element e) { 7614 assert(0, "Cannot append to a special node"); 7615 } 7616 7617 ///. 7618 @property override int nodeType() const { 7619 return 100; 7620 } 7621 } 7622 7623 ///. 7624 /// Group: implementations 7625 class RawSource : SpecialElement { 7626 ///. 7627 this(Document _parentDocument, string s) { 7628 super(_parentDocument); 7629 source = s; 7630 tagName = "#raw"; 7631 } 7632 7633 ///. 7634 override string nodeValue() const { 7635 return this.toString(); 7636 } 7637 7638 ///. 7639 override string writeToAppender(Appender!string where = appender!string()) const { 7640 where.put(source); 7641 return source; 7642 } 7643 7644 override string toPrettyString(bool, int, string) const { 7645 return source; 7646 } 7647 7648 override RawSource cloneNode(bool deep) { 7649 return new RawSource(parentDocument, source); 7650 } 7651 7652 ///. 7653 string source; 7654 } 7655 7656 /// Group: implementations 7657 abstract class ServerSideCode : SpecialElement { 7658 this(Document _parentDocument, string type) { 7659 super(_parentDocument); 7660 tagName = "#" ~ type; 7661 } 7662 7663 ///. 7664 override string nodeValue() const { 7665 return this.source; 7666 } 7667 7668 ///. 7669 override string writeToAppender(Appender!string where = appender!string()) const { 7670 auto start = where.data.length; 7671 where.put("<"); 7672 where.put(source); 7673 where.put(">"); 7674 return where.data[start .. $]; 7675 } 7676 7677 override string toPrettyString(bool, int, string) const { 7678 return "<" ~ source ~ ">"; 7679 } 7680 7681 ///. 7682 string source; 7683 } 7684 7685 ///. 7686 /// Group: implementations 7687 class PhpCode : ServerSideCode { 7688 ///. 7689 this(Document _parentDocument, string s) { 7690 super(_parentDocument, "php"); 7691 source = s; 7692 } 7693 7694 override PhpCode cloneNode(bool deep) { 7695 return new PhpCode(parentDocument, source); 7696 } 7697 } 7698 7699 ///. 7700 /// Group: implementations 7701 class AspCode : ServerSideCode { 7702 ///. 7703 this(Document _parentDocument, string s) { 7704 super(_parentDocument, "asp"); 7705 source = s; 7706 } 7707 7708 override AspCode cloneNode(bool deep) { 7709 return new AspCode(parentDocument, source); 7710 } 7711 } 7712 7713 ///. 7714 /// Group: implementations 7715 class BangInstruction : SpecialElement { 7716 ///. 7717 this(Document _parentDocument, string s) { 7718 super(_parentDocument); 7719 source = s; 7720 tagName = "#bpi"; 7721 } 7722 7723 ///. 7724 override string nodeValue() const { 7725 return this.source; 7726 } 7727 7728 override BangInstruction cloneNode(bool deep) { 7729 return new BangInstruction(parentDocument, source); 7730 } 7731 7732 ///. 7733 override string writeToAppender(Appender!string where = appender!string()) const { 7734 auto start = where.data.length; 7735 where.put("<!"); 7736 where.put(source); 7737 where.put(">"); 7738 return where.data[start .. $]; 7739 } 7740 7741 override string toPrettyString(bool, int, string) const { 7742 string s; 7743 s ~= "<!"; 7744 s ~= source; 7745 s ~= ">"; 7746 return s; 7747 } 7748 7749 ///. 7750 string source; 7751 } 7752 7753 ///. 7754 /// Group: implementations 7755 class QuestionInstruction : SpecialElement { 7756 ///. 7757 this(Document _parentDocument, string s) { 7758 super(_parentDocument); 7759 source = s; 7760 tagName = "#qpi"; 7761 } 7762 7763 override QuestionInstruction cloneNode(bool deep) { 7764 return new QuestionInstruction(parentDocument, source); 7765 } 7766 7767 ///. 7768 override string nodeValue() const { 7769 return this.source; 7770 } 7771 7772 ///. 7773 override string writeToAppender(Appender!string where = appender!string()) const { 7774 auto start = where.data.length; 7775 where.put("<"); 7776 where.put(source); 7777 where.put(">"); 7778 return where.data[start .. $]; 7779 } 7780 7781 override string toPrettyString(bool, int, string) const { 7782 string s; 7783 s ~= "<"; 7784 s ~= source; 7785 s ~= ">"; 7786 return s; 7787 } 7788 7789 ///. 7790 string source; 7791 } 7792 7793 ///. 7794 /// Group: implementations 7795 class HtmlComment : SpecialElement { 7796 ///. 7797 this(Document _parentDocument, string s) { 7798 super(_parentDocument); 7799 source = s; 7800 tagName = "#comment"; 7801 } 7802 7803 override HtmlComment cloneNode(bool deep) { 7804 return new HtmlComment(parentDocument, source); 7805 } 7806 7807 ///. 7808 override string nodeValue() const { 7809 return this.source; 7810 } 7811 7812 ///. 7813 override string writeToAppender(Appender!string where = appender!string()) const { 7814 auto start = where.data.length; 7815 where.put("<!--"); 7816 where.put(source); 7817 where.put("-->"); 7818 return where.data[start .. $]; 7819 } 7820 7821 override string toPrettyString(bool, int, string) const { 7822 string s; 7823 s ~= "<!--"; 7824 s ~= source; 7825 s ~= "-->"; 7826 return s; 7827 } 7828 7829 ///. 7830 string source; 7831 } 7832 7833 ///. 7834 /// Group: implementations 7835 class TextNode : Element { 7836 public: 7837 ///. 7838 this(Document _parentDocument, string e) { 7839 super(_parentDocument); 7840 contents = e; 7841 tagName = "#text"; 7842 } 7843 7844 /// 7845 this(string e) { 7846 this(null, e); 7847 } 7848 7849 string opDispatch(string name)(string v = null) if (0) { 7850 return null; 7851 } // text nodes don't have attributes 7852 7853 ///. 7854 static TextNode fromUndecodedString(Document _parentDocument, string html) { 7855 auto e = new TextNode(_parentDocument, ""); 7856 e.contents = htmlEntitiesDecode(html, _parentDocument is null 7857 ? false : !_parentDocument.loose); 7858 return e; 7859 } 7860 7861 ///. 7862 override @property TextNode cloneNode(bool deep) { 7863 auto n = new TextNode(parentDocument, contents); 7864 return n; 7865 } 7866 7867 ///. 7868 override string nodeValue() const { 7869 return this.contents; //toString(); 7870 } 7871 7872 ///. 7873 @property override int nodeType() const { 7874 return NodeType.Text; 7875 } 7876 7877 ///. 7878 override string writeToAppender(Appender!string where = appender!string()) const { 7879 string s; 7880 if (contents.length) 7881 s = htmlEntitiesEncode(contents, where); 7882 else 7883 s = ""; 7884 7885 assert(s !is null); 7886 return s; 7887 } 7888 7889 override string toPrettyString(bool insertComments = false, 7890 int indentationLevel = 0, string indentWith = "\t") const { 7891 string s; 7892 7893 string contents = this.contents; 7894 // we will first collapse the whitespace per html 7895 // sort of. note this can break stuff yo!!!! 7896 if (this.parentNode is null || this.parentNode.tagName != "pre") { 7897 string n = ""; 7898 bool lastWasWhitespace = indentationLevel > 0; 7899 foreach (char c; contents) { 7900 if (c.isSimpleWhite) { 7901 if (!lastWasWhitespace) 7902 n ~= ' '; 7903 lastWasWhitespace = true; 7904 } else { 7905 n ~= c; 7906 lastWasWhitespace = false; 7907 } 7908 } 7909 7910 contents = n; 7911 } 7912 7913 if (this.parentNode !is null && this.parentNode.tagName != "p") { 7914 contents = contents.strip; 7915 } 7916 7917 auto e = htmlEntitiesEncode(contents); 7918 import std.algorithm.iteration : splitter; 7919 7920 bool first = true; 7921 foreach (line; splitter(e, "\n")) { 7922 if (first) { 7923 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 7924 first = false; 7925 } else { 7926 s ~= "\n"; 7927 if (insertComments) 7928 s ~= "<!--"; 7929 foreach (i; 0 .. indentationLevel) 7930 s ~= "\t"; 7931 if (insertComments) 7932 s ~= "-->"; 7933 } 7934 s ~= line.stripRight; 7935 } 7936 return s; 7937 } 7938 7939 ///. 7940 override Element appendChild(Element e) { 7941 assert(0, "Cannot append to a text node"); 7942 } 7943 7944 ///. 7945 string contents; 7946 // alias contents content; // I just mistype this a lot, 7947 } 7948 7949 /** 7950 There are subclasses of Element offering improved helper 7951 functions for the element in HTML. 7952 */ 7953 7954 ///. 7955 /// Group: implementations 7956 class Link : Element { 7957 7958 ///. 7959 this(Document _parentDocument) { 7960 super(_parentDocument); 7961 this.tagName = "a"; 7962 } 7963 7964 ///. 7965 this(string href, string text) { 7966 super("a"); 7967 setAttribute("href", href); 7968 innerText = text; 7969 } 7970 /+ 7971 /// Returns everything in the href EXCEPT the query string 7972 @property string targetSansQuery() { 7973 7974 } 7975 7976 ///. 7977 @property string domainName() { 7978 7979 } 7980 7981 ///. 7982 @property string path 7983 +/ 7984 /// This gets a variable from the URL's query string. 7985 string getValue(string name) { 7986 auto vars = variablesHash(); 7987 if (name in vars) 7988 return vars[name]; 7989 return null; 7990 } 7991 7992 private string[string] variablesHash() { 7993 string href = getAttribute("href"); 7994 if (href is null) 7995 return null; 7996 7997 auto ques = href.indexOf("?"); 7998 string str = ""; 7999 if (ques != -1) { 8000 str = href[ques + 1 .. $]; 8001 8002 auto fragment = str.indexOf("#"); 8003 if (fragment != -1) 8004 str = str[0 .. fragment]; 8005 } 8006 8007 string[] variables = str.split("&"); 8008 8009 string[string] hash; 8010 8011 foreach (var; variables) { 8012 auto index = var.indexOf("="); 8013 if (index == -1) 8014 hash[var] = ""; 8015 else { 8016 hash[decodeComponent(var[0 .. index])] = decodeComponent(var[index + 1 .. $]); 8017 } 8018 } 8019 8020 return hash; 8021 } 8022 8023 ///. 8024 /*private*/ 8025 void updateQueryString(string[string] vars) { 8026 string href = getAttribute("href"); 8027 8028 auto question = href.indexOf("?"); 8029 if (question != -1) 8030 href = href[0 .. question]; 8031 8032 string frag = ""; 8033 auto fragment = href.indexOf("#"); 8034 if (fragment != -1) { 8035 frag = href[fragment .. $]; 8036 href = href[0 .. fragment]; 8037 } 8038 8039 string query = "?"; 8040 bool first = true; 8041 foreach (name, value; vars) { 8042 if (!first) 8043 query ~= "&"; 8044 else 8045 first = false; 8046 8047 query ~= encodeComponent(name); 8048 if (value.length) 8049 query ~= "=" ~ encodeComponent(value); 8050 } 8051 8052 if (query != "?") 8053 href ~= query; 8054 8055 href ~= frag; 8056 8057 setAttribute("href", href); 8058 } 8059 8060 /// Sets or adds the variable with the given name to the given value 8061 /// It automatically URI encodes the values and takes care of the ? and &. 8062 override void setValue(string name, string variable) { 8063 auto vars = variablesHash(); 8064 vars[name] = variable; 8065 8066 updateQueryString(vars); 8067 } 8068 8069 /// Removes the given variable from the query string 8070 void removeValue(string name) { 8071 auto vars = variablesHash(); 8072 vars.remove(name); 8073 8074 updateQueryString(vars); 8075 } 8076 8077 /* 8078 ///. 8079 override string toString() { 8080 8081 } 8082 8083 ///. 8084 override string getAttribute(string name) { 8085 if(name == "href") { 8086 8087 } else 8088 return super.getAttribute(name); 8089 } 8090 */ 8091 } 8092 8093 ///. 8094 /// Group: implementations 8095 class Form : Element { 8096 8097 ///. 8098 this(Document _parentDocument) { 8099 super(_parentDocument); 8100 tagName = "form"; 8101 } 8102 8103 override Element addField(string label, string name, string type = "text", 8104 FormFieldOptions fieldOptions = FormFieldOptions.none) { 8105 auto t = this.querySelector("fieldset div"); 8106 if (t is null) 8107 return super.addField(label, name, type, fieldOptions); 8108 else 8109 return t.addField(label, name, type, fieldOptions); 8110 } 8111 8112 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 8113 auto type = "text"; 8114 auto t = this.querySelector("fieldset div"); 8115 if (t is null) 8116 return super.addField(label, name, type, fieldOptions); 8117 else 8118 return t.addField(label, name, type, fieldOptions); 8119 } 8120 8121 override Element addField(string label, string name, string[string] options, 8122 FormFieldOptions fieldOptions = FormFieldOptions.none) { 8123 auto t = this.querySelector("fieldset div"); 8124 if (t is null) 8125 return super.addField(label, name, options, fieldOptions); 8126 else 8127 return t.addField(label, name, options, fieldOptions); 8128 } 8129 8130 override void setValue(string field, string value) { 8131 setValue(field, value, true); 8132 } 8133 8134 // FIXME: doesn't handle arrays; multiple fields can have the same name 8135 8136 /// Set's the form field's value. For input boxes, this sets the value attribute. For 8137 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 8138 /// the checked/selected attribute from all, and adds it to the one matching the value. 8139 /// For checkboxes, if the value is non-null and not empty, it checks the box. 8140 8141 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 8142 /// Otherwise, it makes a new input with type=hidden to keep the value. 8143 void setValue(string field, string value, bool makeNew) { 8144 auto eles = getField(field); 8145 if (eles.length == 0) { 8146 if (makeNew) { 8147 addInput(field, value); 8148 return; 8149 } else 8150 throw new Exception("form field does not exist"); 8151 } 8152 8153 if (eles.length == 1) { 8154 auto e = eles[0]; 8155 switch (e.tagName) { 8156 default: 8157 assert(0); 8158 case "textarea": 8159 e.innerText = value; 8160 break; 8161 case "input": 8162 string type = e.getAttribute("type"); 8163 if (type is null) { 8164 e.value = value; 8165 return; 8166 } 8167 switch (type) { 8168 case "checkbox": 8169 case "radio": 8170 if (value.length && value != "false") 8171 e.setAttribute("checked", "checked"); 8172 else 8173 e.removeAttribute("checked"); 8174 break; 8175 default: 8176 e.value = value; 8177 return; 8178 } 8179 break; 8180 case "select": 8181 bool found = false; 8182 foreach (child; e.tree) { 8183 if (child.tagName != "option") 8184 continue; 8185 string val = child.getAttribute("value"); 8186 if (val is null) 8187 val = child.innerText; 8188 if (val == value) { 8189 child.setAttribute("selected", "selected"); 8190 found = true; 8191 } else 8192 child.removeAttribute("selected"); 8193 } 8194 8195 if (!found) { 8196 e.addChild("option", value).setAttribute("selected", "selected"); 8197 } 8198 break; 8199 } 8200 } else { 8201 // assume radio boxes 8202 foreach (e; eles) { 8203 string val = e.getAttribute("value"); 8204 //if(val is null) 8205 // throw new Exception("don't know what to do with radio boxes with null value"); 8206 if (val == value) 8207 e.setAttribute("checked", "checked"); 8208 else 8209 e.removeAttribute("checked"); 8210 } 8211 } 8212 } 8213 8214 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 8215 /// it makes no attempt to find and modify existing elements in the form to the new values. 8216 void addValueArray(string key, string[] arrayOfValues) { 8217 foreach (arr; arrayOfValues) 8218 addChild("input", key, arr); 8219 } 8220 8221 /// Gets the value of the field; what would be given if it submitted right now. (so 8222 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 8223 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 8224 string getValue(string field) { 8225 auto eles = getField(field); 8226 if (eles.length == 0) 8227 return ""; 8228 if (eles.length == 1) { 8229 auto e = eles[0]; 8230 switch (e.tagName) { 8231 default: 8232 assert(0); 8233 case "input": 8234 if (e.type == "checkbox") { 8235 if (e.checked) 8236 return e.value.length ? e.value : "checked"; 8237 return ""; 8238 } else 8239 return e.value; 8240 case "textarea": 8241 return e.innerText; 8242 case "select": 8243 foreach (child; e.tree) { 8244 if (child.tagName != "option") 8245 continue; 8246 if (child.selected) 8247 return child.value; 8248 } 8249 break; 8250 } 8251 } else { 8252 // assuming radio 8253 foreach (e; eles) { 8254 if (e.checked) 8255 return e.value; 8256 } 8257 } 8258 8259 return ""; 8260 } 8261 8262 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 8263 ///. 8264 string getPostableData() { 8265 bool[string] namesDone; 8266 8267 string ret; 8268 bool outputted = false; 8269 8270 foreach (e; getElementsBySelector("[name]")) { 8271 if (e.name in namesDone) 8272 continue; 8273 8274 if (outputted) 8275 ret ~= "&"; 8276 else 8277 outputted = true; 8278 8279 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent( 8280 getValue(e.name)); 8281 8282 namesDone[e.name] = true; 8283 } 8284 8285 return ret; 8286 } 8287 8288 /// Gets the actual elements with the given name 8289 Element[] getField(string name) { 8290 Element[] ret; 8291 foreach (e; tree) { 8292 if (e.name == name) 8293 ret ~= e; 8294 } 8295 return ret; 8296 } 8297 8298 /// Grabs the <label> with the given for tag, if there is one. 8299 Element getLabel(string forId) { 8300 foreach (e; tree) 8301 if (e.tagName == "label" && e.getAttribute("for") == forId) 8302 return e; 8303 return null; 8304 } 8305 8306 /// Adds a new INPUT field to the end of the form with the given attributes. 8307 Element addInput(string name, string value, string type = "hidden") { 8308 auto e = new Element(parentDocument, "input", null, true); 8309 e.name = name; 8310 e.value = value; 8311 e.type = type; 8312 8313 appendChild(e); 8314 8315 return e; 8316 } 8317 8318 /// Removes the given field from the form. It finds the element and knocks it right out. 8319 void removeField(string name) { 8320 foreach (e; getField(name)) 8321 e.parentNode.removeChild(e); 8322 } 8323 8324 /+ 8325 /// Returns all form members. 8326 @property Element[] elements() { 8327 8328 } 8329 8330 ///. 8331 string opDispatch(string name)(string v = null) 8332 // filter things that should actually be attributes on the form 8333 if( name != "method" && name != "action" && name != "enctype" 8334 && name != "style" && name != "name" && name != "id" && name != "class") 8335 { 8336 8337 } 8338 +/ 8339 /+ 8340 void submit() { 8341 // take its elements and submit them through http 8342 } 8343 +/ 8344 } 8345 8346 import std.conv; 8347 8348 ///. 8349 /// Group: implementations 8350 class Table : Element { 8351 8352 ///. 8353 this(Document _parentDocument) { 8354 super(_parentDocument); 8355 tagName = "table"; 8356 } 8357 8358 /// Creates an element with the given type and content. 8359 Element th(T)(T t) { 8360 Element e; 8361 if (parentDocument !is null) 8362 e = parentDocument.createElement("th"); 8363 else 8364 e = Element.make("th"); 8365 static if (is(T == Html)) 8366 e.innerHTML = t; 8367 else 8368 e.innerText = to!string(t); 8369 return e; 8370 } 8371 8372 /// ditto 8373 Element td(T)(T t) { 8374 Element e; 8375 if (parentDocument !is null) 8376 e = parentDocument.createElement("td"); 8377 else 8378 e = Element.make("td"); 8379 static if (is(T == Html)) 8380 e.innerHTML = t; 8381 else 8382 e.innerText = to!string(t); 8383 return e; 8384 } 8385 8386 /// . 8387 Element appendHeaderRow(T...)(T t) { 8388 return appendRowInternal("th", "thead", t); 8389 } 8390 8391 /// . 8392 Element appendFooterRow(T...)(T t) { 8393 return appendRowInternal("td", "tfoot", t); 8394 } 8395 8396 /// . 8397 Element appendRow(T...)(T t) { 8398 return appendRowInternal("td", "tbody", t); 8399 } 8400 8401 void addColumnClasses(string[] classes...) { 8402 auto grid = getGrid(); 8403 foreach (row; grid) 8404 foreach (i, cl; classes) { 8405 if (cl.length) 8406 if (i < row.length) 8407 row[i].addClass(cl); 8408 } 8409 } 8410 8411 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 8412 Element row = Element.make("tr"); 8413 8414 foreach (e; t) { 8415 static if (is(typeof(e) : Element)) { 8416 if (e.tagName == "td" || e.tagName == "th") 8417 row.appendChild(e); 8418 else { 8419 Element a = Element.make(innerType); 8420 8421 a.appendChild(e); 8422 8423 row.appendChild(a); 8424 } 8425 } else static if (is(typeof(e) == Html)) { 8426 Element a = Element.make(innerType); 8427 a.innerHTML = e.source; 8428 row.appendChild(a); 8429 } else static if (is(typeof(e) == Element[])) { 8430 Element a = Element.make(innerType); 8431 foreach (ele; e) 8432 a.appendChild(ele); 8433 row.appendChild(a); 8434 } else static if (is(typeof(e) == string[])) { 8435 foreach (ele; e) { 8436 Element a = Element.make(innerType); 8437 a.innerText = to!string(ele); 8438 row.appendChild(a); 8439 } 8440 } else { 8441 Element a = Element.make(innerType); 8442 a.innerText = to!string(e); 8443 row.appendChild(a); 8444 } 8445 } 8446 8447 foreach (e; children) { 8448 if (e.tagName == findType) { 8449 e.appendChild(row); 8450 return row; 8451 } 8452 } 8453 8454 // the type was not found if we are here... let's add it so it is well-formed 8455 auto lol = this.addChild(findType); 8456 lol.appendChild(row); 8457 8458 return row; 8459 } 8460 8461 ///. 8462 Element captionElement() { 8463 Element cap; 8464 foreach (c; children) { 8465 if (c.tagName == "caption") { 8466 cap = c; 8467 break; 8468 } 8469 } 8470 8471 if (cap is null) { 8472 cap = Element.make("caption"); 8473 appendChild(cap); 8474 } 8475 8476 return cap; 8477 } 8478 8479 ///. 8480 @property string caption() { 8481 return captionElement().innerText; 8482 } 8483 8484 ///. 8485 @property void caption(string text) { 8486 captionElement().innerText = text; 8487 } 8488 8489 /// Gets the logical layout of the table as a rectangular grid of 8490 /// cells. It considers rowspan and colspan. A cell with a large 8491 /// span is represented in the grid by being referenced several times. 8492 /// The tablePortition parameter can get just a <thead>, <tbody>, or 8493 /// <tfoot> portion if you pass one. 8494 /// 8495 /// Note: the rectangular grid might include null cells. 8496 /// 8497 /// This is kinda expensive so you should call once when you want the grid, 8498 /// then do lookups on the returned array. 8499 TableCell[][] getGrid(Element tablePortition = null) 8500 in { 8501 if (tablePortition is null) 8502 assert(tablePortition is null); 8503 else { 8504 assert(tablePortition !is null); 8505 assert(tablePortition.parentNode is this); 8506 assert(tablePortition.tagName == "tbody" 8507 || tablePortition.tagName == "tfoot" || tablePortition.tagName == "thead"); 8508 } 8509 } 8510 body { 8511 if (tablePortition is null) 8512 tablePortition = this; 8513 8514 TableCell[][] ret; 8515 8516 // FIXME: will also return rows of sub tables! 8517 auto rows = tablePortition.getElementsByTagName("tr"); 8518 ret.length = rows.length; 8519 8520 int maxLength = 0; 8521 8522 int insertCell(int row, int position, TableCell cell) { 8523 if (row >= ret.length) 8524 return position; // not supposed to happen - a rowspan is prolly too big. 8525 8526 if (position == -1) { 8527 position++; 8528 foreach (item; ret[row]) { 8529 if (item is null) 8530 break; 8531 position++; 8532 } 8533 } 8534 8535 if (position < ret[row].length) 8536 ret[row][position] = cell; 8537 else 8538 foreach (i; ret[row].length .. position + 1) { 8539 if (i == position) 8540 ret[row] ~= cell; 8541 else 8542 ret[row] ~= null; 8543 } 8544 return position; 8545 } 8546 8547 foreach (i, rowElement; rows) { 8548 auto row = cast(TableRow) rowElement; 8549 assert(row !is null); 8550 assert(i < ret.length); 8551 8552 int position = 0; 8553 foreach (cellElement; rowElement.childNodes) { 8554 auto cell = cast(TableCell) cellElement; 8555 if (cell is null) 8556 continue; 8557 8558 // FIXME: colspan == 0 or rowspan == 0 8559 // is supposed to mean fill in the rest of 8560 // the table, not skip it 8561 foreach (int j; 0 .. cell.colspan) { 8562 foreach (int k; 0 .. cell.rowspan) // if the first row, always append. 8563 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 8564 position++; 8565 } 8566 } 8567 8568 if (ret[i].length > maxLength) 8569 maxLength = cast(int) ret[i].length; 8570 } 8571 8572 // want to ensure it's rectangular 8573 foreach (ref r; ret) { 8574 foreach (i; r.length .. maxLength) 8575 r ~= null; 8576 } 8577 8578 return ret; 8579 } 8580 } 8581 8582 /// Represents a table row element - a <tr> 8583 /// Group: implementations 8584 class TableRow : Element { 8585 ///. 8586 this(Document _parentDocument) { 8587 super(_parentDocument); 8588 tagName = "tr"; 8589 } 8590 8591 // FIXME: the standard says there should be a lot more in here, 8592 // but meh, I never use it and it's a pain to implement. 8593 } 8594 8595 /// Represents anything that can be a table cell - <td> or <th> html. 8596 /// Group: implementations 8597 class TableCell : Element { 8598 ///. 8599 this(Document _parentDocument, string _tagName) { 8600 super(_parentDocument, _tagName); 8601 } 8602 8603 @property int rowspan() const { 8604 int ret = 1; 8605 auto it = getAttribute("rowspan"); 8606 if (it.length) 8607 ret = to!int(it); 8608 return ret; 8609 } 8610 8611 @property int colspan() const { 8612 int ret = 1; 8613 auto it = getAttribute("colspan"); 8614 if (it.length) 8615 ret = to!int(it); 8616 return ret; 8617 } 8618 8619 @property int rowspan(int i) { 8620 setAttribute("rowspan", to!string(i)); 8621 return i; 8622 } 8623 8624 @property int colspan(int i) { 8625 setAttribute("colspan", to!string(i)); 8626 return i; 8627 } 8628 8629 } 8630 8631 ///. 8632 /// Group: implementations 8633 class MarkupException : Exception { 8634 8635 ///. 8636 this(string message, string file = __FILE__, size_t line = __LINE__) { 8637 super(message, file, line); 8638 } 8639 } 8640 8641 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 8642 /// Group: implementations 8643 class ElementNotFoundException : Exception { 8644 8645 /// type == kind of element you were looking for and search == a selector describing the search. 8646 this(string type, string search, Element searchContext, string file = __FILE__, 8647 size_t line = __LINE__) { 8648 this.searchContext = searchContext; 8649 super("Element of type '" ~ type ~ "' matching {" ~ search ~ "} not found.", file, line); 8650 } 8651 8652 Element searchContext; 8653 } 8654 8655 /// The html struct is used to differentiate between regular text nodes and html in certain functions 8656 /// 8657 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 8658 /// Group: core_functionality 8659 struct Html { 8660 /// This string holds the actual html. Use it to retrieve the contents. 8661 string source; 8662 } 8663 8664 // for the observers 8665 enum DomMutationOperations { 8666 setAttribute, 8667 removeAttribute, 8668 appendChild, // tagname, attributes[], innerHTML 8669 insertBefore, 8670 truncateChildren, 8671 removeChild, 8672 appendHtml, 8673 replaceHtml, 8674 appendText, 8675 replaceText, 8676 replaceTextOnly 8677 } 8678 8679 // and for observers too 8680 struct DomMutationEvent { 8681 DomMutationOperations operation; 8682 Element target; 8683 Element related; // what this means differs with the operation 8684 Element related2; 8685 string relatedString; 8686 string relatedString2; 8687 } 8688 8689 private immutable static string[] selfClosedElements = [ 8690 // html 4 8691 "img", "hr", "input", "br", "col", "link", "meta", 8692 // html 5 8693 "source" 8694 ]; 8695 8696 private immutable static string[] inlineElements = [ 8697 "span", "strong", "em", "b", "i", "a" 8698 ]; 8699 8700 static import std.conv; 8701 8702 ///. 8703 int intFromHex(string hex) { 8704 int place = 1; 8705 int value = 0; 8706 for (sizediff_t a = hex.length - 1; a >= 0; a--) { 8707 int v; 8708 char q = hex[a]; 8709 if (q >= '0' && q <= '9') 8710 v = q - '0'; 8711 else if (q >= 'a' && q <= 'f') 8712 v = q - 'a' + 10; 8713 else 8714 throw new Exception("Illegal hex character: " ~ q); 8715 8716 value += v * place; 8717 8718 place *= 16; 8719 } 8720 8721 return value; 8722 } 8723 8724 // CSS selector handling 8725 8726 // EXTENSIONS 8727 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 8728 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 8729 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 8730 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 8731 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 8732 8733 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 8734 // That might be useful to implement, though I do have parent selectors too. 8735 8736 ///. 8737 static immutable string[] selectorTokens = [ 8738 // It is important that the 2 character possibilities go first here for accurate lexing 8739 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 8740 "::", ">>", "<<", // my any-parent extension (reciprocal of whitespace) 8741 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 8742 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", 8743 ")" 8744 ]; // other is white space or a name. 8745 8746 ///. 8747 sizediff_t idToken(string str, sizediff_t position) { 8748 sizediff_t tid = -1; 8749 char c = str[position]; 8750 foreach (a, token; selectorTokens) 8751 8752 if (c == token[0]) { 8753 if (token.length > 1) { 8754 if (position + 1 >= str.length || str[position + 1] != token[1]) 8755 continue; // not this token 8756 } 8757 tid = a; 8758 break; 8759 } 8760 return tid; 8761 } 8762 8763 ///. 8764 // look, ma, no phobos! 8765 // new lexer by ketmar 8766 string[] lexSelector(string selstr) { 8767 8768 static sizediff_t idToken(string str, size_t stpos) { 8769 char c = str[stpos]; 8770 foreach (sizediff_t tidx, immutable token; selectorTokens) { 8771 if (c == token[0]) { 8772 if (token.length > 1) { 8773 assert(token.length == 2, token); // we don't have 3-char tokens yet 8774 if (str.length - stpos < 2 || str[stpos + 1] != token[1]) 8775 continue; 8776 } 8777 return tidx; 8778 } 8779 } 8780 return -1; 8781 } 8782 8783 // skip spaces and comments 8784 static string removeLeadingBlanks(string str) { 8785 size_t curpos = 0; 8786 while (curpos < str.length) { 8787 immutable char ch = str[curpos]; 8788 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 8789 if (ch == '/' && str.length - curpos > 1 && str[curpos + 1] == '*') { 8790 // comment 8791 curpos += 2; 8792 while (curpos < str.length) { 8793 if (str[curpos] == '*' && str.length - curpos > 1 && str[curpos + 1] == '/') { 8794 curpos += 2; 8795 break; 8796 } 8797 ++curpos; 8798 } 8799 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 8800 ++curpos; 8801 8802 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 8803 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 8804 // That is not the same as ".foo.bar". If the space is stripped, important 8805 // information is lost, despite the tokens being separatable anyway. 8806 // 8807 // The parser really needs to be aware of the presence of a space. 8808 } else { 8809 break; 8810 } 8811 } 8812 return str[curpos .. $]; 8813 } 8814 8815 static bool isBlankAt()(string str, size_t pos) { 8816 // we should consider unicode spaces too, but... unicode sux anyway. 8817 return (pos < str.length && // in string 8818 (str[pos] <= 32 || // space 8819 (str.length - pos > 1 && str[pos] == '/' && str[pos + 1] == '*'))); // comment 8820 } 8821 8822 string[] tokens; 8823 // lexx it! 8824 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 8825 if (selstr[0] == '\"' || selstr[0] == '\'') { 8826 auto end = selstr[0]; 8827 auto pos = 1; 8828 bool escaping; 8829 while (pos < selstr.length && !escaping && selstr[pos] != end) { 8830 if (escaping) 8831 escaping = false; 8832 else if (selstr[pos] == '\\') 8833 escaping = true; 8834 pos++; 8835 } 8836 8837 // FIXME: do better unescaping 8838 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 8839 if (pos + 1 >= selstr.length) 8840 assert(0, selstr); 8841 selstr = selstr[pos + 1 .. $]; 8842 continue; 8843 } 8844 8845 // no tokens starts with escape 8846 immutable tid = idToken(selstr, 0); 8847 if (tid >= 0) { 8848 // special token 8849 tokens ~= selectorTokens[tid]; // it's funnier this way 8850 selstr = selstr[selectorTokens[tid].length .. $]; 8851 continue; 8852 } 8853 // from start to space or special token 8854 size_t escapePos = size_t.max; 8855 size_t curpos = 0; // i can has chizburger^w escape at the start 8856 while (curpos < selstr.length) { 8857 if (selstr[curpos] == '\\') { 8858 // this is escape, just skip it and next char 8859 if (escapePos == size_t.max) 8860 escapePos = curpos; 8861 curpos = (selstr.length - curpos >= 2 ? curpos + 2 : selstr.length); 8862 } else { 8863 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) 8864 break; 8865 ++curpos; 8866 } 8867 } 8868 // identifier 8869 if (escapePos != size_t.max) { 8870 // i hate it when it happens 8871 string id = selstr[0 .. escapePos]; 8872 while (escapePos < curpos) { 8873 if (curpos - escapePos < 2) 8874 break; 8875 id ~= selstr[escapePos + 1]; // escaped char 8876 escapePos += 2; 8877 immutable stp = escapePos; 8878 while (escapePos < curpos && selstr[escapePos] != '\\') 8879 ++escapePos; 8880 if (escapePos > stp) 8881 id ~= selstr[stp .. escapePos]; 8882 } 8883 if (id.length > 0) 8884 tokens ~= id; 8885 } else { 8886 tokens ~= selstr[0 .. curpos]; 8887 } 8888 selstr = selstr[curpos .. $]; 8889 } 8890 return tokens; 8891 } 8892 8893 version (unittest_domd_lexer) unittest { 8894 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 8895 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 8896 assert(lexSelector(r" < <") == ["<", "<"]); 8897 assert(lexSelector(r" <<") == ["<<"]); 8898 assert(lexSelector(r" <</") == ["<<", "/"]); 8899 assert(lexSelector(r" <</*") == ["<<"]); 8900 assert(lexSelector(r" <\</*") == ["<", "<"]); 8901 assert(lexSelector(r"heh\") == ["heh"]); 8902 assert(lexSelector(r"alice \") == ["alice"]); 8903 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 8904 } 8905 8906 ///. 8907 struct SelectorPart { 8908 string tagNameFilter; ///. 8909 string[] attributesPresent; /// [attr] 8910 string[2][] attributesEqual; /// [attr=value] 8911 string[2][] attributesStartsWith; /// [attr^=value] 8912 string[2][] attributesEndsWith; /// [attr$=value] 8913 // split it on space, then match to these 8914 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 8915 // split it on dash, then match to these 8916 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 8917 string[2][] attributesInclude; /// [attr*=value] 8918 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 8919 8920 string[] hasSelectors; /// :has(this) 8921 string[] notSelectors; /// :not(this) 8922 8923 ParsedNth[] nthOfType; /// . 8924 ParsedNth[] nthLastOfType; /// . 8925 ParsedNth[] nthChild; /// . 8926 8927 bool firstChild; ///. 8928 bool lastChild; ///. 8929 8930 bool firstOfType; /// . 8931 bool lastOfType; /// . 8932 8933 bool emptyElement; ///. 8934 bool whitespaceOnly; /// 8935 bool oddChild; ///. 8936 bool evenChild; ///. 8937 8938 bool rootElement; ///. 8939 8940 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 8941 8942 bool isCleanSlateExceptSeparation() { 8943 auto cp = this; 8944 cp.separation = -1; 8945 return cp is SelectorPart.init; 8946 } 8947 8948 ///. 8949 string toString() { 8950 string ret; 8951 switch (separation) { 8952 default: 8953 assert(0); 8954 case -1: 8955 break; 8956 case 0: 8957 ret ~= " "; 8958 break; 8959 case 1: 8960 ret ~= " > "; 8961 break; 8962 case 2: 8963 ret ~= " + "; 8964 break; 8965 case 3: 8966 ret ~= " ~ "; 8967 break; 8968 case 4: 8969 ret ~= " < "; 8970 break; 8971 } 8972 ret ~= tagNameFilter; 8973 foreach (a; attributesPresent) 8974 ret ~= "[" ~ a ~ "]"; 8975 foreach (a; attributesEqual) 8976 ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 8977 foreach (a; attributesEndsWith) 8978 ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 8979 foreach (a; attributesStartsWith) 8980 ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 8981 foreach (a; attributesNotEqual) 8982 ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 8983 foreach (a; attributesInclude) 8984 ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 8985 foreach (a; attributesIncludesSeparatedByDashes) 8986 ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 8987 foreach (a; attributesIncludesSeparatedBySpaces) 8988 ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 8989 8990 foreach (a; notSelectors) 8991 ret ~= ":not(" ~ a ~ ")"; 8992 foreach (a; hasSelectors) 8993 ret ~= ":has(" ~ a ~ ")"; 8994 8995 foreach (a; nthChild) 8996 ret ~= ":nth-child(" ~ a.toString ~ ")"; 8997 foreach (a; nthOfType) 8998 ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 8999 foreach (a; nthLastOfType) 9000 ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 9001 9002 if (firstChild) 9003 ret ~= ":first-child"; 9004 if (lastChild) 9005 ret ~= ":last-child"; 9006 if (firstOfType) 9007 ret ~= ":first-of-type"; 9008 if (lastOfType) 9009 ret ~= ":last-of-type"; 9010 if (emptyElement) 9011 ret ~= ":empty"; 9012 if (whitespaceOnly) 9013 ret ~= ":whitespace-only"; 9014 if (oddChild) 9015 ret ~= ":odd-child"; 9016 if (evenChild) 9017 ret ~= ":even-child"; 9018 if (rootElement) 9019 ret ~= ":root"; 9020 9021 return ret; 9022 } 9023 9024 // USEFUL 9025 ///. 9026 bool matchElement(Element e) { 9027 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 9028 // Each individual call is reasonably fast already, but it adds up. 9029 if (e is null) 9030 return false; 9031 if (e.nodeType != 1) 9032 return false; 9033 9034 if (tagNameFilter != "" && tagNameFilter != "*") 9035 if (e.tagName != tagNameFilter) 9036 return false; 9037 if (firstChild) { 9038 if (e.parentNode is null) 9039 return false; 9040 if (e.parentNode.childElements[0]!is e) 9041 return false; 9042 } 9043 if (lastChild) { 9044 if (e.parentNode is null) 9045 return false; 9046 auto ce = e.parentNode.childElements; 9047 if (ce[$ - 1]!is e) 9048 return false; 9049 } 9050 if (firstOfType) { 9051 if (e.parentNode is null) 9052 return false; 9053 auto ce = e.parentNode.childElements; 9054 foreach (c; ce) { 9055 if (c.tagName == e.tagName) { 9056 if (c is e) 9057 return true; 9058 else 9059 return false; 9060 } 9061 } 9062 } 9063 if (lastOfType) { 9064 if (e.parentNode is null) 9065 return false; 9066 auto ce = e.parentNode.childElements; 9067 foreach_reverse (c; ce) { 9068 if (c.tagName == e.tagName) { 9069 if (c is e) 9070 return true; 9071 else 9072 return false; 9073 } 9074 } 9075 } 9076 if (emptyElement) { 9077 if (e.children.length) 9078 return false; 9079 } 9080 if (whitespaceOnly) { 9081 if (e.innerText.strip.length) 9082 return false; 9083 } 9084 if (rootElement) { 9085 if (e.parentNode !is null) 9086 return false; 9087 } 9088 if (oddChild || evenChild) { 9089 if (e.parentNode is null) 9090 return false; 9091 foreach (i, child; e.parentNode.childElements) { 9092 if (child is e) { 9093 if (oddChild && !(i & 1)) 9094 return false; 9095 if (evenChild && (i & 1)) 9096 return false; 9097 break; 9098 } 9099 } 9100 } 9101 9102 bool matchWithSeparator(string attr, string value, string separator) { 9103 foreach (s; attr.split(separator)) 9104 if (s == value) 9105 return true; 9106 return false; 9107 } 9108 9109 foreach (a; attributesPresent) 9110 if (a !in e.attributes) 9111 return false; 9112 foreach (a; attributesEqual) 9113 if (a[0]!in e.attributes || e.attributes[a[0]] != a[1]) 9114 return false; 9115 foreach (a; attributesNotEqual) // FIXME: maybe it should say null counts... this just bit me. 9116 // I did [attr][attr!=value] to work around. 9117 // 9118 // if it's null, it's not equal, right? 9119 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 9120 if (e.getAttribute(a[0]) == a[1]) 9121 return false; 9122 foreach (a; attributesInclude) 9123 if (a[0]!in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 9124 return false; 9125 foreach (a; attributesStartsWith) 9126 if (a[0]!in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 9127 return false; 9128 foreach (a; attributesEndsWith) 9129 if (a[0]!in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 9130 return false; 9131 foreach (a; attributesIncludesSeparatedBySpaces) 9132 if (a[0]!in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 9133 return false; 9134 foreach (a; attributesIncludesSeparatedByDashes) 9135 if (a[0]!in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 9136 return false; 9137 foreach (a; hasSelectors) { 9138 if (e.querySelector(a) is null) 9139 return false; 9140 } 9141 foreach (a; notSelectors) { 9142 auto sel = Selector(a); 9143 if (sel.matchesElement(e)) 9144 return false; 9145 } 9146 9147 foreach (a; nthChild) { 9148 if (e.parentNode is null) 9149 return false; 9150 9151 auto among = e.parentNode.childElements; 9152 9153 if (!a.solvesFor(among, e)) 9154 return false; 9155 } 9156 foreach (a; nthOfType) { 9157 if (e.parentNode is null) 9158 return false; 9159 9160 auto among = e.parentNode.childElements(e.tagName); 9161 9162 if (!a.solvesFor(among, e)) 9163 return false; 9164 } 9165 foreach (a; nthLastOfType) { 9166 if (e.parentNode is null) 9167 return false; 9168 9169 auto among = retro(e.parentNode.childElements(e.tagName)); 9170 9171 if (!a.solvesFor(among, e)) 9172 return false; 9173 } 9174 9175 return true; 9176 } 9177 } 9178 9179 struct ParsedNth { 9180 int multiplier; 9181 int adder; 9182 9183 string of; 9184 9185 this(string text) { 9186 auto original = text; 9187 consumeWhitespace(text); 9188 if (text.startsWith("odd")) { 9189 multiplier = 2; 9190 adder = 1; 9191 9192 text = text[3 .. $]; 9193 } else if (text.startsWith("even")) { 9194 multiplier = 2; 9195 adder = 1; 9196 9197 text = text[4 .. $]; 9198 } else { 9199 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 9200 consumeWhitespace(text); 9201 if (text.length && text[0] == 'n') { 9202 multiplier = n; 9203 text = text[1 .. $]; 9204 consumeWhitespace(text); 9205 if (text.length) { 9206 if (text[0] == '+') { 9207 text = text[1 .. $]; 9208 adder = parseNumber(text); 9209 } else if (text[0] == '-') { 9210 text = text[1 .. $]; 9211 adder = -parseNumber(text); 9212 } else if (text[0] == 'o') { 9213 // continue, this is handled below 9214 } else 9215 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 9216 } 9217 } else { 9218 adder = n; 9219 } 9220 } 9221 9222 consumeWhitespace(text); 9223 if (text.startsWith("of")) { 9224 text = text[2 .. $]; 9225 consumeWhitespace(text); 9226 of = text[0 .. $]; 9227 } 9228 } 9229 9230 string toString() { 9231 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, 9232 of.length ? " of " : "", of); 9233 } 9234 9235 bool solvesFor(R)(R elements, Element e) { 9236 int idx = 1; 9237 bool found = false; 9238 foreach (ele; elements) { 9239 if (of.length) { 9240 auto sel = Selector(of); 9241 if (!sel.matchesElement(ele)) 9242 continue; 9243 } 9244 if (ele is e) { 9245 found = true; 9246 break; 9247 } 9248 idx++; 9249 } 9250 if (!found) 9251 return false; 9252 9253 // multiplier* n + adder = idx 9254 // if there is a solution for integral n, it matches 9255 9256 idx -= adder; 9257 if (multiplier) { 9258 if (idx % multiplier == 0) 9259 return true; 9260 } else { 9261 return idx == 0; 9262 } 9263 return false; 9264 } 9265 9266 private void consumeWhitespace(ref string text) { 9267 while (text.length && text[0] == ' ') 9268 text = text[1 .. $]; 9269 } 9270 9271 private int parseNumber(ref string text) { 9272 consumeWhitespace(text); 9273 if (text.length == 0) 9274 return 0; 9275 bool negative = text[0] == '-'; 9276 if (text[0] == '+') 9277 text = text[1 .. $]; 9278 if (negative) 9279 text = text[1 .. $]; 9280 int i = 0; 9281 while (i < text.length && (text[i] >= '0' && text[i] <= '9')) 9282 i++; 9283 if (i == 0) 9284 return 0; 9285 int cool = to!int(text[0 .. i]); 9286 text = text[i .. $]; 9287 return negative ? -cool : cool; 9288 } 9289 } 9290 9291 // USEFUL 9292 ///. 9293 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 9294 Element[] ret; 9295 if (!parts.length) { 9296 return [start]; // the null selector only matches the start point; it 9297 // is what terminates the recursion 9298 } 9299 9300 auto part = parts[0]; 9301 //writeln("checking ", part, " against ", start, " with ", part.separation); 9302 switch (part.separation) { 9303 default: 9304 assert(0); 9305 case -1: 9306 case 0: // tree 9307 foreach (e; start.tree) { 9308 if (part.separation == 0 && start is e) 9309 continue; // space doesn't match itself! 9310 if (part.matchElement(e)) { 9311 ret ~= getElementsBySelectorParts(e, parts[1 .. $]); 9312 } 9313 } 9314 break; 9315 case 1: // children 9316 foreach (e; start.childNodes) { 9317 if (part.matchElement(e)) { 9318 ret ~= getElementsBySelectorParts(e, parts[1 .. $]); 9319 } 9320 } 9321 break; 9322 case 2: // next-sibling 9323 auto e = start.nextSibling("*"); 9324 if (part.matchElement(e)) 9325 ret ~= getElementsBySelectorParts(e, parts[1 .. $]); 9326 break; 9327 case 3: // younger sibling 9328 auto tmp = start.parentNode; 9329 if (tmp !is null) { 9330 sizediff_t pos = -1; 9331 auto children = tmp.childElements; 9332 foreach (i, child; children) { 9333 if (child is start) { 9334 pos = i; 9335 break; 9336 } 9337 } 9338 assert(pos != -1); 9339 foreach (e; children[pos + 1 .. $]) { 9340 if (part.matchElement(e)) 9341 ret ~= getElementsBySelectorParts(e, parts[1 .. $]); 9342 } 9343 } 9344 break; 9345 case 4: // immediate parent node, an extension of mine to walk back up the tree 9346 auto e = start.parentNode; 9347 if (part.matchElement(e)) { 9348 ret ~= getElementsBySelectorParts(e, parts[1 .. $]); 9349 } 9350 /* 9351 Example of usefulness: 9352 9353 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 9354 9355 table th < tr 9356 9357 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 9358 */ 9359 break; 9360 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 9361 /* 9362 Like with the < operator, this is best used to find some parent of a particular known element. 9363 9364 Say you have an anchor inside a 9365 */ 9366 } 9367 9368 return ret; 9369 } 9370 9371 /++ 9372 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 9373 9374 See_Also: 9375 $(LIST 9376 * [Element.querySelector] 9377 * [Element.querySelectorAll] 9378 * [Element.matches] 9379 * [Element.closest] 9380 * [Document.querySelector] 9381 * [Document.querySelectorAll] 9382 ) 9383 +/ 9384 /// Group: core_functionality 9385 struct Selector { 9386 SelectorComponent[] components; 9387 string original; 9388 /++ 9389 Parses the selector string and constructs the usable structure. 9390 +/ 9391 this(string cssSelector) { 9392 components = parseSelectorString(cssSelector); 9393 original = cssSelector; 9394 } 9395 9396 /++ 9397 Returns true if the given element matches this selector, 9398 considered relative to an arbitrary element. 9399 9400 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 9401 with [std.algorithm.iteration.filter]: 9402 9403 --- 9404 Selector sel = Selector("foo > bar"); 9405 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 9406 --- 9407 +/ 9408 bool matchesElement(Element e, Element relativeTo = null) { 9409 foreach (component; components) 9410 if (component.matchElement(e, relativeTo)) 9411 return true; 9412 9413 return false; 9414 } 9415 9416 /++ 9417 Reciprocal of [Element.querySelectorAll] 9418 +/ 9419 Element[] getMatchingElements(Element start) { 9420 Element[] ret; 9421 foreach (component; components) 9422 ret ~= getElementsBySelectorParts(start, component.parts); 9423 return removeDuplicates(ret); 9424 } 9425 9426 /++ 9427 Like [getMatchingElements], but returns a lazy range. Be careful 9428 about mutating the dom as you iterate through this. 9429 +/ 9430 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 9431 import std.algorithm.iteration; 9432 9433 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 9434 } 9435 9436 /// Returns the string this was built from 9437 string toString() { 9438 return original; 9439 } 9440 9441 /++ 9442 Returns a string from the parsed result 9443 9444 9445 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 9446 +/ 9447 string parsedToString() { 9448 string ret; 9449 9450 foreach (idx, component; components) { 9451 if (idx) 9452 ret ~= ", "; 9453 ret ~= component.toString(); 9454 } 9455 9456 return ret; 9457 } 9458 } 9459 9460 ///. 9461 struct SelectorComponent { 9462 ///. 9463 SelectorPart[] parts; 9464 9465 ///. 9466 string toString() { 9467 string ret; 9468 foreach (part; parts) 9469 ret ~= part.toString(); 9470 return ret; 9471 } 9472 9473 // USEFUL 9474 ///. 9475 Element[] getElements(Element start) { 9476 return removeDuplicates(getElementsBySelectorParts(start, parts)); 9477 } 9478 9479 // USEFUL (but not implemented) 9480 /// If relativeTo == null, it assumes the root of the parent document. 9481 bool matchElement(Element e, Element relativeTo = null) { 9482 if (e is null) 9483 return false; 9484 Element where = e; 9485 int lastSeparation = -1; 9486 9487 auto lparts = parts; 9488 9489 if (parts.length && parts[0].separation > 0) { 9490 // if it starts with a non-trivial separator, inject 9491 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 9492 // which implies html 9493 9494 // there is probably a MUCH better way to do this. 9495 auto dummy = SelectorPart.init; 9496 dummy.tagNameFilter = "*"; 9497 dummy.separation = 0; 9498 lparts = dummy ~ lparts; 9499 } 9500 9501 foreach (part; retro(lparts)) { 9502 9503 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 9504 // writeln(parts); 9505 9506 if (lastSeparation == -1) { 9507 if (!part.matchElement(where)) 9508 return false; 9509 } else if (lastSeparation == 0) { // generic parent 9510 // need to go up the whole chain 9511 where = where.parentNode; 9512 9513 while (where !is null) { 9514 if (part.matchElement(where)) 9515 break; 9516 9517 if (where is relativeTo) 9518 return false; 9519 9520 where = where.parentNode; 9521 } 9522 9523 if (where is null) 9524 return false; 9525 } else if (lastSeparation == 1) { // the > operator 9526 where = where.parentNode; 9527 9528 if (!part.matchElement(where)) 9529 return false; 9530 } else if (lastSeparation == 2) { // the + operator 9531 where = where.previousSibling("*"); 9532 9533 if (!part.matchElement(where)) 9534 return false; 9535 } else if (lastSeparation == 3) { // the ~ operator 9536 where = where.previousSibling("*"); 9537 while (where !is null) { 9538 if (part.matchElement(where)) 9539 break; 9540 9541 if (where is relativeTo) 9542 return false; 9543 9544 where = where.previousSibling("*"); 9545 } 9546 9547 if (where is null) 9548 return false; 9549 } else if (lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 9550 // FIXME 9551 } 9552 9553 lastSeparation = part.separation; 9554 9555 if (where is relativeTo) 9556 return false; // at end of line, if we aren't done by now, the match fails 9557 } 9558 return true; // if we got here, it is a success 9559 } 9560 9561 // the string should NOT have commas. Use parseSelectorString for that instead 9562 ///. 9563 static SelectorComponent fromString(string selector) { 9564 return parseSelector(lexSelector(selector)); 9565 } 9566 } 9567 9568 ///. 9569 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 9570 SelectorComponent[] ret; 9571 auto tokens = lexSelector(selector); // this will parse commas too 9572 // and now do comma-separated slices (i haz phobosophobia!) 9573 while (tokens.length > 0) { 9574 size_t end = 0; 9575 while (end < tokens.length && tokens[end] != ",") 9576 ++end; 9577 if (end > 0) 9578 ret ~= parseSelector(tokens[0 .. end], caseSensitiveTags); 9579 if (tokens.length - end < 2) 9580 break; 9581 tokens = tokens[end + 1 .. $]; 9582 } 9583 return ret; 9584 } 9585 9586 ///. 9587 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 9588 SelectorComponent s; 9589 9590 SelectorPart current; 9591 void commit() { 9592 // might as well skip null items 9593 if (!current.isCleanSlateExceptSeparation()) { 9594 s.parts ~= current; 9595 current = current.init; // start right over 9596 } 9597 } 9598 9599 enum State { 9600 Starting, 9601 ReadingClass, 9602 ReadingId, 9603 ReadingAttributeSelector, 9604 ReadingAttributeComparison, 9605 ExpectingAttributeCloser, 9606 ReadingPseudoClass, 9607 ReadingAttributeValue, 9608 9609 SkippingFunctionalSelector, 9610 } 9611 9612 State state = State.Starting; 9613 string attributeName, attributeValue, attributeComparison; 9614 int parensCount; 9615 foreach (idx, token; tokens) { 9616 string readFunctionalSelector() { 9617 string s; 9618 if (tokens[idx + 1] != "(") 9619 throw new Exception("parse error"); 9620 int pc = 1; 9621 foreach (t; tokens[idx + 2 .. $]) { 9622 if (t == "(") 9623 pc++; 9624 if (t == ")") 9625 pc--; 9626 if (pc == 0) 9627 break; 9628 s ~= t; 9629 } 9630 9631 return s; 9632 } 9633 9634 sizediff_t tid = -1; 9635 foreach (i, item; selectorTokens) 9636 if (token == item) { 9637 tid = i; 9638 break; 9639 } 9640 final switch (state) { 9641 case State.Starting: // fresh, might be reading an operator or a tagname 9642 if (tid == -1) { 9643 if (!caseSensitiveTags) 9644 token = token.toLower(); 9645 9646 if (current.isCleanSlateExceptSeparation()) { 9647 current.tagNameFilter = token; 9648 // default thing, see comment under "*" below 9649 if (current.separation == -1) 9650 current.separation = 0; 9651 } else { 9652 // if it was already set, we must see two thingies 9653 // separated by whitespace... 9654 commit(); 9655 current.separation = 0; // tree 9656 current.tagNameFilter = token; 9657 } 9658 } else { 9659 // Selector operators 9660 switch (token) { 9661 case "*": 9662 current.tagNameFilter = "*"; 9663 // the idea here is if we haven't actually set a separation 9664 // yet (e.g. the > operator), it should assume the generic 9665 // whitespace (descendant) mode to avoid matching self with -1 9666 if (current.separation == -1) 9667 current.separation = 0; 9668 break; 9669 case " ": 9670 // If some other separation has already been set, 9671 // this is irrelevant whitespace, so we should skip it. 9672 // this happens in the case of "foo > bar" for example. 9673 if (current.isCleanSlateExceptSeparation() && current.separation > 0) 9674 continue; 9675 commit(); 9676 current.separation = 0; // tree 9677 break; 9678 case ">>": 9679 commit(); 9680 current.separation = 0; // alternate syntax for tree from html5 css 9681 break; 9682 case ">": 9683 commit(); 9684 current.separation = 1; // child 9685 break; 9686 case "+": 9687 commit(); 9688 current.separation = 2; // sibling directly after 9689 break; 9690 case "~": 9691 commit(); 9692 current.separation = 3; // any sibling after 9693 break; 9694 case "<": 9695 commit(); 9696 current.separation = 4; // immediate parent of 9697 break; 9698 case "[": 9699 state = State.ReadingAttributeSelector; 9700 if (current.separation == -1) 9701 current.separation = 0; 9702 break; 9703 case ".": 9704 state = State.ReadingClass; 9705 if (current.separation == -1) 9706 current.separation = 0; 9707 break; 9708 case "#": 9709 state = State.ReadingId; 9710 if (current.separation == -1) 9711 current.separation = 0; 9712 break; 9713 case ":": 9714 case "::": 9715 state = State.ReadingPseudoClass; 9716 if (current.separation == -1) 9717 current.separation = 0; 9718 break; 9719 9720 default: 9721 assert(0, token); 9722 } 9723 } 9724 break; 9725 case State.ReadingClass: 9726 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 9727 state = State.Starting; 9728 break; 9729 case State.ReadingId: 9730 current.attributesEqual ~= ["id", token]; 9731 state = State.Starting; 9732 break; 9733 case State.ReadingPseudoClass: 9734 switch (token) { 9735 case "first-of-type": 9736 current.firstOfType = true; 9737 break; 9738 case "last-of-type": 9739 current.lastOfType = true; 9740 break; 9741 case "only-of-type": 9742 current.firstOfType = true; 9743 current.lastOfType = true; 9744 break; 9745 case "first-child": 9746 current.firstChild = true; 9747 break; 9748 case "last-child": 9749 current.lastChild = true; 9750 break; 9751 case "only-child": 9752 current.firstChild = true; 9753 current.lastChild = true; 9754 break; 9755 case "empty": 9756 // one with no children 9757 current.emptyElement = true; 9758 break; 9759 case "whitespace-only": 9760 current.whitespaceOnly = true; 9761 break; 9762 case "link": 9763 current.attributesPresent ~= "href"; 9764 break; 9765 case "root": 9766 current.rootElement = true; 9767 break; 9768 case "nth-child": 9769 current.nthChild ~= ParsedNth(readFunctionalSelector()); 9770 state = State.SkippingFunctionalSelector; 9771 continue; 9772 case "nth-of-type": 9773 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 9774 state = State.SkippingFunctionalSelector; 9775 continue; 9776 case "nth-last-of-type": 9777 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 9778 state = State.SkippingFunctionalSelector; 9779 continue; 9780 case "not": 9781 state = State.SkippingFunctionalSelector; 9782 current.notSelectors ~= readFunctionalSelector(); 9783 continue; // now the rest of the parser skips past the parens we just handled 9784 case "has": 9785 state = State.SkippingFunctionalSelector; 9786 current.hasSelectors ~= readFunctionalSelector(); 9787 continue; // now the rest of the parser skips past the parens we just handled 9788 // back to standards though not quite right lol 9789 case "disabled": 9790 current.attributesPresent ~= "disabled"; 9791 break; 9792 case "checked": 9793 current.attributesPresent ~= "checked"; 9794 break; 9795 9796 case "visited", "active", "hover", "target", "focus", 9797 "selected": 9798 current.attributesPresent ~= "nothing"; 9799 // FIXME 9800 /* 9801 // defined in the standard, but I don't implement it 9802 case "not": 9803 */ 9804 /+ 9805 // extensions not implemented 9806 //case "text": // takes the text in the element and wraps it in an element, returning it 9807 +/ 9808 goto case; 9809 case "before", "after": 9810 current.attributesPresent ~= "FIXME"; 9811 9812 break; 9813 // My extensions 9814 case "odd-child": 9815 current.oddChild = true; 9816 break; 9817 case "even-child": 9818 current.evenChild = true; 9819 break; 9820 default: 9821 //if(token.indexOf("lang") == -1) 9822 //assert(0, token); 9823 break; 9824 } 9825 state = State.Starting; 9826 break; 9827 case State.SkippingFunctionalSelector: 9828 if (token == "(") { 9829 parensCount++; 9830 } else if (token == ")") { 9831 parensCount--; 9832 } 9833 9834 if (parensCount == 0) 9835 state = State.Starting; 9836 break; 9837 case State.ReadingAttributeSelector: 9838 attributeName = token; 9839 attributeComparison = null; 9840 attributeValue = null; 9841 state = State.ReadingAttributeComparison; 9842 break; 9843 case State.ReadingAttributeComparison: 9844 // FIXME: these things really should be quotable in the proper lexer... 9845 if (token != "]") { 9846 if (token.indexOf("=") == -1) { 9847 // not a comparison; consider it 9848 // part of the attribute 9849 attributeValue ~= token; 9850 } else { 9851 attributeComparison = token; 9852 state = State.ReadingAttributeValue; 9853 } 9854 break; 9855 } 9856 goto case; 9857 case State.ExpectingAttributeCloser: 9858 if (token != "]") { 9859 // not the closer; consider it part of comparison 9860 if (attributeComparison == "") 9861 attributeName ~= token; 9862 else 9863 attributeValue ~= token; 9864 break; 9865 } 9866 9867 // Selector operators 9868 switch (attributeComparison) { 9869 default: 9870 assert(0); 9871 case "": 9872 current.attributesPresent ~= attributeName; 9873 break; 9874 case "=": 9875 current.attributesEqual ~= [attributeName, attributeValue]; 9876 break; 9877 case "|=": 9878 current.attributesIncludesSeparatedByDashes ~= [ 9879 attributeName, attributeValue 9880 ]; 9881 break; 9882 case "~=": 9883 current.attributesIncludesSeparatedBySpaces ~= [ 9884 attributeName, attributeValue 9885 ]; 9886 break; 9887 case "$=": 9888 current.attributesEndsWith ~= [attributeName, attributeValue]; 9889 break; 9890 case "^=": 9891 current.attributesStartsWith ~= [attributeName, attributeValue]; 9892 break; 9893 case "*=": 9894 current.attributesInclude ~= [attributeName, attributeValue]; 9895 break; 9896 case "!=": 9897 current.attributesNotEqual ~= [attributeName, attributeValue]; 9898 break; 9899 } 9900 9901 state = State.Starting; 9902 break; 9903 case State.ReadingAttributeValue: 9904 attributeValue = token; 9905 state = State.ExpectingAttributeCloser; 9906 break; 9907 } 9908 } 9909 9910 commit(); 9911 9912 return s; 9913 } 9914 9915 ///. 9916 Element[] removeDuplicates(Element[] input) { 9917 Element[] ret; 9918 9919 bool[Element] already; 9920 foreach (e; input) { 9921 if (e in already) 9922 continue; 9923 already[e] = true; 9924 ret ~= e; 9925 } 9926 9927 return ret; 9928 } 9929 9930 // done with CSS selector handling 9931 9932 // FIXME: use the better parser from html.d 9933 /// This is probably not useful to you unless you're writing a browser or something like that. 9934 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 9935 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 9936 class CssStyle { 9937 ///. 9938 this(string rule, string content) { 9939 rule = rule.strip(); 9940 content = content.strip(); 9941 9942 if (content.length == 0) 9943 return; 9944 9945 originatingRule = rule; 9946 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 9947 9948 foreach (part; content.split(";")) { 9949 part = part.strip(); 9950 if (part.length == 0) 9951 continue; 9952 auto idx = part.indexOf(":"); 9953 if (idx == -1) 9954 continue; 9955 //throw new Exception("Bad css rule (no colon): " ~ part); 9956 9957 Property p; 9958 9959 p.name = part[0 .. idx].strip(); 9960 p.value = part[idx + 1 .. $].replace("! important", "!important") 9961 .replace("!important", "").strip(); // FIXME don't drop important 9962 p.givenExplicitly = true; 9963 p.specificity = originatingSpecificity; 9964 9965 properties ~= p; 9966 } 9967 9968 foreach (property; properties) 9969 expandShortForm(property, originatingSpecificity); 9970 } 9971 9972 ///. 9973 Specificity getSpecificityOfRule(string rule) { 9974 Specificity s; 9975 if (rule.length == 0) { // inline 9976 // s.important = 2; 9977 } else { 9978 // FIXME 9979 } 9980 9981 return s; 9982 } 9983 9984 string originatingRule; ///. 9985 Specificity originatingSpecificity; ///. 9986 9987 ///. 9988 union Specificity { 9989 uint score; ///. 9990 // version(little_endian) 9991 ///. 9992 struct { 9993 ubyte tags; ///. 9994 ubyte classes; ///. 9995 ubyte ids; ///. 9996 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 9997 } 9998 } 9999 10000 ///. 10001 struct Property { 10002 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 10003 string name; ///. 10004 string value; ///. 10005 Specificity specificity; ///. 10006 // do we care about the original source rule? 10007 } 10008 10009 ///. 10010 Property[] properties; 10011 10012 ///. 10013 string opDispatch(string nameGiven)(string value = null) 10014 if (nameGiven != "popFront") { 10015 string name = unCamelCase(nameGiven); 10016 if (value is null) 10017 return getValue(name); 10018 else 10019 return setValue(name, value, 0x02000000 /* inline specificity */ ); 10020 } 10021 10022 /// takes dash style name 10023 string getValue(string name) { 10024 foreach (property; properties) 10025 if (property.name == name) 10026 return property.value; 10027 return null; 10028 } 10029 10030 /// takes dash style name 10031 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 10032 value = value.replace("! important", "!important"); 10033 if (value.indexOf("!important") != -1) { 10034 newSpecificity.important = 1; // FIXME 10035 value = value.replace("!important", "").strip(); 10036 } 10037 10038 foreach (ref property; properties) 10039 if (property.name == name) { 10040 if (newSpecificity.score >= property.specificity.score) { 10041 property.givenExplicitly = explicit; 10042 expandShortForm(property, newSpecificity); 10043 return (property.value = value); 10044 } else { 10045 if (name == "display") { 10046 } //writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 10047 return value; // do nothing - the specificity is too low 10048 } 10049 } 10050 10051 // it's not here... 10052 10053 Property p; 10054 p.givenExplicitly = true; 10055 p.name = name; 10056 p.value = value; 10057 p.specificity = originatingSpecificity; 10058 10059 properties ~= p; 10060 expandShortForm(p, originatingSpecificity); 10061 10062 return value; 10063 } 10064 10065 private void expandQuadShort(string name, string value, Specificity specificity) { 10066 auto parts = value.split(" "); 10067 switch (parts.length) { 10068 case 1: 10069 setValue(name ~ "-left", parts[0], specificity, false); 10070 setValue(name ~ "-right", parts[0], specificity, false); 10071 setValue(name ~ "-top", parts[0], specificity, false); 10072 setValue(name ~ "-bottom", parts[0], specificity, false); 10073 break; 10074 case 2: 10075 setValue(name ~ "-left", parts[1], specificity, false); 10076 setValue(name ~ "-right", parts[1], specificity, false); 10077 setValue(name ~ "-top", parts[0], specificity, false); 10078 setValue(name ~ "-bottom", parts[0], specificity, false); 10079 break; 10080 case 3: 10081 setValue(name ~ "-top", parts[0], specificity, false); 10082 setValue(name ~ "-right", parts[1], specificity, false); 10083 setValue(name ~ "-bottom", parts[2], specificity, false); 10084 setValue(name ~ "-left", parts[2], specificity, false); 10085 10086 break; 10087 case 4: 10088 setValue(name ~ "-top", parts[0], specificity, false); 10089 setValue(name ~ "-right", parts[1], specificity, false); 10090 setValue(name ~ "-bottom", parts[2], specificity, false); 10091 setValue(name ~ "-left", parts[3], specificity, false); 10092 break; 10093 default: 10094 assert(0, value); 10095 } 10096 } 10097 10098 ///. 10099 void expandShortForm(Property p, Specificity specificity) { 10100 switch (p.name) { 10101 case "margin": 10102 case "padding": 10103 expandQuadShort(p.name, p.value, specificity); 10104 break; 10105 case "border": 10106 case "outline": 10107 setValue(p.name ~ "-left", p.value, specificity, false); 10108 setValue(p.name ~ "-right", p.value, specificity, false); 10109 setValue(p.name ~ "-top", p.value, specificity, false); 10110 setValue(p.name ~ "-bottom", p.value, specificity, false); 10111 break; 10112 10113 case "border-top": 10114 case "border-bottom": 10115 case "border-left": 10116 case "border-right": 10117 case "outline-top": 10118 case "outline-bottom": 10119 case "outline-left": 10120 case "outline-right": 10121 10122 default: { 10123 } 10124 } 10125 } 10126 10127 ///. 10128 override string toString() { 10129 string ret; 10130 if (originatingRule.length) 10131 ret = originatingRule ~ " {"; 10132 10133 foreach (property; properties) { 10134 if (!property.givenExplicitly) 10135 continue; // skip the inferred shit 10136 10137 if (originatingRule.length) 10138 ret ~= "\n\t"; 10139 else 10140 ret ~= " "; 10141 10142 ret ~= property.name ~ ": " ~ property.value ~ ";"; 10143 } 10144 10145 if (originatingRule.length) 10146 ret ~= "\n}\n"; 10147 10148 return ret; 10149 } 10150 } 10151 10152 string cssUrl(string url) { 10153 return "url(\"" ~ url ~ "\")"; 10154 } 10155 10156 /// This probably isn't useful, unless you're writing a browser or something like that. 10157 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 10158 /// as text. 10159 /// 10160 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 10161 /// that you can apply to your documents to build the complete computedStyle object. 10162 class StyleSheet { 10163 ///. 10164 CssStyle[] rules; 10165 10166 ///. 10167 this(string source) { 10168 // FIXME: handle @ rules and probably could improve lexer 10169 // add nesting? 10170 int state; 10171 string currentRule; 10172 string currentValue; 10173 10174 string* currentThing = ¤tRule; 10175 foreach (c; source) { 10176 handle: 10177 switch (state) { 10178 default: 10179 assert(0); 10180 case 0: // starting - we assume we're reading a rule 10181 switch (c) { 10182 case '@': 10183 state = 4; 10184 break; 10185 case '/': 10186 state = 1; 10187 break; 10188 case '{': 10189 currentThing = ¤tValue; 10190 break; 10191 case '}': 10192 if (currentThing is ¤tValue) { 10193 rules ~= new CssStyle(currentRule, currentValue); 10194 10195 currentRule = ""; 10196 currentValue = ""; 10197 10198 currentThing = ¤tRule; 10199 } else { 10200 // idk what is going on here. 10201 // check sveit.com to reproduce 10202 currentRule = ""; 10203 currentValue = ""; 10204 } 10205 break; 10206 default: 10207 (*currentThing) ~= c; 10208 } 10209 break; 10210 case 1: // expecting * 10211 if (c == '*') 10212 state = 2; 10213 else { 10214 state = 0; 10215 (*currentThing) ~= "/" ~ c; 10216 } 10217 break; 10218 case 2: // inside comment 10219 if (c == '*') 10220 state = 3; 10221 break; 10222 case 3: // expecting / to end comment 10223 if (c == '/') 10224 state = 0; 10225 else 10226 state = 2; // it's just a comment so no need to append 10227 break; 10228 case 4: 10229 if (c == '{') 10230 state = 5; 10231 if (c == ';') 10232 state = 0; // just skipping import 10233 break; 10234 case 5: 10235 if (c == '}') 10236 state = 0; // skipping font face probably 10237 } 10238 } 10239 } 10240 10241 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 10242 void apply(Document document) { 10243 foreach (rule; rules) { 10244 if (rule.originatingRule.length == 0) 10245 continue; // this shouldn't happen here in a stylesheet 10246 foreach (element; document.querySelectorAll(rule.originatingRule)) { 10247 // note: this should be a different object than the inline style 10248 // since givenExplicitly is likely destroyed here 10249 auto current = element.computedStyle; 10250 10251 foreach (item; rule.properties) 10252 current.setValue(item.name, item.value, item.specificity); 10253 } 10254 } 10255 } 10256 } 10257 10258 /// This is kinda private; just a little utility container for use by the ElementStream class. 10259 final class Stack(T) { 10260 this() { 10261 internalLength = 0; 10262 arr = initialBuffer[]; 10263 } 10264 10265 ///. 10266 void push(T t) { 10267 if (internalLength >= arr.length) { 10268 auto oldarr = arr; 10269 if (arr.length < 4096) 10270 arr = new T[arr.length * 2]; 10271 else 10272 arr = new T[arr.length + 4096]; 10273 arr[0 .. oldarr.length] = oldarr[]; 10274 } 10275 10276 arr[internalLength] = t; 10277 internalLength++; 10278 } 10279 10280 ///. 10281 T pop() { 10282 assert(internalLength); 10283 internalLength--; 10284 return arr[internalLength]; 10285 } 10286 10287 ///. 10288 T peek() { 10289 assert(internalLength); 10290 return arr[internalLength - 1]; 10291 } 10292 10293 ///. 10294 @property bool empty() { 10295 return internalLength ? false : true; 10296 } 10297 10298 ///. 10299 private T[] arr; 10300 private size_t internalLength; 10301 private T[64] initialBuffer; 10302 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 10303 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 10304 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 10305 } 10306 10307 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 10308 final class ElementStream { 10309 10310 ///. 10311 @property Element front() { 10312 return current.element; 10313 } 10314 10315 /// Use Element.tree instead. 10316 this(Element start) { 10317 current.element = start; 10318 current.childPosition = -1; 10319 isEmpty = false; 10320 stack = new Stack!(Current); 10321 } 10322 10323 /* 10324 Handle it 10325 handle its children 10326 10327 */ 10328 10329 ///. 10330 void popFront() { 10331 more: 10332 if (isEmpty) 10333 return; 10334 10335 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 10336 10337 current.childPosition++; 10338 if (current.childPosition >= current.element.children.length) { 10339 if (stack.empty()) 10340 isEmpty = true; 10341 else { 10342 current = stack.pop(); 10343 goto more; 10344 } 10345 } else { 10346 stack.push(current); 10347 current.element = current.element.children[current.childPosition]; 10348 current.childPosition = -1; 10349 } 10350 } 10351 10352 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 10353 void currentKilled() { 10354 if (stack.empty) // should never happen 10355 isEmpty = true; 10356 else { 10357 current = stack.pop(); 10358 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 10359 } 10360 } 10361 10362 ///. 10363 @property bool empty() { 10364 return isEmpty; 10365 } 10366 10367 private: 10368 10369 struct Current { 10370 Element element; 10371 int childPosition; 10372 } 10373 10374 Current current; 10375 10376 Stack!(Current) stack; 10377 10378 bool isEmpty; 10379 } 10380 10381 // unbelievable. 10382 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 10383 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 10384 static import std.algorithm; 10385 10386 auto found = std.algorithm.find(haystack, needle); 10387 if (found.length == 0) 10388 return -1; 10389 return haystack.length - found.length; 10390 } 10391 10392 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 10393 assert(position < arr.length); 10394 T[] ret; 10395 ret.length = arr.length + what.length; 10396 int a = 0; 10397 foreach (i; arr[0 .. position + 1]) 10398 ret[a++] = i; 10399 10400 foreach (i; what) 10401 ret[a++] = i; 10402 10403 foreach (i; arr[position + 1 .. $]) 10404 ret[a++] = i; 10405 10406 return ret; 10407 } 10408 10409 package bool isInArray(T)(T item, T[] arr) { 10410 foreach (i; arr) 10411 if (item == i) 10412 return true; 10413 return false; 10414 } 10415 10416 private string[string] aadup(in string[string] arr) { 10417 string[string] ret; 10418 foreach (k, v; arr) 10419 ret[k] = v; 10420 return ret; 10421 } 10422 10423 // dom event support, if you want to use it 10424 10425 /// used for DOM events 10426 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 10427 10428 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 10429 class Event { 10430 this(string eventName, Element target) { 10431 this.eventName = eventName; 10432 this.srcElement = target; 10433 } 10434 10435 /// Prevents the default event handler (if there is one) from being called 10436 void preventDefault() { 10437 defaultPrevented = true; 10438 } 10439 10440 /// Stops the event propagation immediately. 10441 void stopPropagation() { 10442 propagationStopped = true; 10443 } 10444 10445 bool defaultPrevented; 10446 bool propagationStopped; 10447 string eventName; 10448 10449 Element srcElement; 10450 alias srcElement target; 10451 10452 Element relatedTarget; 10453 10454 int clientX; 10455 int clientY; 10456 10457 int button; 10458 10459 bool isBubbling; 10460 10461 /// this sends it only to the target. If you want propagation, use dispatch() instead. 10462 void send() { 10463 if (srcElement is null) 10464 return; 10465 10466 auto e = srcElement; 10467 10468 if (eventName in e.bubblingEventHandlers) 10469 foreach (handler; e.bubblingEventHandlers[eventName]) 10470 handler(e, this); 10471 10472 if (!defaultPrevented) 10473 if (eventName in e.defaultEventHandlers) 10474 e.defaultEventHandlers[eventName](e, this); 10475 } 10476 10477 /// this dispatches the element using the capture -> target -> bubble process 10478 void dispatch() { 10479 if (srcElement is null) 10480 return; 10481 10482 // first capture, then bubble 10483 10484 Element[] chain; 10485 Element curr = srcElement; 10486 while (curr) { 10487 auto l = curr; 10488 chain ~= l; 10489 curr = curr.parentNode; 10490 10491 } 10492 10493 isBubbling = false; 10494 10495 foreach (e; chain.retro()) { 10496 if (eventName in e.capturingEventHandlers) 10497 foreach (handler; e.capturingEventHandlers[eventName]) 10498 handler(e, this); 10499 10500 // the default on capture should really be to always do nothing 10501 10502 //if(!defaultPrevented) 10503 // if(eventName in e.defaultEventHandlers) 10504 // e.defaultEventHandlers[eventName](e.element, this); 10505 10506 if (propagationStopped) 10507 break; 10508 } 10509 10510 isBubbling = true; 10511 if (!propagationStopped) 10512 foreach (e; chain) { 10513 if (eventName in e.bubblingEventHandlers) 10514 foreach (handler; e.bubblingEventHandlers[eventName]) 10515 handler(e, this); 10516 10517 if (propagationStopped) 10518 break; 10519 } 10520 10521 if (!defaultPrevented) 10522 foreach (e; chain) { 10523 if (eventName in e.defaultEventHandlers) 10524 e.defaultEventHandlers[eventName](e, this); 10525 } 10526 } 10527 } 10528 10529 struct FormFieldOptions { 10530 // usable for any 10531 10532 /// this is a regex pattern used to validate the field 10533 string pattern; 10534 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 10535 bool isRequired; 10536 /// this is displayed as an example to the user 10537 string placeholder; 10538 10539 // usable for numeric ones 10540 10541 // convenience methods to quickly get some options 10542 @property static FormFieldOptions none() { 10543 FormFieldOptions f; 10544 return f; 10545 } 10546 10547 static FormFieldOptions required() { 10548 FormFieldOptions f; 10549 f.isRequired = true; 10550 return f; 10551 } 10552 10553 static FormFieldOptions regex(string pattern, bool required = false) { 10554 FormFieldOptions f; 10555 f.pattern = pattern; 10556 f.isRequired = required; 10557 return f; 10558 } 10559 10560 static FormFieldOptions fromElement(Element e) { 10561 FormFieldOptions f; 10562 if (e.hasAttribute("required")) 10563 f.isRequired = true; 10564 if (e.hasAttribute("pattern")) 10565 f.pattern = e.pattern; 10566 if (e.hasAttribute("placeholder")) 10567 f.placeholder = e.placeholder; 10568 return f; 10569 } 10570 10571 Element applyToElement(Element e) { 10572 if (this.isRequired) 10573 e.required = "required"; 10574 if (this.pattern.length) 10575 e.pattern = this.pattern; 10576 if (this.placeholder.length) 10577 e.placeholder = this.placeholder; 10578 return e; 10579 } 10580 } 10581 10582 // this needs to look just like a string, but can expand as needed 10583 version (no_dom_stream) 10584 alias string Utf8Stream; 10585 else 10586 class Utf8Stream { 10587 protected: 10588 // these two should be overridden in subclasses to actually do the stream magic 10589 string getMore() { 10590 if (getMoreHelper !is null) 10591 return getMoreHelper(); 10592 return null; 10593 } 10594 10595 bool hasMore() { 10596 if (hasMoreHelper !is null) 10597 return hasMoreHelper(); 10598 return false; 10599 } 10600 // the rest should be ok 10601 10602 public: 10603 this(string d) { 10604 this.data = d; 10605 } 10606 10607 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 10608 this.getMoreHelper = getMoreHelper; 10609 this.hasMoreHelper = hasMoreHelper; 10610 10611 if (hasMore()) 10612 this.data ~= getMore(); 10613 10614 stdout.flush(); 10615 } 10616 10617 @property final size_t length() { 10618 // the parser checks length primarily directly before accessing the next character 10619 // so this is the place we'll hook to append more if possible and needed. 10620 if (lastIdx + 1 >= data.length && hasMore()) { 10621 data ~= getMore(); 10622 } 10623 return data.length; 10624 } 10625 10626 final char opIndex(size_t idx) { 10627 if (idx > lastIdx) 10628 lastIdx = idx; 10629 return data[idx]; 10630 } 10631 10632 final string opSlice(size_t start, size_t end) { 10633 if (end > lastIdx) 10634 lastIdx = end; 10635 return data[start .. end]; 10636 } 10637 10638 final size_t opDollar() { 10639 return length(); 10640 } 10641 10642 final Utf8Stream opBinary(string op : "~")(string s) { 10643 this.data ~= s; 10644 return this; 10645 } 10646 10647 final Utf8Stream opOpAssign(string op : "~")(string s) { 10648 this.data ~= s; 10649 return this; 10650 } 10651 10652 final Utf8Stream opAssign(string rhs) { 10653 this.data = rhs; 10654 return this; 10655 } 10656 10657 private: 10658 string data; 10659 10660 size_t lastIdx; 10661 10662 bool delegate() hasMoreHelper; 10663 string delegate() getMoreHelper; 10664 10665 /+ 10666 // used to maybe clear some old stuff 10667 // you might have to remove elements parsed with it too since they can hold slices into the 10668 // old stuff, preventing gc 10669 void dropFront(int bytes) { 10670 posAdjustment += bytes; 10671 data = data[bytes .. $]; 10672 } 10673 10674 int posAdjustment; 10675 +/ 10676 } 10677 10678 void fillForm(T)(Form form, T obj, string name) { 10679 import arsd.database; 10680 10681 fillData((k, v) => form.setValue(k, v), obj, name); 10682 } 10683 10684 /+ 10685 /+ 10686 Syntax: 10687 10688 Tag: tagname#id.class 10689 Tree: Tag(Children, comma, separated...) 10690 Children: Tee or Variable 10691 Variable: $varname with optional |funcname following. 10692 10693 If a variable has a tree after it, it breaks the variable down: 10694 * if array, foreach it does the tree 10695 * if struct, it breaks down the member variables 10696 10697 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 10698 +/ 10699 struct Stringplate { 10700 /++ 10701 10702 +/ 10703 this(string s) { 10704 10705 } 10706 10707 /++ 10708 10709 +/ 10710 Element expand(T...)(T vars) { 10711 return null; 10712 } 10713 } 10714 /// 10715 unittest { 10716 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 10717 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 10718 } 10719 +/ 10720 10721 bool allAreInlineHtml(const(Element)[] children) { 10722 foreach (child; children) { 10723 if (child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 10724 // cool 10725 } else if (child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children)) { 10726 // cool 10727 } else { 10728 // prolly block 10729 return false; 10730 } 10731 } 10732 return true; 10733 } 10734 10735 private bool isSimpleWhite(dchar c) { 10736 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 10737 } 10738 10739 unittest { 10740 // Test for issue #120 10741 string s = `<html> 10742 <body> 10743 <P>AN 10744 <P>bubbles</P> 10745 <P>giggles</P> 10746 </body> 10747 </html>`; 10748 auto doc = new Document(); 10749 doc.parseUtf8(s, false, false); 10750 auto s2 = doc.toString(); 10751 assert(s2.indexOf("bubbles") < s2.indexOf("giggles"), "paragraph order incorrect:\n" ~ s2); 10752 } 10753 10754 unittest { 10755 // test for suncarpet email dec 24 2019 10756 // arbitrary id asduiwh 10757 auto document = new Document("<html> 10758 <head> 10759 <meta charset=\"utf-8\"></meta> 10760 <title>Element.querySelector Test</title> 10761 </head> 10762 <body> 10763 <div id=\"foo\"> 10764 <div>Foo</div> 10765 <div>Bar</div> 10766 </div> 10767 </body> 10768 </html>"); 10769 10770 auto doc = document; 10771 10772 assert(doc.querySelectorAll("div div").length == 2); 10773 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 10774 assert(doc.querySelectorAll("> html").length == 0); 10775 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 10776 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 10777 10778 assert(doc.root.matches("html")); 10779 assert(!doc.root.matches("nothtml")); 10780 assert(doc.querySelector("#foo > div").matches("div")); 10781 assert(doc.querySelector("body > #foo").matches("#foo")); 10782 10783 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 10784 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 10785 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 10786 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 10787 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 10788 } 10789 10790 unittest { 10791 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 10792 auto document = new Document(`<article> 10793 <div id="div-01">Here is div-01 10794 <div id="div-02">Here is div-02 10795 <div id="div-03">Here is div-03</div> 10796 </div> 10797 </div> 10798 </article>`, true, true); 10799 10800 auto el = document.getElementById("div-03"); 10801 assert(el.closest("#div-02").id == "div-02"); 10802 assert(el.closest("div div").id == "div-03"); 10803 assert(el.closest("article > div").id == "div-01"); 10804 assert(el.closest(":not(div)").tagName == "article"); 10805 10806 assert(el.closest("p") is null); 10807 assert(el.closest("p, div") is el); 10808 } 10809 10810 /* 10811 Copyright: Adam D. Ruppe, 2010 - 2020 10812 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 10813 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 10814 10815 Copyright Adam D. Ruppe 2010-2020. 10816 Distributed under the Boost Software License, Version 1.0. 10817 (See accompanying file LICENSE_1_0.txt or copy at 10818 http://www.boost.org/LICENSE_1_0.txt) 10819 */