1 // FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it. 2 // FIXME: xml namespace support??? 3 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 4 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 5 6 // FIXME: the scriptable list is quite arbitrary 7 8 9 // xml entity references?! 10 11 /++ 12 This is an html DOM implementation, started with cloning 13 what the browser offers in Javascript, but going well beyond 14 it in convenience. 15 16 If you can do it in Javascript, you can probably do it with 17 this module, and much more. 18 19 --- 20 import arsd.dom; 21 22 void main() { 23 auto document = new Document("<html><p>paragraph</p></html>"); 24 writeln(document.querySelector("p")); 25 document.root.innerHTML = "<p>hey</p>"; 26 writeln(document); 27 } 28 --- 29 30 BTW: this file optionally depends on `arsd.characterencodings`, to 31 help it correctly read files from the internet. You should be able to 32 get characterencodings.d from the same place you got this file. 33 34 If you want it to stand alone, just always use the `Document.parseUtf8` 35 function or the constructor that takes a string. 36 37 Symbol_groups: 38 39 core_functionality = 40 41 These members provide core functionality. The members on these classes 42 will provide most your direct interaction. 43 44 bonus_functionality = 45 46 These provide additional functionality for special use cases. 47 48 implementations = 49 50 These provide implementations of other functionality. 51 +/ 52 module arsd.dom; 53 54 // FIXME: support the css standard namespace thing in the selectors too 55 56 version(with_arsd_jsvar) 57 import arsd.jsvar; 58 else { 59 enum scriptable = "arsd_jsvar_compatible"; 60 } 61 62 // this is only meant to be used at compile time, as a filter for opDispatch 63 // lists the attributes we want to allow without the use of .attr 64 bool isConvenientAttribute(string name) { 65 static immutable list = [ 66 "name", "id", "href", "value", 67 "checked", "selected", "type", 68 "src", "content", "pattern", 69 "placeholder", "required", "alt", 70 "rel", 71 "method", "action", "enctype" 72 ]; 73 foreach(l; list) 74 if(name == l) return true; 75 return false; 76 } 77 78 79 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 80 // FIXME: failing to close a paragraph sometimes messes things up too 81 82 // FIXME: it would be kinda cool to have some support for internal DTDs 83 // and maybe XPath as well, to some extent 84 /* 85 we could do 86 meh this sux 87 88 auto xpath = XPath(element); 89 90 // get the first p 91 xpath.p[0].a["href"] 92 */ 93 94 95 /// The main document interface, including a html parser. 96 /// Group: core_functionality 97 class Document : FileResource { 98 /// Convenience method for web scraping. Requires [arsd.http2] to be 99 /// included in the build as well as [arsd.characterencodings]. 100 static Document fromUrl()(string url, bool strictMode = false) { 101 import arsd.http2; 102 auto client = new HttpClient(); 103 104 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 105 auto res = req.waitForCompletion(); 106 107 auto document = new Document(); 108 if(strictMode) { 109 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 110 } else { 111 document.parseGarbage(cast(string) res.content); 112 } 113 114 return document; 115 } 116 117 ///. 118 this(string data, bool caseSensitive = false, bool strict = false) { 119 parseUtf8(data, caseSensitive, strict); 120 } 121 122 /** 123 Creates an empty document. It has *nothing* in it at all. 124 */ 125 this() { 126 127 } 128 129 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 130 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 131 /// can chain it. 132 /// 133 /// Example: document["p"].innerText("hello").addClass("modified"); 134 /// 135 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 136 /// 137 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 138 /// 139 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 140 /// you could put in some kind of custom filter function tho. 141 ElementCollection opIndex(string selector) { 142 auto e = ElementCollection(this.root); 143 return e[selector]; 144 } 145 146 string _contentType = "text/html; charset=utf-8"; 147 148 /// If you're using this for some other kind of XML, you can 149 /// set the content type here. 150 /// 151 /// Note: this has no impact on the function of this class. 152 /// It is only used if the document is sent via a protocol like HTTP. 153 /// 154 /// This may be called by parse() if it recognizes the data. Otherwise, 155 /// if you don't set it, it assumes text/html; charset=utf-8. 156 @property string contentType(string mimeType) { 157 _contentType = mimeType; 158 return _contentType; 159 } 160 161 /// implementing the FileResource interface, useful for sending via 162 /// http automatically. 163 @property string filename() const { return null; } 164 165 /// implementing the FileResource interface, useful for sending via 166 /// http automatically. 167 override @property string contentType() const { 168 return _contentType; 169 } 170 171 /// implementing the FileResource interface; it calls toString. 172 override immutable(ubyte)[] getData() const { 173 return cast(immutable(ubyte)[]) this.toString(); 174 } 175 176 177 /// Concatenates any consecutive text nodes 178 /* 179 void normalize() { 180 181 } 182 */ 183 184 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 185 /// Call this before calling parse(). 186 187 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 188 void enableAddingSpecialTagsToDom() { 189 parseSawComment = (string) => true; 190 parseSawAspCode = (string) => true; 191 parseSawPhpCode = (string) => true; 192 parseSawQuestionInstruction = (string) => true; 193 parseSawBangInstruction = (string) => true; 194 } 195 196 /// If the parser sees a html comment, it will call this callback 197 /// <!-- comment --> will call parseSawComment(" comment ") 198 /// Return true if you want the node appended to the document. 199 bool delegate(string) parseSawComment; 200 201 /// If the parser sees <% asp code... %>, it will call this callback. 202 /// It will be passed "% asp code... %" or "%= asp code .. %" 203 /// Return true if you want the node appended to the document. 204 bool delegate(string) parseSawAspCode; 205 206 /// If the parser sees <?php php code... ?>, it will call this callback. 207 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 208 /// Note: dom.d cannot identify the other php <? code ?> short format. 209 /// Return true if you want the node appended to the document. 210 bool delegate(string) parseSawPhpCode; 211 212 /// if it sees a <?xxx> that is not php or asp 213 /// it calls this function with the contents. 214 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 215 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 216 /// Return true if you want the node appended to the document. 217 bool delegate(string) parseSawQuestionInstruction; 218 219 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 220 /// it calls this function with the contents. 221 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 222 /// Return true if you want the node appended to the document. 223 bool delegate(string) parseSawBangInstruction; 224 225 /// Given the kind of garbage you find on the Internet, try to make sense of it. 226 /// Equivalent to document.parse(data, false, false, null); 227 /// (Case-insensitive, non-strict, determine character encoding from the data.) 228 229 /// NOTE: this makes no attempt at added security. 230 /// 231 /// It is a template so it lazily imports characterencodings. 232 void parseGarbage()(string data) { 233 parse(data, false, false, null); 234 } 235 236 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 237 /// Will throw exceptions on things like unclosed tags. 238 void parseStrict(string data) { 239 parseStream(toUtf8Stream(data), true, true); 240 } 241 242 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 243 /// tag soup, but does NOT try to correct bad character encodings. 244 /// 245 /// They will still throw an exception. 246 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 247 parseStream(toUtf8Stream(data), caseSensitive, strict); 248 } 249 250 // this is a template so we get lazy import behavior 251 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 252 import arsd.characterencodings; 253 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 254 if(dataEncoding is null) { 255 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 256 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 257 // Now, XML and HTML can both list encoding in the document, but we can't really parse 258 // it here without changing a lot of code until we know the encoding. So I'm going to 259 // do some hackish string checking. 260 if(dataEncoding is null) { 261 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 262 // first, look for an XML prolog 263 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 264 if(idx != -1) { 265 idx += "encoding=\"".length; 266 // we're probably past the prolog if it's this far in; we might be looking at 267 // content. Forget about it. 268 if(idx > 100) 269 idx = -1; 270 } 271 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 272 if(idx == -1) { 273 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 274 if(idx != -1) { 275 idx += "charset=".length; 276 if(dataAsBytes[idx] == '"') 277 idx++; 278 } 279 } 280 281 // found something in either branch... 282 if(idx != -1) { 283 // read till a quote or about 12 chars, whichever comes first... 284 auto end = idx; 285 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 286 end++; 287 288 dataEncoding = cast(string) dataAsBytes[idx .. end]; 289 } 290 // otherwise, we just don't know. 291 } 292 } 293 294 if(dataEncoding is null) { 295 if(strict) 296 throw new MarkupException("I couldn't figure out the encoding of this document."); 297 else 298 // if we really don't know by here, it means we already tried UTF-8, 299 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 300 // tags... let's assume it's Windows-1252, since that's probably the most 301 // common aside from utf that wouldn't be labeled. 302 303 dataEncoding = "Windows 1252"; 304 } 305 306 // and now, go ahead and convert it. 307 308 string data; 309 310 if(!strict) { 311 // if we're in non-strict mode, we need to check 312 // the document for mislabeling too; sometimes 313 // web documents will say they are utf-8, but aren't 314 // actually properly encoded. If it fails to validate, 315 // we'll assume it's actually Windows encoding - the most 316 // likely candidate for mislabeled garbage. 317 dataEncoding = dataEncoding.toLower(); 318 dataEncoding = dataEncoding.replace(" ", ""); 319 dataEncoding = dataEncoding.replace("-", ""); 320 dataEncoding = dataEncoding.replace("_", ""); 321 if(dataEncoding == "utf8") { 322 try { 323 validate(rawdata); 324 } catch(UTFException e) { 325 dataEncoding = "Windows 1252"; 326 } 327 } 328 } 329 330 if(dataEncoding != "UTF-8") { 331 if(strict) 332 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 333 else { 334 try { 335 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 336 } catch(Exception e) { 337 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 338 } 339 } 340 } else 341 data = rawdata; 342 343 return toUtf8Stream(data); 344 } 345 346 private 347 Utf8Stream toUtf8Stream(in string rawdata) { 348 string data = rawdata; 349 static if(is(Utf8Stream == string)) 350 return data; 351 else 352 return new Utf8Stream(data); 353 } 354 355 /++ 356 List of elements that can be assumed to be self-closed 357 in this document. The default for a Document are a hard-coded 358 list of ones appropriate for HTML. For [XmlDocument], it defaults 359 to empty. You can modify this after construction but before parsing. 360 361 History: 362 Added February 8, 2021 (included in dub release 9.2) 363 +/ 364 string[] selfClosedElements = htmlSelfClosedElements; 365 366 /++ 367 List of elements that are considered inline for pretty printing. 368 The default for a Document are hard-coded to something appropriate 369 for HTML. For [XmlDocument], it defaults to empty. You can modify 370 this after construction but before parsing. 371 372 History: 373 Added June 21, 2021 (included in dub release 10.1) 374 +/ 375 string[] inlineElements = htmlInlineElements; 376 377 /** 378 Take XMLish data and try to make the DOM tree out of it. 379 380 The goal isn't to be perfect, but to just be good enough to 381 approximate Javascript's behavior. 382 383 If strict, it throws on something that doesn't make sense. 384 (Examples: mismatched tags. It doesn't validate!) 385 If not strict, it tries to recover anyway, and only throws 386 when something is REALLY unworkable. 387 388 If strict is false, it uses a magic list of tags that needn't 389 be closed. If you are writing a document specifically for this, 390 try to avoid such - use self closed tags at least. Easier to parse. 391 392 The dataEncoding argument can be used to pass a specific 393 charset encoding for automatic conversion. If null (which is NOT 394 the default!), it tries to determine from the data itself, 395 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 396 397 If this assumption is wrong, it can throw on non-ascii 398 characters! 399 400 401 Note that it previously assumed the data was encoded as UTF-8, which 402 is why the dataEncoding argument defaults to that. 403 404 So it shouldn't break backward compatibility. 405 406 But, if you want the best behavior on wild data - figuring it out from the document 407 instead of assuming - you'll probably want to change that argument to null. 408 409 This is a template so it lazily imports arsd.characterencodings, which is required 410 to fix up data encodings. 411 412 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 413 dependency. If it is data from the Internet though, a random website, the encoding 414 is often a lie. This function, if dataEncoding == null, can correct for that, or 415 you can try parseGarbage. In those cases, arsd.characterencodings is required to 416 compile. 417 */ 418 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 419 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 420 parseStream(data, caseSensitive, strict); 421 } 422 423 // note: this work best in strict mode, unless data is just a simple string wrapper 424 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 425 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 426 // of my big app. 427 428 assert(data !is null); 429 430 // go through character by character. 431 // if you see a <, consider it a tag. 432 // name goes until the first non tagname character 433 // then see if it self closes or has an attribute 434 435 // if not in a tag, anything not a tag is a big text 436 // node child. It ends as soon as it sees a < 437 438 // Whitespace in text or attributes is preserved, but not between attributes 439 440 // & and friends are converted when I know them, left the same otherwise 441 442 443 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 444 //validate(data); // it *must* be UTF-8 for this to work correctly 445 446 sizediff_t pos = 0; 447 448 clear(); 449 450 loose = !caseSensitive; 451 452 bool sawImproperNesting = false; 453 bool paragraphHackfixRequired = false; 454 455 int getLineNumber(sizediff_t p) { 456 int line = 1; 457 foreach(c; data[0..p]) 458 if(c == '\n') 459 line++; 460 return line; 461 } 462 463 void parseError(string message) { 464 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 465 } 466 467 bool eatWhitespace() { 468 bool ateAny = false; 469 while(pos < data.length && data[pos].isSimpleWhite) { 470 pos++; 471 ateAny = true; 472 } 473 return ateAny; 474 } 475 476 string readTagName() { 477 // remember to include : for namespaces 478 // basically just keep going until >, /, or whitespace 479 auto start = pos; 480 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 481 { 482 pos++; 483 if(pos == data.length) { 484 if(strict) 485 throw new Exception("tag name incomplete when file ended"); 486 else 487 break; 488 } 489 } 490 491 if(!caseSensitive) 492 return toLower(data[start..pos]); 493 else 494 return data[start..pos]; 495 } 496 497 string readAttributeName() { 498 // remember to include : for namespaces 499 // basically just keep going until >, /, or whitespace 500 auto start = pos; 501 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 502 { 503 if(data[pos] == '<') { 504 if(strict) 505 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 506 else 507 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 508 } 509 pos++; 510 if(pos == data.length) { 511 if(strict) 512 throw new Exception("unterminated attribute name"); 513 else 514 break; 515 } 516 } 517 518 if(!caseSensitive) 519 return toLower(data[start..pos]); 520 else 521 return data[start..pos]; 522 } 523 524 string readAttributeValue() { 525 if(pos >= data.length) { 526 if(strict) 527 throw new Exception("no attribute value before end of file"); 528 else 529 return null; 530 } 531 switch(data[pos]) { 532 case '\'': 533 case '"': 534 auto started = pos; 535 char end = data[pos]; 536 pos++; 537 auto start = pos; 538 while(pos < data.length && data[pos] != end) 539 pos++; 540 if(strict && pos == data.length) 541 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 542 string v = htmlEntitiesDecode(data[start..pos], strict); 543 pos++; // skip over the end 544 return v; 545 default: 546 if(strict) 547 parseError("Attributes must be quoted"); 548 // read until whitespace or terminator (/> or >) 549 auto start = pos; 550 while( 551 pos < data.length && 552 data[pos] != '>' && 553 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 554 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 555 !data[pos].isSimpleWhite) 556 pos++; 557 558 string v = htmlEntitiesDecode(data[start..pos], strict); 559 // don't skip the end - we'll need it later 560 return v; 561 } 562 } 563 564 TextNode readTextNode() { 565 auto start = pos; 566 while(pos < data.length && data[pos] != '<') { 567 pos++; 568 } 569 570 return TextNode.fromUndecodedString(this, data[start..pos]); 571 } 572 573 // this is obsolete! 574 RawSource readCDataNode() { 575 auto start = pos; 576 while(pos < data.length && data[pos] != '<') { 577 pos++; 578 } 579 580 return new RawSource(this, data[start..pos]); 581 } 582 583 584 struct Ele { 585 int type; // element or closing tag or nothing 586 /* 587 type == 0 means regular node, self-closed (element is valid) 588 type == 1 means closing tag (payload is the tag name, element may be valid) 589 type == 2 means you should ignore it completely 590 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 591 type == 4 means the document was totally empty 592 */ 593 Element element; // for type == 0 or type == 3 594 string payload; // for type == 1 595 } 596 // recursively read a tag 597 Ele readElement(string[] parentChain = null) { 598 // FIXME: this is the slowest function in this module, by far, even in strict mode. 599 // Loose mode should perform decently, but strict mode is the important one. 600 if(!strict && parentChain is null) 601 parentChain = []; 602 603 static string[] recentAutoClosedTags; 604 605 if(pos >= data.length) 606 { 607 if(strict) { 608 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 609 } else { 610 if(parentChain.length) 611 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 612 else 613 return Ele(4); // signal emptiness upstream 614 } 615 } 616 617 if(data[pos] != '<') { 618 return Ele(0, readTextNode(), null); 619 } 620 621 enforce(data[pos] == '<'); 622 pos++; 623 if(pos == data.length) { 624 if(strict) 625 throw new MarkupException("Found trailing < at end of file"); 626 // if not strict, we'll just skip the switch 627 } else 628 switch(data[pos]) { 629 // I don't care about these, so I just want to skip them 630 case '!': // might be a comment, a doctype, or a special instruction 631 pos++; 632 633 // FIXME: we should store these in the tree too 634 // though I like having it stripped out tbh. 635 636 if(pos == data.length) { 637 if(strict) 638 throw new MarkupException("<! opened at end of file"); 639 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 640 // comment 641 pos += 2; 642 643 // FIXME: technically, a comment is anything 644 // between -- and -- inside a <!> block. 645 // so in <!-- test -- lol> , the " lol" is NOT a comment 646 // and should probably be handled differently in here, but for now 647 // I'll just keep running until --> since that's the common way 648 649 auto commentStart = pos; 650 while(pos+3 < data.length && data[pos..pos+3] != "-->") 651 pos++; 652 653 auto end = commentStart; 654 655 if(pos + 3 >= data.length) { 656 if(strict) 657 throw new MarkupException("unclosed comment"); 658 end = data.length; 659 pos = data.length; 660 } else { 661 end = pos; 662 assert(data[pos] == '-'); 663 pos++; 664 assert(data[pos] == '-'); 665 pos++; 666 assert(data[pos] == '>'); 667 pos++; 668 } 669 670 if(parseSawComment !is null) 671 if(parseSawComment(data[commentStart .. end])) { 672 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 673 } 674 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 675 pos += 7; 676 677 auto cdataStart = pos; 678 679 ptrdiff_t end = -1; 680 typeof(end) cdataEnd; 681 682 if(pos < data.length) { 683 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 684 end = data[pos .. $].indexOf("]]>"); 685 } 686 687 if(end == -1) { 688 if(strict) 689 throw new MarkupException("Unclosed CDATA section"); 690 end = pos; 691 cdataEnd = pos; 692 } else { 693 cdataEnd = pos + end; 694 pos = cdataEnd + 3; 695 } 696 697 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 698 } else { 699 auto start = pos; 700 while(pos < data.length && data[pos] != '>') 701 pos++; 702 703 auto bangEnds = pos; 704 if(pos == data.length) { 705 if(strict) 706 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 707 } else pos++; // skipping the > 708 709 if(parseSawBangInstruction !is null) 710 if(parseSawBangInstruction(data[start .. bangEnds])) { 711 // FIXME: these should be able to modify the parser state, 712 // doing things like adding entities, somehow. 713 714 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 715 } 716 } 717 718 /* 719 if(pos < data.length && data[pos] == '>') 720 pos++; // skip the > 721 else 722 assert(!strict); 723 */ 724 break; 725 case '%': 726 case '?': 727 /* 728 Here's what we want to support: 729 730 <% asp code %> 731 <%= asp code %> 732 <?php php code ?> 733 <?= php code ?> 734 735 The contents don't really matter, just if it opens with 736 one of the above for, it ends on the two char terminator. 737 738 <?something> 739 this is NOT php code 740 because I've seen this in the wild: <?EM-dummyText> 741 742 This could be php with shorttags which would be cut off 743 prematurely because if(a >) - that > counts as the close 744 of the tag, but since dom.d can't tell the difference 745 between that and the <?EM> real world example, it will 746 not try to look for the ?> ending. 747 748 The difference between this and the asp/php stuff is that it 749 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 750 on >. 751 */ 752 753 char end = data[pos]; 754 auto started = pos; 755 bool isAsp = end == '%'; 756 int currentIndex = 0; 757 bool isPhp = false; 758 bool isEqualTag = false; 759 int phpCount = 0; 760 761 more: 762 pos++; // skip the start 763 if(pos == data.length) { 764 if(strict) 765 throw new MarkupException("Unclosed <"~end~" by end of file"); 766 } else { 767 currentIndex++; 768 if(currentIndex == 1 && data[pos] == '=') { 769 if(!isAsp) 770 isPhp = true; 771 isEqualTag = true; 772 goto more; 773 } 774 if(currentIndex == 1 && data[pos] == 'p') 775 phpCount++; 776 if(currentIndex == 2 && data[pos] == 'h') 777 phpCount++; 778 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 779 isPhp = true; 780 781 if(data[pos] == '>') { 782 if((isAsp || isPhp) && data[pos - 1] != end) 783 goto more; 784 // otherwise we're done 785 } else 786 goto more; 787 } 788 789 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 790 auto code = data[started .. pos]; 791 792 793 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 794 if(pos < data.length) 795 pos++; // get past the > 796 797 if(isAsp && parseSawAspCode !is null) { 798 if(parseSawAspCode(code)) { 799 return Ele(3, new AspCode(this, code), null); 800 } 801 } else if(isPhp && parseSawPhpCode !is null) { 802 if(parseSawPhpCode(code)) { 803 return Ele(3, new PhpCode(this, code), null); 804 } 805 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 806 if(parseSawQuestionInstruction(code)) { 807 return Ele(3, new QuestionInstruction(this, code), null); 808 } 809 } 810 break; 811 case '/': // closing an element 812 pos++; // skip the start 813 auto p = pos; 814 while(pos < data.length && data[pos] != '>') 815 pos++; 816 //writefln("</%s>", data[p..pos]); 817 if(pos == data.length && data[pos-1] != '>') { 818 if(strict) 819 throw new MarkupException("File ended before closing tag had a required >"); 820 else 821 data ~= ">"; // just hack it in 822 } 823 pos++; // skip the '>' 824 825 string tname = data[p..pos-1]; 826 if(!caseSensitive) 827 tname = tname.toLower(); 828 829 return Ele(1, null, tname); // closing tag reports itself here 830 case ' ': // assume it isn't a real element... 831 if(strict) { 832 parseError("bad markup - improperly placed <"); 833 assert(0); // parseError always throws 834 } else 835 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 836 default: 837 838 if(!strict) { 839 // what about something that kinda looks like a tag, but isn't? 840 auto nextTag = data[pos .. $].indexOf("<"); 841 auto closeTag = data[pos .. $].indexOf(">"); 842 if(closeTag != -1 && nextTag != -1) 843 if(nextTag < closeTag) { 844 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 845 846 auto equal = data[pos .. $].indexOf("=\""); 847 if(equal != -1 && equal < closeTag) { 848 // this MIGHT be ok, soldier on 849 } else { 850 // definitely no good, this must be a (horribly distorted) text node 851 pos++; // skip the < we're on - don't want text node to end prematurely 852 auto node = readTextNode(); 853 node.contents = "<" ~ node.contents; // put this back 854 return Ele(0, node, null); 855 } 856 } 857 } 858 859 string tagName = readTagName(); 860 string[string] attributes; 861 862 Ele addTag(bool selfClosed) { 863 if(selfClosed) 864 pos++; 865 else { 866 if(!strict) 867 if(tagName.isInArray(selfClosedElements)) 868 // these are de-facto self closed 869 selfClosed = true; 870 } 871 872 import std.algorithm.comparison; 873 874 if(strict) { 875 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)])); 876 } else { 877 // if we got here, it's probably because a slash was in an 878 // unquoted attribute - don't trust the selfClosed value 879 if(!selfClosed) 880 selfClosed = tagName.isInArray(selfClosedElements); 881 882 while(pos < data.length && data[pos] != '>') 883 pos++; 884 885 if(pos >= data.length) { 886 // the tag never closed 887 assert(data.length != 0); 888 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 889 } 890 } 891 892 auto whereThisTagStarted = pos; // for better error messages 893 894 pos++; 895 896 auto e = createElement(tagName); 897 e.attributes = attributes; 898 version(dom_node_indexes) { 899 if(e.dataset.nodeIndex.length == 0) 900 e.dataset.nodeIndex = to!string(&(e.attributes)); 901 } 902 e.selfClosed = selfClosed; 903 e.parseAttributes(); 904 905 906 // HACK to handle script and style as a raw data section as it is in HTML browsers 907 if(tagName == "script" || tagName == "style") { 908 if(!selfClosed) { 909 string closer = "</" ~ tagName ~ ">"; 910 ptrdiff_t ending; 911 if(pos >= data.length) 912 ending = -1; 913 else 914 ending = indexOf(data[pos..$], closer); 915 916 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 917 /* 918 if(loose && ending == -1 && pos < data.length) 919 ending = indexOf(data[pos..$], closer.toUpper()); 920 */ 921 if(ending == -1) { 922 if(strict) 923 throw new Exception("tag " ~ tagName ~ " never closed"); 924 else { 925 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 926 if(pos < data.length) { 927 e = new TextNode(this, data[pos .. $]); 928 pos = data.length; 929 } 930 } 931 } else { 932 ending += pos; 933 e.innerRawSource = data[pos..ending]; 934 pos = ending + closer.length; 935 } 936 } 937 return Ele(0, e, null); 938 } 939 940 bool closed = selfClosed; 941 942 void considerHtmlParagraphHack(Element n) { 943 assert(!strict); 944 if(e.tagName == "p" && e.tagName == n.tagName) { 945 // html lets you write <p> para 1 <p> para 1 946 // but in the dom tree, they should be siblings, not children. 947 paragraphHackfixRequired = true; 948 } 949 } 950 951 //writef("<%s>", tagName); 952 while(!closed) { 953 Ele n; 954 if(strict) 955 n = readElement(); 956 else 957 n = readElement(parentChain ~ tagName); 958 959 if(n.type == 4) return n; // the document is empty 960 961 if(n.type == 3 && n.element !is null) { 962 // special node, append if possible 963 if(e !is null) 964 e.appendChild(n.element); 965 else 966 piecesBeforeRoot ~= n.element; 967 } else if(n.type == 0) { 968 if(!strict) 969 considerHtmlParagraphHack(n.element); 970 e.appendChild(n.element); 971 } else if(n.type == 1) { 972 bool found = false; 973 if(n.payload != tagName) { 974 if(strict) 975 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 976 else { 977 sawImproperNesting = true; 978 // this is so we don't drop several levels of awful markup 979 if(n.element) { 980 if(!strict) 981 considerHtmlParagraphHack(n.element); 982 e.appendChild(n.element); 983 n.element = null; 984 } 985 986 // is the element open somewhere up the chain? 987 foreach(i, parent; parentChain) 988 if(parent == n.payload) { 989 recentAutoClosedTags ~= tagName; 990 // just rotating it so we don't inadvertently break stuff with vile crap 991 if(recentAutoClosedTags.length > 4) 992 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 993 994 n.element = e; 995 return n; 996 } 997 998 // if not, this is a text node; we can't fix it up... 999 1000 // If it's already in the tree somewhere, assume it is closed by algorithm 1001 // and we shouldn't output it - odds are the user just flipped a couple tags 1002 foreach(ele; e.tree) { 1003 if(ele.tagName == n.payload) { 1004 found = true; 1005 break; 1006 } 1007 } 1008 1009 foreach(ele; recentAutoClosedTags) { 1010 if(ele == n.payload) { 1011 found = true; 1012 break; 1013 } 1014 } 1015 1016 if(!found) // if not found in the tree though, it's probably just text 1017 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1018 } 1019 } else { 1020 if(n.element) { 1021 if(!strict) 1022 considerHtmlParagraphHack(n.element); 1023 e.appendChild(n.element); 1024 } 1025 } 1026 1027 if(n.payload == tagName) // in strict mode, this is always true 1028 closed = true; 1029 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1030 } 1031 //writef("</%s>\n", tagName); 1032 return Ele(0, e, null); 1033 } 1034 1035 // if a tag was opened but not closed by end of file, we can arrive here 1036 if(!strict && pos >= data.length) 1037 return addTag(false); 1038 //else if(strict) assert(0); // should be caught before 1039 1040 switch(data[pos]) { 1041 default: assert(0); 1042 case '/': // self closing tag 1043 return addTag(true); 1044 case '>': 1045 return addTag(false); 1046 case ' ': 1047 case '\t': 1048 case '\n': 1049 case '\r': 1050 // there might be attributes... 1051 moreAttributes: 1052 eatWhitespace(); 1053 1054 // same deal as above the switch.... 1055 if(!strict && pos >= data.length) 1056 return addTag(false); 1057 1058 if(strict && pos >= data.length) 1059 throw new MarkupException("tag open, didn't find > before end of file"); 1060 1061 switch(data[pos]) { 1062 case '/': // self closing tag 1063 return addTag(true); 1064 case '>': // closed tag; open -- we now read the contents 1065 return addTag(false); 1066 default: // it is an attribute 1067 string attrName = readAttributeName(); 1068 string attrValue = attrName; 1069 1070 bool ateAny = eatWhitespace(); 1071 if(strict && ateAny) 1072 throw new MarkupException("inappropriate whitespace after attribute name"); 1073 1074 if(pos >= data.length) { 1075 if(strict) 1076 assert(0, "this should have thrown in readAttributeName"); 1077 else { 1078 data ~= ">"; 1079 goto blankValue; 1080 } 1081 } 1082 if(data[pos] == '=') { 1083 pos++; 1084 1085 ateAny = eatWhitespace(); 1086 // the spec actually allows this! 1087 //if(strict && ateAny) 1088 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1089 1090 attrValue = readAttributeValue(); 1091 1092 eatWhitespace(); 1093 } 1094 1095 blankValue: 1096 1097 if(strict && attrName in attributes) 1098 throw new MarkupException("Repeated attribute: " ~ attrName); 1099 1100 if(attrName.strip().length) 1101 attributes[attrName] = attrValue; 1102 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1103 1104 if(!strict && pos < data.length && data[pos] == '<') { 1105 // this is the broken tag that doesn't have a > at the end 1106 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1107 // let's insert one as a hack 1108 goto case '>'; 1109 } 1110 1111 goto moreAttributes; 1112 } 1113 } 1114 } 1115 1116 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1117 //assert(0); 1118 } 1119 1120 eatWhitespace(); 1121 Ele r; 1122 do { 1123 r = readElement(); // there SHOULD only be one element... 1124 1125 if(r.type == 3 && r.element !is null) 1126 piecesBeforeRoot ~= r.element; 1127 1128 if(r.type == 4) 1129 break; // the document is completely empty... 1130 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1131 1132 root = r.element; 1133 1134 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1135 while(r.type != 4) { 1136 r = readElement(); 1137 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1138 if(r.element !is null) 1139 piecesAfterRoot ~= r.element; 1140 } 1141 } 1142 1143 if(root is null) 1144 { 1145 if(strict) 1146 assert(0, "empty document should be impossible in strict mode"); 1147 else 1148 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1149 } 1150 1151 if(paragraphHackfixRequired) { 1152 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1153 1154 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1155 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1156 1157 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1158 // Kind of inefficient because we can't detect when we recurse back out of a node. 1159 Element[Element] insertLocations; 1160 auto iterator = root.tree; 1161 foreach(ele; iterator) { 1162 if(ele.parentNode is null) 1163 continue; 1164 1165 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 1166 auto shouldBePreviousSibling = ele.parentNode; 1167 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1168 if (auto p = holder in insertLocations) { 1169 shouldBePreviousSibling = *p; 1170 assert(shouldBePreviousSibling.parentNode is holder); 1171 } 1172 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1173 insertLocations[holder] = ele; 1174 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1175 } 1176 } 1177 } 1178 } 1179 1180 /* end massive parse function */ 1181 1182 /// Gets the <title> element's innerText, if one exists 1183 @property string title() { 1184 bool doesItMatch(Element e) { 1185 return (e.tagName == "title"); 1186 } 1187 1188 auto e = findFirst(&doesItMatch); 1189 if(e) 1190 return e.innerText(); 1191 return ""; 1192 } 1193 1194 /// Sets the title of the page, creating a <title> element if needed. 1195 @property void title(string t) { 1196 bool doesItMatch(Element e) { 1197 return (e.tagName == "title"); 1198 } 1199 1200 auto e = findFirst(&doesItMatch); 1201 1202 if(!e) { 1203 e = createElement("title"); 1204 auto heads = getElementsByTagName("head"); 1205 if(heads.length) 1206 heads[0].appendChild(e); 1207 } 1208 1209 if(e) 1210 e.innerText = t; 1211 } 1212 1213 // FIXME: would it work to alias root this; ???? might be a good idea 1214 /// These functions all forward to the root element. See the documentation in the Element class. 1215 Element getElementById(string id) { 1216 return root.getElementById(id); 1217 } 1218 1219 /// ditto 1220 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1221 if( is(SomeElementType : Element)) 1222 out(ret) { assert(ret !is null); } 1223 do { 1224 return root.requireElementById!(SomeElementType)(id, file, line); 1225 } 1226 1227 /// ditto 1228 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1229 if( is(SomeElementType : Element)) 1230 out(ret) { assert(ret !is null); } 1231 do { 1232 auto e = cast(SomeElementType) querySelector(selector); 1233 if(e is null) 1234 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1235 return e; 1236 } 1237 1238 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1239 if(is(SomeElementType : Element)) 1240 { 1241 auto e = cast(SomeElementType) querySelector(selector); 1242 return MaybeNullElement!SomeElementType(e); 1243 } 1244 1245 /// ditto 1246 @scriptable 1247 Element querySelector(string selector) { 1248 // see comment below on Document.querySelectorAll 1249 auto s = Selector(selector);//, !loose); 1250 foreach(ref comp; s.components) 1251 if(comp.parts.length && comp.parts[0].separation == 0) 1252 comp.parts[0].separation = -1; 1253 foreach(e; s.getMatchingElementsLazy(this.root)) 1254 return e; 1255 return null; 1256 1257 } 1258 1259 /// ditto 1260 @scriptable 1261 Element[] querySelectorAll(string selector) { 1262 // In standards-compliant code, the document is slightly magical 1263 // in that it is a pseudoelement at top level. It should actually 1264 // match the root as one of its children. 1265 // 1266 // In versions of dom.d before Dec 29 2019, this worked because 1267 // querySelectorAll was willing to return itself. With that bug fix 1268 // (search "arbitrary id asduiwh" in this file for associated unittest) 1269 // this would have failed. Hence adding back the root if it matches the 1270 // selector itself. 1271 // 1272 // I'd love to do this better later. 1273 1274 auto s = Selector(selector);//, !loose); 1275 foreach(ref comp; s.components) 1276 if(comp.parts.length && comp.parts[0].separation == 0) 1277 comp.parts[0].separation = -1; 1278 return s.getMatchingElements(this.root); 1279 } 1280 1281 /// ditto 1282 deprecated("use querySelectorAll instead") 1283 Element[] getElementsBySelector(string selector) { 1284 return root.getElementsBySelector(selector); 1285 } 1286 1287 /// ditto 1288 @scriptable 1289 Element[] getElementsByTagName(string tag) { 1290 return root.getElementsByTagName(tag); 1291 } 1292 1293 /// ditto 1294 @scriptable 1295 Element[] getElementsByClassName(string tag) { 1296 return root.getElementsByClassName(tag); 1297 } 1298 1299 /** FIXME: btw, this could just be a lazy range...... */ 1300 Element getFirstElementByTagName(string tag) { 1301 if(loose) 1302 tag = tag.toLower(); 1303 bool doesItMatch(Element e) { 1304 return e.tagName == tag; 1305 } 1306 return findFirst(&doesItMatch); 1307 } 1308 1309 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 1310 Element mainBody() { 1311 return getFirstElementByTagName("body"); 1312 } 1313 1314 /// this uses a weird thing... it's [name=] if no colon and 1315 /// [property=] if colon 1316 string getMeta(string name) { 1317 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1318 auto e = querySelector("head meta["~thing~"="~name~"]"); 1319 if(e is null) 1320 return null; 1321 return e.content; 1322 } 1323 1324 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1325 void setMeta(string name, string value) { 1326 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1327 auto e = querySelector("head meta["~thing~"="~name~"]"); 1328 if(e is null) { 1329 e = requireSelector("head").addChild("meta"); 1330 e.setAttribute(thing, name); 1331 } 1332 1333 e.content = value; 1334 } 1335 1336 ///. 1337 Form[] forms() { 1338 return cast(Form[]) getElementsByTagName("form"); 1339 } 1340 1341 ///. 1342 Form createForm() 1343 out(ret) { 1344 assert(ret !is null); 1345 } 1346 do { 1347 return cast(Form) createElement("form"); 1348 } 1349 1350 ///. 1351 Element createElement(string name) { 1352 if(loose) 1353 name = name.toLower(); 1354 1355 auto e = Element.make(name, null, null, selfClosedElements); 1356 e.parentDocument = this; 1357 1358 return e; 1359 1360 // return new Element(this, name, null, selfClosed); 1361 } 1362 1363 ///. 1364 Element createFragment() { 1365 return new DocumentFragment(this); 1366 } 1367 1368 ///. 1369 Element createTextNode(string content) { 1370 return new TextNode(this, content); 1371 } 1372 1373 1374 ///. 1375 Element findFirst(bool delegate(Element) doesItMatch) { 1376 if(root is null) 1377 return null; 1378 Element result; 1379 1380 bool goThroughElement(Element e) { 1381 if(doesItMatch(e)) { 1382 result = e; 1383 return true; 1384 } 1385 1386 foreach(child; e.children) { 1387 if(goThroughElement(child)) 1388 return true; 1389 } 1390 1391 return false; 1392 } 1393 1394 goThroughElement(root); 1395 1396 return result; 1397 } 1398 1399 ///. 1400 void clear() { 1401 root = null; 1402 loose = false; 1403 } 1404 1405 ///. 1406 void setProlog(string d) { 1407 _prolog = d; 1408 prologWasSet = true; 1409 } 1410 1411 ///. 1412 private string _prolog = "<!DOCTYPE html>\n"; 1413 private bool prologWasSet = false; // set to true if the user changed it 1414 1415 @property string prolog() const { 1416 // if the user explicitly changed it, do what they want 1417 // or if we didn't keep/find stuff from the document itself, 1418 // we'll use the builtin one as a default. 1419 if(prologWasSet || piecesBeforeRoot.length == 0) 1420 return _prolog; 1421 1422 string p; 1423 foreach(e; piecesBeforeRoot) 1424 p ~= e.toString() ~ "\n"; 1425 return p; 1426 } 1427 1428 ///. 1429 override string toString() const { 1430 return prolog ~ root.toString(); 1431 } 1432 1433 /++ 1434 Writes it out with whitespace for easier eyeball debugging 1435 1436 Do NOT use for anything other than eyeball debugging, 1437 because whitespace may be significant content in XML. 1438 +/ 1439 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1440 import std.string; 1441 string s = prolog.strip; 1442 1443 /* 1444 if(insertComments) s ~= "<!--"; 1445 s ~= "\n"; 1446 if(insertComments) s ~= "-->"; 1447 */ 1448 1449 s ~= root.toPrettyString(insertComments, indentationLevel, indentWith); 1450 foreach(a; piecesAfterRoot) 1451 s ~= a.toPrettyString(insertComments, indentationLevel, indentWith); 1452 return s; 1453 } 1454 1455 ///. 1456 Element root; 1457 1458 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1459 Element[] piecesBeforeRoot; 1460 1461 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1462 Element[] piecesAfterRoot; 1463 1464 ///. 1465 bool loose; 1466 1467 1468 1469 // what follows are for mutation events that you can observe 1470 void delegate(DomMutationEvent)[] eventObservers; 1471 1472 void dispatchMutationEvent(DomMutationEvent e) { 1473 foreach(o; eventObservers) 1474 o(e); 1475 } 1476 } 1477 1478 /// This represents almost everything in the DOM. 1479 /// Group: core_functionality 1480 class Element { 1481 /// Returns a collection of elements by selector. 1482 /// See: [Document.opIndex] 1483 ElementCollection opIndex(string selector) { 1484 auto e = ElementCollection(this); 1485 return e[selector]; 1486 } 1487 1488 /++ 1489 Returns the child node with the particular index. 1490 1491 Be aware that child nodes include text nodes, including 1492 whitespace-only nodes. 1493 +/ 1494 Element opIndex(size_t index) { 1495 if(index >= children.length) 1496 return null; 1497 return this.children[index]; 1498 } 1499 1500 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1501 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1502 if( 1503 is(SomeElementType : Element) 1504 ) 1505 out(ret) { 1506 assert(ret !is null); 1507 } 1508 do { 1509 auto e = cast(SomeElementType) getElementById(id); 1510 if(e is null) 1511 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 1512 return e; 1513 } 1514 1515 /// ditto but with selectors instead of ids 1516 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1517 if( 1518 is(SomeElementType : Element) 1519 ) 1520 out(ret) { 1521 assert(ret !is null); 1522 } 1523 do { 1524 auto e = cast(SomeElementType) querySelector(selector); 1525 if(e is null) 1526 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 1527 return e; 1528 } 1529 1530 1531 /++ 1532 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 1533 +/ 1534 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1535 if(is(SomeElementType : Element)) 1536 { 1537 auto e = cast(SomeElementType) querySelector(selector); 1538 return MaybeNullElement!SomeElementType(e); 1539 } 1540 1541 1542 1543 /// get all the classes on this element 1544 @property string[] classes() { 1545 return split(className, " "); 1546 } 1547 1548 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 1549 @scriptable 1550 Element addClass(string c) { 1551 if(hasClass(c)) 1552 return this; // don't add it twice 1553 1554 string cn = getAttribute("class"); 1555 if(cn.length == 0) { 1556 setAttribute("class", c); 1557 return this; 1558 } else { 1559 setAttribute("class", cn ~ " " ~ c); 1560 } 1561 1562 return this; 1563 } 1564 1565 /// Removes a particular class name. 1566 @scriptable 1567 Element removeClass(string c) { 1568 if(!hasClass(c)) 1569 return this; 1570 string n; 1571 foreach(name; classes) { 1572 if(c == name) 1573 continue; // cut it out 1574 if(n.length) 1575 n ~= " "; 1576 n ~= name; 1577 } 1578 1579 className = n.strip(); 1580 1581 return this; 1582 } 1583 1584 /// Returns whether the given class appears in this element. 1585 bool hasClass(string c) { 1586 string cn = className; 1587 1588 auto idx = cn.indexOf(c); 1589 if(idx == -1) 1590 return false; 1591 1592 foreach(cla; cn.split(" ")) 1593 if(cla == c) 1594 return true; 1595 return false; 1596 1597 /* 1598 int rightSide = idx + c.length; 1599 1600 bool checkRight() { 1601 if(rightSide == cn.length) 1602 return true; // it's the only class 1603 else if(iswhite(cn[rightSide])) 1604 return true; 1605 return false; // this is a substring of something else.. 1606 } 1607 1608 if(idx == 0) { 1609 return checkRight(); 1610 } else { 1611 if(!iswhite(cn[idx - 1])) 1612 return false; // substring 1613 return checkRight(); 1614 } 1615 1616 assert(0); 1617 */ 1618 } 1619 1620 1621 /* ******************************* 1622 DOM Mutation 1623 *********************************/ 1624 /// convenience function to quickly add a tag with some text or 1625 /// other relevant info (for example, it's a src for an <img> element 1626 /// instead of inner text) 1627 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 1628 in { 1629 assert(tagName !is null); 1630 } 1631 out(e) { 1632 //assert(e.parentNode is this); 1633 //assert(e.parentDocument is this.parentDocument); 1634 } 1635 do { 1636 auto e = Element.make(tagName, childInfo, childInfo2); 1637 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 1638 // return the parent. That will break existing code though. 1639 return appendChild(e); 1640 } 1641 1642 /// Another convenience function. Adds a child directly after the current one, returning 1643 /// the new child. 1644 /// 1645 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 1646 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 1647 in { 1648 assert(tagName !is null); 1649 assert(parentNode !is null); 1650 } 1651 out(e) { 1652 assert(e.parentNode is this.parentNode); 1653 assert(e.parentDocument is this.parentDocument); 1654 } 1655 do { 1656 auto e = Element.make(tagName, childInfo, childInfo2); 1657 return parentNode.insertAfter(this, e); 1658 } 1659 1660 /// 1661 Element addSibling(Element e) { 1662 return parentNode.insertAfter(this, e); 1663 } 1664 1665 /// 1666 Element addChild(Element e) { 1667 return this.appendChild(e); 1668 } 1669 1670 /// Convenience function to append text intermixed with other children. 1671 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 1672 /// or div.addChildren("Hello, ", user.name, "!"); 1673 1674 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 1675 void addChildren(T...)(T t) { 1676 foreach(item; t) { 1677 static if(is(item : Element)) 1678 appendChild(item); 1679 else static if (is(isSomeString!(item))) 1680 appendText(to!string(item)); 1681 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 1682 } 1683 } 1684 1685 ///. 1686 Element addChild(string tagName, Element firstChild, string info2 = null) 1687 in { 1688 assert(firstChild !is null); 1689 } 1690 out(ret) { 1691 assert(ret !is null); 1692 assert(ret.parentNode is this); 1693 assert(firstChild.parentNode is ret); 1694 1695 assert(ret.parentDocument is this.parentDocument); 1696 //assert(firstChild.parentDocument is this.parentDocument); 1697 } 1698 do { 1699 auto e = Element.make(tagName, "", info2); 1700 e.appendChild(firstChild); 1701 this.appendChild(e); 1702 return e; 1703 } 1704 1705 /// 1706 Element addChild(string tagName, in Html innerHtml, string info2 = null) 1707 in { 1708 } 1709 out(ret) { 1710 assert(ret !is null); 1711 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 1712 assert(ret.parentDocument is this.parentDocument); 1713 } 1714 do { 1715 auto e = Element.make(tagName, "", info2); 1716 this.appendChild(e); 1717 e.innerHTML = innerHtml.source; 1718 return e; 1719 } 1720 1721 1722 /// . 1723 void appendChildren(Element[] children) { 1724 foreach(ele; children) 1725 appendChild(ele); 1726 } 1727 1728 ///. 1729 void reparent(Element newParent) 1730 in { 1731 assert(newParent !is null); 1732 assert(parentNode !is null); 1733 } 1734 out { 1735 assert(this.parentNode is newParent); 1736 //assert(isInArray(this, newParent.children)); 1737 } 1738 do { 1739 parentNode.removeChild(this); 1740 newParent.appendChild(this); 1741 } 1742 1743 /** 1744 Strips this tag out of the document, putting its inner html 1745 as children of the parent. 1746 1747 For example, given: `<p>hello <b>there</b></p>`, if you 1748 call `stripOut` on the `b` element, you'll be left with 1749 `<p>hello there<p>`. 1750 1751 The idea here is to make it easy to get rid of garbage 1752 markup you aren't interested in. 1753 */ 1754 void stripOut() 1755 in { 1756 assert(parentNode !is null); 1757 } 1758 out { 1759 assert(parentNode is null); 1760 assert(children.length == 0); 1761 } 1762 do { 1763 foreach(c; children) 1764 c.parentNode = null; // remove the parent 1765 if(children.length) 1766 parentNode.replaceChild(this, this.children); 1767 else 1768 parentNode.removeChild(this); 1769 this.children.length = 0; // we reparented them all above 1770 } 1771 1772 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 1773 /// if the element already isn't in a tree, it does nothing. 1774 Element removeFromTree() 1775 in { 1776 1777 } 1778 out(var) { 1779 assert(this.parentNode is null); 1780 assert(var is this); 1781 } 1782 do { 1783 if(this.parentNode is null) 1784 return this; 1785 1786 this.parentNode.removeChild(this); 1787 1788 return this; 1789 } 1790 1791 /++ 1792 Wraps this element inside the given element. 1793 It's like `this.replaceWith(what); what.appendchild(this);` 1794 1795 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 1796 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 1797 +/ 1798 Element wrapIn(Element what) 1799 in { 1800 assert(what !is null); 1801 } 1802 out(ret) { 1803 assert(this.parentNode is what); 1804 assert(ret is what); 1805 } 1806 do { 1807 this.replaceWith(what); 1808 what.appendChild(this); 1809 1810 return what; 1811 } 1812 1813 /// Replaces this element with something else in the tree. 1814 Element replaceWith(Element e) 1815 in { 1816 assert(this.parentNode !is null); 1817 } 1818 do { 1819 e.removeFromTree(); 1820 this.parentNode.replaceChild(this, e); 1821 return e; 1822 } 1823 1824 /** 1825 Splits the className into an array of each class given 1826 */ 1827 string[] classNames() const { 1828 return className().split(" "); 1829 } 1830 1831 /** 1832 Fetches the first consecutive text nodes concatenated together. 1833 1834 1835 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 1836 1837 See_also: [directText], [innerText] 1838 */ 1839 string firstInnerText() const { 1840 string s; 1841 foreach(child; children) { 1842 if(child.nodeType != NodeType.Text) 1843 break; 1844 1845 s ~= child.nodeValue(); 1846 } 1847 return s; 1848 } 1849 1850 1851 /** 1852 Returns the text directly under this element. 1853 1854 1855 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 1856 past child tags. So, `<example>some <b>bold</b> text</example>` 1857 will return `some text` because it only gets the text, skipping non-text children. 1858 1859 See_also: [firstInnerText], [innerText] 1860 */ 1861 @property string directText() { 1862 string ret; 1863 foreach(e; children) { 1864 if(e.nodeType == NodeType.Text) 1865 ret ~= e.nodeValue(); 1866 } 1867 1868 return ret; 1869 } 1870 1871 /** 1872 Sets the direct text, without modifying other child nodes. 1873 1874 1875 Unlike [innerText], this does *not* remove existing elements in the element. 1876 1877 It only replaces the first text node it sees. 1878 1879 If there are no text nodes, it calls [appendText]. 1880 1881 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 1882 */ 1883 @property void directText(string text) { 1884 foreach(e; children) { 1885 if(e.nodeType == NodeType.Text) { 1886 auto it = cast(TextNode) e; 1887 it.contents = text; 1888 return; 1889 } 1890 } 1891 1892 appendText(text); 1893 } 1894 1895 // do nothing, this is primarily a virtual hook 1896 // for links and forms 1897 void setValue(string field, string value) { } 1898 1899 1900 // this is a thing so i can remove observer support if it gets slow 1901 // I have not implemented all these yet 1902 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 1903 if(parentDocument is null) return; 1904 DomMutationEvent me; 1905 me.operation = operation; 1906 me.target = this; 1907 me.relatedString = s1; 1908 me.relatedString2 = s2; 1909 me.related = r; 1910 me.related2 = r2; 1911 parentDocument.dispatchMutationEvent(me); 1912 } 1913 1914 // putting all the members up front 1915 1916 // this ought to be private. don't use it directly. 1917 Element[] children; 1918 1919 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 1920 string tagName; 1921 1922 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 1923 string[string] attributes; 1924 1925 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 1926 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 1927 private bool selfClosed; 1928 1929 /// Get the parent Document object that contains this element. 1930 /// It may be null, so remember to check for that. 1931 Document parentDocument; 1932 1933 ///. 1934 inout(Element) parentNode() inout { 1935 auto p = _parentNode; 1936 1937 if(cast(DocumentFragment) p) 1938 return p._parentNode; 1939 1940 return p; 1941 } 1942 1943 //protected 1944 Element parentNode(Element e) { 1945 return _parentNode = e; 1946 } 1947 1948 private Element _parentNode; 1949 1950 // the next few methods are for implementing interactive kind of things 1951 private CssStyle _computedStyle; 1952 1953 // these are here for event handlers. Don't forget that this library never fires events. 1954 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 1955 EventHandler[][string] bubblingEventHandlers; 1956 EventHandler[][string] capturingEventHandlers; 1957 EventHandler[string] defaultEventHandlers; 1958 1959 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 1960 if(event.length > 2 && event[0..2] == "on") 1961 event = event[2 .. $]; 1962 1963 if(useCapture) 1964 capturingEventHandlers[event] ~= handler; 1965 else 1966 bubblingEventHandlers[event] ~= handler; 1967 } 1968 1969 1970 // and now methods 1971 1972 /++ 1973 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 1974 1975 History: 1976 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 1977 immutable global list for HTML. It still defaults to the same list, but you can change it now via 1978 the parameter. 1979 +/ 1980 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 1981 bool selfClosed = tagName.isInArray(selfClosedElements); 1982 1983 Element e; 1984 // want to create the right kind of object for the given tag... 1985 switch(tagName) { 1986 case "#text": 1987 e = new TextNode(null, childInfo); 1988 return e; 1989 // break; 1990 case "table": 1991 e = new Table(null); 1992 break; 1993 case "a": 1994 e = new Link(null); 1995 break; 1996 case "form": 1997 e = new Form(null); 1998 break; 1999 case "tr": 2000 e = new TableRow(null); 2001 break; 2002 case "td", "th": 2003 e = new TableCell(null, tagName); 2004 break; 2005 default: 2006 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2007 } 2008 2009 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2010 e.tagName = tagName; 2011 e.selfClosed = selfClosed; 2012 2013 if(childInfo !is null) 2014 switch(tagName) { 2015 /* html5 convenience tags */ 2016 case "audio": 2017 if(childInfo.length) 2018 e.addChild("source", childInfo); 2019 if(childInfo2 !is null) 2020 e.appendText(childInfo2); 2021 break; 2022 case "source": 2023 e.src = childInfo; 2024 if(childInfo2 !is null) 2025 e.type = childInfo2; 2026 break; 2027 /* regular html 4 stuff */ 2028 case "img": 2029 e.src = childInfo; 2030 if(childInfo2 !is null) 2031 e.alt = childInfo2; 2032 break; 2033 case "link": 2034 e.href = childInfo; 2035 if(childInfo2 !is null) 2036 e.rel = childInfo2; 2037 break; 2038 case "option": 2039 e.innerText = childInfo; 2040 if(childInfo2 !is null) 2041 e.value = childInfo2; 2042 break; 2043 case "input": 2044 e.type = "hidden"; 2045 e.name = childInfo; 2046 if(childInfo2 !is null) 2047 e.value = childInfo2; 2048 break; 2049 case "button": 2050 e.innerText = childInfo; 2051 if(childInfo2 !is null) 2052 e.type = childInfo2; 2053 break; 2054 case "a": 2055 e.innerText = childInfo; 2056 if(childInfo2 !is null) 2057 e.href = childInfo2; 2058 break; 2059 case "script": 2060 case "style": 2061 e.innerRawSource = childInfo; 2062 break; 2063 case "meta": 2064 e.name = childInfo; 2065 if(childInfo2 !is null) 2066 e.content = childInfo2; 2067 break; 2068 /* generically, assume we were passed text and perhaps class */ 2069 default: 2070 e.innerText = childInfo; 2071 if(childInfo2.length) 2072 e.className = childInfo2; 2073 } 2074 2075 return e; 2076 } 2077 2078 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2079 // FIXME: childInfo2 is ignored when info1 is null 2080 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2081 m.innerHTML = innerHtml.source; 2082 return m; 2083 } 2084 2085 static Element make(string tagName, Element child, string childInfo2 = null) { 2086 auto m = Element.make(tagName, cast(string) null, childInfo2); 2087 m.appendChild(child); 2088 return m; 2089 } 2090 2091 2092 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2093 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2094 parentDocument = _parentDocument; 2095 tagName = _tagName; 2096 if(_attributes !is null) 2097 attributes = _attributes; 2098 selfClosed = _selfClosed; 2099 2100 version(dom_node_indexes) 2101 this.dataset.nodeIndex = to!string(&(this.attributes)); 2102 2103 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2104 } 2105 2106 /++ 2107 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2108 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2109 2110 History: 2111 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2112 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2113 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2114 something here if you like. 2115 +/ 2116 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2117 tagName = _tagName; 2118 if(_attributes !is null) 2119 attributes = _attributes; 2120 selfClosed = tagName.isInArray(selfClosedElements); 2121 2122 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2123 //children.length = 8; 2124 //children.length = 0; 2125 2126 version(dom_node_indexes) 2127 this.dataset.nodeIndex = to!string(&(this.attributes)); 2128 } 2129 2130 private this(Document _parentDocument) { 2131 parentDocument = _parentDocument; 2132 2133 version(dom_node_indexes) 2134 this.dataset.nodeIndex = to!string(&(this.attributes)); 2135 } 2136 2137 2138 /* ******************************* 2139 Navigating the DOM 2140 *********************************/ 2141 2142 /// Returns the first child of this element. If it has no children, returns null. 2143 /// Remember, text nodes are children too. 2144 @property Element firstChild() { 2145 return children.length ? children[0] : null; 2146 } 2147 2148 /// 2149 @property Element lastChild() { 2150 return children.length ? children[$ - 1] : null; 2151 } 2152 2153 /// UNTESTED 2154 /// the next element you would encounter if you were reading it in the source 2155 Element nextInSource() { 2156 auto n = firstChild; 2157 if(n is null) 2158 n = nextSibling(); 2159 if(n is null) { 2160 auto p = this.parentNode; 2161 while(p !is null && n is null) { 2162 n = p.nextSibling; 2163 } 2164 } 2165 2166 return n; 2167 } 2168 2169 /// UNTESTED 2170 /// ditto 2171 Element previousInSource() { 2172 auto p = previousSibling; 2173 if(p is null) { 2174 auto par = parentNode; 2175 if(par) 2176 p = par.lastChild; 2177 if(p is null) 2178 p = par; 2179 } 2180 return p; 2181 } 2182 2183 ///. 2184 @property Element previousElementSibling() { 2185 return previousSibling("*"); 2186 } 2187 2188 ///. 2189 @property Element previousSibling(string tagName = null) { 2190 if(this.parentNode is null) 2191 return null; 2192 Element ps = null; 2193 foreach(e; this.parentNode.childNodes) { 2194 if(e is this) 2195 break; 2196 if(tagName == "*" && e.nodeType != NodeType.Text) { 2197 ps = e; 2198 } else if(tagName is null || e.tagName == tagName) 2199 ps = e; 2200 } 2201 2202 return ps; 2203 } 2204 2205 ///. 2206 @property Element nextElementSibling() { 2207 return nextSibling("*"); 2208 } 2209 2210 ///. 2211 @property Element nextSibling(string tagName = null) { 2212 if(this.parentNode is null) 2213 return null; 2214 Element ns = null; 2215 bool mightBe = false; 2216 foreach(e; this.parentNode.childNodes) { 2217 if(e is this) { 2218 mightBe = true; 2219 continue; 2220 } 2221 if(mightBe) { 2222 if(tagName == "*" && e.nodeType != NodeType.Text) { 2223 ns = e; 2224 break; 2225 } 2226 if(tagName is null || e.tagName == tagName) { 2227 ns = e; 2228 break; 2229 } 2230 } 2231 } 2232 2233 return ns; 2234 } 2235 2236 2237 /// Gets the nearest node, going up the chain, with the given tagName 2238 /// May return null or throw. 2239 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2240 if(tagName is null) { 2241 static if(is(T == Form)) 2242 tagName = "form"; 2243 else static if(is(T == Table)) 2244 tagName = "table"; 2245 else static if(is(T == Link)) 2246 tagName == "a"; 2247 } 2248 2249 auto par = this.parentNode; 2250 while(par !is null) { 2251 if(tagName is null || par.tagName == tagName) 2252 break; 2253 par = par.parentNode; 2254 } 2255 2256 static if(!is(T == Element)) { 2257 auto t = cast(T) par; 2258 if(t is null) 2259 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2260 } else 2261 auto t = par; 2262 2263 return t; 2264 } 2265 2266 ///. 2267 Element getElementById(string id) { 2268 // FIXME: I use this function a lot, and it's kinda slow 2269 // not terribly slow, but not great. 2270 foreach(e; tree) 2271 if(e.id == id) 2272 return e; 2273 return null; 2274 } 2275 2276 /++ 2277 Returns a child element that matches the given `selector`. 2278 2279 Note: you can give multiple selectors, separated by commas. 2280 It will return the first match it finds. 2281 +/ 2282 @scriptable 2283 Element querySelector(string selector) { 2284 Selector s = Selector(selector); 2285 foreach(ele; tree) 2286 if(s.matchesElement(ele)) 2287 return ele; 2288 return null; 2289 } 2290 2291 /// a more standards-compliant alias for getElementsBySelector 2292 @scriptable 2293 Element[] querySelectorAll(string selector) { 2294 return getElementsBySelector(selector); 2295 } 2296 2297 /// If the element matches the given selector. Previously known as `matchesSelector`. 2298 @scriptable 2299 bool matches(string selector) { 2300 /+ 2301 bool caseSensitiveTags = true; 2302 if(parentDocument && parentDocument.loose) 2303 caseSensitiveTags = false; 2304 +/ 2305 2306 Selector s = Selector(selector); 2307 return s.matchesElement(this); 2308 } 2309 2310 /// Returns itself or the closest parent that matches the given selector, or null if none found 2311 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2312 @scriptable 2313 Element closest(string selector) { 2314 Element e = this; 2315 while(e !is null) { 2316 if(e.matches(selector)) 2317 return e; 2318 e = e.parentNode; 2319 } 2320 return null; 2321 } 2322 2323 /** 2324 Returns elements that match the given CSS selector 2325 2326 * -- all, default if nothing else is there 2327 2328 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2329 2330 It is all additive 2331 2332 OP 2333 2334 space = descendant 2335 > = direct descendant 2336 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2337 2338 [foo] Foo is present as an attribute 2339 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2340 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2341 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2342 2343 [item$=sdas] ends with 2344 [item^-sdsad] begins with 2345 2346 Quotes are optional here. 2347 2348 Pseudos: 2349 :first-child 2350 :last-child 2351 :link (same as a[href] for our purposes here) 2352 2353 2354 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2355 2356 2357 2358 This ONLY cares about elements. text, etc, are ignored 2359 2360 2361 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2362 */ 2363 Element[] getElementsBySelector(string selector) { 2364 // FIXME: this function could probably use some performance attention 2365 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2366 2367 2368 bool caseSensitiveTags = true; 2369 if(parentDocument && parentDocument.loose) 2370 caseSensitiveTags = false; 2371 2372 Element[] ret; 2373 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2374 ret ~= sel.getElements(this); 2375 return ret; 2376 } 2377 2378 /// . 2379 Element[] getElementsByClassName(string cn) { 2380 // is this correct? 2381 return getElementsBySelector("." ~ cn); 2382 } 2383 2384 ///. 2385 Element[] getElementsByTagName(string tag) { 2386 if(parentDocument && parentDocument.loose) 2387 tag = tag.toLower(); 2388 Element[] ret; 2389 foreach(e; tree) 2390 if(e.tagName == tag) 2391 ret ~= e; 2392 return ret; 2393 } 2394 2395 2396 /* ******************************* 2397 Attributes 2398 *********************************/ 2399 2400 /** 2401 Gets the given attribute value, or null if the 2402 attribute is not set. 2403 2404 Note that the returned string is decoded, so it no longer contains any xml entities. 2405 */ 2406 @scriptable 2407 string getAttribute(string name) const { 2408 if(parentDocument && parentDocument.loose) 2409 name = name.toLower(); 2410 auto e = name in attributes; 2411 if(e) 2412 return *e; 2413 else 2414 return null; 2415 } 2416 2417 /** 2418 Sets an attribute. Returns this for easy chaining 2419 */ 2420 @scriptable 2421 Element setAttribute(string name, string value) { 2422 if(parentDocument && parentDocument.loose) 2423 name = name.toLower(); 2424 2425 // I never use this shit legitimately and neither should you 2426 auto it = name.toLower(); 2427 if(it == "href" || it == "src") { 2428 auto v = value.strip().toLower(); 2429 if(v.startsWith("vbscript:")) 2430 value = value[9..$]; 2431 if(v.startsWith("javascript:")) 2432 value = value[11..$]; 2433 } 2434 2435 attributes[name] = value; 2436 2437 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 2438 2439 return this; 2440 } 2441 2442 /** 2443 Returns if the attribute exists. 2444 */ 2445 @scriptable 2446 bool hasAttribute(string name) { 2447 if(parentDocument && parentDocument.loose) 2448 name = name.toLower(); 2449 2450 if(name in attributes) 2451 return true; 2452 else 2453 return false; 2454 } 2455 2456 /** 2457 Removes the given attribute from the element. 2458 */ 2459 @scriptable 2460 Element removeAttribute(string name) 2461 out(ret) { 2462 assert(ret is this); 2463 } 2464 do { 2465 if(parentDocument && parentDocument.loose) 2466 name = name.toLower(); 2467 if(name in attributes) 2468 attributes.remove(name); 2469 2470 sendObserverEvent(DomMutationOperations.removeAttribute, name); 2471 return this; 2472 } 2473 2474 /** 2475 Gets the class attribute's contents. Returns 2476 an empty string if it has no class. 2477 */ 2478 @property string className() const { 2479 auto c = getAttribute("class"); 2480 if(c is null) 2481 return ""; 2482 return c; 2483 } 2484 2485 ///. 2486 @property Element className(string c) { 2487 setAttribute("class", c); 2488 return this; 2489 } 2490 2491 /** 2492 Provides easy access to common HTML attributes, object style. 2493 2494 --- 2495 auto element = Element.make("a"); 2496 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 2497 string where = a.href; // same as a.getAttribute("href"); 2498 --- 2499 2500 */ 2501 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 2502 if(v !is null) 2503 setAttribute(name, v); 2504 return getAttribute(name); 2505 } 2506 2507 /** 2508 Old access to attributes. Use [attrs] instead. 2509 2510 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 2511 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 2512 2513 Instead, use element.attrs.attribute, element.attrs["attribute"], 2514 or element.getAttribute("attribute")/element.setAttribute("attribute"). 2515 */ 2516 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 2517 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 2518 } 2519 2520 /* 2521 // this would be nice for convenience, but it broke the getter above. 2522 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 2523 if(boolean) 2524 setAttribute(name, name); 2525 else 2526 removeAttribute(name); 2527 } 2528 */ 2529 2530 /** 2531 Returns the element's children. 2532 */ 2533 @property const(Element[]) childNodes() const { 2534 return children; 2535 } 2536 2537 /// Mutable version of the same 2538 @property Element[] childNodes() { // FIXME: the above should be inout 2539 return children; 2540 } 2541 2542 /++ 2543 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 2544 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 2545 +/ 2546 @property DataSet dataset() { 2547 return DataSet(this); 2548 } 2549 2550 /++ 2551 Gives dot/opIndex access to attributes 2552 --- 2553 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 2554 --- 2555 +/ 2556 @property AttributeSet attrs() { 2557 return AttributeSet(this); 2558 } 2559 2560 /++ 2561 Provides both string and object style (like in Javascript) access to the style attribute. 2562 2563 --- 2564 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 2565 --- 2566 +/ 2567 @property ElementStyle style() { 2568 return ElementStyle(this); 2569 } 2570 2571 /++ 2572 This sets the style attribute with a string. 2573 +/ 2574 @property ElementStyle style(string s) { 2575 this.setAttribute("style", s); 2576 return this.style; 2577 } 2578 2579 private void parseAttributes(string[] whichOnes = null) { 2580 /+ 2581 if(whichOnes is null) 2582 whichOnes = attributes.keys; 2583 foreach(attr; whichOnes) { 2584 switch(attr) { 2585 case "id": 2586 2587 break; 2588 case "class": 2589 2590 break; 2591 case "style": 2592 2593 break; 2594 default: 2595 // we don't care about it 2596 } 2597 } 2598 +/ 2599 } 2600 2601 2602 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 2603 /// Don't use this. 2604 @property CssStyle computedStyle() { 2605 if(_computedStyle is null) { 2606 auto style = this.getAttribute("style"); 2607 /* we'll treat shitty old html attributes as css here */ 2608 if(this.hasAttribute("width")) 2609 style ~= "; width: " ~ this.attrs.width; 2610 if(this.hasAttribute("height")) 2611 style ~= "; height: " ~ this.attrs.height; 2612 if(this.hasAttribute("bgcolor")) 2613 style ~= "; background-color: " ~ this.attrs.bgcolor; 2614 if(this.tagName == "body" && this.hasAttribute("text")) 2615 style ~= "; color: " ~ this.attrs.text; 2616 if(this.hasAttribute("color")) 2617 style ~= "; color: " ~ this.attrs.color; 2618 /* done */ 2619 2620 2621 _computedStyle = new CssStyle(null, style); // gives at least something to work with 2622 } 2623 return _computedStyle; 2624 } 2625 2626 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 2627 version(browser) { 2628 void* expansionHook; ///ditto 2629 int offsetWidth; ///ditto 2630 int offsetHeight; ///ditto 2631 int offsetLeft; ///ditto 2632 int offsetTop; ///ditto 2633 Element offsetParent; ///ditto 2634 bool hasLayout; ///ditto 2635 int zIndex; ///ditto 2636 2637 ///ditto 2638 int absoluteLeft() { 2639 int a = offsetLeft; 2640 auto p = offsetParent; 2641 while(p) { 2642 a += p.offsetLeft; 2643 p = p.offsetParent; 2644 } 2645 2646 return a; 2647 } 2648 2649 ///ditto 2650 int absoluteTop() { 2651 int a = offsetTop; 2652 auto p = offsetParent; 2653 while(p) { 2654 a += p.offsetTop; 2655 p = p.offsetParent; 2656 } 2657 2658 return a; 2659 } 2660 } 2661 2662 // Back to the regular dom functions 2663 2664 public: 2665 2666 2667 /* ******************************* 2668 DOM Mutation 2669 *********************************/ 2670 2671 /// Removes all inner content from the tag; all child text and elements are gone. 2672 void removeAllChildren() 2673 out { 2674 assert(this.children.length == 0); 2675 } 2676 do { 2677 children = null; 2678 } 2679 2680 /// History: added June 13, 2020 2681 Element appendSibling(Element e) { 2682 parentNode.insertAfter(this, e); 2683 return e; 2684 } 2685 2686 /// History: added June 13, 2020 2687 Element prependSibling(Element e) { 2688 parentNode.insertBefore(this, e); 2689 return e; 2690 } 2691 2692 2693 /++ 2694 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 2695 2696 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 2697 2698 History: 2699 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 2700 +/ 2701 Element appendChild(Element e) 2702 in { 2703 assert(e !is null); 2704 } 2705 out (ret) { 2706 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 2707 assert(e.parentDocument is this.parentDocument); 2708 assert(e is ret); 2709 } 2710 do { 2711 if(e.parentNode !is null) 2712 e.parentNode.removeChild(e); 2713 2714 selfClosed = false; 2715 e.parentNode = this; 2716 e.parentDocument = this.parentDocument; 2717 if(auto frag = cast(DocumentFragment) e) 2718 children ~= frag.children; 2719 else 2720 children ~= e; 2721 2722 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 2723 2724 return e; 2725 } 2726 2727 /// Inserts the second element to this node, right before the first param 2728 Element insertBefore(in Element where, Element what) 2729 in { 2730 assert(where !is null); 2731 assert(where.parentNode is this); 2732 assert(what !is null); 2733 assert(what.parentNode is null); 2734 } 2735 out (ret) { 2736 assert(where.parentNode is this); 2737 assert(what.parentNode is this); 2738 2739 assert(what.parentDocument is this.parentDocument); 2740 assert(ret is what); 2741 } 2742 do { 2743 foreach(i, e; children) { 2744 if(e is where) { 2745 if(auto frag = cast(DocumentFragment) what) 2746 children = children[0..i] ~ frag.children ~ children[i..$]; 2747 else 2748 children = children[0..i] ~ what ~ children[i..$]; 2749 what.parentDocument = this.parentDocument; 2750 what.parentNode = this; 2751 return what; 2752 } 2753 } 2754 2755 return what; 2756 2757 assert(0); 2758 } 2759 2760 /++ 2761 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 2762 +/ 2763 Element insertAfter(in Element where, Element what) 2764 in { 2765 assert(where !is null); 2766 assert(where.parentNode is this); 2767 assert(what !is null); 2768 assert(what.parentNode is null); 2769 } 2770 out (ret) { 2771 assert(where.parentNode is this); 2772 assert(what.parentNode is this); 2773 assert(what.parentDocument is this.parentDocument); 2774 assert(ret is what); 2775 } 2776 do { 2777 foreach(i, e; children) { 2778 if(e is where) { 2779 if(auto frag = cast(DocumentFragment) what) 2780 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 2781 else 2782 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 2783 what.parentNode = this; 2784 what.parentDocument = this.parentDocument; 2785 return what; 2786 } 2787 } 2788 2789 return what; 2790 2791 assert(0); 2792 } 2793 2794 /// swaps one child for a new thing. Returns the old child which is now parentless. 2795 Element swapNode(Element child, Element replacement) 2796 in { 2797 assert(child !is null); 2798 assert(replacement !is null); 2799 assert(child.parentNode is this); 2800 } 2801 out(ret) { 2802 assert(ret is child); 2803 assert(ret.parentNode is null); 2804 assert(replacement.parentNode is this); 2805 assert(replacement.parentDocument is this.parentDocument); 2806 } 2807 do { 2808 foreach(ref c; this.children) 2809 if(c is child) { 2810 c.parentNode = null; 2811 c = replacement; 2812 c.parentNode = this; 2813 c.parentDocument = this.parentDocument; 2814 return child; 2815 } 2816 assert(0); 2817 } 2818 2819 2820 /++ 2821 Appends the given to the node. 2822 2823 2824 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 2825 yields `<example>text <b>bold</b> hi</example>`. 2826 2827 See_Also: 2828 [firstInnerText], [directText], [innerText], [appendChild] 2829 +/ 2830 @scriptable 2831 Element appendText(string text) { 2832 Element e = new TextNode(parentDocument, text); 2833 appendChild(e); 2834 return this; 2835 } 2836 2837 /++ 2838 Returns child elements which are of a tag type (excludes text, comments, etc.). 2839 2840 2841 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 2842 2843 Params: 2844 tagName = filter results to only the child elements with the given tag name. 2845 +/ 2846 @property Element[] childElements(string tagName = null) { 2847 Element[] ret; 2848 foreach(c; children) 2849 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 2850 ret ~= c; 2851 return ret; 2852 } 2853 2854 /++ 2855 Appends the given html to the element, returning the elements appended 2856 2857 2858 This is similar to `element.innerHTML += "html string";` in Javascript. 2859 +/ 2860 @scriptable 2861 Element[] appendHtml(string html) { 2862 Document d = new Document("<root>" ~ html ~ "</root>"); 2863 return stealChildren(d.root); 2864 } 2865 2866 2867 ///. 2868 void insertChildAfter(Element child, Element where) 2869 in { 2870 assert(child !is null); 2871 assert(where !is null); 2872 assert(where.parentNode is this); 2873 assert(!selfClosed); 2874 //assert(isInArray(where, children)); 2875 } 2876 out { 2877 assert(child.parentNode is this); 2878 assert(where.parentNode is this); 2879 //assert(isInArray(where, children)); 2880 //assert(isInArray(child, children)); 2881 } 2882 do { 2883 foreach(ref i, c; children) { 2884 if(c is where) { 2885 i++; 2886 if(auto frag = cast(DocumentFragment) child) 2887 children = children[0..i] ~ child.children ~ children[i..$]; 2888 else 2889 children = children[0..i] ~ child ~ children[i..$]; 2890 child.parentNode = this; 2891 child.parentDocument = this.parentDocument; 2892 break; 2893 } 2894 } 2895 } 2896 2897 /++ 2898 Reparents all the child elements of `e` to `this`, leaving `e` childless. 2899 2900 Params: 2901 e = the element whose children you want to steal 2902 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 2903 +/ 2904 Element[] stealChildren(Element e, Element position = null) 2905 in { 2906 assert(!selfClosed); 2907 assert(e !is null); 2908 //if(position !is null) 2909 //assert(isInArray(position, children)); 2910 } 2911 out (ret) { 2912 assert(e.children.length == 0); 2913 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 2914 version(none) 2915 debug foreach(child; ret) { 2916 assert(child.parentNode is this); 2917 assert(child.parentDocument is this.parentDocument); 2918 } 2919 } 2920 do { 2921 foreach(c; e.children) { 2922 c.parentNode = this; 2923 c.parentDocument = this.parentDocument; 2924 } 2925 if(position is null) 2926 children ~= e.children; 2927 else { 2928 foreach(i, child; children) { 2929 if(child is position) { 2930 children = children[0..i] ~ 2931 e.children ~ 2932 children[i..$]; 2933 break; 2934 } 2935 } 2936 } 2937 2938 auto ret = e.children[]; 2939 e.children.length = 0; 2940 2941 return ret; 2942 } 2943 2944 /// Puts the current element first in our children list. The given element must not have a parent already. 2945 Element prependChild(Element e) 2946 in { 2947 assert(e.parentNode is null); 2948 assert(!selfClosed); 2949 } 2950 out { 2951 assert(e.parentNode is this); 2952 assert(e.parentDocument is this.parentDocument); 2953 assert(children[0] is e); 2954 } 2955 do { 2956 e.parentNode = this; 2957 e.parentDocument = this.parentDocument; 2958 if(auto frag = cast(DocumentFragment) e) 2959 children = e.children ~ children; 2960 else 2961 children = e ~ children; 2962 return e; 2963 } 2964 2965 2966 /** 2967 Returns a string containing all child elements, formatted such that it could be pasted into 2968 an XML file. 2969 */ 2970 @property string innerHTML(Appender!string where = appender!string()) const { 2971 if(children is null) 2972 return ""; 2973 2974 auto start = where.data.length; 2975 2976 foreach(child; children) { 2977 assert(child !is null); 2978 2979 child.writeToAppender(where); 2980 } 2981 2982 return where.data[start .. $]; 2983 } 2984 2985 /** 2986 Takes some html and replaces the element's children with the tree made from the string. 2987 */ 2988 @property Element innerHTML(string html, bool strict = false) { 2989 if(html.length) 2990 selfClosed = false; 2991 2992 if(html.length == 0) { 2993 // I often say innerHTML = ""; as a shortcut to clear it out, 2994 // so let's optimize that slightly. 2995 removeAllChildren(); 2996 return this; 2997 } 2998 2999 auto doc = new Document(); 3000 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3001 3002 children = doc.root.children; 3003 foreach(c; children) { 3004 c.parentNode = this; 3005 c.parentDocument = this.parentDocument; 3006 } 3007 3008 reparentTreeDocuments(); 3009 3010 doc.root.children = null; 3011 3012 return this; 3013 } 3014 3015 /// ditto 3016 @property Element innerHTML(Html html) { 3017 return this.innerHTML = html.source; 3018 } 3019 3020 private void reparentTreeDocuments() { 3021 foreach(c; this.tree) 3022 c.parentDocument = this.parentDocument; 3023 } 3024 3025 /** 3026 Replaces this node with the given html string, which is parsed 3027 3028 Note: this invalidates the this reference, since it is removed 3029 from the tree. 3030 3031 Returns the new children that replace this. 3032 */ 3033 @property Element[] outerHTML(string html) { 3034 auto doc = new Document(); 3035 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3036 3037 children = doc.root.children; 3038 foreach(c; children) { 3039 c.parentNode = this; 3040 c.parentDocument = this.parentDocument; 3041 } 3042 3043 3044 reparentTreeDocuments(); 3045 3046 3047 stripOut(); 3048 3049 return doc.root.children; 3050 } 3051 3052 /++ 3053 Returns all the html for this element, including the tag itself. 3054 3055 This is equivalent to calling toString(). 3056 +/ 3057 @property string outerHTML() { 3058 return this.toString(); 3059 } 3060 3061 /// This sets the inner content of the element *without* trying to parse it. 3062 /// You can inject any code in there; this serves as an escape hatch from the dom. 3063 /// 3064 /// The only times you might actually need it are for < style > and < script > tags in html. 3065 /// Other than that, innerHTML and/or innerText should do the job. 3066 @property void innerRawSource(string rawSource) { 3067 children.length = 0; 3068 auto rs = new RawSource(parentDocument, rawSource); 3069 rs.parentNode = this; 3070 3071 children ~= rs; 3072 } 3073 3074 ///. 3075 Element replaceChild(Element find, Element replace) 3076 in { 3077 assert(find !is null); 3078 assert(replace !is null); 3079 assert(replace.parentNode is null); 3080 } 3081 out(ret) { 3082 assert(ret is replace); 3083 assert(replace.parentNode is this); 3084 assert(replace.parentDocument is this.parentDocument); 3085 assert(find.parentNode is null); 3086 } 3087 do { 3088 // FIXME 3089 //if(auto frag = cast(DocumentFragment) replace) 3090 //return this.replaceChild(frag, replace.children); 3091 for(int i = 0; i < children.length; i++) { 3092 if(children[i] is find) { 3093 replace.parentNode = this; 3094 children[i].parentNode = null; 3095 children[i] = replace; 3096 replace.parentDocument = this.parentDocument; 3097 return replace; 3098 } 3099 } 3100 3101 throw new Exception("no such child"); 3102 } 3103 3104 /** 3105 Replaces the given element with a whole group. 3106 */ 3107 void replaceChild(Element find, Element[] replace) 3108 in { 3109 assert(find !is null); 3110 assert(replace !is null); 3111 assert(find.parentNode is this); 3112 debug foreach(r; replace) 3113 assert(r.parentNode is null); 3114 } 3115 out { 3116 assert(find.parentNode is null); 3117 assert(children.length >= replace.length); 3118 debug foreach(child; children) 3119 assert(child !is find); 3120 debug foreach(r; replace) 3121 assert(r.parentNode is this); 3122 } 3123 do { 3124 if(replace.length == 0) { 3125 removeChild(find); 3126 return; 3127 } 3128 assert(replace.length); 3129 for(int i = 0; i < children.length; i++) { 3130 if(children[i] is find) { 3131 children[i].parentNode = null; // this element should now be dead 3132 children[i] = replace[0]; 3133 foreach(e; replace) { 3134 e.parentNode = this; 3135 e.parentDocument = this.parentDocument; 3136 } 3137 3138 children = .insertAfter(children, i, replace[1..$]); 3139 3140 return; 3141 } 3142 } 3143 3144 throw new Exception("no such child"); 3145 } 3146 3147 3148 /** 3149 Removes the given child from this list. 3150 3151 Returns the removed element. 3152 */ 3153 Element removeChild(Element c) 3154 in { 3155 assert(c !is null); 3156 assert(c.parentNode is this); 3157 } 3158 out { 3159 debug foreach(child; children) 3160 assert(child !is c); 3161 assert(c.parentNode is null); 3162 } 3163 do { 3164 foreach(i, e; children) { 3165 if(e is c) { 3166 children = children[0..i] ~ children [i+1..$]; 3167 c.parentNode = null; 3168 return c; 3169 } 3170 } 3171 3172 throw new Exception("no such child"); 3173 } 3174 3175 /// This removes all the children from this element, returning the old list. 3176 Element[] removeChildren() 3177 out (ret) { 3178 assert(children.length == 0); 3179 debug foreach(r; ret) 3180 assert(r.parentNode is null); 3181 } 3182 do { 3183 Element[] oldChildren = children.dup; 3184 foreach(c; oldChildren) 3185 c.parentNode = null; 3186 3187 children.length = 0; 3188 3189 return oldChildren; 3190 } 3191 3192 /** 3193 Fetch the inside text, with all tags stripped out. 3194 3195 <p>cool <b>api</b> & code dude<p> 3196 innerText of that is "cool api & code dude". 3197 3198 This does not match what real innerText does! 3199 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3200 3201 It is more like textContent. 3202 */ 3203 @scriptable 3204 @property string innerText() const { 3205 string s; 3206 foreach(child; children) { 3207 if(child.nodeType != NodeType.Text) 3208 s ~= child.innerText; 3209 else 3210 s ~= child.nodeValue(); 3211 } 3212 return s; 3213 } 3214 3215 /// 3216 alias textContent = innerText; 3217 3218 /** 3219 Sets the inside text, replacing all children. You don't 3220 have to worry about entity encoding. 3221 */ 3222 @scriptable 3223 @property void innerText(string text) { 3224 selfClosed = false; 3225 Element e = new TextNode(parentDocument, text); 3226 e.parentNode = this; 3227 children = [e]; 3228 } 3229 3230 /** 3231 Strips this node out of the document, replacing it with the given text 3232 */ 3233 @property void outerText(string text) { 3234 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3235 } 3236 3237 /** 3238 Same result as innerText; the tag with all inner tags stripped out 3239 */ 3240 @property string outerText() const { 3241 return innerText; 3242 } 3243 3244 3245 /* ******************************* 3246 Miscellaneous 3247 *********************************/ 3248 3249 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3250 @property Element cloned() 3251 /+ 3252 out(ret) { 3253 // FIXME: not sure why these fail... 3254 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3255 assert(ret.tagName == this.tagName); 3256 } 3257 do { 3258 +/ 3259 { 3260 return this.cloneNode(true); 3261 } 3262 3263 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3264 Element cloneNode(bool deepClone) { 3265 auto e = Element.make(this.tagName); 3266 e.parentDocument = this.parentDocument; 3267 e.attributes = this.attributes.aadup; 3268 e.selfClosed = this.selfClosed; 3269 3270 if(deepClone) { 3271 foreach(child; children) { 3272 e.appendChild(child.cloneNode(true)); 3273 } 3274 } 3275 3276 3277 return e; 3278 } 3279 3280 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3281 string nodeValue() const { 3282 return ""; 3283 } 3284 3285 // should return int 3286 ///. 3287 @property int nodeType() const { 3288 return 1; 3289 } 3290 3291 3292 invariant () { 3293 assert(tagName.indexOf(" ") == -1); 3294 3295 if(children !is null) 3296 debug foreach(child; children) { 3297 // assert(parentNode !is null); 3298 assert(child !is null); 3299 // assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 3300 assert(child !is this); 3301 //assert(child !is parentNode); 3302 } 3303 3304 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3305 if(parentNode !is null) { 3306 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3307 auto lol = cast(TextNode) this; 3308 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3309 } 3310 +/ 3311 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 3312 // reason is so you can create these without needing a reference to the document 3313 } 3314 3315 /** 3316 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 3317 an XML file. 3318 */ 3319 override string toString() const { 3320 return writeToAppender(); 3321 } 3322 3323 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 3324 if(indentWith is null) 3325 return null; 3326 string s; 3327 3328 if(insertComments) s ~= "<!--"; 3329 s ~= "\n"; 3330 foreach(indent; 0 .. indentationLevel) 3331 s ~= indentWith; 3332 if(insertComments) s ~= "-->"; 3333 3334 return s; 3335 } 3336 3337 /++ 3338 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 3339 for eyeball debugging. 3340 +/ 3341 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 3342 3343 // first step is to concatenate any consecutive text nodes to simplify 3344 // the white space analysis. this changes the tree! but i'm allowed since 3345 // the comment always says it changes the comments 3346 // 3347 // actually i'm not allowed cuz it is const so i will cheat and lie 3348 /+ 3349 TextNode lastTextChild = null; 3350 for(int a = 0; a < this.children.length; a++) { 3351 auto child = this.children[a]; 3352 if(auto tn = cast(TextNode) child) { 3353 if(lastTextChild) { 3354 lastTextChild.contents ~= tn.contents; 3355 for(int b = a; b < this.children.length - 1; b++) 3356 this.children[b] = this.children[b + 1]; 3357 this.children = this.children[0 .. $-1]; 3358 } else { 3359 lastTextChild = tn; 3360 } 3361 } else { 3362 lastTextChild = null; 3363 } 3364 } 3365 +/ 3366 3367 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 3368 3369 const(Element)[] children; 3370 3371 TextNode lastTextChild = null; 3372 for(int a = 0; a < this.children.length; a++) { 3373 auto child = this.children[a]; 3374 if(auto tn = cast(const(TextNode)) child) { 3375 if(lastTextChild !is null) { 3376 lastTextChild.contents ~= tn.contents; 3377 } else { 3378 lastTextChild = new TextNode(""); 3379 lastTextChild.parentNode = cast(Element) this; 3380 lastTextChild.contents ~= tn.contents; 3381 children ~= lastTextChild; 3382 } 3383 } else { 3384 lastTextChild = null; 3385 children ~= child; 3386 } 3387 } 3388 3389 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3390 3391 s ~= "<"; 3392 s ~= tagName; 3393 3394 // i sort these for consistent output. might be more legible 3395 // but especially it keeps it the same for diff purposes. 3396 import std.algorithm : sort; 3397 auto keys = sort(attributes.keys); 3398 foreach(n; keys) { 3399 auto v = attributes[n]; 3400 s ~= " "; 3401 s ~= n; 3402 s ~= "=\""; 3403 s ~= htmlEntitiesEncode(v); 3404 s ~= "\""; 3405 } 3406 3407 if(selfClosed){ 3408 s ~= " />"; 3409 return s; 3410 } 3411 3412 s ~= ">"; 3413 3414 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 3415 // just keep them on the same line 3416 if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 3417 foreach(child; children) { 3418 s ~= child.toString();//toPrettyString(false, 0, null); 3419 } 3420 } else { 3421 foreach(child; children) { 3422 assert(child !is null); 3423 3424 s ~= child.toPrettyString(insertComments, indentationLevel + 1, indentWith); 3425 } 3426 3427 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 3428 } 3429 3430 s ~= "</"; 3431 s ~= tagName; 3432 s ~= ">"; 3433 3434 return s; 3435 } 3436 3437 /+ 3438 /// Writes out the opening tag only, if applicable. 3439 string writeTagOnly(Appender!string where = appender!string()) const { 3440 +/ 3441 3442 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 3443 /// Note: the ordering of attributes in the string is undefined. 3444 /// Returns the string it creates. 3445 string writeToAppender(Appender!string where = appender!string()) const { 3446 assert(tagName !is null); 3447 3448 where.reserve((this.children.length + 1) * 512); 3449 3450 auto start = where.data.length; 3451 3452 where.put("<"); 3453 where.put(tagName); 3454 3455 import std.algorithm : sort; 3456 auto keys = sort(attributes.keys); 3457 foreach(n; keys) { 3458 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 3459 //assert(v !is null); 3460 where.put(" "); 3461 where.put(n); 3462 where.put("=\""); 3463 htmlEntitiesEncode(v, where); 3464 where.put("\""); 3465 } 3466 3467 if(selfClosed){ 3468 where.put(" />"); 3469 return where.data[start .. $]; 3470 } 3471 3472 where.put('>'); 3473 3474 innerHTML(where); 3475 3476 where.put("</"); 3477 where.put(tagName); 3478 where.put('>'); 3479 3480 return where.data[start .. $]; 3481 } 3482 3483 /** 3484 Returns a lazy range of all its children, recursively. 3485 */ 3486 @property ElementStream tree() { 3487 return new ElementStream(this); 3488 } 3489 3490 // I moved these from Form because they are generally useful. 3491 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 3492 /// Tags: HTML, HTML5 3493 // FIXME: add overloads for other label types... 3494 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3495 auto fs = this; 3496 auto i = fs.addChild("label"); 3497 3498 if(!(type == "checkbox" || type == "radio")) 3499 i.addChild("span", label); 3500 3501 Element input; 3502 if(type == "textarea") 3503 input = i.addChild("textarea"). 3504 setAttribute("name", name). 3505 setAttribute("rows", "6"); 3506 else 3507 input = i.addChild("input"). 3508 setAttribute("name", name). 3509 setAttribute("type", type); 3510 3511 if(type == "checkbox" || type == "radio") 3512 i.addChild("span", label); 3513 3514 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3515 fieldOptions.applyToElement(input); 3516 return i; 3517 } 3518 3519 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 3520 auto fs = this; 3521 auto i = fs.addChild("label"); 3522 i.addChild(label); 3523 Element input; 3524 if(type == "textarea") 3525 input = i.addChild("textarea"). 3526 setAttribute("name", name). 3527 setAttribute("rows", "6"); 3528 else 3529 input = i.addChild("input"). 3530 setAttribute("name", name). 3531 setAttribute("type", type); 3532 3533 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 3534 fieldOptions.applyToElement(input); 3535 return i; 3536 } 3537 3538 Element addField(string label, string name, FormFieldOptions fieldOptions) { 3539 return addField(label, name, "text", fieldOptions); 3540 } 3541 3542 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 3543 auto fs = this; 3544 auto i = fs.addChild("label"); 3545 i.addChild("span", label); 3546 auto sel = i.addChild("select").setAttribute("name", name); 3547 3548 foreach(k, opt; options) 3549 sel.addChild("option", opt, k); 3550 3551 // FIXME: implement requirements somehow 3552 3553 return i; 3554 } 3555 3556 Element addSubmitButton(string label = null) { 3557 auto t = this; 3558 auto holder = t.addChild("div"); 3559 holder.addClass("submit-holder"); 3560 auto i = holder.addChild("input"); 3561 i.type = "submit"; 3562 if(label.length) 3563 i.value = label; 3564 return holder; 3565 } 3566 3567 } 3568 3569 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 3570 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 3571 /// Group: core_functionality 3572 class XmlDocument : Document { 3573 this(string data) { 3574 selfClosedElements = null; 3575 inlineElements = null; 3576 contentType = "text/xml; charset=utf-8"; 3577 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 3578 3579 parseStrict(data); 3580 } 3581 } 3582 3583 3584 3585 3586 import std.string; 3587 3588 /* domconvenience follows { */ 3589 3590 /// finds comments that match the given txt. Case insensitive, strips whitespace. 3591 /// Group: core_functionality 3592 Element[] findComments(Document document, string txt) { 3593 return findComments(document.root, txt); 3594 } 3595 3596 /// ditto 3597 Element[] findComments(Element element, string txt) { 3598 txt = txt.strip().toLower(); 3599 Element[] ret; 3600 3601 foreach(comment; element.getElementsByTagName("#comment")) { 3602 string t = comment.nodeValue().strip().toLower(); 3603 if(t == txt) 3604 ret ~= comment; 3605 } 3606 3607 return ret; 3608 } 3609 3610 /// An option type that propagates null. See: [Element.optionSelector] 3611 /// Group: implementations 3612 struct MaybeNullElement(SomeElementType) { 3613 this(SomeElementType ele) { 3614 this.element = ele; 3615 } 3616 SomeElementType element; 3617 3618 /// Forwards to the element, wit a null check inserted that propagates null. 3619 auto opDispatch(string method, T...)(T args) { 3620 alias type = typeof(__traits(getMember, element, method)(args)); 3621 static if(is(type : Element)) { 3622 if(element is null) 3623 return MaybeNullElement!type(null); 3624 return __traits(getMember, element, method)(args); 3625 } else static if(is(type == string)) { 3626 if(element is null) 3627 return cast(string) null; 3628 return __traits(getMember, element, method)(args); 3629 } else static if(is(type == void)) { 3630 if(element is null) 3631 return; 3632 __traits(getMember, element, method)(args); 3633 } else { 3634 static assert(0); 3635 } 3636 } 3637 3638 /// Allows implicit casting to the wrapped element. 3639 alias element this; 3640 } 3641 3642 /++ 3643 A collection of elements which forwards methods to the children. 3644 +/ 3645 /// Group: implementations 3646 struct ElementCollection { 3647 /// 3648 this(Element e) { 3649 elements = [e]; 3650 } 3651 3652 /// 3653 this(Element e, string selector) { 3654 elements = e.querySelectorAll(selector); 3655 } 3656 3657 /// 3658 this(Element[] e) { 3659 elements = e; 3660 } 3661 3662 Element[] elements; 3663 //alias elements this; // let it implicitly convert to the underlying array 3664 3665 /// 3666 ElementCollection opIndex(string selector) { 3667 ElementCollection ec; 3668 foreach(e; elements) 3669 ec.elements ~= e.getElementsBySelector(selector); 3670 return ec; 3671 } 3672 3673 /// 3674 Element opIndex(int i) { 3675 return elements[i]; 3676 } 3677 3678 /// if you slice it, give the underlying array for easy forwarding of the 3679 /// collection to range expecting algorithms or looping over. 3680 Element[] opSlice() { 3681 return elements; 3682 } 3683 3684 /// And input range primitives so we can foreach over this 3685 void popFront() { 3686 elements = elements[1..$]; 3687 } 3688 3689 /// ditto 3690 Element front() { 3691 return elements[0]; 3692 } 3693 3694 /// ditto 3695 bool empty() { 3696 return !elements.length; 3697 } 3698 3699 /++ 3700 Collects strings from the collection, concatenating them together 3701 Kinda like running reduce and ~= on it. 3702 3703 --- 3704 document["p"].collect!"innerText"; 3705 --- 3706 +/ 3707 string collect(string method)(string separator = "") { 3708 string text; 3709 foreach(e; elements) { 3710 text ~= mixin("e." ~ method); 3711 text ~= separator; 3712 } 3713 return text; 3714 } 3715 3716 /// Forward method calls to each individual [Element|element] of the collection 3717 /// returns this so it can be chained. 3718 ElementCollection opDispatch(string name, T...)(T t) { 3719 foreach(e; elements) { 3720 mixin("e." ~ name)(t); 3721 } 3722 return this; 3723 } 3724 3725 /++ 3726 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 3727 +/ 3728 ElementCollection wrapIn(Element what) { 3729 foreach(e; elements) { 3730 e.wrapIn(what.cloneNode(false)); 3731 } 3732 3733 return this; 3734 } 3735 3736 /// Concatenates two ElementCollection together. 3737 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 3738 return ElementCollection(this.elements ~ rhs.elements); 3739 } 3740 } 3741 3742 3743 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 3744 /// Group: implementations 3745 mixin template JavascriptStyleDispatch() { 3746 /// 3747 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 3748 if(v !is null) 3749 return set(name, v); 3750 return get(name); 3751 } 3752 3753 /// 3754 string opIndex(string key) const { 3755 return get(key); 3756 } 3757 3758 /// 3759 string opIndexAssign(string value, string field) { 3760 return set(field, value); 3761 } 3762 3763 // FIXME: doesn't seem to work 3764 string* opBinary(string op)(string key) if(op == "in") { 3765 return key in fields; 3766 } 3767 } 3768 3769 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 3770 /// 3771 /// Do not create this object directly. 3772 /// Group: implementations 3773 struct DataSet { 3774 /// 3775 this(Element e) { 3776 this._element = e; 3777 } 3778 3779 private Element _element; 3780 /// 3781 string set(string name, string value) { 3782 _element.setAttribute("data-" ~ unCamelCase(name), value); 3783 return value; 3784 } 3785 3786 /// 3787 string get(string name) const { 3788 return _element.getAttribute("data-" ~ unCamelCase(name)); 3789 } 3790 3791 /// 3792 mixin JavascriptStyleDispatch!(); 3793 } 3794 3795 /// Proxy object for attributes which will replace the main opDispatch eventually 3796 /// Group: implementations 3797 struct AttributeSet { 3798 /// 3799 this(Element e) { 3800 this._element = e; 3801 } 3802 3803 private Element _element; 3804 /// 3805 string set(string name, string value) { 3806 _element.setAttribute(name, value); 3807 return value; 3808 } 3809 3810 /// 3811 string get(string name) const { 3812 return _element.getAttribute(name); 3813 } 3814 3815 /// 3816 mixin JavascriptStyleDispatch!(); 3817 } 3818 3819 3820 3821 /// for style, i want to be able to set it with a string like a plain attribute, 3822 /// but also be able to do properties Javascript style. 3823 3824 /// Group: implementations 3825 struct ElementStyle { 3826 this(Element parent) { 3827 _element = parent; 3828 } 3829 3830 Element _element; 3831 3832 @property ref inout(string) _attribute() inout { 3833 auto s = "style" in _element.attributes; 3834 if(s is null) { 3835 auto e = cast() _element; // const_cast 3836 e.attributes["style"] = ""; // we need something to reference 3837 s = cast(inout) ("style" in e.attributes); 3838 } 3839 3840 assert(s !is null); 3841 return *s; 3842 } 3843 3844 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 3845 3846 string set(string name, string value) { 3847 if(name.length == 0) 3848 return value; 3849 if(name == "cssFloat") 3850 name = "float"; 3851 else 3852 name = unCamelCase(name); 3853 auto r = rules(); 3854 r[name] = value; 3855 3856 _attribute = ""; 3857 foreach(k, v; r) { 3858 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 3859 continue; 3860 if(_attribute.length) 3861 _attribute ~= " "; 3862 _attribute ~= k ~ ": " ~ v ~ ";"; 3863 } 3864 3865 _element.setAttribute("style", _attribute); // this is to trigger the observer call 3866 3867 return value; 3868 } 3869 string get(string name) const { 3870 if(name == "cssFloat") 3871 name = "float"; 3872 else 3873 name = unCamelCase(name); 3874 auto r = rules(); 3875 if(name in r) 3876 return r[name]; 3877 return null; 3878 } 3879 3880 string[string] rules() const { 3881 string[string] ret; 3882 foreach(rule; _attribute.split(";")) { 3883 rule = rule.strip(); 3884 if(rule.length == 0) 3885 continue; 3886 auto idx = rule.indexOf(":"); 3887 if(idx == -1) 3888 ret[rule] = ""; 3889 else { 3890 auto name = rule[0 .. idx].strip(); 3891 auto value = rule[idx + 1 .. $].strip(); 3892 3893 ret[name] = value; 3894 } 3895 } 3896 3897 return ret; 3898 } 3899 3900 mixin JavascriptStyleDispatch!(); 3901 } 3902 3903 /// Converts a camel cased propertyName to a css style dashed property-name 3904 string unCamelCase(string a) { 3905 string ret; 3906 foreach(c; a) 3907 if((c >= 'A' && c <= 'Z')) 3908 ret ~= "-" ~ toLower("" ~ c)[0]; 3909 else 3910 ret ~= c; 3911 return ret; 3912 } 3913 3914 /// Translates a css style property-name to a camel cased propertyName 3915 string camelCase(string a) { 3916 string ret; 3917 bool justSawDash = false; 3918 foreach(c; a) 3919 if(c == '-') { 3920 justSawDash = true; 3921 } else { 3922 if(justSawDash) { 3923 justSawDash = false; 3924 ret ~= toUpper("" ~ c); 3925 } else 3926 ret ~= c; 3927 } 3928 return ret; 3929 } 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 // domconvenience ends } 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 // @safe: 3952 3953 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 3954 // Instead, override writeToAppender(); 3955 3956 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 3957 3958 // Stripping them is useful for reading php as html.... but adding them 3959 // is good for building php. 3960 3961 // I need to maintain compatibility with the way it is now too. 3962 3963 import std.string; 3964 import std.exception; 3965 import std.uri; 3966 import std.array; 3967 import std.range; 3968 3969 //import std.stdio; 3970 3971 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 3972 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 3973 // most likely a typo so I say kill kill kill. 3974 3975 3976 /++ 3977 This might belong in another module, but it represents a file with a mime type and some data. 3978 Document implements this interface with type = text/html (see Document.contentType for more info) 3979 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 3980 +/ 3981 /// Group: bonus_functionality 3982 interface FileResource { 3983 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 3984 @property string contentType() const; 3985 /// the data 3986 immutable(ubyte)[] getData() const; 3987 /++ 3988 filename, return null if none 3989 3990 History: 3991 Added December 25, 2020 3992 +/ 3993 @property string filename() const; 3994 } 3995 3996 3997 3998 3999 ///. 4000 /// Group: bonus_functionality 4001 enum NodeType { Text = 3 } 4002 4003 4004 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 4005 /// Group: core_functionality 4006 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 4007 in {} 4008 out(ret) { assert(ret !is null); } 4009 do { 4010 auto ret = cast(T) e; 4011 if(ret is null) 4012 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 4013 return ret; 4014 } 4015 4016 4017 ///. 4018 /// Group: core_functionality 4019 class DocumentFragment : Element { 4020 ///. 4021 this(Document _parentDocument) { 4022 tagName = "#fragment"; 4023 super(_parentDocument); 4024 } 4025 4026 /++ 4027 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 4028 4029 Since: March 29, 2018 (or git tagged v2.1.0) 4030 +/ 4031 this(Html html) { 4032 this(null); 4033 4034 this.innerHTML = html.source; 4035 } 4036 4037 ///. 4038 override string writeToAppender(Appender!string where = appender!string()) const { 4039 return this.innerHTML(where); 4040 } 4041 4042 override string toPrettyString(bool insertComments, int indentationLevel, string indentWith) const { 4043 string s; 4044 foreach(child; children) 4045 s ~= child.toPrettyString(insertComments, indentationLevel, indentWith); 4046 return s; 4047 } 4048 4049 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 4050 /* 4051 override inout(Element) parentNode() inout { 4052 return children.length ? children[0].parentNode : null; 4053 } 4054 */ 4055 override Element parentNode(Element p) { 4056 this._parentNode = p; 4057 foreach(child; children) 4058 child.parentNode = p; 4059 return p; 4060 } 4061 } 4062 4063 /// Given text, encode all html entities on it - &, <, >, and ". This function also 4064 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 4065 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 4066 /// 4067 /// The output parameter can be given to append to an existing buffer. You don't have to 4068 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 4069 /// Group: core_functionality 4070 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 4071 // if there's no entities, we can save a lot of time by not bothering with the 4072 // decoding loop. This check cuts the net toString time by better than half in my test. 4073 // let me know if it made your tests worse though, since if you use an entity in just about 4074 // every location, the check will add time... but I suspect the average experience is like mine 4075 // since the check gives up as soon as it can anyway. 4076 4077 bool shortcut = true; 4078 foreach(char c; data) { 4079 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 4080 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 4081 shortcut = false; // there's actual work to be done 4082 break; 4083 } 4084 } 4085 4086 if(shortcut) { 4087 output.put(data); 4088 return data; 4089 } 4090 4091 auto start = output.data.length; 4092 4093 output.reserve(data.length + 64); // grab some extra space for the encoded entities 4094 4095 foreach(dchar d; data) { 4096 if(d == '&') 4097 output.put("&"); 4098 else if (d == '<') 4099 output.put("<"); 4100 else if (d == '>') 4101 output.put(">"); 4102 else if (d == '\"') 4103 output.put("""); 4104 // else if (d == '\'') 4105 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 4106 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 4107 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 4108 // idk about apostrophes though. Might be worth it, might not. 4109 else if (!encodeNonAscii || (d < 128 && d > 0)) 4110 output.put(d); 4111 else 4112 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 4113 } 4114 4115 //assert(output !is null); // this fails on empty attributes..... 4116 return output.data[start .. $]; 4117 4118 // data = data.replace("\u00a0", " "); 4119 } 4120 4121 /// An alias for htmlEntitiesEncode; it works for xml too 4122 /// Group: core_functionality 4123 string xmlEntitiesEncode(string data) { 4124 return htmlEntitiesEncode(data); 4125 } 4126 4127 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 4128 /// Group: core_functionality 4129 dchar parseEntity(in dchar[] entity) { 4130 switch(entity[1..$-1]) { 4131 case "quot": 4132 return '"'; 4133 case "apos": 4134 return '\''; 4135 case "lt": 4136 return '<'; 4137 case "gt": 4138 return '>'; 4139 case "amp": 4140 return '&'; 4141 // the next are html rather than xml 4142 4143 // Retrieved from https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references 4144 // Only entities that resolve to U+0009 ~ U+1D56B are stated. 4145 case "Tab": return '\u0009'; 4146 case "NewLine": return '\u000A'; 4147 case "excl": return '\u0021'; 4148 case "QUOT": return '\u0022'; 4149 case "num": return '\u0023'; 4150 case "dollar": return '\u0024'; 4151 case "percnt": return '\u0025'; 4152 case "AMP": return '\u0026'; 4153 case "lpar": return '\u0028'; 4154 case "rpar": return '\u0029'; 4155 case "ast": case "midast": return '\u002A'; 4156 case "plus": return '\u002B'; 4157 case "comma": return '\u002C'; 4158 case "period": return '\u002E'; 4159 case "sol": return '\u002F'; 4160 case "colon": return '\u003A'; 4161 case "semi": return '\u003B'; 4162 case "LT": return '\u003C'; 4163 case "equals": return '\u003D'; 4164 case "GT": return '\u003E'; 4165 case "quest": return '\u003F'; 4166 case "commat": return '\u0040'; 4167 case "lsqb": case "lbrack": return '\u005B'; 4168 case "bsol": return '\u005C'; 4169 case "rsqb": case "rbrack": return '\u005D'; 4170 case "Hat": return '\u005E'; 4171 case "lowbar": case "UnderBar": return '\u005F'; 4172 case "grave": case "DiacriticalGrave": return '\u0060'; 4173 case "lcub": case "lbrace": return '\u007B'; 4174 case "verbar": case "vert": case "VerticalLine": return '\u007C'; 4175 case "rcub": case "rbrace": return '\u007D'; 4176 case "nbsp": case "NonBreakingSpace": return '\u00A0'; 4177 case "iexcl": return '\u00A1'; 4178 case "cent": return '\u00A2'; 4179 case "pound": return '\u00A3'; 4180 case "curren": return '\u00A4'; 4181 case "yen": return '\u00A5'; 4182 case "brvbar": return '\u00A6'; 4183 case "sect": return '\u00A7'; 4184 case "Dot": case "die": case "DoubleDot": case "uml": return '\u00A8'; 4185 case "copy": case "COPY": return '\u00A9'; 4186 case "ordf": return '\u00AA'; 4187 case "laquo": return '\u00AB'; 4188 case "not": return '\u00AC'; 4189 case "shy": return '\u00AD'; 4190 case "reg": case "circledR": case "REG": return '\u00AE'; 4191 case "macr": case "strns": return '\u00AF'; 4192 case "deg": return '\u00B0'; 4193 case "plusmn": case "pm": case "PlusMinus": return '\u00B1'; 4194 case "sup2": return '\u00B2'; 4195 case "sup3": return '\u00B3'; 4196 case "acute": case "DiacriticalAcute": return '\u00B4'; 4197 case "micro": return '\u00B5'; 4198 case "para": return '\u00B6'; 4199 case "middot": case "centerdot": case "CenterDot": return '\u00B7'; 4200 case "cedil": case "Cedilla": return '\u00B8'; 4201 case "sup1": return '\u00B9'; 4202 case "ordm": return '\u00BA'; 4203 case "raquo": return '\u00BB'; 4204 case "frac14": return '\u00BC'; 4205 case "frac12": case "half": return '\u00BD'; 4206 case "frac34": return '\u00BE'; 4207 case "iquest": return '\u00BF'; 4208 case "Agrave": return '\u00C0'; 4209 case "Aacute": return '\u00C1'; 4210 case "Acirc": return '\u00C2'; 4211 case "Atilde": return '\u00C3'; 4212 case "Auml": return '\u00C4'; 4213 case "Aring": case "angst": return '\u00C5'; 4214 case "AElig": return '\u00C6'; 4215 case "Ccedil": return '\u00C7'; 4216 case "Egrave": return '\u00C8'; 4217 case "Eacute": return '\u00C9'; 4218 case "Ecirc": return '\u00CA'; 4219 case "Euml": return '\u00CB'; 4220 case "Igrave": return '\u00CC'; 4221 case "Iacute": return '\u00CD'; 4222 case "Icirc": return '\u00CE'; 4223 case "Iuml": return '\u00CF'; 4224 case "ETH": return '\u00D0'; 4225 case "Ntilde": return '\u00D1'; 4226 case "Ograve": return '\u00D2'; 4227 case "Oacute": return '\u00D3'; 4228 case "Ocirc": return '\u00D4'; 4229 case "Otilde": return '\u00D5'; 4230 case "Ouml": return '\u00D6'; 4231 case "times": return '\u00D7'; 4232 case "Oslash": return '\u00D8'; 4233 case "Ugrave": return '\u00D9'; 4234 case "Uacute": return '\u00DA'; 4235 case "Ucirc": return '\u00DB'; 4236 case "Uuml": return '\u00DC'; 4237 case "Yacute": return '\u00DD'; 4238 case "THORN": return '\u00DE'; 4239 case "szlig": return '\u00DF'; 4240 case "agrave": return '\u00E0'; 4241 case "aacute": return '\u00E1'; 4242 case "acirc": return '\u00E2'; 4243 case "atilde": return '\u00E3'; 4244 case "auml": return '\u00E4'; 4245 case "aring": return '\u00E5'; 4246 case "aelig": return '\u00E6'; 4247 case "ccedil": return '\u00E7'; 4248 case "egrave": return '\u00E8'; 4249 case "eacute": return '\u00E9'; 4250 case "ecirc": return '\u00EA'; 4251 case "euml": return '\u00EB'; 4252 case "igrave": return '\u00EC'; 4253 case "iacute": return '\u00ED'; 4254 case "icirc": return '\u00EE'; 4255 case "iuml": return '\u00EF'; 4256 case "eth": return '\u00F0'; 4257 case "ntilde": return '\u00F1'; 4258 case "ograve": return '\u00F2'; 4259 case "oacute": return '\u00F3'; 4260 case "ocirc": return '\u00F4'; 4261 case "otilde": return '\u00F5'; 4262 case "ouml": return '\u00F6'; 4263 case "divide": case "div": return '\u00F7'; 4264 case "oslash": return '\u00F8'; 4265 case "ugrave": return '\u00F9'; 4266 case "uacute": return '\u00FA'; 4267 case "ucirc": return '\u00FB'; 4268 case "uuml": return '\u00FC'; 4269 case "yacute": return '\u00FD'; 4270 case "thorn": return '\u00FE'; 4271 case "yuml": return '\u00FF'; 4272 case "Amacr": return '\u0100'; 4273 case "amacr": return '\u0101'; 4274 case "Abreve": return '\u0102'; 4275 case "abreve": return '\u0103'; 4276 case "Aogon": return '\u0104'; 4277 case "aogon": return '\u0105'; 4278 case "Cacute": return '\u0106'; 4279 case "cacute": return '\u0107'; 4280 case "Ccirc": return '\u0108'; 4281 case "ccirc": return '\u0109'; 4282 case "Cdot": return '\u010A'; 4283 case "cdot": return '\u010B'; 4284 case "Ccaron": return '\u010C'; 4285 case "ccaron": return '\u010D'; 4286 case "Dcaron": return '\u010E'; 4287 case "dcaron": return '\u010F'; 4288 case "Dstrok": return '\u0110'; 4289 case "dstrok": return '\u0111'; 4290 case "Emacr": return '\u0112'; 4291 case "emacr": return '\u0113'; 4292 case "Edot": return '\u0116'; 4293 case "edot": return '\u0117'; 4294 case "Eogon": return '\u0118'; 4295 case "eogon": return '\u0119'; 4296 case "Ecaron": return '\u011A'; 4297 case "ecaron": return '\u011B'; 4298 case "Gcirc": return '\u011C'; 4299 case "gcirc": return '\u011D'; 4300 case "Gbreve": return '\u011E'; 4301 case "gbreve": return '\u011F'; 4302 case "Gdot": return '\u0120'; 4303 case "gdot": return '\u0121'; 4304 case "Gcedil": return '\u0122'; 4305 case "Hcirc": return '\u0124'; 4306 case "hcirc": return '\u0125'; 4307 case "Hstrok": return '\u0126'; 4308 case "hstrok": return '\u0127'; 4309 case "Itilde": return '\u0128'; 4310 case "itilde": return '\u0129'; 4311 case "Imacr": return '\u012A'; 4312 case "imacr": return '\u012B'; 4313 case "Iogon": return '\u012E'; 4314 case "iogon": return '\u012F'; 4315 case "Idot": return '\u0130'; 4316 case "imath": case "inodot": return '\u0131'; 4317 case "IJlig": return '\u0132'; 4318 case "ijlig": return '\u0133'; 4319 case "Jcirc": return '\u0134'; 4320 case "jcirc": return '\u0135'; 4321 case "Kcedil": return '\u0136'; 4322 case "kcedil": return '\u0137'; 4323 case "kgreen": return '\u0138'; 4324 case "Lacute": return '\u0139'; 4325 case "lacute": return '\u013A'; 4326 case "Lcedil": return '\u013B'; 4327 case "lcedil": return '\u013C'; 4328 case "Lcaron": return '\u013D'; 4329 case "lcaron": return '\u013E'; 4330 case "Lmidot": return '\u013F'; 4331 case "lmidot": return '\u0140'; 4332 case "Lstrok": return '\u0141'; 4333 case "lstrok": return '\u0142'; 4334 case "Nacute": return '\u0143'; 4335 case "nacute": return '\u0144'; 4336 case "Ncedil": return '\u0145'; 4337 case "ncedil": return '\u0146'; 4338 case "Ncaron": return '\u0147'; 4339 case "ncaron": return '\u0148'; 4340 case "napos": return '\u0149'; 4341 case "ENG": return '\u014A'; 4342 case "eng": return '\u014B'; 4343 case "Omacr": return '\u014C'; 4344 case "omacr": return '\u014D'; 4345 case "Odblac": return '\u0150'; 4346 case "odblac": return '\u0151'; 4347 case "OElig": return '\u0152'; 4348 case "oelig": return '\u0153'; 4349 case "Racute": return '\u0154'; 4350 case "racute": return '\u0155'; 4351 case "Rcedil": return '\u0156'; 4352 case "rcedil": return '\u0157'; 4353 case "Rcaron": return '\u0158'; 4354 case "rcaron": return '\u0159'; 4355 case "Sacute": return '\u015A'; 4356 case "sacute": return '\u015B'; 4357 case "Scirc": return '\u015C'; 4358 case "scirc": return '\u015D'; 4359 case "Scedil": return '\u015E'; 4360 case "scedil": return '\u015F'; 4361 case "Scaron": return '\u0160'; 4362 case "scaron": return '\u0161'; 4363 case "Tcedil": return '\u0162'; 4364 case "tcedil": return '\u0163'; 4365 case "Tcaron": return '\u0164'; 4366 case "tcaron": return '\u0165'; 4367 case "Tstrok": return '\u0166'; 4368 case "tstrok": return '\u0167'; 4369 case "Utilde": return '\u0168'; 4370 case "utilde": return '\u0169'; 4371 case "Umacr": return '\u016A'; 4372 case "umacr": return '\u016B'; 4373 case "Ubreve": return '\u016C'; 4374 case "ubreve": return '\u016D'; 4375 case "Uring": return '\u016E'; 4376 case "uring": return '\u016F'; 4377 case "Udblac": return '\u0170'; 4378 case "udblac": return '\u0171'; 4379 case "Uogon": return '\u0172'; 4380 case "uogon": return '\u0173'; 4381 case "Wcirc": return '\u0174'; 4382 case "wcirc": return '\u0175'; 4383 case "Ycirc": return '\u0176'; 4384 case "ycirc": return '\u0177'; 4385 case "Yuml": return '\u0178'; 4386 case "Zacute": return '\u0179'; 4387 case "zacute": return '\u017A'; 4388 case "Zdot": return '\u017B'; 4389 case "zdot": return '\u017C'; 4390 case "Zcaron": return '\u017D'; 4391 case "zcaron": return '\u017E'; 4392 case "fnof": return '\u0192'; 4393 case "imped": return '\u01B5'; 4394 case "gacute": return '\u01F5'; 4395 case "jmath": return '\u0237'; 4396 case "circ": return '\u02C6'; 4397 case "caron": case "Hacek": return '\u02C7'; 4398 case "breve": case "Breve": return '\u02D8'; 4399 case "dot": case "DiacriticalDot": return '\u02D9'; 4400 case "ring": return '\u02DA'; 4401 case "ogon": return '\u02DB'; 4402 case "tilde": case "DiacriticalTilde": return '\u02DC'; 4403 case "dblac": case "DiacriticalDoubleAcute": return '\u02DD'; 4404 case "DownBreve": return '\u0311'; 4405 case "Alpha": return '\u0391'; 4406 case "Beta": return '\u0392'; 4407 case "Gamma": return '\u0393'; 4408 case "Delta": return '\u0394'; 4409 case "Epsilon": return '\u0395'; 4410 case "Zeta": return '\u0396'; 4411 case "Eta": return '\u0397'; 4412 case "Theta": return '\u0398'; 4413 case "Iota": return '\u0399'; 4414 case "Kappa": return '\u039A'; 4415 case "Lambda": return '\u039B'; 4416 case "Mu": return '\u039C'; 4417 case "Nu": return '\u039D'; 4418 case "Xi": return '\u039E'; 4419 case "Omicron": return '\u039F'; 4420 case "Pi": return '\u03A0'; 4421 case "Rho": return '\u03A1'; 4422 case "Sigma": return '\u03A3'; 4423 case "Tau": return '\u03A4'; 4424 case "Upsilon": return '\u03A5'; 4425 case "Phi": return '\u03A6'; 4426 case "Chi": return '\u03A7'; 4427 case "Psi": return '\u03A8'; 4428 case "Omega": case "ohm": return '\u03A9'; 4429 case "alpha": return '\u03B1'; 4430 case "beta": return '\u03B2'; 4431 case "gamma": return '\u03B3'; 4432 case "delta": return '\u03B4'; 4433 case "epsi": case "epsilon": return '\u03B5'; 4434 case "zeta": return '\u03B6'; 4435 case "eta": return '\u03B7'; 4436 case "theta": return '\u03B8'; 4437 case "iota": return '\u03B9'; 4438 case "kappa": return '\u03BA'; 4439 case "lambda": return '\u03BB'; 4440 case "mu": return '\u03BC'; 4441 case "nu": return '\u03BD'; 4442 case "xi": return '\u03BE'; 4443 case "omicron": return '\u03BF'; 4444 case "pi": return '\u03C0'; 4445 case "rho": return '\u03C1'; 4446 case "sigmav": case "varsigma": case "sigmaf": return '\u03C2'; 4447 case "sigma": return '\u03C3'; 4448 case "tau": return '\u03C4'; 4449 case "upsi": case "upsilon": return '\u03C5'; 4450 case "phi": return '\u03C6'; 4451 case "chi": return '\u03C7'; 4452 case "psi": return '\u03C8'; 4453 case "omega": return '\u03C9'; 4454 case "thetav": case "vartheta": case "thetasym": return '\u03D1'; 4455 case "Upsi": case "upsih": return '\u03D2'; 4456 case "straightphi": case "phiv": case "varphi": return '\u03D5'; 4457 case "piv": case "varpi": return '\u03D6'; 4458 case "Gammad": return '\u03DC'; 4459 case "gammad": case "digamma": return '\u03DD'; 4460 case "kappav": case "varkappa": return '\u03F0'; 4461 case "rhov": case "varrho": return '\u03F1'; 4462 case "epsiv": case "varepsilon": case "straightepsilon": return '\u03F5'; 4463 case "bepsi": case "backepsilon": return '\u03F6'; 4464 case "IOcy": return '\u0401'; 4465 case "DJcy": return '\u0402'; 4466 case "GJcy": return '\u0403'; 4467 case "Jukcy": return '\u0404'; 4468 case "DScy": return '\u0405'; 4469 case "Iukcy": return '\u0406'; 4470 case "YIcy": return '\u0407'; 4471 case "Jsercy": return '\u0408'; 4472 case "LJcy": return '\u0409'; 4473 case "NJcy": return '\u040A'; 4474 case "TSHcy": return '\u040B'; 4475 case "KJcy": return '\u040C'; 4476 case "Ubrcy": return '\u040E'; 4477 case "DZcy": return '\u040F'; 4478 case "Acy": return '\u0410'; 4479 case "Bcy": return '\u0411'; 4480 case "Vcy": return '\u0412'; 4481 case "Gcy": return '\u0413'; 4482 case "Dcy": return '\u0414'; 4483 case "IEcy": return '\u0415'; 4484 case "ZHcy": return '\u0416'; 4485 case "Zcy": return '\u0417'; 4486 case "Icy": return '\u0418'; 4487 case "Jcy": return '\u0419'; 4488 case "Kcy": return '\u041A'; 4489 case "Lcy": return '\u041B'; 4490 case "Mcy": return '\u041C'; 4491 case "Ncy": return '\u041D'; 4492 case "Ocy": return '\u041E'; 4493 case "Pcy": return '\u041F'; 4494 case "Rcy": return '\u0420'; 4495 case "Scy": return '\u0421'; 4496 case "Tcy": return '\u0422'; 4497 case "Ucy": return '\u0423'; 4498 case "Fcy": return '\u0424'; 4499 case "KHcy": return '\u0425'; 4500 case "TScy": return '\u0426'; 4501 case "CHcy": return '\u0427'; 4502 case "SHcy": return '\u0428'; 4503 case "SHCHcy": return '\u0429'; 4504 case "HARDcy": return '\u042A'; 4505 case "Ycy": return '\u042B'; 4506 case "SOFTcy": return '\u042C'; 4507 case "Ecy": return '\u042D'; 4508 case "YUcy": return '\u042E'; 4509 case "YAcy": return '\u042F'; 4510 case "acy": return '\u0430'; 4511 case "bcy": return '\u0431'; 4512 case "vcy": return '\u0432'; 4513 case "gcy": return '\u0433'; 4514 case "dcy": return '\u0434'; 4515 case "iecy": return '\u0435'; 4516 case "zhcy": return '\u0436'; 4517 case "zcy": return '\u0437'; 4518 case "icy": return '\u0438'; 4519 case "jcy": return '\u0439'; 4520 case "kcy": return '\u043A'; 4521 case "lcy": return '\u043B'; 4522 case "mcy": return '\u043C'; 4523 case "ncy": return '\u043D'; 4524 case "ocy": return '\u043E'; 4525 case "pcy": return '\u043F'; 4526 case "rcy": return '\u0440'; 4527 case "scy": return '\u0441'; 4528 case "tcy": return '\u0442'; 4529 case "ucy": return '\u0443'; 4530 case "fcy": return '\u0444'; 4531 case "khcy": return '\u0445'; 4532 case "tscy": return '\u0446'; 4533 case "chcy": return '\u0447'; 4534 case "shcy": return '\u0448'; 4535 case "shchcy": return '\u0449'; 4536 case "hardcy": return '\u044A'; 4537 case "ycy": return '\u044B'; 4538 case "softcy": return '\u044C'; 4539 case "ecy": return '\u044D'; 4540 case "yucy": return '\u044E'; 4541 case "yacy": return '\u044F'; 4542 case "iocy": return '\u0451'; 4543 case "djcy": return '\u0452'; 4544 case "gjcy": return '\u0453'; 4545 case "jukcy": return '\u0454'; 4546 case "dscy": return '\u0455'; 4547 case "iukcy": return '\u0456'; 4548 case "yicy": return '\u0457'; 4549 case "jsercy": return '\u0458'; 4550 case "ljcy": return '\u0459'; 4551 case "njcy": return '\u045A'; 4552 case "tshcy": return '\u045B'; 4553 case "kjcy": return '\u045C'; 4554 case "ubrcy": return '\u045E'; 4555 case "dzcy": return '\u045F'; 4556 case "ensp": return '\u2002'; 4557 case "emsp": return '\u2003'; 4558 case "emsp13": return '\u2004'; 4559 case "emsp14": return '\u2005'; 4560 case "numsp": return '\u2007'; 4561 case "puncsp": return '\u2008'; 4562 case "thinsp": case "ThinSpace": return '\u2009'; 4563 case "hairsp": case "VeryThinSpace": return '\u200A'; 4564 case "ZeroWidthSpace": case "NegativeVeryThinSpace": case "NegativeThinSpace": case "NegativeMediumSpace": case "NegativeThickSpace": return '\u200B'; 4565 case "zwnj": return '\u200C'; 4566 case "zwj": return '\u200D'; 4567 case "lrm": return '\u200E'; 4568 case "rlm": return '\u200F'; 4569 case "hyphen": case "dash": return '\u2010'; 4570 case "ndash": return '\u2013'; 4571 case "mdash": return '\u2014'; 4572 case "horbar": return '\u2015'; 4573 case "Verbar": case "Vert": return '\u2016'; 4574 case "lsquo": case "OpenCurlyQuote": return '\u2018'; 4575 case "rsquo": case "rsquor": case "CloseCurlyQuote": return '\u2019'; 4576 case "lsquor": case "sbquo": return '\u201A'; 4577 case "ldquo": case "OpenCurlyDoubleQuote": return '\u201C'; 4578 case "rdquo": case "rdquor": case "CloseCurlyDoubleQuote": return '\u201D'; 4579 case "ldquor": case "bdquo": return '\u201E'; 4580 case "dagger": return '\u2020'; 4581 case "Dagger": case "ddagger": return '\u2021'; 4582 case "bull": case "bullet": return '\u2022'; 4583 case "nldr": return '\u2025'; 4584 case "hellip": case "mldr": return '\u2026'; 4585 case "permil": return '\u2030'; 4586 case "pertenk": return '\u2031'; 4587 case "prime": return '\u2032'; 4588 case "Prime": return '\u2033'; 4589 case "tprime": return '\u2034'; 4590 case "bprime": case "backprime": return '\u2035'; 4591 case "lsaquo": return '\u2039'; 4592 case "rsaquo": return '\u203A'; 4593 case "oline": case "OverBar": return '\u203E'; 4594 case "caret": return '\u2041'; 4595 case "hybull": return '\u2043'; 4596 case "frasl": return '\u2044'; 4597 case "bsemi": return '\u204F'; 4598 case "qprime": return '\u2057'; 4599 case "MediumSpace": return '\u205F'; 4600 case "NoBreak": return '\u2060'; 4601 case "ApplyFunction": case "af": return '\u2061'; 4602 case "InvisibleTimes": case "it": return '\u2062'; 4603 case "InvisibleComma": case "ic": return '\u2063'; 4604 case "euro": return '\u20AC'; 4605 case "tdot": case "TripleDot": return '\u20DB'; 4606 case "DotDot": return '\u20DC'; 4607 case "Copf": case "complexes": return '\u2102'; 4608 case "incare": return '\u2105'; 4609 case "gscr": return '\u210A'; 4610 case "hamilt": case "HilbertSpace": case "Hscr": return '\u210B'; 4611 case "Hfr": case "Poincareplane": return '\u210C'; 4612 case "quaternions": case "Hopf": return '\u210D'; 4613 case "planckh": return '\u210E'; 4614 case "planck": case "hbar": case "plankv": case "hslash": return '\u210F'; 4615 case "Iscr": case "imagline": return '\u2110'; 4616 case "image": case "Im": case "imagpart": case "Ifr": return '\u2111'; 4617 case "Lscr": case "lagran": case "Laplacetrf": return '\u2112'; 4618 case "ell": return '\u2113'; 4619 case "Nopf": case "naturals": return '\u2115'; 4620 case "numero": return '\u2116'; 4621 case "copysr": return '\u2117'; 4622 case "weierp": case "wp": return '\u2118'; 4623 case "Popf": case "primes": return '\u2119'; 4624 case "rationals": case "Qopf": return '\u211A'; 4625 case "Rscr": case "realine": return '\u211B'; 4626 case "real": case "Re": case "realpart": case "Rfr": return '\u211C'; 4627 case "reals": case "Ropf": return '\u211D'; 4628 case "rx": return '\u211E'; 4629 case "trade": case "TRADE": return '\u2122'; 4630 case "integers": case "Zopf": return '\u2124'; 4631 case "mho": return '\u2127'; 4632 case "Zfr": case "zeetrf": return '\u2128'; 4633 case "iiota": return '\u2129'; 4634 case "bernou": case "Bernoullis": case "Bscr": return '\u212C'; 4635 case "Cfr": case "Cayleys": return '\u212D'; 4636 case "escr": return '\u212F'; 4637 case "Escr": case "expectation": return '\u2130'; 4638 case "Fscr": case "Fouriertrf": return '\u2131'; 4639 case "phmmat": case "Mellintrf": case "Mscr": return '\u2133'; 4640 case "order": case "orderof": case "oscr": return '\u2134'; 4641 case "alefsym": case "aleph": return '\u2135'; 4642 case "beth": return '\u2136'; 4643 case "gimel": return '\u2137'; 4644 case "daleth": return '\u2138'; 4645 case "CapitalDifferentialD": case "DD": return '\u2145'; 4646 case "DifferentialD": case "dd": return '\u2146'; 4647 case "ExponentialE": case "exponentiale": case "ee": return '\u2147'; 4648 case "ImaginaryI": case "ii": return '\u2148'; 4649 case "frac13": return '\u2153'; 4650 case "frac23": return '\u2154'; 4651 case "frac15": return '\u2155'; 4652 case "frac25": return '\u2156'; 4653 case "frac35": return '\u2157'; 4654 case "frac45": return '\u2158'; 4655 case "frac16": return '\u2159'; 4656 case "frac56": return '\u215A'; 4657 case "frac18": return '\u215B'; 4658 case "frac38": return '\u215C'; 4659 case "frac58": return '\u215D'; 4660 case "frac78": return '\u215E'; 4661 case "larr": case "leftarrow": case "LeftArrow": case "slarr": case "ShortLeftArrow": return '\u2190'; 4662 case "uarr": case "uparrow": case "UpArrow": case "ShortUpArrow": return '\u2191'; 4663 case "rarr": case "rightarrow": case "RightArrow": case "srarr": case "ShortRightArrow": return '\u2192'; 4664 case "darr": case "downarrow": case "DownArrow": case "ShortDownArrow": return '\u2193'; 4665 case "harr": case "leftrightarrow": case "LeftRightArrow": return '\u2194'; 4666 case "varr": case "updownarrow": case "UpDownArrow": return '\u2195'; 4667 case "nwarr": case "UpperLeftArrow": case "nwarrow": return '\u2196'; 4668 case "nearr": case "UpperRightArrow": case "nearrow": return '\u2197'; 4669 case "searr": case "searrow": case "LowerRightArrow": return '\u2198'; 4670 case "swarr": case "swarrow": case "LowerLeftArrow": return '\u2199'; 4671 case "nlarr": case "nleftarrow": return '\u219A'; 4672 case "nrarr": case "nrightarrow": return '\u219B'; 4673 case "rarrw": case "rightsquigarrow": return '\u219D'; 4674 case "Larr": case "twoheadleftarrow": return '\u219E'; 4675 case "Uarr": return '\u219F'; 4676 case "Rarr": case "twoheadrightarrow": return '\u21A0'; 4677 case "Darr": return '\u21A1'; 4678 case "larrtl": case "leftarrowtail": return '\u21A2'; 4679 case "rarrtl": case "rightarrowtail": return '\u21A3'; 4680 case "LeftTeeArrow": case "mapstoleft": return '\u21A4'; 4681 case "UpTeeArrow": case "mapstoup": return '\u21A5'; 4682 case "map": case "RightTeeArrow": case "mapsto": return '\u21A6'; 4683 case "DownTeeArrow": case "mapstodown": return '\u21A7'; 4684 case "larrhk": case "hookleftarrow": return '\u21A9'; 4685 case "rarrhk": case "hookrightarrow": return '\u21AA'; 4686 case "larrlp": case "looparrowleft": return '\u21AB'; 4687 case "rarrlp": case "looparrowright": return '\u21AC'; 4688 case "harrw": case "leftrightsquigarrow": return '\u21AD'; 4689 case "nharr": case "nleftrightarrow": return '\u21AE'; 4690 case "lsh": case "Lsh": return '\u21B0'; 4691 case "rsh": case "Rsh": return '\u21B1'; 4692 case "ldsh": return '\u21B2'; 4693 case "rdsh": return '\u21B3'; 4694 case "crarr": return '\u21B5'; 4695 case "cularr": case "curvearrowleft": return '\u21B6'; 4696 case "curarr": case "curvearrowright": return '\u21B7'; 4697 case "olarr": case "circlearrowleft": return '\u21BA'; 4698 case "orarr": case "circlearrowright": return '\u21BB'; 4699 case "lharu": case "LeftVector": case "leftharpoonup": return '\u21BC'; 4700 case "lhard": case "leftharpoondown": case "DownLeftVector": return '\u21BD'; 4701 case "uharr": case "upharpoonright": case "RightUpVector": return '\u21BE'; 4702 case "uharl": case "upharpoonleft": case "LeftUpVector": return '\u21BF'; 4703 case "rharu": case "RightVector": case "rightharpoonup": return '\u21C0'; 4704 case "rhard": case "rightharpoondown": case "DownRightVector": return '\u21C1'; 4705 case "dharr": case "RightDownVector": case "downharpoonright": return '\u21C2'; 4706 case "dharl": case "LeftDownVector": case "downharpoonleft": return '\u21C3'; 4707 case "rlarr": case "rightleftarrows": case "RightArrowLeftArrow": return '\u21C4'; 4708 case "udarr": case "UpArrowDownArrow": return '\u21C5'; 4709 case "lrarr": case "leftrightarrows": case "LeftArrowRightArrow": return '\u21C6'; 4710 case "llarr": case "leftleftarrows": return '\u21C7'; 4711 case "uuarr": case "upuparrows": return '\u21C8'; 4712 case "rrarr": case "rightrightarrows": return '\u21C9'; 4713 case "ddarr": case "downdownarrows": return '\u21CA'; 4714 case "lrhar": case "ReverseEquilibrium": case "leftrightharpoons": return '\u21CB'; 4715 case "rlhar": case "rightleftharpoons": case "Equilibrium": return '\u21CC'; 4716 case "nlArr": case "nLeftarrow": return '\u21CD'; 4717 case "nhArr": case "nLeftrightarrow": return '\u21CE'; 4718 case "nrArr": case "nRightarrow": return '\u21CF'; 4719 case "lArr": case "Leftarrow": case "DoubleLeftArrow": return '\u21D0'; 4720 case "uArr": case "Uparrow": case "DoubleUpArrow": return '\u21D1'; 4721 case "rArr": case "Rightarrow": case "Implies": case "DoubleRightArrow": return '\u21D2'; 4722 case "dArr": case "Downarrow": case "DoubleDownArrow": return '\u21D3'; 4723 case "hArr": case "Leftrightarrow": case "DoubleLeftRightArrow": case "iff": return '\u21D4'; 4724 case "vArr": case "Updownarrow": case "DoubleUpDownArrow": return '\u21D5'; 4725 case "nwArr": return '\u21D6'; 4726 case "neArr": return '\u21D7'; 4727 case "seArr": return '\u21D8'; 4728 case "swArr": return '\u21D9'; 4729 case "lAarr": case "Lleftarrow": return '\u21DA'; 4730 case "rAarr": case "Rrightarrow": return '\u21DB'; 4731 case "zigrarr": return '\u21DD'; 4732 case "larrb": case "LeftArrowBar": return '\u21E4'; 4733 case "rarrb": case "RightArrowBar": return '\u21E5'; 4734 case "duarr": case "DownArrowUpArrow": return '\u21F5'; 4735 case "loarr": return '\u21FD'; 4736 case "roarr": return '\u21FE'; 4737 case "hoarr": return '\u21FF'; 4738 case "forall": case "ForAll": return '\u2200'; 4739 case "comp": case "complement": return '\u2201'; 4740 case "part": case "PartialD": return '\u2202'; 4741 case "exist": case "Exists": return '\u2203'; 4742 case "nexist": case "NotExists": case "nexists": return '\u2204'; 4743 case "empty": case "emptyset": case "emptyv": case "varnothing": return '\u2205'; 4744 case "nabla": case "Del": return '\u2207'; 4745 case "isin": case "isinv": case "Element": case "in": return '\u2208'; 4746 case "notin": case "NotElement": case "notinva": return '\u2209'; 4747 case "niv": case "ReverseElement": case "ni": case "SuchThat": return '\u220B'; 4748 case "notni": case "notniva": case "NotReverseElement": return '\u220C'; 4749 case "prod": case "Product": return '\u220F'; 4750 case "coprod": case "Coproduct": return '\u2210'; 4751 case "sum": case "Sum": return '\u2211'; 4752 case "minus": return '\u2212'; 4753 case "mnplus": case "mp": case "MinusPlus": return '\u2213'; 4754 case "plusdo": case "dotplus": return '\u2214'; 4755 case "setmn": case "setminus": case "Backslash": case "ssetmn": case "smallsetminus": return '\u2216'; 4756 case "lowast": return '\u2217'; 4757 case "compfn": case "SmallCircle": return '\u2218'; 4758 case "radic": case "Sqrt": return '\u221A'; 4759 case "prop": case "propto": case "Proportional": case "vprop": case "varpropto": return '\u221D'; 4760 case "infin": return '\u221E'; 4761 case "angrt": return '\u221F'; 4762 case "ang": case "angle": return '\u2220'; 4763 case "angmsd": case "measuredangle": return '\u2221'; 4764 case "angsph": return '\u2222'; 4765 case "mid": case "VerticalBar": case "smid": case "shortmid": return '\u2223'; 4766 case "nmid": case "NotVerticalBar": case "nsmid": case "nshortmid": return '\u2224'; 4767 case "par": case "parallel": case "DoubleVerticalBar": case "spar": case "shortparallel": return '\u2225'; 4768 case "npar": case "nparallel": case "NotDoubleVerticalBar": case "nspar": case "nshortparallel": return '\u2226'; 4769 case "and": case "wedge": return '\u2227'; 4770 case "or": case "vee": return '\u2228'; 4771 case "cap": return '\u2229'; 4772 case "cup": return '\u222A'; 4773 case "int": case "Integral": return '\u222B'; 4774 case "Int": return '\u222C'; 4775 case "tint": case "iiint": return '\u222D'; 4776 case "conint": case "oint": case "ContourIntegral": return '\u222E'; 4777 case "Conint": case "DoubleContourIntegral": return '\u222F'; 4778 case "Cconint": return '\u2230'; 4779 case "cwint": return '\u2231'; 4780 case "cwconint": case "ClockwiseContourIntegral": return '\u2232'; 4781 case "awconint": case "CounterClockwiseContourIntegral": return '\u2233'; 4782 case "there4": case "therefore": case "Therefore": return '\u2234'; 4783 case "becaus": case "because": case "Because": return '\u2235'; 4784 case "ratio": return '\u2236'; 4785 case "Colon": case "Proportion": return '\u2237'; 4786 case "minusd": case "dotminus": return '\u2238'; 4787 case "mDDot": return '\u223A'; 4788 case "homtht": return '\u223B'; 4789 case "sim": case "Tilde": case "thksim": case "thicksim": return '\u223C'; 4790 case "bsim": case "backsim": return '\u223D'; 4791 case "ac": case "mstpos": return '\u223E'; 4792 case "acd": return '\u223F'; 4793 case "wreath": case "VerticalTilde": case "wr": return '\u2240'; 4794 case "nsim": case "NotTilde": return '\u2241'; 4795 case "esim": case "EqualTilde": case "eqsim": return '\u2242'; 4796 case "sime": case "TildeEqual": case "simeq": return '\u2243'; 4797 case "nsime": case "nsimeq": case "NotTildeEqual": return '\u2244'; 4798 case "cong": case "TildeFullEqual": return '\u2245'; 4799 case "simne": return '\u2246'; 4800 case "ncong": case "NotTildeFullEqual": return '\u2247'; 4801 case "asymp": case "ap": case "TildeTilde": case "approx": case "thkap": case "thickapprox": return '\u2248'; 4802 case "nap": case "NotTildeTilde": case "napprox": return '\u2249'; 4803 case "ape": case "approxeq": return '\u224A'; 4804 case "apid": return '\u224B'; 4805 case "bcong": case "backcong": return '\u224C'; 4806 case "asympeq": case "CupCap": return '\u224D'; 4807 case "bump": case "HumpDownHump": case "Bumpeq": return '\u224E'; 4808 case "bumpe": case "HumpEqual": case "bumpeq": return '\u224F'; 4809 case "esdot": case "DotEqual": case "doteq": return '\u2250'; 4810 case "eDot": case "doteqdot": return '\u2251'; 4811 case "efDot": case "fallingdotseq": return '\u2252'; 4812 case "erDot": case "risingdotseq": return '\u2253'; 4813 case "colone": case "coloneq": case "Assign": return '\u2254'; 4814 case "ecolon": case "eqcolon": return '\u2255'; 4815 case "ecir": case "eqcirc": return '\u2256'; 4816 case "cire": case "circeq": return '\u2257'; 4817 case "wedgeq": return '\u2259'; 4818 case "veeeq": return '\u225A'; 4819 case "trie": case "triangleq": return '\u225C'; 4820 case "equest": case "questeq": return '\u225F'; 4821 case "ne": case "NotEqual": return '\u2260'; 4822 case "equiv": case "Congruent": return '\u2261'; 4823 case "nequiv": case "NotCongruent": return '\u2262'; 4824 case "le": case "leq": return '\u2264'; 4825 case "ge": case "GreaterEqual": case "geq": return '\u2265'; 4826 case "lE": case "LessFullEqual": case "leqq": return '\u2266'; 4827 case "gE": case "GreaterFullEqual": case "geqq": return '\u2267'; 4828 case "lnE": case "lneqq": return '\u2268'; 4829 case "gnE": case "gneqq": return '\u2269'; 4830 case "Lt": case "NestedLessLess": case "ll": return '\u226A'; 4831 case "Gt": case "NestedGreaterGreater": case "gg": return '\u226B'; 4832 case "twixt": case "between": return '\u226C'; 4833 case "NotCupCap": return '\u226D'; 4834 case "nlt": case "NotLess": case "nless": return '\u226E'; 4835 case "ngt": case "NotGreater": case "ngtr": return '\u226F'; 4836 case "nle": case "NotLessEqual": case "nleq": return '\u2270'; 4837 case "nge": case "NotGreaterEqual": case "ngeq": return '\u2271'; 4838 case "lsim": case "LessTilde": case "lesssim": return '\u2272'; 4839 case "gsim": case "gtrsim": case "GreaterTilde": return '\u2273'; 4840 case "nlsim": case "NotLessTilde": return '\u2274'; 4841 case "ngsim": case "NotGreaterTilde": return '\u2275'; 4842 case "lg": case "lessgtr": case "LessGreater": return '\u2276'; 4843 case "gl": case "gtrless": case "GreaterLess": return '\u2277'; 4844 case "ntlg": case "NotLessGreater": return '\u2278'; 4845 case "ntgl": case "NotGreaterLess": return '\u2279'; 4846 case "pr": case "Precedes": case "prec": return '\u227A'; 4847 case "sc": case "Succeeds": case "succ": return '\u227B'; 4848 case "prcue": case "PrecedesSlantEqual": case "preccurlyeq": return '\u227C'; 4849 case "sccue": case "SucceedsSlantEqual": case "succcurlyeq": return '\u227D'; 4850 case "prsim": case "precsim": case "PrecedesTilde": return '\u227E'; 4851 case "scsim": case "succsim": case "SucceedsTilde": return '\u227F'; 4852 case "npr": case "nprec": case "NotPrecedes": return '\u2280'; 4853 case "nsc": case "nsucc": case "NotSucceeds": return '\u2281'; 4854 case "sub": case "subset": return '\u2282'; 4855 case "sup": case "supset": case "Superset": return '\u2283'; 4856 case "nsub": return '\u2284'; 4857 case "nsup": return '\u2285'; 4858 case "sube": case "SubsetEqual": case "subseteq": return '\u2286'; 4859 case "supe": case "supseteq": case "SupersetEqual": return '\u2287'; 4860 case "nsube": case "nsubseteq": case "NotSubsetEqual": return '\u2288'; 4861 case "nsupe": case "nsupseteq": case "NotSupersetEqual": return '\u2289'; 4862 case "subne": case "subsetneq": return '\u228A'; 4863 case "supne": case "supsetneq": return '\u228B'; 4864 case "cupdot": return '\u228D'; 4865 case "uplus": case "UnionPlus": return '\u228E'; 4866 case "sqsub": case "SquareSubset": case "sqsubset": return '\u228F'; 4867 case "sqsup": case "SquareSuperset": case "sqsupset": return '\u2290'; 4868 case "sqsube": case "SquareSubsetEqual": case "sqsubseteq": return '\u2291'; 4869 case "sqsupe": case "SquareSupersetEqual": case "sqsupseteq": return '\u2292'; 4870 case "sqcap": case "SquareIntersection": return '\u2293'; 4871 case "sqcup": case "SquareUnion": return '\u2294'; 4872 case "oplus": case "CirclePlus": return '\u2295'; 4873 case "ominus": case "CircleMinus": return '\u2296'; 4874 case "otimes": case "CircleTimes": return '\u2297'; 4875 case "osol": return '\u2298'; 4876 case "odot": case "CircleDot": return '\u2299'; 4877 case "ocir": case "circledcirc": return '\u229A'; 4878 case "oast": case "circledast": return '\u229B'; 4879 case "odash": case "circleddash": return '\u229D'; 4880 case "plusb": case "boxplus": return '\u229E'; 4881 case "minusb": case "boxminus": return '\u229F'; 4882 case "timesb": case "boxtimes": return '\u22A0'; 4883 case "sdotb": case "dotsquare": return '\u22A1'; 4884 case "vdash": case "RightTee": return '\u22A2'; 4885 case "dashv": case "LeftTee": return '\u22A3'; 4886 case "top": case "DownTee": return '\u22A4'; 4887 case "bottom": case "bot": case "perp": case "UpTee": return '\u22A5'; 4888 case "models": return '\u22A7'; 4889 case "vDash": case "DoubleRightTee": return '\u22A8'; 4890 case "Vdash": return '\u22A9'; 4891 case "Vvdash": return '\u22AA'; 4892 case "VDash": return '\u22AB'; 4893 case "nvdash": return '\u22AC'; 4894 case "nvDash": return '\u22AD'; 4895 case "nVdash": return '\u22AE'; 4896 case "nVDash": return '\u22AF'; 4897 case "prurel": return '\u22B0'; 4898 case "vltri": case "vartriangleleft": case "LeftTriangle": return '\u22B2'; 4899 case "vrtri": case "vartriangleright": case "RightTriangle": return '\u22B3'; 4900 case "ltrie": case "trianglelefteq": case "LeftTriangleEqual": return '\u22B4'; 4901 case "rtrie": case "trianglerighteq": case "RightTriangleEqual": return '\u22B5'; 4902 case "origof": return '\u22B6'; 4903 case "imof": return '\u22B7'; 4904 case "mumap": case "multimap": return '\u22B8'; 4905 case "hercon": return '\u22B9'; 4906 case "intcal": case "intercal": return '\u22BA'; 4907 case "veebar": return '\u22BB'; 4908 case "barvee": return '\u22BD'; 4909 case "angrtvb": return '\u22BE'; 4910 case "lrtri": return '\u22BF'; 4911 case "xwedge": case "Wedge": case "bigwedge": return '\u22C0'; 4912 case "xvee": case "Vee": case "bigvee": return '\u22C1'; 4913 case "xcap": case "Intersection": case "bigcap": return '\u22C2'; 4914 case "xcup": case "Union": case "bigcup": return '\u22C3'; 4915 case "diam": case "diamond": case "Diamond": return '\u22C4'; 4916 case "sdot": return '\u22C5'; 4917 case "sstarf": case "Star": return '\u22C6'; 4918 case "divonx": case "divideontimes": return '\u22C7'; 4919 case "bowtie": return '\u22C8'; 4920 case "ltimes": return '\u22C9'; 4921 case "rtimes": return '\u22CA'; 4922 case "lthree": case "leftthreetimes": return '\u22CB'; 4923 case "rthree": case "rightthreetimes": return '\u22CC'; 4924 case "bsime": case "backsimeq": return '\u22CD'; 4925 case "cuvee": case "curlyvee": return '\u22CE'; 4926 case "cuwed": case "curlywedge": return '\u22CF'; 4927 case "Sub": case "Subset": return '\u22D0'; 4928 case "Sup": case "Supset": return '\u22D1'; 4929 case "Cap": return '\u22D2'; 4930 case "Cup": return '\u22D3'; 4931 case "fork": case "pitchfork": return '\u22D4'; 4932 case "epar": return '\u22D5'; 4933 case "ltdot": case "lessdot": return '\u22D6'; 4934 case "gtdot": case "gtrdot": return '\u22D7'; 4935 case "Ll": return '\u22D8'; 4936 case "Gg": case "ggg": return '\u22D9'; 4937 case "leg": case "LessEqualGreater": case "lesseqgtr": return '\u22DA'; 4938 case "gel": case "gtreqless": case "GreaterEqualLess": return '\u22DB'; 4939 case "cuepr": case "curlyeqprec": return '\u22DE'; 4940 case "cuesc": case "curlyeqsucc": return '\u22DF'; 4941 case "nprcue": case "NotPrecedesSlantEqual": return '\u22E0'; 4942 case "nsccue": case "NotSucceedsSlantEqual": return '\u22E1'; 4943 case "nsqsube": case "NotSquareSubsetEqual": return '\u22E2'; 4944 case "nsqsupe": case "NotSquareSupersetEqual": return '\u22E3'; 4945 case "lnsim": return '\u22E6'; 4946 case "gnsim": return '\u22E7'; 4947 case "prnsim": case "precnsim": return '\u22E8'; 4948 case "scnsim": case "succnsim": return '\u22E9'; 4949 case "nltri": case "ntriangleleft": case "NotLeftTriangle": return '\u22EA'; 4950 case "nrtri": case "ntriangleright": case "NotRightTriangle": return '\u22EB'; 4951 case "nltrie": case "ntrianglelefteq": case "NotLeftTriangleEqual": return '\u22EC'; 4952 case "nrtrie": case "ntrianglerighteq": case "NotRightTriangleEqual": return '\u22ED'; 4953 case "vellip": return '\u22EE'; 4954 case "ctdot": return '\u22EF'; 4955 case "utdot": return '\u22F0'; 4956 case "dtdot": return '\u22F1'; 4957 case "disin": return '\u22F2'; 4958 case "isinsv": return '\u22F3'; 4959 case "isins": return '\u22F4'; 4960 case "isindot": return '\u22F5'; 4961 case "notinvc": return '\u22F6'; 4962 case "notinvb": return '\u22F7'; 4963 case "isinE": return '\u22F9'; 4964 case "nisd": return '\u22FA'; 4965 case "xnis": return '\u22FB'; 4966 case "nis": return '\u22FC'; 4967 case "notnivc": return '\u22FD'; 4968 case "notnivb": return '\u22FE'; 4969 case "barwed": case "barwedge": return '\u2305'; 4970 case "Barwed": case "doublebarwedge": return '\u2306'; 4971 case "lceil": case "LeftCeiling": return '\u2308'; 4972 case "rceil": case "RightCeiling": return '\u2309'; 4973 case "lfloor": case "LeftFloor": return '\u230A'; 4974 case "rfloor": case "RightFloor": return '\u230B'; 4975 case "drcrop": return '\u230C'; 4976 case "dlcrop": return '\u230D'; 4977 case "urcrop": return '\u230E'; 4978 case "ulcrop": return '\u230F'; 4979 case "bnot": return '\u2310'; 4980 case "profline": return '\u2312'; 4981 case "profsurf": return '\u2313'; 4982 case "telrec": return '\u2315'; 4983 case "target": return '\u2316'; 4984 case "ulcorn": case "ulcorner": return '\u231C'; 4985 case "urcorn": case "urcorner": return '\u231D'; 4986 case "dlcorn": case "llcorner": return '\u231E'; 4987 case "drcorn": case "lrcorner": return '\u231F'; 4988 case "frown": case "sfrown": return '\u2322'; 4989 case "smile": case "ssmile": return '\u2323'; 4990 case "cylcty": return '\u232D'; 4991 case "profalar": return '\u232E'; 4992 case "topbot": return '\u2336'; 4993 case "ovbar": return '\u233D'; 4994 case "solbar": return '\u233F'; 4995 case "angzarr": return '\u237C'; 4996 case "lmoust": case "lmoustache": return '\u23B0'; 4997 case "rmoust": case "rmoustache": return '\u23B1'; 4998 case "tbrk": case "OverBracket": return '\u23B4'; 4999 case "bbrk": case "UnderBracket": return '\u23B5'; 5000 case "bbrktbrk": return '\u23B6'; 5001 case "OverParenthesis": return '\u23DC'; 5002 case "UnderParenthesis": return '\u23DD'; 5003 case "OverBrace": return '\u23DE'; 5004 case "UnderBrace": return '\u23DF'; 5005 case "trpezium": return '\u23E2'; 5006 case "elinters": return '\u23E7'; 5007 case "blank": return '\u2423'; 5008 case "oS": case "circledS": return '\u24C8'; 5009 case "boxh": case "HorizontalLine": return '\u2500'; 5010 case "boxv": return '\u2502'; 5011 case "boxdr": return '\u250C'; 5012 case "boxdl": return '\u2510'; 5013 case "boxur": return '\u2514'; 5014 case "boxul": return '\u2518'; 5015 case "boxvr": return '\u251C'; 5016 case "boxvl": return '\u2524'; 5017 case "boxhd": return '\u252C'; 5018 case "boxhu": return '\u2534'; 5019 case "boxvh": return '\u253C'; 5020 case "boxH": return '\u2550'; 5021 case "boxV": return '\u2551'; 5022 case "boxdR": return '\u2552'; 5023 case "boxDr": return '\u2553'; 5024 case "boxDR": return '\u2554'; 5025 case "boxdL": return '\u2555'; 5026 case "boxDl": return '\u2556'; 5027 case "boxDL": return '\u2557'; 5028 case "boxuR": return '\u2558'; 5029 case "boxUr": return '\u2559'; 5030 case "boxUR": return '\u255A'; 5031 case "boxuL": return '\u255B'; 5032 case "boxUl": return '\u255C'; 5033 case "boxUL": return '\u255D'; 5034 case "boxvR": return '\u255E'; 5035 case "boxVr": return '\u255F'; 5036 case "boxVR": return '\u2560'; 5037 case "boxvL": return '\u2561'; 5038 case "boxVl": return '\u2562'; 5039 case "boxVL": return '\u2563'; 5040 case "boxHd": return '\u2564'; 5041 case "boxhD": return '\u2565'; 5042 case "boxHD": return '\u2566'; 5043 case "boxHu": return '\u2567'; 5044 case "boxhU": return '\u2568'; 5045 case "boxHU": return '\u2569'; 5046 case "boxvH": return '\u256A'; 5047 case "boxVh": return '\u256B'; 5048 case "boxVH": return '\u256C'; 5049 case "uhblk": return '\u2580'; 5050 case "lhblk": return '\u2584'; 5051 case "block": return '\u2588'; 5052 case "blk14": return '\u2591'; 5053 case "blk12": return '\u2592'; 5054 case "blk34": return '\u2593'; 5055 case "squ": case "square": case "Square": return '\u25A1'; 5056 case "squf": case "squarf": case "blacksquare": case "FilledVerySmallSquare": return '\u25AA'; 5057 case "EmptyVerySmallSquare": return '\u25AB'; 5058 case "rect": return '\u25AD'; 5059 case "marker": return '\u25AE'; 5060 case "fltns": return '\u25B1'; 5061 case "xutri": case "bigtriangleup": return '\u25B3'; 5062 case "utrif": case "blacktriangle": return '\u25B4'; 5063 case "utri": case "triangle": return '\u25B5'; 5064 case "rtrif": case "blacktriangleright": return '\u25B8'; 5065 case "rtri": case "triangleright": return '\u25B9'; 5066 case "xdtri": case "bigtriangledown": return '\u25BD'; 5067 case "dtrif": case "blacktriangledown": return '\u25BE'; 5068 case "dtri": case "triangledown": return '\u25BF'; 5069 case "ltrif": case "blacktriangleleft": return '\u25C2'; 5070 case "ltri": case "triangleleft": return '\u25C3'; 5071 case "loz": case "lozenge": return '\u25CA'; 5072 case "cir": return '\u25CB'; 5073 case "tridot": return '\u25EC'; 5074 case "xcirc": case "bigcirc": return '\u25EF'; 5075 case "ultri": return '\u25F8'; 5076 case "urtri": return '\u25F9'; 5077 case "lltri": return '\u25FA'; 5078 case "EmptySmallSquare": return '\u25FB'; 5079 case "FilledSmallSquare": return '\u25FC'; 5080 case "starf": case "bigstar": return '\u2605'; 5081 case "star": return '\u2606'; 5082 case "phone": return '\u260E'; 5083 case "female": return '\u2640'; 5084 case "male": return '\u2642'; 5085 case "spades": case "spadesuit": return '\u2660'; 5086 case "clubs": case "clubsuit": return '\u2663'; 5087 case "hearts": case "heartsuit": return '\u2665'; 5088 case "diams": case "diamondsuit": return '\u2666'; 5089 case "sung": return '\u266A'; 5090 case "flat": return '\u266D'; 5091 case "natur": case "natural": return '\u266E'; 5092 case "sharp": return '\u266F'; 5093 case "check": case "checkmark": return '\u2713'; 5094 case "cross": return '\u2717'; 5095 case "malt": case "maltese": return '\u2720'; 5096 case "sext": return '\u2736'; 5097 case "VerticalSeparator": return '\u2758'; 5098 case "lbbrk": return '\u2772'; 5099 case "rbbrk": return '\u2773'; 5100 case "bsolhsub": return '\u27C8'; 5101 case "suphsol": return '\u27C9'; 5102 case "lobrk": case "LeftDoubleBracket": return '\u27E6'; 5103 case "robrk": case "RightDoubleBracket": return '\u27E7'; 5104 case "lang": case "LeftAngleBracket": case "langle": return '\u27E8'; 5105 case "rang": case "RightAngleBracket": case "rangle": return '\u27E9'; 5106 case "Lang": return '\u27EA'; 5107 case "Rang": return '\u27EB'; 5108 case "loang": return '\u27EC'; 5109 case "roang": return '\u27ED'; 5110 case "xlarr": case "longleftarrow": case "LongLeftArrow": return '\u27F5'; 5111 case "xrarr": case "longrightarrow": case "LongRightArrow": return '\u27F6'; 5112 case "xharr": case "longleftrightarrow": case "LongLeftRightArrow": return '\u27F7'; 5113 case "xlArr": case "Longleftarrow": case "DoubleLongLeftArrow": return '\u27F8'; 5114 case "xrArr": case "Longrightarrow": case "DoubleLongRightArrow": return '\u27F9'; 5115 case "xhArr": case "Longleftrightarrow": case "DoubleLongLeftRightArrow": return '\u27FA'; 5116 case "xmap": case "longmapsto": return '\u27FC'; 5117 case "dzigrarr": return '\u27FF'; 5118 case "nvlArr": return '\u2902'; 5119 case "nvrArr": return '\u2903'; 5120 case "nvHarr": return '\u2904'; 5121 case "Map": return '\u2905'; 5122 case "lbarr": return '\u290C'; 5123 case "rbarr": case "bkarow": return '\u290D'; 5124 case "lBarr": return '\u290E'; 5125 case "rBarr": case "dbkarow": return '\u290F'; 5126 case "RBarr": case "drbkarow": return '\u2910'; 5127 case "DDotrahd": return '\u2911'; 5128 case "UpArrowBar": return '\u2912'; 5129 case "DownArrowBar": return '\u2913'; 5130 case "Rarrtl": return '\u2916'; 5131 case "latail": return '\u2919'; 5132 case "ratail": return '\u291A'; 5133 case "lAtail": return '\u291B'; 5134 case "rAtail": return '\u291C'; 5135 case "larrfs": return '\u291D'; 5136 case "rarrfs": return '\u291E'; 5137 case "larrbfs": return '\u291F'; 5138 case "rarrbfs": return '\u2920'; 5139 case "nwarhk": return '\u2923'; 5140 case "nearhk": return '\u2924'; 5141 case "searhk": case "hksearow": return '\u2925'; 5142 case "swarhk": case "hkswarow": return '\u2926'; 5143 case "nwnear": return '\u2927'; 5144 case "nesear": case "toea": return '\u2928'; 5145 case "seswar": case "tosa": return '\u2929'; 5146 case "swnwar": return '\u292A'; 5147 case "rarrc": return '\u2933'; 5148 case "cudarrr": return '\u2935'; 5149 case "ldca": return '\u2936'; 5150 case "rdca": return '\u2937'; 5151 case "cudarrl": return '\u2938'; 5152 case "larrpl": return '\u2939'; 5153 case "curarrm": return '\u293C'; 5154 case "cularrp": return '\u293D'; 5155 case "rarrpl": return '\u2945'; 5156 case "harrcir": return '\u2948'; 5157 case "Uarrocir": return '\u2949'; 5158 case "lurdshar": return '\u294A'; 5159 case "ldrushar": return '\u294B'; 5160 case "LeftRightVector": return '\u294E'; 5161 case "RightUpDownVector": return '\u294F'; 5162 case "DownLeftRightVector": return '\u2950'; 5163 case "LeftUpDownVector": return '\u2951'; 5164 case "LeftVectorBar": return '\u2952'; 5165 case "RightVectorBar": return '\u2953'; 5166 case "RightUpVectorBar": return '\u2954'; 5167 case "RightDownVectorBar": return '\u2955'; 5168 case "DownLeftVectorBar": return '\u2956'; 5169 case "DownRightVectorBar": return '\u2957'; 5170 case "LeftUpVectorBar": return '\u2958'; 5171 case "LeftDownVectorBar": return '\u2959'; 5172 case "LeftTeeVector": return '\u295A'; 5173 case "RightTeeVector": return '\u295B'; 5174 case "RightUpTeeVector": return '\u295C'; 5175 case "RightDownTeeVector": return '\u295D'; 5176 case "DownLeftTeeVector": return '\u295E'; 5177 case "DownRightTeeVector": return '\u295F'; 5178 case "LeftUpTeeVector": return '\u2960'; 5179 case "LeftDownTeeVector": return '\u2961'; 5180 case "lHar": return '\u2962'; 5181 case "uHar": return '\u2963'; 5182 case "rHar": return '\u2964'; 5183 case "dHar": return '\u2965'; 5184 case "luruhar": return '\u2966'; 5185 case "ldrdhar": return '\u2967'; 5186 case "ruluhar": return '\u2968'; 5187 case "rdldhar": return '\u2969'; 5188 case "lharul": return '\u296A'; 5189 case "llhard": return '\u296B'; 5190 case "rharul": return '\u296C'; 5191 case "lrhard": return '\u296D'; 5192 case "udhar": case "UpEquilibrium": return '\u296E'; 5193 case "duhar": case "ReverseUpEquilibrium": return '\u296F'; 5194 case "RoundImplies": return '\u2970'; 5195 case "erarr": return '\u2971'; 5196 case "simrarr": return '\u2972'; 5197 case "larrsim": return '\u2973'; 5198 case "rarrsim": return '\u2974'; 5199 case "rarrap": return '\u2975'; 5200 case "ltlarr": return '\u2976'; 5201 case "gtrarr": return '\u2978'; 5202 case "subrarr": return '\u2979'; 5203 case "suplarr": return '\u297B'; 5204 case "lfisht": return '\u297C'; 5205 case "rfisht": return '\u297D'; 5206 case "ufisht": return '\u297E'; 5207 case "dfisht": return '\u297F'; 5208 case "lopar": return '\u2985'; 5209 case "ropar": return '\u2986'; 5210 case "lbrke": return '\u298B'; 5211 case "rbrke": return '\u298C'; 5212 case "lbrkslu": return '\u298D'; 5213 case "rbrksld": return '\u298E'; 5214 case "lbrksld": return '\u298F'; 5215 case "rbrkslu": return '\u2990'; 5216 case "langd": return '\u2991'; 5217 case "rangd": return '\u2992'; 5218 case "lparlt": return '\u2993'; 5219 case "rpargt": return '\u2994'; 5220 case "gtlPar": return '\u2995'; 5221 case "ltrPar": return '\u2996'; 5222 case "vzigzag": return '\u299A'; 5223 case "vangrt": return '\u299C'; 5224 case "angrtvbd": return '\u299D'; 5225 case "ange": return '\u29A4'; 5226 case "range": return '\u29A5'; 5227 case "dwangle": return '\u29A6'; 5228 case "uwangle": return '\u29A7'; 5229 case "angmsdaa": return '\u29A8'; 5230 case "angmsdab": return '\u29A9'; 5231 case "angmsdac": return '\u29AA'; 5232 case "angmsdad": return '\u29AB'; 5233 case "angmsdae": return '\u29AC'; 5234 case "angmsdaf": return '\u29AD'; 5235 case "angmsdag": return '\u29AE'; 5236 case "angmsdah": return '\u29AF'; 5237 case "bemptyv": return '\u29B0'; 5238 case "demptyv": return '\u29B1'; 5239 case "cemptyv": return '\u29B2'; 5240 case "raemptyv": return '\u29B3'; 5241 case "laemptyv": return '\u29B4'; 5242 case "ohbar": return '\u29B5'; 5243 case "omid": return '\u29B6'; 5244 case "opar": return '\u29B7'; 5245 case "operp": return '\u29B9'; 5246 case "olcross": return '\u29BB'; 5247 case "odsold": return '\u29BC'; 5248 case "olcir": return '\u29BE'; 5249 case "ofcir": return '\u29BF'; 5250 case "olt": return '\u29C0'; 5251 case "ogt": return '\u29C1'; 5252 case "cirscir": return '\u29C2'; 5253 case "cirE": return '\u29C3'; 5254 case "solb": return '\u29C4'; 5255 case "bsolb": return '\u29C5'; 5256 case "boxbox": return '\u29C9'; 5257 case "trisb": return '\u29CD'; 5258 case "rtriltri": return '\u29CE'; 5259 case "LeftTriangleBar": return '\u29CF'; 5260 case "RightTriangleBar": return '\u29D0'; 5261 case "iinfin": return '\u29DC'; 5262 case "infintie": return '\u29DD'; 5263 case "nvinfin": return '\u29DE'; 5264 case "eparsl": return '\u29E3'; 5265 case "smeparsl": return '\u29E4'; 5266 case "eqvparsl": return '\u29E5'; 5267 case "lozf": case "blacklozenge": return '\u29EB'; 5268 case "RuleDelayed": return '\u29F4'; 5269 case "dsol": return '\u29F6'; 5270 case "xodot": case "bigodot": return '\u2A00'; 5271 case "xoplus": case "bigoplus": return '\u2A01'; 5272 case "xotime": case "bigotimes": return '\u2A02'; 5273 case "xuplus": case "biguplus": return '\u2A04'; 5274 case "xsqcup": case "bigsqcup": return '\u2A06'; 5275 case "qint": case "iiiint": return '\u2A0C'; 5276 case "fpartint": return '\u2A0D'; 5277 case "cirfnint": return '\u2A10'; 5278 case "awint": return '\u2A11'; 5279 case "rppolint": return '\u2A12'; 5280 case "scpolint": return '\u2A13'; 5281 case "npolint": return '\u2A14'; 5282 case "pointint": return '\u2A15'; 5283 case "quatint": return '\u2A16'; 5284 case "intlarhk": return '\u2A17'; 5285 case "pluscir": return '\u2A22'; 5286 case "plusacir": return '\u2A23'; 5287 case "simplus": return '\u2A24'; 5288 case "plusdu": return '\u2A25'; 5289 case "plussim": return '\u2A26'; 5290 case "plustwo": return '\u2A27'; 5291 case "mcomma": return '\u2A29'; 5292 case "minusdu": return '\u2A2A'; 5293 case "loplus": return '\u2A2D'; 5294 case "roplus": return '\u2A2E'; 5295 case "Cross": return '\u2A2F'; 5296 case "timesd": return '\u2A30'; 5297 case "timesbar": return '\u2A31'; 5298 case "smashp": return '\u2A33'; 5299 case "lotimes": return '\u2A34'; 5300 case "rotimes": return '\u2A35'; 5301 case "otimesas": return '\u2A36'; 5302 case "Otimes": return '\u2A37'; 5303 case "odiv": return '\u2A38'; 5304 case "triplus": return '\u2A39'; 5305 case "triminus": return '\u2A3A'; 5306 case "tritime": return '\u2A3B'; 5307 case "iprod": case "intprod": return '\u2A3C'; 5308 case "amalg": return '\u2A3F'; 5309 case "capdot": return '\u2A40'; 5310 case "ncup": return '\u2A42'; 5311 case "ncap": return '\u2A43'; 5312 case "capand": return '\u2A44'; 5313 case "cupor": return '\u2A45'; 5314 case "cupcap": return '\u2A46'; 5315 case "capcup": return '\u2A47'; 5316 case "cupbrcap": return '\u2A48'; 5317 case "capbrcup": return '\u2A49'; 5318 case "cupcup": return '\u2A4A'; 5319 case "capcap": return '\u2A4B'; 5320 case "ccups": return '\u2A4C'; 5321 case "ccaps": return '\u2A4D'; 5322 case "ccupssm": return '\u2A50'; 5323 case "And": return '\u2A53'; 5324 case "Or": return '\u2A54'; 5325 case "andand": return '\u2A55'; 5326 case "oror": return '\u2A56'; 5327 case "orslope": return '\u2A57'; 5328 case "andslope": return '\u2A58'; 5329 case "andv": return '\u2A5A'; 5330 case "orv": return '\u2A5B'; 5331 case "andd": return '\u2A5C'; 5332 case "ord": return '\u2A5D'; 5333 case "wedbar": return '\u2A5F'; 5334 case "sdote": return '\u2A66'; 5335 case "simdot": return '\u2A6A'; 5336 case "congdot": return '\u2A6D'; 5337 case "easter": return '\u2A6E'; 5338 case "apacir": return '\u2A6F'; 5339 case "apE": return '\u2A70'; 5340 case "eplus": return '\u2A71'; 5341 case "pluse": return '\u2A72'; 5342 case "Esim": return '\u2A73'; 5343 case "Colone": return '\u2A74'; 5344 case "Equal": return '\u2A75'; 5345 case "eDDot": case "ddotseq": return '\u2A77'; 5346 case "equivDD": return '\u2A78'; 5347 case "ltcir": return '\u2A79'; 5348 case "gtcir": return '\u2A7A'; 5349 case "ltquest": return '\u2A7B'; 5350 case "gtquest": return '\u2A7C'; 5351 case "les": case "LessSlantEqual": case "leqslant": return '\u2A7D'; 5352 case "ges": case "GreaterSlantEqual": case "geqslant": return '\u2A7E'; 5353 case "lesdot": return '\u2A7F'; 5354 case "gesdot": return '\u2A80'; 5355 case "lesdoto": return '\u2A81'; 5356 case "gesdoto": return '\u2A82'; 5357 case "lesdotor": return '\u2A83'; 5358 case "gesdotol": return '\u2A84'; 5359 case "lap": case "lessapprox": return '\u2A85'; 5360 case "gap": case "gtrapprox": return '\u2A86'; 5361 case "lne": case "lneq": return '\u2A87'; 5362 case "gne": case "gneq": return '\u2A88'; 5363 case "lnap": case "lnapprox": return '\u2A89'; 5364 case "gnap": case "gnapprox": return '\u2A8A'; 5365 case "lEg": case "lesseqqgtr": return '\u2A8B'; 5366 case "gEl": case "gtreqqless": return '\u2A8C'; 5367 case "lsime": return '\u2A8D'; 5368 case "gsime": return '\u2A8E'; 5369 case "lsimg": return '\u2A8F'; 5370 case "gsiml": return '\u2A90'; 5371 case "lgE": return '\u2A91'; 5372 case "glE": return '\u2A92'; 5373 case "lesges": return '\u2A93'; 5374 case "gesles": return '\u2A94'; 5375 case "els": case "eqslantless": return '\u2A95'; 5376 case "egs": case "eqslantgtr": return '\u2A96'; 5377 case "elsdot": return '\u2A97'; 5378 case "egsdot": return '\u2A98'; 5379 case "el": return '\u2A99'; 5380 case "eg": return '\u2A9A'; 5381 case "siml": return '\u2A9D'; 5382 case "simg": return '\u2A9E'; 5383 case "simlE": return '\u2A9F'; 5384 case "simgE": return '\u2AA0'; 5385 case "LessLess": return '\u2AA1'; 5386 case "GreaterGreater": return '\u2AA2'; 5387 case "glj": return '\u2AA4'; 5388 case "gla": return '\u2AA5'; 5389 case "ltcc": return '\u2AA6'; 5390 case "gtcc": return '\u2AA7'; 5391 case "lescc": return '\u2AA8'; 5392 case "gescc": return '\u2AA9'; 5393 case "smt": return '\u2AAA'; 5394 case "lat": return '\u2AAB'; 5395 case "smte": return '\u2AAC'; 5396 case "late": return '\u2AAD'; 5397 case "bumpE": return '\u2AAE'; 5398 case "pre": case "preceq": case "PrecedesEqual": return '\u2AAF'; 5399 case "sce": case "succeq": case "SucceedsEqual": return '\u2AB0'; 5400 case "prE": return '\u2AB3'; 5401 case "scE": return '\u2AB4'; 5402 case "prnE": case "precneqq": return '\u2AB5'; 5403 case "scnE": case "succneqq": return '\u2AB6'; 5404 case "prap": case "precapprox": return '\u2AB7'; 5405 case "scap": case "succapprox": return '\u2AB8'; 5406 case "prnap": case "precnapprox": return '\u2AB9'; 5407 case "scnap": case "succnapprox": return '\u2ABA'; 5408 case "Pr": return '\u2ABB'; 5409 case "Sc": return '\u2ABC'; 5410 case "subdot": return '\u2ABD'; 5411 case "supdot": return '\u2ABE'; 5412 case "subplus": return '\u2ABF'; 5413 case "supplus": return '\u2AC0'; 5414 case "submult": return '\u2AC1'; 5415 case "supmult": return '\u2AC2'; 5416 case "subedot": return '\u2AC3'; 5417 case "supedot": return '\u2AC4'; 5418 case "subE": case "subseteqq": return '\u2AC5'; 5419 case "supE": case "supseteqq": return '\u2AC6'; 5420 case "subsim": return '\u2AC7'; 5421 case "supsim": return '\u2AC8'; 5422 case "subnE": case "subsetneqq": return '\u2ACB'; 5423 case "supnE": case "supsetneqq": return '\u2ACC'; 5424 case "csub": return '\u2ACF'; 5425 case "csup": return '\u2AD0'; 5426 case "csube": return '\u2AD1'; 5427 case "csupe": return '\u2AD2'; 5428 case "subsup": return '\u2AD3'; 5429 case "supsub": return '\u2AD4'; 5430 case "subsub": return '\u2AD5'; 5431 case "supsup": return '\u2AD6'; 5432 case "suphsub": return '\u2AD7'; 5433 case "supdsub": return '\u2AD8'; 5434 case "forkv": return '\u2AD9'; 5435 case "topfork": return '\u2ADA'; 5436 case "mlcp": return '\u2ADB'; 5437 case "Dashv": case "DoubleLeftTee": return '\u2AE4'; 5438 case "Vdashl": return '\u2AE6'; 5439 case "Barv": return '\u2AE7'; 5440 case "vBar": return '\u2AE8'; 5441 case "vBarv": return '\u2AE9'; 5442 case "Vbar": return '\u2AEB'; 5443 case "Not": return '\u2AEC'; 5444 case "bNot": return '\u2AED'; 5445 case "rnmid": return '\u2AEE'; 5446 case "cirmid": return '\u2AEF'; 5447 case "midcir": return '\u2AF0'; 5448 case "topcir": return '\u2AF1'; 5449 case "nhpar": return '\u2AF2'; 5450 case "parsim": return '\u2AF3'; 5451 case "parsl": return '\u2AFD'; 5452 case "fflig": return '\uFB00'; 5453 case "filig": return '\uFB01'; 5454 case "fllig": return '\uFB02'; 5455 case "ffilig": return '\uFB03'; 5456 case "ffllig": return '\uFB04'; 5457 case "Ascr": return '\U0001D49C'; 5458 case "Cscr": return '\U0001D49E'; 5459 case "Dscr": return '\U0001D49F'; 5460 case "Gscr": return '\U0001D4A2'; 5461 case "Jscr": return '\U0001D4A5'; 5462 case "Kscr": return '\U0001D4A6'; 5463 case "Nscr": return '\U0001D4A9'; 5464 case "Oscr": return '\U0001D4AA'; 5465 case "Pscr": return '\U0001D4AB'; 5466 case "Qscr": return '\U0001D4AC'; 5467 case "Sscr": return '\U0001D4AE'; 5468 case "Tscr": return '\U0001D4AF'; 5469 case "Uscr": return '\U0001D4B0'; 5470 case "Vscr": return '\U0001D4B1'; 5471 case "Wscr": return '\U0001D4B2'; 5472 case "Xscr": return '\U0001D4B3'; 5473 case "Yscr": return '\U0001D4B4'; 5474 case "Zscr": return '\U0001D4B5'; 5475 case "ascr": return '\U0001D4B6'; 5476 case "bscr": return '\U0001D4B7'; 5477 case "cscr": return '\U0001D4B8'; 5478 case "dscr": return '\U0001D4B9'; 5479 case "fscr": return '\U0001D4BB'; 5480 case "hscr": return '\U0001D4BD'; 5481 case "iscr": return '\U0001D4BE'; 5482 case "jscr": return '\U0001D4BF'; 5483 case "kscr": return '\U0001D4C0'; 5484 case "lscr": return '\U0001D4C1'; 5485 case "mscr": return '\U0001D4C2'; 5486 case "nscr": return '\U0001D4C3'; 5487 case "pscr": return '\U0001D4C5'; 5488 case "qscr": return '\U0001D4C6'; 5489 case "rscr": return '\U0001D4C7'; 5490 case "sscr": return '\U0001D4C8'; 5491 case "tscr": return '\U0001D4C9'; 5492 case "uscr": return '\U0001D4CA'; 5493 case "vscr": return '\U0001D4CB'; 5494 case "wscr": return '\U0001D4CC'; 5495 case "xscr": return '\U0001D4CD'; 5496 case "yscr": return '\U0001D4CE'; 5497 case "zscr": return '\U0001D4CF'; 5498 case "Afr": return '\U0001D504'; 5499 case "Bfr": return '\U0001D505'; 5500 case "Dfr": return '\U0001D507'; 5501 case "Efr": return '\U0001D508'; 5502 case "Ffr": return '\U0001D509'; 5503 case "Gfr": return '\U0001D50A'; 5504 case "Jfr": return '\U0001D50D'; 5505 case "Kfr": return '\U0001D50E'; 5506 case "Lfr": return '\U0001D50F'; 5507 case "Mfr": return '\U0001D510'; 5508 case "Nfr": return '\U0001D511'; 5509 case "Ofr": return '\U0001D512'; 5510 case "Pfr": return '\U0001D513'; 5511 case "Qfr": return '\U0001D514'; 5512 case "Sfr": return '\U0001D516'; 5513 case "Tfr": return '\U0001D517'; 5514 case "Ufr": return '\U0001D518'; 5515 case "Vfr": return '\U0001D519'; 5516 case "Wfr": return '\U0001D51A'; 5517 case "Xfr": return '\U0001D51B'; 5518 case "Yfr": return '\U0001D51C'; 5519 case "afr": return '\U0001D51E'; 5520 case "bfr": return '\U0001D51F'; 5521 case "cfr": return '\U0001D520'; 5522 case "dfr": return '\U0001D521'; 5523 case "efr": return '\U0001D522'; 5524 case "ffr": return '\U0001D523'; 5525 case "gfr": return '\U0001D524'; 5526 case "hfr": return '\U0001D525'; 5527 case "ifr": return '\U0001D526'; 5528 case "jfr": return '\U0001D527'; 5529 case "kfr": return '\U0001D528'; 5530 case "lfr": return '\U0001D529'; 5531 case "mfr": return '\U0001D52A'; 5532 case "nfr": return '\U0001D52B'; 5533 case "ofr": return '\U0001D52C'; 5534 case "pfr": return '\U0001D52D'; 5535 case "qfr": return '\U0001D52E'; 5536 case "rfr": return '\U0001D52F'; 5537 case "sfr": return '\U0001D530'; 5538 case "tfr": return '\U0001D531'; 5539 case "ufr": return '\U0001D532'; 5540 case "vfr": return '\U0001D533'; 5541 case "wfr": return '\U0001D534'; 5542 case "xfr": return '\U0001D535'; 5543 case "yfr": return '\U0001D536'; 5544 case "zfr": return '\U0001D537'; 5545 case "Aopf": return '\U0001D538'; 5546 case "Bopf": return '\U0001D539'; 5547 case "Dopf": return '\U0001D53B'; 5548 case "Eopf": return '\U0001D53C'; 5549 case "Fopf": return '\U0001D53D'; 5550 case "Gopf": return '\U0001D53E'; 5551 case "Iopf": return '\U0001D540'; 5552 case "Jopf": return '\U0001D541'; 5553 case "Kopf": return '\U0001D542'; 5554 case "Lopf": return '\U0001D543'; 5555 case "Mopf": return '\U0001D544'; 5556 case "Oopf": return '\U0001D546'; 5557 case "Sopf": return '\U0001D54A'; 5558 case "Topf": return '\U0001D54B'; 5559 case "Uopf": return '\U0001D54C'; 5560 case "Vopf": return '\U0001D54D'; 5561 case "Wopf": return '\U0001D54E'; 5562 case "Xopf": return '\U0001D54F'; 5563 case "Yopf": return '\U0001D550'; 5564 case "aopf": return '\U0001D552'; 5565 case "bopf": return '\U0001D553'; 5566 case "copf": return '\U0001D554'; 5567 case "dopf": return '\U0001D555'; 5568 case "eopf": return '\U0001D556'; 5569 case "fopf": return '\U0001D557'; 5570 case "gopf": return '\U0001D558'; 5571 case "hopf": return '\U0001D559'; 5572 case "iopf": return '\U0001D55A'; 5573 case "jopf": return '\U0001D55B'; 5574 case "kopf": return '\U0001D55C'; 5575 case "lopf": return '\U0001D55D'; 5576 case "mopf": return '\U0001D55E'; 5577 case "nopf": return '\U0001D55F'; 5578 case "oopf": return '\U0001D560'; 5579 case "popf": return '\U0001D561'; 5580 case "qopf": return '\U0001D562'; 5581 case "ropf": return '\U0001D563'; 5582 case "sopf": return '\U0001D564'; 5583 case "topf": return '\U0001D565'; 5584 case "uopf": return '\U0001D566'; 5585 case "vopf": return '\U0001D567'; 5586 case "wopf": return '\U0001D568'; 5587 case "xopf": return '\U0001D569'; 5588 case "yopf": return '\U0001D56A'; 5589 case "zopf": return '\U0001D56B'; 5590 5591 // and handling numeric entities 5592 default: 5593 if(entity[1] == '#') { 5594 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5595 auto hex = entity[3..$-1]; 5596 5597 auto p = intFromHex(to!string(hex).toLower()); 5598 return cast(dchar) p; 5599 } else { 5600 auto decimal = entity[2..$-1]; 5601 5602 // dealing with broken html entities 5603 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5604 decimal = decimal[1 .. $]; 5605 5606 if(decimal.length == 0) 5607 return ' '; // this is really broken html 5608 // done with dealing with broken stuff 5609 5610 auto p = std.conv.to!int(decimal); 5611 return cast(dchar) p; 5612 } 5613 } else 5614 return '\ufffd'; // replacement character diamond thing 5615 } 5616 5617 assert(0); 5618 } 5619 5620 import std.utf; 5621 import std.stdio; 5622 5623 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5624 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5625 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5626 /// Group: core_functionality 5627 string htmlEntitiesDecode(string data, bool strict = false) { 5628 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5629 if(data.indexOf("&") == -1) // all html entities begin with & 5630 return data; // if there are no entities in here, we can return the original slice and save some time 5631 5632 char[] a; // this seems to do a *better* job than appender! 5633 5634 char[4] buffer; 5635 5636 bool tryingEntity = false; 5637 dchar[16] entityBeingTried; 5638 int entityBeingTriedLength = 0; 5639 int entityAttemptIndex = 0; 5640 5641 foreach(dchar ch; data) { 5642 if(tryingEntity) { 5643 entityAttemptIndex++; 5644 entityBeingTried[entityBeingTriedLength++] = ch; 5645 5646 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5647 if(ch == '&') { 5648 if(strict) 5649 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5650 5651 // if not strict, let's try to parse both. 5652 5653 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") 5654 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5655 else 5656 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5657 5658 // tryingEntity is still true 5659 entityBeingTriedLength = 1; 5660 entityAttemptIndex = 0; // restarting o this 5661 } else 5662 if(ch == ';') { 5663 tryingEntity = false; 5664 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5665 } else if(ch == ' ') { 5666 // e.g. you & i 5667 if(strict) 5668 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5669 else { 5670 tryingEntity = false; 5671 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5672 } 5673 } else { 5674 if(entityAttemptIndex >= 9) { 5675 if(strict) 5676 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5677 else { 5678 tryingEntity = false; 5679 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5680 } 5681 } 5682 } 5683 } else { 5684 if(ch == '&') { 5685 tryingEntity = true; 5686 entityBeingTriedLength = 0; 5687 entityBeingTried[entityBeingTriedLength++] = ch; 5688 entityAttemptIndex = 0; 5689 } else { 5690 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5691 } 5692 } 5693 } 5694 5695 if(tryingEntity) { 5696 if(strict) 5697 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5698 5699 // otherwise, let's try to recover, at least so we don't drop any data 5700 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5701 // FIXME: what if we have "cool &"? should we try to parse it? 5702 } 5703 5704 return cast(string) a; // assumeUnique is actually kinda slow, lol 5705 } 5706 5707 /// Group: implementations 5708 abstract class SpecialElement : Element { 5709 this(Document _parentDocument) { 5710 super(_parentDocument); 5711 } 5712 5713 ///. 5714 override Element appendChild(Element e) { 5715 assert(0, "Cannot append to a special node"); 5716 } 5717 5718 ///. 5719 @property override int nodeType() const { 5720 return 100; 5721 } 5722 } 5723 5724 ///. 5725 /// Group: implementations 5726 class RawSource : SpecialElement { 5727 ///. 5728 this(Document _parentDocument, string s) { 5729 super(_parentDocument); 5730 source = s; 5731 tagName = "#raw"; 5732 } 5733 5734 ///. 5735 override string nodeValue() const { 5736 return this.toString(); 5737 } 5738 5739 ///. 5740 override string writeToAppender(Appender!string where = appender!string()) const { 5741 where.put(source); 5742 return source; 5743 } 5744 5745 override string toPrettyString(bool, int, string) const { 5746 return source; 5747 } 5748 5749 5750 override RawSource cloneNode(bool deep) { 5751 return new RawSource(parentDocument, source); 5752 } 5753 5754 ///. 5755 string source; 5756 } 5757 5758 /// Group: implementations 5759 abstract class ServerSideCode : SpecialElement { 5760 this(Document _parentDocument, string type) { 5761 super(_parentDocument); 5762 tagName = "#" ~ type; 5763 } 5764 5765 ///. 5766 override string nodeValue() const { 5767 return this.source; 5768 } 5769 5770 ///. 5771 override string writeToAppender(Appender!string where = appender!string()) const { 5772 auto start = where.data.length; 5773 where.put("<"); 5774 where.put(source); 5775 where.put(">"); 5776 return where.data[start .. $]; 5777 } 5778 5779 override string toPrettyString(bool, int, string) const { 5780 return "<" ~ source ~ ">"; 5781 } 5782 5783 ///. 5784 string source; 5785 } 5786 5787 ///. 5788 /// Group: implementations 5789 class PhpCode : ServerSideCode { 5790 ///. 5791 this(Document _parentDocument, string s) { 5792 super(_parentDocument, "php"); 5793 source = s; 5794 } 5795 5796 override PhpCode cloneNode(bool deep) { 5797 return new PhpCode(parentDocument, source); 5798 } 5799 } 5800 5801 ///. 5802 /// Group: implementations 5803 class AspCode : ServerSideCode { 5804 ///. 5805 this(Document _parentDocument, string s) { 5806 super(_parentDocument, "asp"); 5807 source = s; 5808 } 5809 5810 override AspCode cloneNode(bool deep) { 5811 return new AspCode(parentDocument, source); 5812 } 5813 } 5814 5815 ///. 5816 /// Group: implementations 5817 class BangInstruction : SpecialElement { 5818 ///. 5819 this(Document _parentDocument, string s) { 5820 super(_parentDocument); 5821 source = s; 5822 tagName = "#bpi"; 5823 } 5824 5825 ///. 5826 override string nodeValue() const { 5827 return this.source; 5828 } 5829 5830 override BangInstruction cloneNode(bool deep) { 5831 return new BangInstruction(parentDocument, source); 5832 } 5833 5834 ///. 5835 override string writeToAppender(Appender!string where = appender!string()) const { 5836 auto start = where.data.length; 5837 where.put("<!"); 5838 where.put(source); 5839 where.put(">"); 5840 return where.data[start .. $]; 5841 } 5842 5843 override string toPrettyString(bool, int, string) const { 5844 string s; 5845 s ~= "<!"; 5846 s ~= source; 5847 s ~= ">"; 5848 return s; 5849 } 5850 5851 ///. 5852 string source; 5853 } 5854 5855 ///. 5856 /// Group: implementations 5857 class QuestionInstruction : SpecialElement { 5858 ///. 5859 this(Document _parentDocument, string s) { 5860 super(_parentDocument); 5861 source = s; 5862 tagName = "#qpi"; 5863 } 5864 5865 override QuestionInstruction cloneNode(bool deep) { 5866 return new QuestionInstruction(parentDocument, source); 5867 } 5868 5869 ///. 5870 override string nodeValue() const { 5871 return this.source; 5872 } 5873 5874 ///. 5875 override string writeToAppender(Appender!string where = appender!string()) const { 5876 auto start = where.data.length; 5877 where.put("<"); 5878 where.put(source); 5879 where.put(">"); 5880 return where.data[start .. $]; 5881 } 5882 5883 override string toPrettyString(bool, int, string) const { 5884 string s; 5885 s ~= "<"; 5886 s ~= source; 5887 s ~= ">"; 5888 return s; 5889 } 5890 5891 5892 ///. 5893 string source; 5894 } 5895 5896 ///. 5897 /// Group: implementations 5898 class HtmlComment : SpecialElement { 5899 ///. 5900 this(Document _parentDocument, string s) { 5901 super(_parentDocument); 5902 source = s; 5903 tagName = "#comment"; 5904 } 5905 5906 override HtmlComment cloneNode(bool deep) { 5907 return new HtmlComment(parentDocument, source); 5908 } 5909 5910 ///. 5911 override string nodeValue() const { 5912 return this.source; 5913 } 5914 5915 ///. 5916 override string writeToAppender(Appender!string where = appender!string()) const { 5917 auto start = where.data.length; 5918 where.put("<!--"); 5919 where.put(source); 5920 where.put("-->"); 5921 return where.data[start .. $]; 5922 } 5923 5924 override string toPrettyString(bool, int, string) const { 5925 string s; 5926 s ~= "<!--"; 5927 s ~= source; 5928 s ~= "-->"; 5929 return s; 5930 } 5931 5932 5933 ///. 5934 string source; 5935 } 5936 5937 5938 5939 5940 ///. 5941 /// Group: implementations 5942 class TextNode : Element { 5943 public: 5944 ///. 5945 this(Document _parentDocument, string e) { 5946 super(_parentDocument); 5947 contents = e; 5948 tagName = "#text"; 5949 } 5950 5951 /// 5952 this(string e) { 5953 this(null, e); 5954 } 5955 5956 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5957 5958 ///. 5959 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5960 auto e = new TextNode(_parentDocument, ""); 5961 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5962 return e; 5963 } 5964 5965 ///. 5966 override @property TextNode cloneNode(bool deep) { 5967 auto n = new TextNode(parentDocument, contents); 5968 return n; 5969 } 5970 5971 ///. 5972 override string nodeValue() const { 5973 return this.contents; //toString(); 5974 } 5975 5976 ///. 5977 @property override int nodeType() const { 5978 return NodeType.Text; 5979 } 5980 5981 ///. 5982 override string writeToAppender(Appender!string where = appender!string()) const { 5983 string s; 5984 if(contents.length) 5985 s = htmlEntitiesEncode(contents, where); 5986 else 5987 s = ""; 5988 5989 assert(s !is null); 5990 return s; 5991 } 5992 5993 override string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5994 string s; 5995 5996 string contents = this.contents; 5997 // we will first collapse the whitespace per html 5998 // sort of. note this can break stuff yo!!!! 5999 if(this.parentNode is null || this.parentNode.tagName != "pre") { 6000 string n = ""; 6001 bool lastWasWhitespace = indentationLevel > 0; 6002 foreach(char c; contents) { 6003 if(c.isSimpleWhite) { 6004 if(!lastWasWhitespace) 6005 n ~= ' '; 6006 lastWasWhitespace = true; 6007 } else { 6008 n ~= c; 6009 lastWasWhitespace = false; 6010 } 6011 } 6012 6013 contents = n; 6014 } 6015 6016 if(this.parentNode !is null && this.parentNode.tagName != "p") { 6017 contents = contents.strip; 6018 } 6019 6020 auto e = htmlEntitiesEncode(contents); 6021 import std.algorithm.iteration : splitter; 6022 bool first = true; 6023 foreach(line; splitter(e, "\n")) { 6024 if(first) { 6025 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 6026 first = false; 6027 } else { 6028 s ~= "\n"; 6029 if(insertComments) 6030 s ~= "<!--"; 6031 foreach(i; 0 .. indentationLevel) 6032 s ~= "\t"; 6033 if(insertComments) 6034 s ~= "-->"; 6035 } 6036 s ~= line.stripRight; 6037 } 6038 return s; 6039 } 6040 6041 ///. 6042 override Element appendChild(Element e) { 6043 assert(0, "Cannot append to a text node"); 6044 } 6045 6046 ///. 6047 string contents; 6048 // alias contents content; // I just mistype this a lot, 6049 } 6050 6051 /** 6052 There are subclasses of Element offering improved helper 6053 functions for the element in HTML. 6054 */ 6055 6056 ///. 6057 /// Group: implementations 6058 class Link : Element { 6059 6060 ///. 6061 this(Document _parentDocument) { 6062 super(_parentDocument); 6063 this.tagName = "a"; 6064 } 6065 6066 6067 ///. 6068 this(string href, string text) { 6069 super("a"); 6070 setAttribute("href", href); 6071 innerText = text; 6072 } 6073 /+ 6074 /// Returns everything in the href EXCEPT the query string 6075 @property string targetSansQuery() { 6076 6077 } 6078 6079 ///. 6080 @property string domainName() { 6081 6082 } 6083 6084 ///. 6085 @property string path 6086 +/ 6087 /// This gets a variable from the URL's query string. 6088 string getValue(string name) { 6089 auto vars = variablesHash(); 6090 if(name in vars) 6091 return vars[name]; 6092 return null; 6093 } 6094 6095 private string[string] variablesHash() { 6096 string href = getAttribute("href"); 6097 if(href is null) 6098 return null; 6099 6100 auto ques = href.indexOf("?"); 6101 string str = ""; 6102 if(ques != -1) { 6103 str = href[ques+1..$]; 6104 6105 auto fragment = str.indexOf("#"); 6106 if(fragment != -1) 6107 str = str[0..fragment]; 6108 } 6109 6110 string[] variables = str.split("&"); 6111 6112 string[string] hash; 6113 6114 foreach(var; variables) { 6115 auto index = var.indexOf("="); 6116 if(index == -1) 6117 hash[var] = ""; 6118 else { 6119 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 6120 } 6121 } 6122 6123 return hash; 6124 } 6125 6126 ///. 6127 /*private*/ void updateQueryString(string[string] vars) { 6128 string href = getAttribute("href"); 6129 6130 auto question = href.indexOf("?"); 6131 if(question != -1) 6132 href = href[0..question]; 6133 6134 string frag = ""; 6135 auto fragment = href.indexOf("#"); 6136 if(fragment != -1) { 6137 frag = href[fragment..$]; 6138 href = href[0..fragment]; 6139 } 6140 6141 string query = "?"; 6142 bool first = true; 6143 foreach(name, value; vars) { 6144 if(!first) 6145 query ~= "&"; 6146 else 6147 first = false; 6148 6149 query ~= encodeComponent(name); 6150 if(value.length) 6151 query ~= "=" ~ encodeComponent(value); 6152 } 6153 6154 if(query != "?") 6155 href ~= query; 6156 6157 href ~= frag; 6158 6159 setAttribute("href", href); 6160 } 6161 6162 /// Sets or adds the variable with the given name to the given value 6163 /// It automatically URI encodes the values and takes care of the ? and &. 6164 override void setValue(string name, string variable) { 6165 auto vars = variablesHash(); 6166 vars[name] = variable; 6167 6168 updateQueryString(vars); 6169 } 6170 6171 /// Removes the given variable from the query string 6172 void removeValue(string name) { 6173 auto vars = variablesHash(); 6174 vars.remove(name); 6175 6176 updateQueryString(vars); 6177 } 6178 6179 /* 6180 ///. 6181 override string toString() { 6182 6183 } 6184 6185 ///. 6186 override string getAttribute(string name) { 6187 if(name == "href") { 6188 6189 } else 6190 return super.getAttribute(name); 6191 } 6192 */ 6193 } 6194 6195 ///. 6196 /// Group: implementations 6197 class Form : Element { 6198 6199 ///. 6200 this(Document _parentDocument) { 6201 super(_parentDocument); 6202 tagName = "form"; 6203 } 6204 6205 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 6206 auto t = this.querySelector("fieldset div"); 6207 if(t is null) 6208 return super.addField(label, name, type, fieldOptions); 6209 else 6210 return t.addField(label, name, type, fieldOptions); 6211 } 6212 6213 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 6214 auto type = "text"; 6215 auto t = this.querySelector("fieldset div"); 6216 if(t is null) 6217 return super.addField(label, name, type, fieldOptions); 6218 else 6219 return t.addField(label, name, type, fieldOptions); 6220 } 6221 6222 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 6223 auto t = this.querySelector("fieldset div"); 6224 if(t is null) 6225 return super.addField(label, name, options, fieldOptions); 6226 else 6227 return t.addField(label, name, options, fieldOptions); 6228 } 6229 6230 override void setValue(string field, string value) { 6231 setValue(field, value, true); 6232 } 6233 6234 // FIXME: doesn't handle arrays; multiple fields can have the same name 6235 6236 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6237 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6238 /// the checked/selected attribute from all, and adds it to the one matching the value. 6239 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6240 6241 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6242 /// Otherwise, it makes a new input with type=hidden to keep the value. 6243 void setValue(string field, string value, bool makeNew) { 6244 auto eles = getField(field); 6245 if(eles.length == 0) { 6246 if(makeNew) { 6247 addInput(field, value); 6248 return; 6249 } else 6250 throw new Exception("form field does not exist"); 6251 } 6252 6253 if(eles.length == 1) { 6254 auto e = eles[0]; 6255 switch(e.tagName) { 6256 default: assert(0); 6257 case "textarea": 6258 e.innerText = value; 6259 break; 6260 case "input": 6261 string type = e.getAttribute("type"); 6262 if(type is null) { 6263 e.value = value; 6264 return; 6265 } 6266 switch(type) { 6267 case "checkbox": 6268 case "radio": 6269 if(value.length && value != "false") 6270 e.setAttribute("checked", "checked"); 6271 else 6272 e.removeAttribute("checked"); 6273 break; 6274 default: 6275 e.value = value; 6276 return; 6277 } 6278 break; 6279 case "select": 6280 bool found = false; 6281 foreach(child; e.tree) { 6282 if(child.tagName != "option") 6283 continue; 6284 string val = child.getAttribute("value"); 6285 if(val is null) 6286 val = child.innerText; 6287 if(val == value) { 6288 child.setAttribute("selected", "selected"); 6289 found = true; 6290 } else 6291 child.removeAttribute("selected"); 6292 } 6293 6294 if(!found) { 6295 e.addChild("option", value) 6296 .setAttribute("selected", "selected"); 6297 } 6298 break; 6299 } 6300 } else { 6301 // assume radio boxes 6302 foreach(e; eles) { 6303 string val = e.getAttribute("value"); 6304 //if(val is null) 6305 // throw new Exception("don't know what to do with radio boxes with null value"); 6306 if(val == value) 6307 e.setAttribute("checked", "checked"); 6308 else 6309 e.removeAttribute("checked"); 6310 } 6311 } 6312 } 6313 6314 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6315 /// it makes no attempt to find and modify existing elements in the form to the new values. 6316 void addValueArray(string key, string[] arrayOfValues) { 6317 foreach(arr; arrayOfValues) 6318 addChild("input", key, arr); 6319 } 6320 6321 /// Gets the value of the field; what would be given if it submitted right now. (so 6322 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6323 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6324 string getValue(string field) { 6325 auto eles = getField(field); 6326 if(eles.length == 0) 6327 return ""; 6328 if(eles.length == 1) { 6329 auto e = eles[0]; 6330 switch(e.tagName) { 6331 default: assert(0); 6332 case "input": 6333 if(e.type == "checkbox") { 6334 if(e.checked) 6335 return e.value.length ? e.value : "checked"; 6336 return ""; 6337 } else 6338 return e.value; 6339 case "textarea": 6340 return e.innerText; 6341 case "select": 6342 foreach(child; e.tree) { 6343 if(child.tagName != "option") 6344 continue; 6345 if(child.selected) 6346 return child.value; 6347 } 6348 break; 6349 } 6350 } else { 6351 // assuming radio 6352 foreach(e; eles) { 6353 if(e.checked) 6354 return e.value; 6355 } 6356 } 6357 6358 return ""; 6359 } 6360 6361 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6362 ///. 6363 string getPostableData() { 6364 bool[string] namesDone; 6365 6366 string ret; 6367 bool outputted = false; 6368 6369 foreach(e; getElementsBySelector("[name]")) { 6370 if(e.name in namesDone) 6371 continue; 6372 6373 if(outputted) 6374 ret ~= "&"; 6375 else 6376 outputted = true; 6377 6378 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 6379 6380 namesDone[e.name] = true; 6381 } 6382 6383 return ret; 6384 } 6385 6386 /// Gets the actual elements with the given name 6387 Element[] getField(string name) { 6388 Element[] ret; 6389 foreach(e; tree) { 6390 if(e.name == name) 6391 ret ~= e; 6392 } 6393 return ret; 6394 } 6395 6396 /// Grabs the <label> with the given for tag, if there is one. 6397 Element getLabel(string forId) { 6398 foreach(e; tree) 6399 if(e.tagName == "label" && e.getAttribute("for") == forId) 6400 return e; 6401 return null; 6402 } 6403 6404 /// Adds a new INPUT field to the end of the form with the given attributes. 6405 Element addInput(string name, string value, string type = "hidden") { 6406 auto e = new Element(parentDocument, "input", null, true); 6407 e.name = name; 6408 e.value = value; 6409 e.type = type; 6410 6411 appendChild(e); 6412 6413 return e; 6414 } 6415 6416 /// Removes the given field from the form. It finds the element and knocks it right out. 6417 void removeField(string name) { 6418 foreach(e; getField(name)) 6419 e.parentNode.removeChild(e); 6420 } 6421 6422 /+ 6423 /// Returns all form members. 6424 @property Element[] elements() { 6425 6426 } 6427 6428 ///. 6429 string opDispatch(string name)(string v = null) 6430 // filter things that should actually be attributes on the form 6431 if( name != "method" && name != "action" && name != "enctype" 6432 && name != "style" && name != "name" && name != "id" && name != "class") 6433 { 6434 6435 } 6436 +/ 6437 /+ 6438 void submit() { 6439 // take its elements and submit them through http 6440 } 6441 +/ 6442 } 6443 6444 import std.conv; 6445 6446 ///. 6447 /// Group: implementations 6448 class Table : Element { 6449 6450 ///. 6451 this(Document _parentDocument) { 6452 super(_parentDocument); 6453 tagName = "table"; 6454 } 6455 6456 /// Creates an element with the given type and content. 6457 Element th(T)(T t) { 6458 Element e; 6459 if(parentDocument !is null) 6460 e = parentDocument.createElement("th"); 6461 else 6462 e = Element.make("th"); 6463 static if(is(T == Html)) 6464 e.innerHTML = t; 6465 else 6466 e.innerText = to!string(t); 6467 return e; 6468 } 6469 6470 /// ditto 6471 Element td(T)(T t) { 6472 Element e; 6473 if(parentDocument !is null) 6474 e = parentDocument.createElement("td"); 6475 else 6476 e = Element.make("td"); 6477 static if(is(T == Html)) 6478 e.innerHTML = t; 6479 else 6480 e.innerText = to!string(t); 6481 return e; 6482 } 6483 6484 /// . 6485 Element appendHeaderRow(T...)(T t) { 6486 return appendRowInternal("th", "thead", t); 6487 } 6488 6489 /// . 6490 Element appendFooterRow(T...)(T t) { 6491 return appendRowInternal("td", "tfoot", t); 6492 } 6493 6494 /// . 6495 Element appendRow(T...)(T t) { 6496 return appendRowInternal("td", "tbody", t); 6497 } 6498 6499 void addColumnClasses(string[] classes...) { 6500 auto grid = getGrid(); 6501 foreach(row; grid) 6502 foreach(i, cl; classes) { 6503 if(cl.length) 6504 if(i < row.length) 6505 row[i].addClass(cl); 6506 } 6507 } 6508 6509 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6510 Element row = Element.make("tr"); 6511 6512 foreach(e; t) { 6513 static if(is(typeof(e) : Element)) { 6514 if(e.tagName == "td" || e.tagName == "th") 6515 row.appendChild(e); 6516 else { 6517 Element a = Element.make(innerType); 6518 6519 a.appendChild(e); 6520 6521 row.appendChild(a); 6522 } 6523 } else static if(is(typeof(e) == Html)) { 6524 Element a = Element.make(innerType); 6525 a.innerHTML = e.source; 6526 row.appendChild(a); 6527 } else static if(is(typeof(e) == Element[])) { 6528 Element a = Element.make(innerType); 6529 foreach(ele; e) 6530 a.appendChild(ele); 6531 row.appendChild(a); 6532 } else static if(is(typeof(e) == string[])) { 6533 foreach(ele; e) { 6534 Element a = Element.make(innerType); 6535 a.innerText = to!string(ele); 6536 row.appendChild(a); 6537 } 6538 } else { 6539 Element a = Element.make(innerType); 6540 a.innerText = to!string(e); 6541 row.appendChild(a); 6542 } 6543 } 6544 6545 foreach(e; children) { 6546 if(e.tagName == findType) { 6547 e.appendChild(row); 6548 return row; 6549 } 6550 } 6551 6552 // the type was not found if we are here... let's add it so it is well-formed 6553 auto lol = this.addChild(findType); 6554 lol.appendChild(row); 6555 6556 return row; 6557 } 6558 6559 ///. 6560 Element captionElement() { 6561 Element cap; 6562 foreach(c; children) { 6563 if(c.tagName == "caption") { 6564 cap = c; 6565 break; 6566 } 6567 } 6568 6569 if(cap is null) { 6570 cap = Element.make("caption"); 6571 appendChild(cap); 6572 } 6573 6574 return cap; 6575 } 6576 6577 ///. 6578 @property string caption() { 6579 return captionElement().innerText; 6580 } 6581 6582 ///. 6583 @property void caption(string text) { 6584 captionElement().innerText = text; 6585 } 6586 6587 /// Gets the logical layout of the table as a rectangular grid of 6588 /// cells. It considers rowspan and colspan. A cell with a large 6589 /// span is represented in the grid by being referenced several times. 6590 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6591 /// <tfoot> portion if you pass one. 6592 /// 6593 /// Note: the rectangular grid might include null cells. 6594 /// 6595 /// This is kinda expensive so you should call once when you want the grid, 6596 /// then do lookups on the returned array. 6597 TableCell[][] getGrid(Element tablePortition = null) 6598 in { 6599 if(tablePortition is null) 6600 assert(tablePortition is null); 6601 else { 6602 assert(tablePortition !is null); 6603 assert(tablePortition.parentNode is this); 6604 assert( 6605 tablePortition.tagName == "tbody" 6606 || 6607 tablePortition.tagName == "tfoot" 6608 || 6609 tablePortition.tagName == "thead" 6610 ); 6611 } 6612 } 6613 do { 6614 if(tablePortition is null) 6615 tablePortition = this; 6616 6617 TableCell[][] ret; 6618 6619 // FIXME: will also return rows of sub tables! 6620 auto rows = tablePortition.getElementsByTagName("tr"); 6621 ret.length = rows.length; 6622 6623 int maxLength = 0; 6624 6625 int insertCell(int row, int position, TableCell cell) { 6626 if(row >= ret.length) 6627 return position; // not supposed to happen - a rowspan is prolly too big. 6628 6629 if(position == -1) { 6630 position++; 6631 foreach(item; ret[row]) { 6632 if(item is null) 6633 break; 6634 position++; 6635 } 6636 } 6637 6638 if(position < ret[row].length) 6639 ret[row][position] = cell; 6640 else 6641 foreach(i; ret[row].length .. position + 1) { 6642 if(i == position) 6643 ret[row] ~= cell; 6644 else 6645 ret[row] ~= null; 6646 } 6647 return position; 6648 } 6649 6650 foreach(i, rowElement; rows) { 6651 auto row = cast(TableRow) rowElement; 6652 assert(row !is null); 6653 assert(i < ret.length); 6654 6655 int position = 0; 6656 foreach(cellElement; rowElement.childNodes) { 6657 auto cell = cast(TableCell) cellElement; 6658 if(cell is null) 6659 continue; 6660 6661 // FIXME: colspan == 0 or rowspan == 0 6662 // is supposed to mean fill in the rest of 6663 // the table, not skip it 6664 foreach(int j; 0 .. cell.colspan) { 6665 foreach(int k; 0 .. cell.rowspan) 6666 // if the first row, always append. 6667 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6668 position++; 6669 } 6670 } 6671 6672 if(ret[i].length > maxLength) 6673 maxLength = cast(int) ret[i].length; 6674 } 6675 6676 // want to ensure it's rectangular 6677 foreach(ref r; ret) { 6678 foreach(i; r.length .. maxLength) 6679 r ~= null; 6680 } 6681 6682 return ret; 6683 } 6684 } 6685 6686 /// Represents a table row element - a <tr> 6687 /// Group: implementations 6688 class TableRow : Element { 6689 ///. 6690 this(Document _parentDocument) { 6691 super(_parentDocument); 6692 tagName = "tr"; 6693 } 6694 6695 // FIXME: the standard says there should be a lot more in here, 6696 // but meh, I never use it and it's a pain to implement. 6697 } 6698 6699 /// Represents anything that can be a table cell - <td> or <th> html. 6700 /// Group: implementations 6701 class TableCell : Element { 6702 ///. 6703 this(Document _parentDocument, string _tagName) { 6704 super(_parentDocument, _tagName); 6705 } 6706 6707 @property int rowspan() const { 6708 int ret = 1; 6709 auto it = getAttribute("rowspan"); 6710 if(it.length) 6711 ret = to!int(it); 6712 return ret; 6713 } 6714 6715 @property int colspan() const { 6716 int ret = 1; 6717 auto it = getAttribute("colspan"); 6718 if(it.length) 6719 ret = to!int(it); 6720 return ret; 6721 } 6722 6723 @property int rowspan(int i) { 6724 setAttribute("rowspan", to!string(i)); 6725 return i; 6726 } 6727 6728 @property int colspan(int i) { 6729 setAttribute("colspan", to!string(i)); 6730 return i; 6731 } 6732 6733 } 6734 6735 6736 ///. 6737 /// Group: implementations 6738 class MarkupException : Exception { 6739 6740 ///. 6741 this(string message, string file = __FILE__, size_t line = __LINE__) { 6742 super(message, file, line); 6743 } 6744 } 6745 6746 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6747 /// Group: implementations 6748 class ElementNotFoundException : Exception { 6749 6750 /// type == kind of element you were looking for and search == a selector describing the search. 6751 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6752 this.searchContext = searchContext; 6753 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6754 } 6755 6756 Element searchContext; 6757 } 6758 6759 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6760 /// 6761 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6762 /// Group: core_functionality 6763 struct Html { 6764 /// This string holds the actual html. Use it to retrieve the contents. 6765 string source; 6766 } 6767 6768 // for the observers 6769 enum DomMutationOperations { 6770 setAttribute, 6771 removeAttribute, 6772 appendChild, // tagname, attributes[], innerHTML 6773 insertBefore, 6774 truncateChildren, 6775 removeChild, 6776 appendHtml, 6777 replaceHtml, 6778 appendText, 6779 replaceText, 6780 replaceTextOnly 6781 } 6782 6783 // and for observers too 6784 struct DomMutationEvent { 6785 DomMutationOperations operation; 6786 Element target; 6787 Element related; // what this means differs with the operation 6788 Element related2; 6789 string relatedString; 6790 string relatedString2; 6791 } 6792 6793 6794 private immutable static string[] htmlSelfClosedElements = [ 6795 // html 4 6796 "img", "hr", "input", "br", "col", "link", "meta", 6797 // html 5 6798 "source" ]; 6799 6800 private immutable static string[] htmlInlineElements = [ 6801 "span", "strong", "em", "b", "i", "a" 6802 ]; 6803 6804 6805 static import std.conv; 6806 6807 ///. 6808 int intFromHex(string hex) { 6809 int place = 1; 6810 int value = 0; 6811 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6812 int v; 6813 char q = hex[a]; 6814 if( q >= '0' && q <= '9') 6815 v = q - '0'; 6816 else if (q >= 'a' && q <= 'f') 6817 v = q - 'a' + 10; 6818 else throw new Exception("Illegal hex character: " ~ q); 6819 6820 value += v * place; 6821 6822 place *= 16; 6823 } 6824 6825 return value; 6826 } 6827 6828 6829 // CSS selector handling 6830 6831 // EXTENSIONS 6832 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6833 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6834 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6835 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6836 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6837 6838 6839 6840 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6841 // That might be useful to implement, though I do have parent selectors too. 6842 6843 ///. 6844 static immutable string[] selectorTokens = [ 6845 // It is important that the 2 character possibilities go first here for accurate lexing 6846 "~=", "*=", "|=", "^=", "$=", "!=", 6847 "::", ">>", 6848 "<<", // my any-parent extension (reciprocal of whitespace) 6849 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6850 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6851 ]; // other is white space or a name. 6852 6853 ///. 6854 sizediff_t idToken(string str, sizediff_t position) { 6855 sizediff_t tid = -1; 6856 char c = str[position]; 6857 foreach(a, token; selectorTokens) 6858 6859 if(c == token[0]) { 6860 if(token.length > 1) { 6861 if(position + 1 >= str.length || str[position+1] != token[1]) 6862 continue; // not this token 6863 } 6864 tid = a; 6865 break; 6866 } 6867 return tid; 6868 } 6869 6870 ///. 6871 // look, ma, no phobos! 6872 // new lexer by ketmar 6873 string[] lexSelector (string selstr) { 6874 6875 static sizediff_t idToken (string str, size_t stpos) { 6876 char c = str[stpos]; 6877 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6878 if (c == token[0]) { 6879 if (token.length > 1) { 6880 assert(token.length == 2, token); // we don't have 3-char tokens yet 6881 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6882 } 6883 return tidx; 6884 } 6885 } 6886 return -1; 6887 } 6888 6889 // skip spaces and comments 6890 static string removeLeadingBlanks (string str) { 6891 size_t curpos = 0; 6892 while (curpos < str.length) { 6893 immutable char ch = str[curpos]; 6894 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6895 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6896 // comment 6897 curpos += 2; 6898 while (curpos < str.length) { 6899 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6900 curpos += 2; 6901 break; 6902 } 6903 ++curpos; 6904 } 6905 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6906 ++curpos; 6907 6908 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6909 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6910 // That is not the same as ".foo.bar". If the space is stripped, important 6911 // information is lost, despite the tokens being separatable anyway. 6912 // 6913 // The parser really needs to be aware of the presence of a space. 6914 } else { 6915 break; 6916 } 6917 } 6918 return str[curpos..$]; 6919 } 6920 6921 static bool isBlankAt() (string str, size_t pos) { 6922 // we should consider unicode spaces too, but... unicode sux anyway. 6923 return 6924 (pos < str.length && // in string 6925 (str[pos] <= 32 || // space 6926 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6927 } 6928 6929 string[] tokens; 6930 // lexx it! 6931 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6932 if(selstr[0] == '\"' || selstr[0] == '\'') { 6933 auto end = selstr[0]; 6934 auto pos = 1; 6935 bool escaping; 6936 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6937 if(escaping) 6938 escaping = false; 6939 else if(selstr[pos] == '\\') 6940 escaping = true; 6941 pos++; 6942 } 6943 6944 // FIXME: do better unescaping 6945 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6946 if(pos+1 >= selstr.length) 6947 assert(0, selstr); 6948 selstr = selstr[pos + 1.. $]; 6949 continue; 6950 } 6951 6952 6953 // no tokens starts with escape 6954 immutable tid = idToken(selstr, 0); 6955 if (tid >= 0) { 6956 // special token 6957 tokens ~= selectorTokens[tid]; // it's funnier this way 6958 selstr = selstr[selectorTokens[tid].length..$]; 6959 continue; 6960 } 6961 // from start to space or special token 6962 size_t escapePos = size_t.max; 6963 size_t curpos = 0; // i can has chizburger^w escape at the start 6964 while (curpos < selstr.length) { 6965 if (selstr[curpos] == '\\') { 6966 // this is escape, just skip it and next char 6967 if (escapePos == size_t.max) escapePos = curpos; 6968 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6969 } else { 6970 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6971 ++curpos; 6972 } 6973 } 6974 // identifier 6975 if (escapePos != size_t.max) { 6976 // i hate it when it happens 6977 string id = selstr[0..escapePos]; 6978 while (escapePos < curpos) { 6979 if (curpos-escapePos < 2) break; 6980 id ~= selstr[escapePos+1]; // escaped char 6981 escapePos += 2; 6982 immutable stp = escapePos; 6983 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6984 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6985 } 6986 if (id.length > 0) tokens ~= id; 6987 } else { 6988 tokens ~= selstr[0..curpos]; 6989 } 6990 selstr = selstr[curpos..$]; 6991 } 6992 return tokens; 6993 } 6994 version(unittest_domd_lexer) unittest { 6995 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6996 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6997 assert(lexSelector(r" < <") == ["<", "<"]); 6998 assert(lexSelector(r" <<") == ["<<"]); 6999 assert(lexSelector(r" <</") == ["<<", "/"]); 7000 assert(lexSelector(r" <</*") == ["<<"]); 7001 assert(lexSelector(r" <\</*") == ["<", "<"]); 7002 assert(lexSelector(r"heh\") == ["heh"]); 7003 assert(lexSelector(r"alice \") == ["alice"]); 7004 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 7005 } 7006 7007 ///. 7008 struct SelectorPart { 7009 string tagNameFilter; ///. 7010 string[] attributesPresent; /// [attr] 7011 string[2][] attributesEqual; /// [attr=value] 7012 string[2][] attributesStartsWith; /// [attr^=value] 7013 string[2][] attributesEndsWith; /// [attr$=value] 7014 // split it on space, then match to these 7015 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 7016 // split it on dash, then match to these 7017 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 7018 string[2][] attributesInclude; /// [attr*=value] 7019 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 7020 7021 string[] hasSelectors; /// :has(this) 7022 string[] notSelectors; /// :not(this) 7023 7024 string[] isSelectors; /// :is(this) 7025 string[] whereSelectors; /// :where(this) 7026 7027 ParsedNth[] nthOfType; /// . 7028 ParsedNth[] nthLastOfType; /// . 7029 ParsedNth[] nthChild; /// . 7030 7031 bool firstChild; ///. 7032 bool lastChild; ///. 7033 7034 bool firstOfType; /// . 7035 bool lastOfType; /// . 7036 7037 bool emptyElement; ///. 7038 bool whitespaceOnly; /// 7039 bool oddChild; ///. 7040 bool evenChild; ///. 7041 7042 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 7043 7044 bool rootElement; ///. 7045 7046 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 7047 7048 bool isCleanSlateExceptSeparation() { 7049 auto cp = this; 7050 cp.separation = -1; 7051 return cp is SelectorPart.init; 7052 } 7053 7054 ///. 7055 string toString() { 7056 string ret; 7057 switch(separation) { 7058 default: assert(0); 7059 case -1: break; 7060 case 0: ret ~= " "; break; 7061 case 1: ret ~= " > "; break; 7062 case 2: ret ~= " + "; break; 7063 case 3: ret ~= " ~ "; break; 7064 case 4: ret ~= " < "; break; 7065 } 7066 ret ~= tagNameFilter; 7067 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 7068 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 7069 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 7070 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 7071 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 7072 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 7073 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 7074 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 7075 7076 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 7077 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 7078 7079 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 7080 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 7081 7082 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 7083 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 7084 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 7085 7086 if(firstChild) ret ~= ":first-child"; 7087 if(lastChild) ret ~= ":last-child"; 7088 if(firstOfType) ret ~= ":first-of-type"; 7089 if(lastOfType) ret ~= ":last-of-type"; 7090 if(emptyElement) ret ~= ":empty"; 7091 if(whitespaceOnly) ret ~= ":whitespace-only"; 7092 if(oddChild) ret ~= ":odd-child"; 7093 if(evenChild) ret ~= ":even-child"; 7094 if(rootElement) ret ~= ":root"; 7095 if(scopeElement) ret ~= ":scope"; 7096 7097 return ret; 7098 } 7099 7100 // USEFUL 7101 ///. 7102 bool matchElement(Element e) { 7103 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 7104 // Each individual call is reasonably fast already, but it adds up. 7105 if(e is null) return false; 7106 if(e.nodeType != 1) return false; 7107 7108 if(tagNameFilter != "" && tagNameFilter != "*") 7109 if(e.tagName != tagNameFilter) 7110 return false; 7111 if(firstChild) { 7112 if(e.parentNode is null) 7113 return false; 7114 if(e.parentNode.childElements[0] !is e) 7115 return false; 7116 } 7117 if(lastChild) { 7118 if(e.parentNode is null) 7119 return false; 7120 auto ce = e.parentNode.childElements; 7121 if(ce[$-1] !is e) 7122 return false; 7123 } 7124 if(firstOfType) { 7125 if(e.parentNode is null) 7126 return false; 7127 auto ce = e.parentNode.childElements; 7128 foreach(c; ce) { 7129 if(c.tagName == e.tagName) { 7130 if(c is e) 7131 return true; 7132 else 7133 return false; 7134 } 7135 } 7136 } 7137 if(lastOfType) { 7138 if(e.parentNode is null) 7139 return false; 7140 auto ce = e.parentNode.childElements; 7141 foreach_reverse(c; ce) { 7142 if(c.tagName == e.tagName) { 7143 if(c is e) 7144 return true; 7145 else 7146 return false; 7147 } 7148 } 7149 } 7150 /+ 7151 if(scopeElement) { 7152 if(e !is this_) 7153 return false; 7154 } 7155 +/ 7156 if(emptyElement) { 7157 if(e.children.length) 7158 return false; 7159 } 7160 if(whitespaceOnly) { 7161 if(e.innerText.strip.length) 7162 return false; 7163 } 7164 if(rootElement) { 7165 if(e.parentNode !is null) 7166 return false; 7167 } 7168 if(oddChild || evenChild) { 7169 if(e.parentNode is null) 7170 return false; 7171 foreach(i, child; e.parentNode.childElements) { 7172 if(child is e) { 7173 if(oddChild && !(i&1)) 7174 return false; 7175 if(evenChild && (i&1)) 7176 return false; 7177 break; 7178 } 7179 } 7180 } 7181 7182 bool matchWithSeparator(string attr, string value, string separator) { 7183 foreach(s; attr.split(separator)) 7184 if(s == value) 7185 return true; 7186 return false; 7187 } 7188 7189 foreach(a; attributesPresent) 7190 if(a !in e.attributes) 7191 return false; 7192 foreach(a; attributesEqual) 7193 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 7194 return false; 7195 foreach(a; attributesNotEqual) 7196 // FIXME: maybe it should say null counts... this just bit me. 7197 // I did [attr][attr!=value] to work around. 7198 // 7199 // if it's null, it's not equal, right? 7200 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7201 if(e.getAttribute(a[0]) == a[1]) 7202 return false; 7203 foreach(a; attributesInclude) 7204 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7205 return false; 7206 foreach(a; attributesStartsWith) 7207 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7208 return false; 7209 foreach(a; attributesEndsWith) 7210 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7211 return false; 7212 foreach(a; attributesIncludesSeparatedBySpaces) 7213 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7214 return false; 7215 foreach(a; attributesIncludesSeparatedByDashes) 7216 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7217 return false; 7218 foreach(a; hasSelectors) { 7219 if(e.querySelector(a) is null) 7220 return false; 7221 } 7222 foreach(a; notSelectors) { 7223 auto sel = Selector(a); 7224 if(sel.matchesElement(e)) 7225 return false; 7226 } 7227 foreach(a; isSelectors) { 7228 auto sel = Selector(a); 7229 if(!sel.matchesElement(e)) 7230 return false; 7231 } 7232 foreach(a; whereSelectors) { 7233 auto sel = Selector(a); 7234 if(!sel.matchesElement(e)) 7235 return false; 7236 } 7237 7238 foreach(a; nthChild) { 7239 if(e.parentNode is null) 7240 return false; 7241 7242 auto among = e.parentNode.childElements; 7243 7244 if(!a.solvesFor(among, e)) 7245 return false; 7246 } 7247 foreach(a; nthOfType) { 7248 if(e.parentNode is null) 7249 return false; 7250 7251 auto among = e.parentNode.childElements(e.tagName); 7252 7253 if(!a.solvesFor(among, e)) 7254 return false; 7255 } 7256 foreach(a; nthLastOfType) { 7257 if(e.parentNode is null) 7258 return false; 7259 7260 auto among = retro(e.parentNode.childElements(e.tagName)); 7261 7262 if(!a.solvesFor(among, e)) 7263 return false; 7264 } 7265 7266 return true; 7267 } 7268 } 7269 7270 struct ParsedNth { 7271 int multiplier; 7272 int adder; 7273 7274 string of; 7275 7276 this(string text) { 7277 auto original = text; 7278 consumeWhitespace(text); 7279 if(text.startsWith("odd")) { 7280 multiplier = 2; 7281 adder = 1; 7282 7283 text = text[3 .. $]; 7284 } else if(text.startsWith("even")) { 7285 multiplier = 2; 7286 adder = 1; 7287 7288 text = text[4 .. $]; 7289 } else { 7290 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7291 consumeWhitespace(text); 7292 if(text.length && text[0] == 'n') { 7293 multiplier = n; 7294 text = text[1 .. $]; 7295 consumeWhitespace(text); 7296 if(text.length) { 7297 if(text[0] == '+') { 7298 text = text[1 .. $]; 7299 adder = parseNumber(text); 7300 } else if(text[0] == '-') { 7301 text = text[1 .. $]; 7302 adder = -parseNumber(text); 7303 } else if(text[0] == 'o') { 7304 // continue, this is handled below 7305 } else 7306 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7307 } 7308 } else { 7309 adder = n; 7310 } 7311 } 7312 7313 consumeWhitespace(text); 7314 if(text.startsWith("of")) { 7315 text = text[2 .. $]; 7316 consumeWhitespace(text); 7317 of = text[0 .. $]; 7318 } 7319 } 7320 7321 string toString() { 7322 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7323 } 7324 7325 bool solvesFor(R)(R elements, Element e) { 7326 int idx = 1; 7327 bool found = false; 7328 foreach(ele; elements) { 7329 if(of.length) { 7330 auto sel = Selector(of); 7331 if(!sel.matchesElement(ele)) 7332 continue; 7333 } 7334 if(ele is e) { 7335 found = true; 7336 break; 7337 } 7338 idx++; 7339 } 7340 if(!found) return false; 7341 7342 // multiplier* n + adder = idx 7343 // if there is a solution for integral n, it matches 7344 7345 idx -= adder; 7346 if(multiplier) { 7347 if(idx % multiplier == 0) 7348 return true; 7349 } else { 7350 return idx == 0; 7351 } 7352 return false; 7353 } 7354 7355 private void consumeWhitespace(ref string text) { 7356 while(text.length && text[0] == ' ') 7357 text = text[1 .. $]; 7358 } 7359 7360 private int parseNumber(ref string text) { 7361 consumeWhitespace(text); 7362 if(text.length == 0) return 0; 7363 bool negative = text[0] == '-'; 7364 if(text[0] == '+') 7365 text = text[1 .. $]; 7366 if(negative) text = text[1 .. $]; 7367 int i = 0; 7368 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7369 i++; 7370 if(i == 0) 7371 return 0; 7372 int cool = to!int(text[0 .. i]); 7373 text = text[i .. $]; 7374 return negative ? -cool : cool; 7375 } 7376 } 7377 7378 // USEFUL 7379 ///. 7380 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 7381 Element[] ret; 7382 if(!parts.length) { 7383 return [start]; // the null selector only matches the start point; it 7384 // is what terminates the recursion 7385 } 7386 7387 auto part = parts[0]; 7388 //writeln("checking ", part, " against ", start, " with ", part.separation); 7389 switch(part.separation) { 7390 default: assert(0); 7391 case -1: 7392 case 0: // tree 7393 foreach(e; start.tree) { 7394 if(part.separation == 0 && start is e) 7395 continue; // space doesn't match itself! 7396 if(part.matchElement(e)) { 7397 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7398 } 7399 } 7400 break; 7401 case 1: // children 7402 foreach(e; start.childNodes) { 7403 if(part.matchElement(e)) { 7404 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7405 } 7406 } 7407 break; 7408 case 2: // next-sibling 7409 auto e = start.nextSibling("*"); 7410 if(part.matchElement(e)) 7411 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7412 break; 7413 case 3: // younger sibling 7414 auto tmp = start.parentNode; 7415 if(tmp !is null) { 7416 sizediff_t pos = -1; 7417 auto children = tmp.childElements; 7418 foreach(i, child; children) { 7419 if(child is start) { 7420 pos = i; 7421 break; 7422 } 7423 } 7424 assert(pos != -1); 7425 foreach(e; children[pos+1..$]) { 7426 if(part.matchElement(e)) 7427 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7428 } 7429 } 7430 break; 7431 case 4: // immediate parent node, an extension of mine to walk back up the tree 7432 auto e = start.parentNode; 7433 if(part.matchElement(e)) { 7434 ret ~= getElementsBySelectorParts(e, parts[1..$]); 7435 } 7436 /* 7437 Example of usefulness: 7438 7439 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7440 7441 table th < tr 7442 7443 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7444 */ 7445 break; 7446 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7447 /* 7448 Like with the < operator, this is best used to find some parent of a particular known element. 7449 7450 Say you have an anchor inside a 7451 */ 7452 } 7453 7454 return ret; 7455 } 7456 7457 /++ 7458 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7459 7460 See_Also: 7461 $(LIST 7462 * [Element.querySelector] 7463 * [Element.querySelectorAll] 7464 * [Element.matches] 7465 * [Element.closest] 7466 * [Document.querySelector] 7467 * [Document.querySelectorAll] 7468 ) 7469 +/ 7470 /// Group: core_functionality 7471 struct Selector { 7472 SelectorComponent[] components; 7473 string original; 7474 /++ 7475 Parses the selector string and constructs the usable structure. 7476 +/ 7477 this(string cssSelector) { 7478 components = parseSelectorString(cssSelector); 7479 original = cssSelector; 7480 } 7481 7482 /++ 7483 Returns true if the given element matches this selector, 7484 considered relative to an arbitrary element. 7485 7486 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7487 with [std.algorithm.iteration.filter]: 7488 7489 --- 7490 Selector sel = Selector("foo > bar"); 7491 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7492 --- 7493 +/ 7494 bool matchesElement(Element e, Element relativeTo = null) { 7495 foreach(component; components) 7496 if(component.matchElement(e, relativeTo)) 7497 return true; 7498 7499 return false; 7500 } 7501 7502 /++ 7503 Reciprocal of [Element.querySelectorAll] 7504 +/ 7505 Element[] getMatchingElements(Element start) { 7506 Element[] ret; 7507 foreach(component; components) 7508 ret ~= getElementsBySelectorParts(start, component.parts); 7509 return removeDuplicates(ret); 7510 } 7511 7512 /++ 7513 Like [getMatchingElements], but returns a lazy range. Be careful 7514 about mutating the dom as you iterate through this. 7515 +/ 7516 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7517 import std.algorithm.iteration; 7518 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7519 } 7520 7521 7522 /// Returns the string this was built from 7523 string toString() { 7524 return original; 7525 } 7526 7527 /++ 7528 Returns a string from the parsed result 7529 7530 7531 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7532 +/ 7533 string parsedToString() { 7534 string ret; 7535 7536 foreach(idx, component; components) { 7537 if(idx) ret ~= ", "; 7538 ret ~= component.toString(); 7539 } 7540 7541 return ret; 7542 } 7543 } 7544 7545 ///. 7546 struct SelectorComponent { 7547 ///. 7548 SelectorPart[] parts; 7549 7550 ///. 7551 string toString() { 7552 string ret; 7553 foreach(part; parts) 7554 ret ~= part.toString(); 7555 return ret; 7556 } 7557 7558 // USEFUL 7559 ///. 7560 Element[] getElements(Element start) { 7561 return removeDuplicates(getElementsBySelectorParts(start, parts)); 7562 } 7563 7564 // USEFUL (but not implemented) 7565 /// If relativeTo == null, it assumes the root of the parent document. 7566 bool matchElement(Element e, Element relativeTo = null) { 7567 if(e is null) return false; 7568 Element where = e; 7569 int lastSeparation = -1; 7570 7571 auto lparts = parts; 7572 7573 if(parts.length && parts[0].separation > 0) { 7574 // if it starts with a non-trivial separator, inject 7575 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7576 // which implies html 7577 7578 // there is probably a MUCH better way to do this. 7579 auto dummy = SelectorPart.init; 7580 dummy.tagNameFilter = "*"; 7581 dummy.separation = 0; 7582 lparts = dummy ~ lparts; 7583 } 7584 7585 foreach(part; retro(lparts)) { 7586 7587 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7588 // writeln(parts); 7589 7590 if(lastSeparation == -1) { 7591 if(!part.matchElement(where)) 7592 return false; 7593 } else if(lastSeparation == 0) { // generic parent 7594 // need to go up the whole chain 7595 where = where.parentNode; 7596 7597 while(where !is null) { 7598 if(part.matchElement(where)) 7599 break; 7600 7601 if(where is relativeTo) 7602 return false; 7603 7604 where = where.parentNode; 7605 } 7606 7607 if(where is null) 7608 return false; 7609 } else if(lastSeparation == 1) { // the > operator 7610 where = where.parentNode; 7611 7612 if(!part.matchElement(where)) 7613 return false; 7614 } else if(lastSeparation == 2) { // the + operator 7615 //writeln("WHERE", where, " ", part); 7616 where = where.previousSibling("*"); 7617 7618 if(!part.matchElement(where)) 7619 return false; 7620 } else if(lastSeparation == 3) { // the ~ operator 7621 where = where.previousSibling("*"); 7622 while(where !is null) { 7623 if(part.matchElement(where)) 7624 break; 7625 7626 if(where is relativeTo) 7627 return false; 7628 7629 where = where.previousSibling("*"); 7630 } 7631 7632 if(where is null) 7633 return false; 7634 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7635 // FIXME 7636 } 7637 7638 lastSeparation = part.separation; 7639 7640 if(where is relativeTo) 7641 return false; // at end of line, if we aren't done by now, the match fails 7642 } 7643 return true; // if we got here, it is a success 7644 } 7645 7646 // the string should NOT have commas. Use parseSelectorString for that instead 7647 ///. 7648 static SelectorComponent fromString(string selector) { 7649 return parseSelector(lexSelector(selector)); 7650 } 7651 } 7652 7653 ///. 7654 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7655 SelectorComponent[] ret; 7656 auto tokens = lexSelector(selector); // this will parse commas too 7657 // and now do comma-separated slices (i haz phobosophobia!) 7658 int parensCount = 0; 7659 while (tokens.length > 0) { 7660 size_t end = 0; 7661 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7662 if(tokens[end] == "(") parensCount++; 7663 if(tokens[end] == ")") parensCount--; 7664 ++end; 7665 } 7666 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7667 if (tokens.length-end < 2) break; 7668 tokens = tokens[end+1..$]; 7669 } 7670 return ret; 7671 } 7672 7673 ///. 7674 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7675 SelectorComponent s; 7676 7677 SelectorPart current; 7678 void commit() { 7679 // might as well skip null items 7680 if(!current.isCleanSlateExceptSeparation()) { 7681 s.parts ~= current; 7682 current = current.init; // start right over 7683 } 7684 } 7685 enum State { 7686 Starting, 7687 ReadingClass, 7688 ReadingId, 7689 ReadingAttributeSelector, 7690 ReadingAttributeComparison, 7691 ExpectingAttributeCloser, 7692 ReadingPseudoClass, 7693 ReadingAttributeValue, 7694 7695 SkippingFunctionalSelector, 7696 } 7697 State state = State.Starting; 7698 string attributeName, attributeValue, attributeComparison; 7699 int parensCount; 7700 foreach(idx, token; tokens) { 7701 string readFunctionalSelector() { 7702 string s; 7703 if(tokens[idx + 1] != "(") 7704 throw new Exception("parse error"); 7705 int pc = 1; 7706 foreach(t; tokens[idx + 2 .. $]) { 7707 if(t == "(") 7708 pc++; 7709 if(t == ")") 7710 pc--; 7711 if(pc == 0) 7712 break; 7713 s ~= t; 7714 } 7715 7716 return s; 7717 } 7718 7719 sizediff_t tid = -1; 7720 foreach(i, item; selectorTokens) 7721 if(token == item) { 7722 tid = i; 7723 break; 7724 } 7725 final switch(state) { 7726 case State.Starting: // fresh, might be reading an operator or a tagname 7727 if(tid == -1) { 7728 if(!caseSensitiveTags) 7729 token = token.toLower(); 7730 7731 if(current.isCleanSlateExceptSeparation()) { 7732 current.tagNameFilter = token; 7733 // default thing, see comment under "*" below 7734 if(current.separation == -1) current.separation = 0; 7735 } else { 7736 // if it was already set, we must see two thingies 7737 // separated by whitespace... 7738 commit(); 7739 current.separation = 0; // tree 7740 current.tagNameFilter = token; 7741 } 7742 } else { 7743 // Selector operators 7744 switch(token) { 7745 case "*": 7746 current.tagNameFilter = "*"; 7747 // the idea here is if we haven't actually set a separation 7748 // yet (e.g. the > operator), it should assume the generic 7749 // whitespace (descendant) mode to avoid matching self with -1 7750 if(current.separation == -1) current.separation = 0; 7751 break; 7752 case " ": 7753 // If some other separation has already been set, 7754 // this is irrelevant whitespace, so we should skip it. 7755 // this happens in the case of "foo > bar" for example. 7756 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7757 continue; 7758 commit(); 7759 current.separation = 0; // tree 7760 break; 7761 case ">>": 7762 commit(); 7763 current.separation = 0; // alternate syntax for tree from html5 css 7764 break; 7765 case ">": 7766 commit(); 7767 current.separation = 1; // child 7768 break; 7769 case "+": 7770 commit(); 7771 current.separation = 2; // sibling directly after 7772 break; 7773 case "~": 7774 commit(); 7775 current.separation = 3; // any sibling after 7776 break; 7777 case "<": 7778 commit(); 7779 current.separation = 4; // immediate parent of 7780 break; 7781 case "[": 7782 state = State.ReadingAttributeSelector; 7783 if(current.separation == -1) current.separation = 0; 7784 break; 7785 case ".": 7786 state = State.ReadingClass; 7787 if(current.separation == -1) current.separation = 0; 7788 break; 7789 case "#": 7790 state = State.ReadingId; 7791 if(current.separation == -1) current.separation = 0; 7792 break; 7793 case ":": 7794 case "::": 7795 state = State.ReadingPseudoClass; 7796 if(current.separation == -1) current.separation = 0; 7797 break; 7798 7799 default: 7800 assert(0, token); 7801 } 7802 } 7803 break; 7804 case State.ReadingClass: 7805 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7806 state = State.Starting; 7807 break; 7808 case State.ReadingId: 7809 current.attributesEqual ~= ["id", token]; 7810 state = State.Starting; 7811 break; 7812 case State.ReadingPseudoClass: 7813 switch(token) { 7814 case "first-of-type": 7815 current.firstOfType = true; 7816 break; 7817 case "last-of-type": 7818 current.lastOfType = true; 7819 break; 7820 case "only-of-type": 7821 current.firstOfType = true; 7822 current.lastOfType = true; 7823 break; 7824 case "first-child": 7825 current.firstChild = true; 7826 break; 7827 case "last-child": 7828 current.lastChild = true; 7829 break; 7830 case "only-child": 7831 current.firstChild = true; 7832 current.lastChild = true; 7833 break; 7834 case "scope": 7835 current.scopeElement = true; 7836 break; 7837 case "empty": 7838 // one with no children 7839 current.emptyElement = true; 7840 break; 7841 case "whitespace-only": 7842 current.whitespaceOnly = true; 7843 break; 7844 case "link": 7845 current.attributesPresent ~= "href"; 7846 break; 7847 case "root": 7848 current.rootElement = true; 7849 break; 7850 case "nth-child": 7851 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7852 state = State.SkippingFunctionalSelector; 7853 continue; 7854 case "nth-of-type": 7855 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7856 state = State.SkippingFunctionalSelector; 7857 continue; 7858 case "nth-last-of-type": 7859 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7860 state = State.SkippingFunctionalSelector; 7861 continue; 7862 case "is": 7863 state = State.SkippingFunctionalSelector; 7864 current.isSelectors ~= readFunctionalSelector(); 7865 continue; // now the rest of the parser skips past the parens we just handled 7866 case "where": 7867 state = State.SkippingFunctionalSelector; 7868 current.whereSelectors ~= readFunctionalSelector(); 7869 continue; // now the rest of the parser skips past the parens we just handled 7870 case "not": 7871 state = State.SkippingFunctionalSelector; 7872 current.notSelectors ~= readFunctionalSelector(); 7873 continue; // now the rest of the parser skips past the parens we just handled 7874 case "has": 7875 state = State.SkippingFunctionalSelector; 7876 current.hasSelectors ~= readFunctionalSelector(); 7877 continue; // now the rest of the parser skips past the parens we just handled 7878 // back to standards though not quite right lol 7879 case "disabled": 7880 current.attributesPresent ~= "disabled"; 7881 break; 7882 case "checked": 7883 current.attributesPresent ~= "checked"; 7884 break; 7885 7886 case "visited", "active", "hover", "target", "focus", "selected": 7887 current.attributesPresent ~= "nothing"; 7888 // FIXME 7889 /+ 7890 // extensions not implemented 7891 //case "text": // takes the text in the element and wraps it in an element, returning it 7892 +/ 7893 goto case; 7894 case "before", "after": 7895 current.attributesPresent ~= "FIXME"; 7896 7897 break; 7898 // My extensions 7899 case "odd-child": 7900 current.oddChild = true; 7901 break; 7902 case "even-child": 7903 current.evenChild = true; 7904 break; 7905 default: 7906 //if(token.indexOf("lang") == -1) 7907 //assert(0, token); 7908 break; 7909 } 7910 state = State.Starting; 7911 break; 7912 case State.SkippingFunctionalSelector: 7913 if(token == "(") { 7914 parensCount++; 7915 } else if(token == ")") { 7916 parensCount--; 7917 } 7918 7919 if(parensCount == 0) 7920 state = State.Starting; 7921 break; 7922 case State.ReadingAttributeSelector: 7923 attributeName = token; 7924 attributeComparison = null; 7925 attributeValue = null; 7926 state = State.ReadingAttributeComparison; 7927 break; 7928 case State.ReadingAttributeComparison: 7929 // FIXME: these things really should be quotable in the proper lexer... 7930 if(token != "]") { 7931 if(token.indexOf("=") == -1) { 7932 // not a comparison; consider it 7933 // part of the attribute 7934 attributeValue ~= token; 7935 } else { 7936 attributeComparison = token; 7937 state = State.ReadingAttributeValue; 7938 } 7939 break; 7940 } 7941 goto case; 7942 case State.ExpectingAttributeCloser: 7943 if(token != "]") { 7944 // not the closer; consider it part of comparison 7945 if(attributeComparison == "") 7946 attributeName ~= token; 7947 else 7948 attributeValue ~= token; 7949 break; 7950 } 7951 7952 // Selector operators 7953 switch(attributeComparison) { 7954 default: assert(0); 7955 case "": 7956 current.attributesPresent ~= attributeName; 7957 break; 7958 case "=": 7959 current.attributesEqual ~= [attributeName, attributeValue]; 7960 break; 7961 case "|=": 7962 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7963 break; 7964 case "~=": 7965 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7966 break; 7967 case "$=": 7968 current.attributesEndsWith ~= [attributeName, attributeValue]; 7969 break; 7970 case "^=": 7971 current.attributesStartsWith ~= [attributeName, attributeValue]; 7972 break; 7973 case "*=": 7974 current.attributesInclude ~= [attributeName, attributeValue]; 7975 break; 7976 case "!=": 7977 current.attributesNotEqual ~= [attributeName, attributeValue]; 7978 break; 7979 } 7980 7981 state = State.Starting; 7982 break; 7983 case State.ReadingAttributeValue: 7984 attributeValue = token; 7985 state = State.ExpectingAttributeCloser; 7986 break; 7987 } 7988 } 7989 7990 commit(); 7991 7992 return s; 7993 } 7994 7995 ///. 7996 Element[] removeDuplicates(Element[] input) { 7997 Element[] ret; 7998 7999 bool[Element] already; 8000 foreach(e; input) { 8001 if(e in already) continue; 8002 already[e] = true; 8003 ret ~= e; 8004 } 8005 8006 return ret; 8007 } 8008 8009 // done with CSS selector handling 8010 8011 8012 // FIXME: use the better parser from html.d 8013 /// This is probably not useful to you unless you're writing a browser or something like that. 8014 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 8015 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 8016 class CssStyle { 8017 ///. 8018 this(string rule, string content) { 8019 rule = rule.strip(); 8020 content = content.strip(); 8021 8022 if(content.length == 0) 8023 return; 8024 8025 originatingRule = rule; 8026 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 8027 8028 foreach(part; content.split(";")) { 8029 part = part.strip(); 8030 if(part.length == 0) 8031 continue; 8032 auto idx = part.indexOf(":"); 8033 if(idx == -1) 8034 continue; 8035 //throw new Exception("Bad css rule (no colon): " ~ part); 8036 8037 Property p; 8038 8039 p.name = part[0 .. idx].strip(); 8040 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 8041 p.givenExplicitly = true; 8042 p.specificity = originatingSpecificity; 8043 8044 properties ~= p; 8045 } 8046 8047 foreach(property; properties) 8048 expandShortForm(property, originatingSpecificity); 8049 } 8050 8051 ///. 8052 Specificity getSpecificityOfRule(string rule) { 8053 Specificity s; 8054 if(rule.length == 0) { // inline 8055 // s.important = 2; 8056 } else { 8057 // FIXME 8058 } 8059 8060 return s; 8061 } 8062 8063 string originatingRule; ///. 8064 Specificity originatingSpecificity; ///. 8065 8066 ///. 8067 union Specificity { 8068 uint score; ///. 8069 // version(little_endian) 8070 ///. 8071 struct { 8072 ubyte tags; ///. 8073 ubyte classes; ///. 8074 ubyte ids; ///. 8075 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 8076 } 8077 } 8078 8079 ///. 8080 struct Property { 8081 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 8082 string name; ///. 8083 string value; ///. 8084 Specificity specificity; ///. 8085 // do we care about the original source rule? 8086 } 8087 8088 ///. 8089 Property[] properties; 8090 8091 ///. 8092 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 8093 string name = unCamelCase(nameGiven); 8094 if(value is null) 8095 return getValue(name); 8096 else 8097 return setValue(name, value, 0x02000000 /* inline specificity */); 8098 } 8099 8100 /// takes dash style name 8101 string getValue(string name) { 8102 foreach(property; properties) 8103 if(property.name == name) 8104 return property.value; 8105 return null; 8106 } 8107 8108 /// takes dash style name 8109 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 8110 value = value.replace("! important", "!important"); 8111 if(value.indexOf("!important") != -1) { 8112 newSpecificity.important = 1; // FIXME 8113 value = value.replace("!important", "").strip(); 8114 } 8115 8116 foreach(ref property; properties) 8117 if(property.name == name) { 8118 if(newSpecificity.score >= property.specificity.score) { 8119 property.givenExplicitly = explicit; 8120 expandShortForm(property, newSpecificity); 8121 return (property.value = value); 8122 } else { 8123 if(name == "display") 8124 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 8125 return value; // do nothing - the specificity is too low 8126 } 8127 } 8128 8129 // it's not here... 8130 8131 Property p; 8132 p.givenExplicitly = true; 8133 p.name = name; 8134 p.value = value; 8135 p.specificity = originatingSpecificity; 8136 8137 properties ~= p; 8138 expandShortForm(p, originatingSpecificity); 8139 8140 return value; 8141 } 8142 8143 private void expandQuadShort(string name, string value, Specificity specificity) { 8144 auto parts = value.split(" "); 8145 switch(parts.length) { 8146 case 1: 8147 setValue(name ~"-left", parts[0], specificity, false); 8148 setValue(name ~"-right", parts[0], specificity, false); 8149 setValue(name ~"-top", parts[0], specificity, false); 8150 setValue(name ~"-bottom", parts[0], specificity, false); 8151 break; 8152 case 2: 8153 setValue(name ~"-left", parts[1], specificity, false); 8154 setValue(name ~"-right", parts[1], specificity, false); 8155 setValue(name ~"-top", parts[0], specificity, false); 8156 setValue(name ~"-bottom", parts[0], specificity, false); 8157 break; 8158 case 3: 8159 setValue(name ~"-top", parts[0], specificity, false); 8160 setValue(name ~"-right", parts[1], specificity, false); 8161 setValue(name ~"-bottom", parts[2], specificity, false); 8162 setValue(name ~"-left", parts[2], specificity, false); 8163 8164 break; 8165 case 4: 8166 setValue(name ~"-top", parts[0], specificity, false); 8167 setValue(name ~"-right", parts[1], specificity, false); 8168 setValue(name ~"-bottom", parts[2], specificity, false); 8169 setValue(name ~"-left", parts[3], specificity, false); 8170 break; 8171 default: 8172 assert(0, value); 8173 } 8174 } 8175 8176 ///. 8177 void expandShortForm(Property p, Specificity specificity) { 8178 switch(p.name) { 8179 case "margin": 8180 case "padding": 8181 expandQuadShort(p.name, p.value, specificity); 8182 break; 8183 case "border": 8184 case "outline": 8185 setValue(p.name ~ "-left", p.value, specificity, false); 8186 setValue(p.name ~ "-right", p.value, specificity, false); 8187 setValue(p.name ~ "-top", p.value, specificity, false); 8188 setValue(p.name ~ "-bottom", p.value, specificity, false); 8189 break; 8190 8191 case "border-top": 8192 case "border-bottom": 8193 case "border-left": 8194 case "border-right": 8195 case "outline-top": 8196 case "outline-bottom": 8197 case "outline-left": 8198 case "outline-right": 8199 8200 default: {} 8201 } 8202 } 8203 8204 ///. 8205 override string toString() { 8206 string ret; 8207 if(originatingRule.length) 8208 ret = originatingRule ~ " {"; 8209 8210 foreach(property; properties) { 8211 if(!property.givenExplicitly) 8212 continue; // skip the inferred shit 8213 8214 if(originatingRule.length) 8215 ret ~= "\n\t"; 8216 else 8217 ret ~= " "; 8218 8219 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8220 } 8221 8222 if(originatingRule.length) 8223 ret ~= "\n}\n"; 8224 8225 return ret; 8226 } 8227 } 8228 8229 string cssUrl(string url) { 8230 return "url(\"" ~ url ~ "\")"; 8231 } 8232 8233 /// This probably isn't useful, unless you're writing a browser or something like that. 8234 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8235 /// as text. 8236 /// 8237 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8238 /// that you can apply to your documents to build the complete computedStyle object. 8239 class StyleSheet { 8240 ///. 8241 CssStyle[] rules; 8242 8243 ///. 8244 this(string source) { 8245 // FIXME: handle @ rules and probably could improve lexer 8246 // add nesting? 8247 int state; 8248 string currentRule; 8249 string currentValue; 8250 8251 string* currentThing = ¤tRule; 8252 foreach(c; source) { 8253 handle: switch(state) { 8254 default: assert(0); 8255 case 0: // starting - we assume we're reading a rule 8256 switch(c) { 8257 case '@': 8258 state = 4; 8259 break; 8260 case '/': 8261 state = 1; 8262 break; 8263 case '{': 8264 currentThing = ¤tValue; 8265 break; 8266 case '}': 8267 if(currentThing is ¤tValue) { 8268 rules ~= new CssStyle(currentRule, currentValue); 8269 8270 currentRule = ""; 8271 currentValue = ""; 8272 8273 currentThing = ¤tRule; 8274 } else { 8275 // idk what is going on here. 8276 // check sveit.com to reproduce 8277 currentRule = ""; 8278 currentValue = ""; 8279 } 8280 break; 8281 default: 8282 (*currentThing) ~= c; 8283 } 8284 break; 8285 case 1: // expecting * 8286 if(c == '*') 8287 state = 2; 8288 else { 8289 state = 0; 8290 (*currentThing) ~= "/" ~ c; 8291 } 8292 break; 8293 case 2: // inside comment 8294 if(c == '*') 8295 state = 3; 8296 break; 8297 case 3: // expecting / to end comment 8298 if(c == '/') 8299 state = 0; 8300 else 8301 state = 2; // it's just a comment so no need to append 8302 break; 8303 case 4: 8304 if(c == '{') 8305 state = 5; 8306 if(c == ';') 8307 state = 0; // just skipping import 8308 break; 8309 case 5: 8310 if(c == '}') 8311 state = 0; // skipping font face probably 8312 } 8313 } 8314 } 8315 8316 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8317 void apply(Document document) { 8318 foreach(rule; rules) { 8319 if(rule.originatingRule.length == 0) 8320 continue; // this shouldn't happen here in a stylesheet 8321 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8322 // note: this should be a different object than the inline style 8323 // since givenExplicitly is likely destroyed here 8324 auto current = element.computedStyle; 8325 8326 foreach(item; rule.properties) 8327 current.setValue(item.name, item.value, item.specificity); 8328 } 8329 } 8330 } 8331 } 8332 8333 8334 /// This is kinda private; just a little utility container for use by the ElementStream class. 8335 final class Stack(T) { 8336 this() { 8337 internalLength = 0; 8338 arr = initialBuffer[]; 8339 } 8340 8341 ///. 8342 void push(T t) { 8343 if(internalLength >= arr.length) { 8344 auto oldarr = arr; 8345 if(arr.length < 4096) 8346 arr = new T[arr.length * 2]; 8347 else 8348 arr = new T[arr.length + 4096]; 8349 arr[0 .. oldarr.length] = oldarr[]; 8350 } 8351 8352 arr[internalLength] = t; 8353 internalLength++; 8354 } 8355 8356 ///. 8357 T pop() { 8358 assert(internalLength); 8359 internalLength--; 8360 return arr[internalLength]; 8361 } 8362 8363 ///. 8364 T peek() { 8365 assert(internalLength); 8366 return arr[internalLength - 1]; 8367 } 8368 8369 ///. 8370 @property bool empty() { 8371 return internalLength ? false : true; 8372 } 8373 8374 ///. 8375 private T[] arr; 8376 private size_t internalLength; 8377 private T[64] initialBuffer; 8378 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8379 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8380 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8381 } 8382 8383 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8384 final class ElementStream { 8385 8386 ///. 8387 @property Element front() { 8388 return current.element; 8389 } 8390 8391 /// Use Element.tree instead. 8392 this(Element start) { 8393 current.element = start; 8394 current.childPosition = -1; 8395 isEmpty = false; 8396 stack = new Stack!(Current); 8397 } 8398 8399 /* 8400 Handle it 8401 handle its children 8402 8403 */ 8404 8405 ///. 8406 void popFront() { 8407 more: 8408 if(isEmpty) return; 8409 8410 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8411 8412 current.childPosition++; 8413 if(current.childPosition >= current.element.children.length) { 8414 if(stack.empty()) 8415 isEmpty = true; 8416 else { 8417 current = stack.pop(); 8418 goto more; 8419 } 8420 } else { 8421 stack.push(current); 8422 current.element = current.element.children[current.childPosition]; 8423 current.childPosition = -1; 8424 } 8425 } 8426 8427 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8428 void currentKilled() { 8429 if(stack.empty) // should never happen 8430 isEmpty = true; 8431 else { 8432 current = stack.pop(); 8433 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8434 } 8435 } 8436 8437 ///. 8438 @property bool empty() { 8439 return isEmpty; 8440 } 8441 8442 private: 8443 8444 struct Current { 8445 Element element; 8446 int childPosition; 8447 } 8448 8449 Current current; 8450 8451 Stack!(Current) stack; 8452 8453 bool isEmpty; 8454 } 8455 8456 8457 8458 // unbelievable. 8459 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8460 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8461 static import std.algorithm; 8462 auto found = std.algorithm.find(haystack, needle); 8463 if(found.length == 0) 8464 return -1; 8465 return haystack.length - found.length; 8466 } 8467 8468 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8469 assert(position < arr.length); 8470 T[] ret; 8471 ret.length = arr.length + what.length; 8472 int a = 0; 8473 foreach(i; arr[0..position+1]) 8474 ret[a++] = i; 8475 8476 foreach(i; what) 8477 ret[a++] = i; 8478 8479 foreach(i; arr[position+1..$]) 8480 ret[a++] = i; 8481 8482 return ret; 8483 } 8484 8485 package bool isInArray(T)(T item, T[] arr) { 8486 foreach(i; arr) 8487 if(item == i) 8488 return true; 8489 return false; 8490 } 8491 8492 private string[string] aadup(in string[string] arr) { 8493 string[string] ret; 8494 foreach(k, v; arr) 8495 ret[k] = v; 8496 return ret; 8497 } 8498 8499 // dom event support, if you want to use it 8500 8501 /// used for DOM events 8502 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8503 8504 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8505 class Event { 8506 this(string eventName, Element target) { 8507 this.eventName = eventName; 8508 this.srcElement = target; 8509 } 8510 8511 /// Prevents the default event handler (if there is one) from being called 8512 void preventDefault() { 8513 defaultPrevented = true; 8514 } 8515 8516 /// Stops the event propagation immediately. 8517 void stopPropagation() { 8518 propagationStopped = true; 8519 } 8520 8521 bool defaultPrevented; 8522 bool propagationStopped; 8523 string eventName; 8524 8525 Element srcElement; 8526 alias srcElement target; 8527 8528 Element relatedTarget; 8529 8530 int clientX; 8531 int clientY; 8532 8533 int button; 8534 8535 bool isBubbling; 8536 8537 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8538 void send() { 8539 if(srcElement is null) 8540 return; 8541 8542 auto e = srcElement; 8543 8544 if(eventName in e.bubblingEventHandlers) 8545 foreach(handler; e.bubblingEventHandlers[eventName]) 8546 handler(e, this); 8547 8548 if(!defaultPrevented) 8549 if(eventName in e.defaultEventHandlers) 8550 e.defaultEventHandlers[eventName](e, this); 8551 } 8552 8553 /// this dispatches the element using the capture -> target -> bubble process 8554 void dispatch() { 8555 if(srcElement is null) 8556 return; 8557 8558 // first capture, then bubble 8559 8560 Element[] chain; 8561 Element curr = srcElement; 8562 while(curr) { 8563 auto l = curr; 8564 chain ~= l; 8565 curr = curr.parentNode; 8566 8567 } 8568 8569 isBubbling = false; 8570 8571 foreach(e; chain.retro()) { 8572 if(eventName in e.capturingEventHandlers) 8573 foreach(handler; e.capturingEventHandlers[eventName]) 8574 handler(e, this); 8575 8576 // the default on capture should really be to always do nothing 8577 8578 //if(!defaultPrevented) 8579 // if(eventName in e.defaultEventHandlers) 8580 // e.defaultEventHandlers[eventName](e.element, this); 8581 8582 if(propagationStopped) 8583 break; 8584 } 8585 8586 isBubbling = true; 8587 if(!propagationStopped) 8588 foreach(e; chain) { 8589 if(eventName in e.bubblingEventHandlers) 8590 foreach(handler; e.bubblingEventHandlers[eventName]) 8591 handler(e, this); 8592 8593 if(propagationStopped) 8594 break; 8595 } 8596 8597 if(!defaultPrevented) 8598 foreach(e; chain) { 8599 if(eventName in e.defaultEventHandlers) 8600 e.defaultEventHandlers[eventName](e, this); 8601 } 8602 } 8603 } 8604 8605 struct FormFieldOptions { 8606 // usable for any 8607 8608 /// this is a regex pattern used to validate the field 8609 string pattern; 8610 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8611 bool isRequired; 8612 /// this is displayed as an example to the user 8613 string placeholder; 8614 8615 // usable for numeric ones 8616 8617 8618 // convenience methods to quickly get some options 8619 @property static FormFieldOptions none() { 8620 FormFieldOptions f; 8621 return f; 8622 } 8623 8624 static FormFieldOptions required() { 8625 FormFieldOptions f; 8626 f.isRequired = true; 8627 return f; 8628 } 8629 8630 static FormFieldOptions regex(string pattern, bool required = false) { 8631 FormFieldOptions f; 8632 f.pattern = pattern; 8633 f.isRequired = required; 8634 return f; 8635 } 8636 8637 static FormFieldOptions fromElement(Element e) { 8638 FormFieldOptions f; 8639 if(e.hasAttribute("required")) 8640 f.isRequired = true; 8641 if(e.hasAttribute("pattern")) 8642 f.pattern = e.pattern; 8643 if(e.hasAttribute("placeholder")) 8644 f.placeholder = e.placeholder; 8645 return f; 8646 } 8647 8648 Element applyToElement(Element e) { 8649 if(this.isRequired) 8650 e.required = "required"; 8651 if(this.pattern.length) 8652 e.pattern = this.pattern; 8653 if(this.placeholder.length) 8654 e.placeholder = this.placeholder; 8655 return e; 8656 } 8657 } 8658 8659 // this needs to look just like a string, but can expand as needed 8660 version(no_dom_stream) 8661 alias string Utf8Stream; 8662 else 8663 class Utf8Stream { 8664 protected: 8665 // these two should be overridden in subclasses to actually do the stream magic 8666 string getMore() { 8667 if(getMoreHelper !is null) 8668 return getMoreHelper(); 8669 return null; 8670 } 8671 8672 bool hasMore() { 8673 if(hasMoreHelper !is null) 8674 return hasMoreHelper(); 8675 return false; 8676 } 8677 // the rest should be ok 8678 8679 public: 8680 this(string d) { 8681 this.data = d; 8682 } 8683 8684 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8685 this.getMoreHelper = getMoreHelper; 8686 this.hasMoreHelper = hasMoreHelper; 8687 8688 if(hasMore()) 8689 this.data ~= getMore(); 8690 8691 stdout.flush(); 8692 } 8693 8694 @property final size_t length() { 8695 // the parser checks length primarily directly before accessing the next character 8696 // so this is the place we'll hook to append more if possible and needed. 8697 if(lastIdx + 1 >= data.length && hasMore()) { 8698 data ~= getMore(); 8699 } 8700 return data.length; 8701 } 8702 8703 final char opIndex(size_t idx) { 8704 if(idx > lastIdx) 8705 lastIdx = idx; 8706 return data[idx]; 8707 } 8708 8709 final string opSlice(size_t start, size_t end) { 8710 if(end > lastIdx) 8711 lastIdx = end; 8712 return data[start .. end]; 8713 } 8714 8715 final size_t opDollar() { 8716 return length(); 8717 } 8718 8719 final Utf8Stream opBinary(string op : "~")(string s) { 8720 this.data ~= s; 8721 return this; 8722 } 8723 8724 final Utf8Stream opOpAssign(string op : "~")(string s) { 8725 this.data ~= s; 8726 return this; 8727 } 8728 8729 final Utf8Stream opAssign(string rhs) { 8730 this.data = rhs; 8731 return this; 8732 } 8733 private: 8734 string data; 8735 8736 size_t lastIdx; 8737 8738 bool delegate() hasMoreHelper; 8739 string delegate() getMoreHelper; 8740 8741 8742 /+ 8743 // used to maybe clear some old stuff 8744 // you might have to remove elements parsed with it too since they can hold slices into the 8745 // old stuff, preventing gc 8746 void dropFront(int bytes) { 8747 posAdjustment += bytes; 8748 data = data[bytes .. $]; 8749 } 8750 8751 int posAdjustment; 8752 +/ 8753 } 8754 8755 void fillForm(T)(Form form, T obj, string name) { 8756 import arsd.database; 8757 fillData((k, v) => form.setValue(k, v), obj, name); 8758 } 8759 8760 8761 /+ 8762 /+ 8763 Syntax: 8764 8765 Tag: tagname#id.class 8766 Tree: Tag(Children, comma, separated...) 8767 Children: Tee or Variable 8768 Variable: $varname with optional |funcname following. 8769 8770 If a variable has a tree after it, it breaks the variable down: 8771 * if array, foreach it does the tree 8772 * if struct, it breaks down the member variables 8773 8774 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 8775 +/ 8776 struct Stringplate { 8777 /++ 8778 8779 +/ 8780 this(string s) { 8781 8782 } 8783 8784 /++ 8785 8786 +/ 8787 Element expand(T...)(T vars) { 8788 return null; 8789 } 8790 } 8791 /// 8792 unittest { 8793 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 8794 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 8795 } 8796 +/ 8797 8798 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 8799 foreach(child; children) { 8800 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 8801 // cool 8802 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 8803 // cool 8804 } else { 8805 // prolly block 8806 return false; 8807 } 8808 } 8809 return true; 8810 } 8811 8812 private bool isSimpleWhite(dchar c) { 8813 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 8814 } 8815 8816 unittest { 8817 // Test for issue #120 8818 string s = `<html> 8819 <body> 8820 <P>AN 8821 <P>bubbles</P> 8822 <P>giggles</P> 8823 </body> 8824 </html>`; 8825 auto doc = new Document(); 8826 doc.parseUtf8(s, false, false); 8827 auto s2 = doc.toString(); 8828 assert( 8829 s2.indexOf("bubbles") < s2.indexOf("giggles"), 8830 "paragraph order incorrect:\n" ~ s2); 8831 } 8832 8833 unittest { 8834 // test for suncarpet email dec 24 2019 8835 // arbitrary id asduiwh 8836 auto document = new Document("<html> 8837 <head> 8838 <meta charset=\"utf-8\"></meta> 8839 <title>Element.querySelector Test</title> 8840 </head> 8841 <body> 8842 <div id=\"foo\"> 8843 <div>Foo</div> 8844 <div>Bar</div> 8845 </div> 8846 </body> 8847 </html>"); 8848 8849 auto doc = document; 8850 8851 assert(doc.querySelectorAll("div div").length == 2); 8852 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 8853 assert(doc.querySelectorAll("> html").length == 0); 8854 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 8855 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 8856 8857 8858 assert(doc.root.matches("html")); 8859 assert(!doc.root.matches("nothtml")); 8860 assert(doc.querySelector("#foo > div").matches("div")); 8861 assert(doc.querySelector("body > #foo").matches("#foo")); 8862 8863 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 8864 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 8865 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 8866 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 8867 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 8868 8869 // also confirming the querySelector works via the mdn definition 8870 auto foo = doc.requireSelector("#foo"); 8871 assert(foo.querySelector("#foo > div") !is null); 8872 assert(foo.querySelector("body #foo > div") !is null); 8873 8874 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 8875 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 8876 //assert(foo.querySelectorAll("#foo > div").length == 2); 8877 } 8878 8879 unittest { 8880 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 8881 auto document = new Document(`<article> 8882 <div id="div-01">Here is div-01 8883 <div id="div-02">Here is div-02 8884 <div id="div-03">Here is div-03</div> 8885 </div> 8886 </div> 8887 </article>`, true, true); 8888 8889 auto el = document.getElementById("div-03"); 8890 assert(el.closest("#div-02").id == "div-02"); 8891 assert(el.closest("div div").id == "div-03"); 8892 assert(el.closest("article > div").id == "div-01"); 8893 assert(el.closest(":not(div)").tagName == "article"); 8894 8895 assert(el.closest("p") is null); 8896 assert(el.closest("p, div") is el); 8897 } 8898 8899 unittest { 8900 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 8901 auto document = new Document(`<test> 8902 <div class="foo"><p>cool</p><span>bar</span></div> 8903 <main><p>two</p></main> 8904 </test>`); 8905 8906 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 8907 assert(document.querySelector("div:where(.foo)") !is null); 8908 } 8909 8910 unittest { 8911 immutable string html = q{ 8912 <root> 8913 <div class="roundedbox"> 8914 <table> 8915 <caption class="boxheader">Recent Reviews</caption> 8916 <tr> 8917 <th>Game</th> 8918 <th>User</th> 8919 <th>Rating</th> 8920 <th>Created</th> 8921 </tr> 8922 8923 <tr> 8924 <td>June 13, 2020 15:10</td> 8925 <td><a href="/reviews/8833">[Show]</a></td> 8926 </tr> 8927 8928 <tr> 8929 <td>June 13, 2020 15:02</td> 8930 <td><a href="/reviews/8832">[Show]</a></td> 8931 </tr> 8932 8933 <tr> 8934 <td>June 13, 2020 14:41</td> 8935 <td><a href="/reviews/8831">[Show]</a></td> 8936 </tr> 8937 </table> 8938 </div> 8939 </root> 8940 }; 8941 8942 auto doc = new Document(cast(string)html); 8943 // this should select the second table row, but... 8944 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8945 assert(rd !is null); 8946 assert(rd.href == "/reviews/8832"); 8947 8948 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 8949 assert(rd !is null); 8950 assert(rd.href == "/reviews/8832"); 8951 } 8952 8953 unittest { 8954 try { 8955 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 8956 assert(0); 8957 } catch(Exception e) { 8958 // good; it should throw an exception, not an error. 8959 } 8960 } 8961 8962 /* 8963 Copyright: Adam D. Ruppe, 2010 - 2021 8964 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 8965 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 8966 8967 Copyright Adam D. Ruppe 2010-2021. 8968 Distributed under the Boost Software License, Version 1.0. 8969 (See accompanying file LICENSE_1_0.txt or copy at 8970 http://www.boost.org/LICENSE_1_0.txt) 8971 */ 8972 8973