1 /* 2 * Copyright Lodovico Giaretta 2016 - . 3 * Distributed under the Boost Software License, Version 1.0. 4 * (See accompanying file LICENSE_1_0.txt or copy at 5 * http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 /++ 9 + Authors: 10 + Lodovico Giaretta 11 + László Szerémi 12 + 13 + License: 14 + <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 15 + 16 + Copyright: 17 + Copyright Lodovico Giaretta 2016 -- 18 +/ 19 20 module newxml.cursor; 21 22 import newxml.interfaces; 23 import newxml.faststrings; 24 25 import newxml.validation; 26 27 import std.meta : staticIndexOf; 28 import std.range.primitives; 29 import std.typecons; 30 31 32 public class CursorException : XMLException { 33 @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) 34 { 35 super(msg, file, line, nextInChain); 36 } 37 38 @nogc @safe pure nothrow this(string msg, Throwable nextInChain, string file = __FILE__, size_t line = __LINE__) 39 { 40 super(msg, file, line, nextInChain); 41 } 42 } 43 @safe: 44 package struct Attribute(StringType) 45 { 46 StringType value; 47 private StringType _name; 48 private size_t colon; 49 50 this(StringType qualifiedName, StringType value) 51 { 52 this.value = value; 53 name = qualifiedName; 54 } 55 56 @property auto name() inout 57 { 58 return _name; 59 } 60 @property void name(StringType _name) 61 { 62 this._name = _name; 63 auto i = _name.fastIndexOf(':'); 64 if (i > 0) 65 colon = i; 66 else 67 colon = 0; 68 } 69 @property auto prefix() inout 70 { 71 return name[0..colon]; 72 } 73 @property StringType localName() 74 { 75 if (colon) 76 return name[colon+1..$]; 77 else 78 return name; 79 } 80 StringType toString() { 81 return name ~ " = \"" ~ value ~ "\""; 82 } 83 } 84 85 /++ 86 + An implementation of the $(LINK2 ../interfaces/isCursor, `isCursor`) trait. 87 + 88 + This is the only provided cursor that builds on top of a parser (and not on top of another cursor), so it is part 89 + of virtually every parsing chain. All documented methods are implementations of the specifications dictated by 90 + $(LINK2 ../interfaces/isCursor, `isCursor`). 91 + Parameters: 92 + P = The parser. 93 + conflateCData = 94 + processBadDocument = If set to `Yes` (default is `No`), then it'll ignore errors as long as it can still 95 + process the document. Otherwise it'll throw an appropriate exception if an error is encountered. 96 +/ 97 struct Cursor(P, Flag!"conflateCDATA" conflateCDATA = Yes.conflateCDATA, 98 Flag!"processBadDocument" processBadDocument = No.processBadDocument) 99 if (isLowLevelParser!P) 100 { 101 struct AttributesRange 102 { 103 private StringType content; 104 private Attribute!StringType attr; 105 private Cursor* cursor; 106 private bool error; 107 108 private this(StringType str, ref Cursor cur) @system nothrow 109 { 110 content = str; 111 cursor = &cur; 112 } 113 114 bool empty() @safe 115 { 116 if (error) 117 return true; 118 119 auto i = content.fastIndexOfNeither(" \r\n\t"); 120 if (i >= 0) 121 { 122 content = content[i..$]; 123 return false; 124 } 125 return true; 126 } 127 128 auto front() @safe 129 { 130 if (attr == attr.init) 131 { 132 auto i = content.fastIndexOfNeither(" \r\n\t"); 133 assert(i >= 0, "No more attributes..."); 134 content = content[i..$]; 135 136 auto sep = fastIndexOf(content[0..$], '='); 137 if (sep == -1) 138 { 139 // attribute without value??? 140 static if (processBadDocument == No.processBadDocument) 141 { 142 throw new CursorException("Invalid attribute syntax!"); 143 } 144 else 145 { 146 error = true; 147 return attr.init; 148 } 149 } 150 151 auto name = content[0..sep]; 152 153 154 auto delta = fastIndexOfAny(name, " \r\n\t"); 155 if (delta >= 0) 156 { 157 auto j = name[delta..$].fastIndexOfNeither(" \r\n\t"); 158 if (j != -1) 159 { 160 // attribute name contains spaces??? 161 static if (processBadDocument == No.processBadDocument) 162 { 163 throw new CursorException("Invalid attribute syntax!"); 164 } 165 else 166 { 167 error = true; 168 return attr.init; 169 } 170 } 171 name = name[0..delta]; 172 } 173 if (!isValidXMLName(name)) 174 { 175 static if (processBadDocument == No.processBadDocument) 176 { 177 throw new CursorException("Invalid attribute name!"); 178 } 179 else 180 { 181 error = true; 182 } 183 } 184 attr.name = name; 185 186 size_t attEnd; 187 size_t quote; 188 delta = (sep + 1 < content.length) ? fastIndexOfNeither(content[sep + 1..$], " \r\n\t") : -1; 189 if (delta >= 0) 190 { 191 quote = sep + 1 + delta; 192 if (content[quote] == '"' || content[quote] == '\'') 193 { 194 delta = fastIndexOf(content[(quote + 1)..$], content[quote]); 195 if (delta == -1) 196 { 197 // attribute quotes never closed??? 198 static if (processBadDocument == No.processBadDocument) 199 { 200 throw new CursorException("Invalid attribute syntax!"); 201 } 202 else 203 { 204 error = true; 205 return attr.init; 206 } 207 } 208 attEnd = quote + 1 + delta; 209 } 210 else 211 { 212 static if (processBadDocument == No.processBadDocument) 213 { 214 throw new CursorException("Invalid attribute syntax!"); 215 } 216 else 217 { 218 error = true; 219 return attr.init; 220 } 221 } 222 } 223 else 224 { 225 // attribute without value??? 226 static if (processBadDocument == No.processBadDocument) 227 { 228 throw new CursorException("Invalid attribute syntax!"); 229 } 230 else 231 { 232 error = true; 233 return attr.init; 234 } 235 } 236 //attr.value = content[(quote + 1)..attEnd]; 237 static if (processBadDocument == No.processBadDocument) 238 attr.value = xmlUnescape(content[(quote + 1)..attEnd], cursor.parser.chrEntities); 239 else 240 attr.value = xmlUnescape!No.strict(content[(quote + 1)..attEnd], cursor.parser.chrEntities); 241 content = content[attEnd+1..$]; 242 } 243 return attr; 244 } 245 246 auto popFront() @safe 247 { 248 front(); 249 attr = attr.init; 250 } 251 } 252 /++ The type of characters in the input, as returned by the underlying low level parser. +/ 253 alias CharacterType = P.CharacterType; 254 255 /++ The type of sequences of CharacterType, as returned by this parser +/ 256 alias StringType = CharacterType[]; 257 258 private P parser; 259 private ElementType!P currentNode; 260 private bool starting, _documentEnd = true, nextFailed, _xmlDeclNotFound; 261 private ptrdiff_t colon; 262 private size_t nameBegin, nameEnd; 263 public StringType encoding; 264 public StringType docType; 265 ///Loads system entities if needed. 266 ///If not used, then it can protect against certain system entity attacks at the 267 ///cost of having this feature disabled. 268 public @safe StringType delegate(StringType path) sysEntityLoader; 269 270 /++ Generic constructor; forwards its arguments to the parser constructor +/ 271 this(Args...)(Args args) 272 { 273 parser = P(args); 274 } 275 276 static if (isSaveableLowLevelParser!P) 277 { 278 public auto save() 279 { 280 auto result = this; 281 result.parser = parser.save; 282 return result; 283 } 284 } 285 ///Returns true if XML declaration was not found. 286 public @property bool xmlDeclNotFound() @nogc @safe pure nothrow 287 { 288 return _xmlDeclNotFound; 289 } 290 /+ 291 /** 292 * Preprocesses the document, mainly the declaration (sets document version and encoding) and the Document type. 293 * NOTE: Does not want to process anything beyond the first processing instruction (`<?xml [...] ?>`) for unknown 294 * reasons, and I cannot get the debugger to find the reason. 295 */ 296 public void preprocess() { 297 import std.array; 298 int i; 299 do 300 { 301 i++; 302 switch (currentNode.kind) { 303 case XMLKind.document: 304 //enter(); 305 break; 306 case XMLKind.processingInstruction: 307 auto attrl = attributes().array; 308 foreach (attr ; attrl) { 309 //Attribute!StringType attr = attrl.front; 310 switch (attr.name) { 311 case "version": 312 if (attr.value == "1.0") 313 { 314 parser.xmlVersion = XMLVersion.XML1_0; 315 } 316 else if (attr.value == "1.1") 317 { 318 parser.xmlVersion = XMLVersion.XML1_1; 319 } 320 break; 321 case "encoding": 322 encoding = attr.value; 323 break; 324 default: 325 break; //Check whether other types of attributes are allowed here. 326 } 327 } 328 //exit(); 329 /+if (!enter()) 330 goto exitloop;+/ 331 break; 332 case XMLKind.dtdStart: 333 docType = content(); 334 if (!enter()) 335 goto exitloop; 336 break; 337 case XMLKind.dtdEmpty: 338 docType = content(); 339 goto exitloop; 340 case XMLKind.entityDecl: 341 StringType entName = name(); 342 //Check for external entities. 343 parser.chrEntities[entName] = content(); 344 break; 345 case XMLKind.attlistDecl, XMLKind.elementDecl, XMLKind.notationDecl, XMLKind.declaration: 346 break; 347 default: 348 goto exitloop; 349 } 350 351 } 352 while (next); 353 exitloop: 354 exit(); 355 }+/ 356 357 private bool advanceInput() 358 { 359 colon = colon.max; 360 nameEnd = 0; 361 parser.popFront(); 362 if (!parser.empty) 363 { 364 currentNode = parser.front; 365 return true; 366 } 367 _documentEnd = true; 368 return false; 369 } 370 371 372 static if (needSource!P) 373 { 374 /++ 375 + The type of input accepted by this parser, 376 + i.e., the one accepted by the underlying low level parser. 377 +/ 378 alias InputType = P.InputType; 379 380 /++ 381 + Initializes this cursor (and the underlying low level parser) with the given input. 382 +/ 383 void setSource(InputType input) 384 { 385 parser.setSource(input); 386 initialize(); 387 } 388 } 389 390 private void initialize() 391 { 392 // reset private fields 393 nextFailed = false; 394 _xmlDeclNotFound = false; 395 colon = colon.max; 396 nameEnd = 0; 397 398 if (!parser.empty) 399 { 400 if (parser.front.kind == XMLKind.processingInstruction && 401 parser.front.content.length >= 3 && 402 fastEqual(parser.front.content[0..3], "xml")) 403 { 404 currentNode = parser.front; 405 } 406 else 407 { 408 // document without xml declaration??? 409 // It turns out XML declaration is not mandatory, just assume UTF-8 and XML version 1.0 if it's missing! 410 currentNode.kind = XMLKind.processingInstruction; 411 currentNode.content = "xml version = \"1.0\" encoding = \"UTF-8\""; 412 _xmlDeclNotFound = true; 413 } 414 starting = true; 415 _documentEnd = false; 416 } 417 else 418 _documentEnd = true; 419 } 420 421 /++ Returns whether the cursor is at the end of the document. +/ 422 bool documentEnd() 423 { 424 return _documentEnd; 425 } 426 427 /++ 428 + Returns whether the cursor is at the beginning of the document 429 + (i.e. whether no `enter`/`next`/`exit` has been performed successfully and thus 430 + the cursor points to the xml declaration) 431 +/ 432 bool atBeginning() 433 { 434 return starting; 435 } 436 437 /++ 438 + Advances to the first child of the current node and returns `true`. 439 + If it returns `false`, the cursor is either on the same node (it wasn't 440 + an element start) or it is at the close tag of the element it was called on 441 + (it was a pair open/close tag without any content) 442 +/ 443 bool enter() 444 { 445 if (starting) 446 { 447 starting = false; 448 if (currentNode.content is parser.front.content) 449 return advanceInput(); 450 else 451 { 452 nameEnd = 0; 453 nameBegin = 0; 454 } 455 456 currentNode = parser.front; 457 return true; 458 } 459 else if (currentNode.kind == XMLKind.elementStart) 460 { 461 return advanceInput() && currentNode.kind != XMLKind.elementEnd; 462 } 463 else if (currentNode.kind == XMLKind.dtdStart) 464 { 465 return advanceInput() && currentNode.kind != XMLKind.dtdEnd; 466 } 467 else 468 return false; 469 } 470 471 /++ Advances to the end of the parent of the current node. +/ 472 void exit() 473 { 474 if (!nextFailed) 475 while (next()) {} 476 477 nextFailed = false; 478 } 479 480 /++ 481 + Advances to the _next sibling of the current node. 482 + Returns whether it succeded. If it fails, either the 483 + document has ended or the only meaningful operation is `exit`. 484 +/ 485 bool next() 486 { 487 if (parser.empty || starting || nextFailed) 488 return false; 489 else if (currentNode.kind == XMLKind.dtdStart) 490 { 491 /+while (advanceInput && currentNode.kind != XMLKind.dtdEnd) 492 { 493 494 }+/ 495 } 496 else if (currentNode.kind == XMLKind.elementStart) 497 { 498 int count = 1; 499 static if (processBadDocument == No.processBadDocument) 500 StringType currName = name; 501 while (count > 0 && !parser.empty) 502 { 503 if (!advanceInput) 504 return false; 505 if (currentNode.kind == XMLKind.elementStart) 506 count++; 507 else if (currentNode.kind == XMLKind.elementEnd) 508 count--; 509 } 510 static if (processBadDocument == No.processBadDocument) 511 { 512 if (count != 0 || currName != name) 513 throw new CursorException("Document is malformed!"); 514 } 515 } 516 if (!advanceInput || currentNode.kind == XMLKind.elementEnd || currentNode.kind == XMLKind.dtdEnd) 517 { 518 nextFailed = true; 519 return false; 520 } 521 return true; 522 } 523 524 /++ Returns the _kind of the current node. +/ 525 XMLKind kind() const 526 { 527 if (starting) 528 return XMLKind.document; 529 530 static if (conflateCDATA == Yes.conflateCDATA) 531 if (currentNode.kind == XMLKind.cdata) 532 return XMLKind.text; 533 534 return currentNode.kind; 535 } 536 537 /++ 538 + If the current node is an element or a doctype, returns its complete _name; 539 + it it is a processing instruction, return its target; 540 + otherwise, returns an empty string; 541 +/ 542 StringType name() 543 { 544 switch (currentNode.kind) 545 { 546 case XMLKind.document: 547 case XMLKind.text: 548 case XMLKind.cdata: 549 case XMLKind.comment: 550 case XMLKind.declaration: 551 case XMLKind.conditional: 552 case XMLKind.dtdStart: 553 case XMLKind.dtdEmpty: 554 case XMLKind.dtdEnd: 555 return []; 556 default: 557 if (!nameEnd) 558 { 559 ptrdiff_t i, j; 560 if ((j = fastIndexOfNeither(currentNode.content, " \r\n\t")) >= 0) 561 nameBegin = j; 562 if ((i = fastIndexOfAny(currentNode.content[nameBegin..$], " \r\n\t")) >= 0) 563 nameEnd = i + nameBegin; 564 else 565 nameEnd = currentNode.content.length; 566 } 567 return currentNode.content[nameBegin..nameEnd]; 568 } 569 } 570 571 /++ 572 + If the current node is an element, returns its local name (without namespace prefix); 573 + otherwise, returns the same result as `name`. 574 +/ 575 StringType localName() 576 { 577 auto name = name(); 578 if (currentNode.kind == XMLKind.elementStart || currentNode.kind == XMLKind.elementEnd) 579 { 580 if (colon == colon.max) 581 colon = fastIndexOf(name, ':'); 582 return name[(colon+1)..$]; 583 } 584 return name; 585 } 586 587 /++ 588 + If the current node is an element, returns its namespace _prefix; 589 + otherwise, the result in unspecified; 590 +/ 591 StringType prefix() 592 { 593 if (currentNode.kind == XMLKind.elementStart || currentNode.kind == XMLKind.elementEnd) 594 { 595 auto name = name; 596 if (colon == colon.max) 597 colon = fastIndexOf(name, ':'); 598 599 if (colon >= 0) 600 return name[0..colon]; 601 else 602 return []; 603 } 604 return []; 605 } 606 607 /++ 608 + If the current node is an element, return its _attributes as a range of triplets 609 + (`prefix`, `name`, `value`); if the current node is the document node, return the _attributes 610 + of the xml declaration (encoding, version, ...); otherwise, returns an empty array. 611 +/ 612 auto attributes() @trusted 613 { 614 615 616 auto kind = currentNode.kind; 617 if (kind == XMLKind.elementStart || kind == XMLKind.elementEmpty || kind == XMLKind.processingInstruction) 618 { 619 name; 620 return AttributesRange(currentNode.content[nameEnd..$], this); 621 } 622 else 623 return AttributesRange(); 624 } 625 626 /++ 627 + Return the text content of a cdata section, a comment or a text node; 628 + in all other cases, returns the entire node without the name 629 +/ 630 StringType content() 631 { 632 if (currentNode.kind == XMLKind.entityDecl) 633 { 634 sizediff_t b = fastIndexOfAny(currentNode.content[nameEnd..$], "\"\'"); 635 sizediff_t e = fastLastIndexOf(currentNode.content[nameEnd..$], currentNode.content[b + nameEnd]); 636 if (b > 0 && e > 0) 637 { 638 if (b + 1 <= e) 639 return currentNode.content[nameEnd + b + 1..nameEnd + e]; 640 else 641 return null; 642 } 643 else 644 { 645 static if (processBadDocument == No.processBadDocument) 646 throw new CursorException("Entity content not found!"); 647 else 648 return null; 649 } 650 } 651 /* else if (currentNode.kind == XMLKind.dtdStart || currentNode.kind == XMLKind.dtdEmpty) 652 { 653 sizediff_t b = fastLastIndexOfAny(currentNode.content[nameEnd..$], " \r\n\t"); 654 if (b == -1) 655 return null; 656 sizediff_t e = fastIndexOfAny(currentNode.content[nameEnd + b..$], " \r\n\t"); 657 if (e == -1) 658 return currentNode.content[nameEnd + b + 1..$]; 659 else 660 return currentNode.content[nameEnd + b + 1..nameEnd + e]; 661 } */ 662 else 663 return currentNode.content[nameEnd..$]; 664 } 665 666 /++ Returns the entire text of the current node. +/ 667 StringType wholeContent() const 668 { 669 return currentNode.content; 670 } 671 } 672 673 /++ 674 + Instantiates a specialized `Cursor` with the given underlying `parser` and 675 + the given error handler (defaults to an error handler that just asserts 0). 676 +/ 677 template cursor(Flag!"conflateCDATA" conflateCDATA = Yes.conflateCDATA) 678 { 679 /* auto cursor(T)(auto ref T parser) 680 if(isLowLevelParser!T) 681 { 682 return cursor(parser); 683 } */ 684 auto cursor(T)(auto ref T parser) 685 if(isLowLevelParser!T) 686 { 687 auto cursor = Cursor!(T, conflateCDATA)(); 688 cursor.parser = parser; 689 if (!cursor.parser.empty) 690 { 691 cursor.initialize; 692 } 693 return cursor; 694 } 695 } 696 697 unittest 698 { 699 import newxml.lexers; 700 import newxml.parser; 701 import std.string : lineSplitter, strip; 702 import std.algorithm : map; 703 import std.array : array; 704 import std.conv : to; 705 706 wstring xml = q"{ 707 <?xml encoding = "utf-8" ?> 708 <!DOCTYPE mydoc https://myUri.org/bla [ 709 <!ELEMENT myelem ANY> 710 <!ENTITY myent "replacement text"> 711 <!ATTLIST myelem foo cdata #REQUIRED > 712 <!NOTATION PUBLIC 'h'> 713 <!FOODECL asdffdsa > 714 ]> 715 <aaa xmlns:myns="something"> 716 <myns:bbb myns:att='>'> 717 <!-- lol --> 718 Lots of Text! 719 On multiple lines! 720 </myns:bbb> 721 <![CDATA[ Ciaone! ]]> 722 <ccc/> 723 </aaa> 724 }"; 725 726 auto cursor = xml.lexer.parser.cursor; 727 728 assert(cursor.atBeginning); 729 730 // <?xml encoding = "utf-8" ?> 731 assert(cursor.kind() == XMLKind.document); 732 assert(cursor.name() == "xml"); 733 assert(cursor.prefix() == ""); 734 assert(cursor.localName() == "xml"); 735 assert(cursor.attributes().array == [Attribute!wstring("encoding", "utf-8")]); 736 assert(cursor.content() == " encoding = \"utf-8\" "); 737 738 assert(cursor.enter()); 739 assert(!cursor.atBeginning); 740 741 // <!DOCTYPE mydoc https://myUri.org/bla [ 742 assert(cursor.kind == XMLKind.dtdStart); 743 assert(cursor.wholeContent == " mydoc https://myUri.org/bla "); 744 745 assert(cursor.enter); 746 // <!ELEMENT myelem ANY> 747 assert(cursor.kind == XMLKind.elementDecl); 748 assert(cursor.wholeContent == " myelem ANY"); 749 750 assert(cursor.next); 751 // <!ENTITY myent "replacement text"> 752 assert(cursor.kind == XMLKind.entityDecl); 753 assert(cursor.wholeContent == " myent \"replacement text\""); 754 assert(cursor.name == "myent"); 755 assert(cursor.content == "replacement text", to!string(cursor.content)); 756 757 assert(cursor.next); 758 // <!ATTLIST myelem foo cdata #REQUIRED > 759 assert(cursor.kind == XMLKind.attlistDecl); 760 assert(cursor.wholeContent == " myelem foo cdata #REQUIRED "); 761 762 assert(cursor.next); 763 // <!NOTATION PUBLIC 'h'> 764 assert(cursor.kind == XMLKind.notationDecl); 765 assert(cursor.wholeContent == " PUBLIC 'h'"); 766 767 assert(cursor.next); 768 // <!FOODECL asdffdsa > 769 assert(cursor.kind == XMLKind.declaration); 770 assert(cursor.wholeContent == "FOODECL asdffdsa "); 771 772 assert(!cursor.next); 773 774 //assert(cursor.parser._chrEntities["myent"] == "replacement text"); 775 cursor.exit; 776 777 // ]> 778 assert(cursor.kind == XMLKind.dtdEnd); 779 assert(!cursor.wholeContent); 780 assert(cursor.next); 781 782 // <aaa xmlns:myns="something"> 783 assert(cursor.kind() == XMLKind.elementStart); 784 assert(cursor.name() == "aaa"); 785 assert(cursor.prefix() == ""); 786 assert(cursor.localName() == "aaa"); 787 assert(cursor.attributes().array == [Attribute!wstring("xmlns:myns", "something")]); 788 assert(cursor.content() == " xmlns:myns=\"something\""); 789 790 assert(cursor.enter()); 791 // <myns:bbb myns:att='>'> 792 assert(cursor.kind() == XMLKind.elementStart); 793 assert(cursor.name() == "myns:bbb"); 794 assert(cursor.prefix() == "myns"); 795 assert(cursor.localName() == "bbb"); 796 assert(cursor.attributes().array == [Attribute!wstring("myns:att", ">")]); 797 assert(cursor.content() == " myns:att='>'"); 798 799 assert(cursor.enter()); 800 cursor.exit(); 801 802 // </myns:bbb> 803 assert(cursor.kind() == XMLKind.elementEnd); 804 assert(cursor.name() == "myns:bbb"); 805 assert(cursor.prefix() == "myns"); 806 assert(cursor.localName() == "bbb"); 807 assert(cursor.attributes().empty); 808 assert(cursor.content() == []); 809 810 assert(cursor.next()); 811 // <![CDATA[ Ciaone! ]]> 812 assert(cursor.kind() == XMLKind.text); 813 assert(cursor.name() == ""); 814 assert(cursor.prefix() == ""); 815 assert(cursor.localName() == ""); 816 assert(cursor.attributes().empty); 817 assert(cursor.content() == " Ciaone! "); 818 819 assert(cursor.next()); 820 // <ccc/> 821 assert(cursor.kind() == XMLKind.elementEmpty); 822 assert(cursor.name() == "ccc"); 823 assert(cursor.prefix() == ""); 824 assert(cursor.localName() == "ccc"); 825 assert(cursor.attributes().empty); 826 assert(cursor.content() == []); 827 828 assert(!cursor.next()); 829 cursor.exit(); 830 831 // </aaa> 832 assert(cursor.kind() == XMLKind.elementEnd); 833 assert(cursor.name() == "aaa"); 834 assert(cursor.prefix() == ""); 835 assert(cursor.localName() == "aaa"); 836 assert(cursor.attributes().empty); 837 assert(cursor.content() == []); 838 839 assert(!cursor.next()); 840 cursor.exit(); 841 842 assert(cursor.documentEnd); 843 assert(!cursor.atBeginning); 844 } 845 846 /++ 847 + Returns an input range of the children of the node currently pointed by `cursor`. 848 + 849 + Advancing the range returned by this function also advances `cursor`. It is thus 850 + not recommended to interleave usage of this function with raw usage of `cursor`. 851 +/ 852 auto children(T)(ref T cursor) @trusted 853 if (isCursor!T) 854 { 855 struct XMLRange 856 { 857 T* cursor; 858 bool endReached; 859 860 bool empty() const { return endReached; } 861 void popFront() { endReached = !cursor.next(); } 862 ref T front() { return *cursor; } 863 864 ~this() { cursor.exit; } 865 } 866 auto workaround() @system { 867 return XMLRange(&cursor, cursor.enter); 868 } 869 return workaround(); 870 } 871 872 unittest 873 { 874 import newxml.lexers; 875 import newxml.parser; 876 import std.string : lineSplitter, strip; 877 import std.algorithm : map, equal; 878 import std.array : array; 879 import std.exception : assertThrown; 880 import std.stdio; 881 882 string xml = q{ 883 <?xml encoding = "utf-8" ?> 884 <aaa xmlns:myns="something"> 885 <myns:bbb myns:att='>'> 886 <!-- lol --> 887 Lots of Text! 888 On multiple lines! 889 </myns:bbb> 890 <![CDATA[ Ciaone! ]]> 891 <ccc/> 892 </aaa> 893 }; 894 string xml_bad = q{ 895 <?xml encoding = "utf-8" ?> 896 <AAA> 897 <BBB attr = "this should fail �" /> 898 </AAA> 899 }; 900 901 //import std.experimental.allocator.mallocator;//import stdx.allocator.mallocator; 902 903 //auto handler = () { assert(0, "Some problem here..."); }; 904 auto lexer = RangeLexer!(string)(); 905 906 auto cursor = lexer.parser.cursor!(Yes.conflateCDATA)(); 907 assert(cursor.documentEnd); 908 cursor.setSource(xml); 909 910 // <?xml encoding = "utf-8" ?> 911 assert(cursor.kind() == XMLKind.document); 912 assert(cursor.name() == "xml"); 913 assert(cursor.prefix() == ""); 914 assert(cursor.localName() == "xml"); 915 auto attrs = cursor.attributes; 916 assert(attrs.front == Attribute!string("encoding", "utf-8")); 917 attrs.popFront; 918 assert(attrs.empty); 919 assert(cursor.content() == " encoding = \"utf-8\" "); 920 921 { 922 auto range1 = cursor.children; 923 // <aaa xmlns:myns="something"> 924 assert(range1.front.kind() == XMLKind.elementStart); 925 assert(range1.front.name() == "aaa"); 926 assert(range1.front.prefix() == ""); 927 assert(range1.front.localName() == "aaa"); 928 attrs = range1.front.attributes; 929 assert(attrs.front == Attribute!string("xmlns:myns", "something")); 930 attrs.popFront; 931 assert(attrs.empty); 932 assert(range1.front.content() == " xmlns:myns=\"something\""); 933 934 { 935 auto range2 = range1.front.children(); 936 // <myns:bbb myns:att='>'> 937 assert(range2.front.kind() == XMLKind.elementStart); 938 assert(range2.front.name() == "myns:bbb"); 939 assert(range2.front.prefix() == "myns"); 940 assert(range2.front.localName() == "bbb"); 941 attrs = range2.front.attributes; 942 assert(attrs.front == Attribute!string("myns:att", ">")); 943 attrs.popFront; 944 assert(attrs.empty); 945 assert(range2.front.content() == " myns:att='>'"); 946 947 { 948 auto range3 = range2.front.children(); 949 // <!-- lol --> 950 assert(range3.front.kind() == XMLKind.comment); 951 assert(range3.front.name() == ""); 952 assert(range3.front.prefix() == ""); 953 assert(range3.front.localName() == ""); 954 assert(range3.front.attributes.empty); 955 assert(range3.front.content() == " lol "); 956 957 range3.popFront; 958 assert(!range3.empty); 959 // Lots of Text! 960 // On multiple lines! 961 assert(range3.front.kind() == XMLKind.text); 962 assert(range3.front.name() == ""); 963 assert(range3.front.prefix() == ""); 964 assert(range3.front.localName() == ""); 965 assert(range3.front.attributes().empty); 966 // split and strip so the unittest does not depend on the newline policy or indentation of this file 967 static immutable linesArr = ["Lots of Text!", " On multiple lines!", " "]; 968 assert(range3.front.content().lineSplitter.equal(linesArr)); 969 970 range3.popFront; 971 assert(range3.empty); 972 } 973 974 range2.popFront; 975 assert(!range2.empty); 976 // <<![CDATA[ Ciaone! ]]> 977 assert(range2.front.kind() == XMLKind.text); 978 assert(range2.front.name() == ""); 979 assert(range2.front.prefix() == ""); 980 assert(range2.front.localName() == ""); 981 assert(range2.front.attributes().empty); 982 assert(range2.front.content() == " Ciaone! "); 983 984 range2.popFront; 985 assert(!range2.empty()); 986 // <ccc/> 987 assert(range2.front.kind() == XMLKind.elementEmpty); 988 assert(range2.front.name() == "ccc"); 989 assert(range2.front.prefix() == ""); 990 assert(range2.front.localName() == "ccc"); 991 assert(range2.front.attributes().empty); 992 assert(range2.front.content() == []); 993 994 range2.popFront; 995 assert(range2.empty()); 996 } 997 998 range1.popFront; 999 assert(range1.empty); 1000 } 1001 1002 assert(cursor.documentEnd()); 1003 { 1004 cursor.setSource(xml_bad); 1005 auto range1 = cursor.children(); 1006 assert(range1.front.name == "AAA"); 1007 auto range2 = range1.front.children(); 1008 assert(range2.front.name == "BBB"); 1009 auto range3 = range2.front.attributes(); 1010 assertThrown!XMLException(range3.front()); 1011 1012 } 1013 } 1014 1015 import std.traits : isArray; 1016 1017 /++ 1018 + A cursor that wraps another cursor, copying all output strings. 1019 + 1020 + The cursor specification ($(LINK2 ../interfaces/isCursor, `newxml.interfaces.isCursor`)) 1021 + clearly states that a cursor (as the underlying parser and lexer) is free to reuse 1022 + its internal buffers and thus invalidate every output. This wrapper returns freshly 1023 + allocated strings, thus allowing references to its outputs to outlive calls to advancing 1024 + methods. 1025 + 1026 + This type should not be instantiated directly, but using the helper function 1027 + `copyingCursor`. 1028 +/ 1029 struct CopyingCursor(CursorType, Flag!"intern" intern = No.intern) 1030 if (isCursor!CursorType && isArray!(CursorType.StringType)) 1031 { 1032 alias StringType = CursorType.StringType; 1033 1034 //mixin UsesAllocator!Alloc; 1035 1036 CursorType cursor; 1037 alias cursor this; 1038 1039 static if (intern == Yes.intern) 1040 { 1041 import std.typecons: Rebindable; 1042 1043 Rebindable!(immutable StringType)[const StringType] interned; 1044 } 1045 1046 private auto copy(StringType str) @system 1047 { 1048 static if (intern == Yes.intern) 1049 { 1050 auto match = str in interned; 1051 if (match) 1052 return *match; 1053 } 1054 1055 import std.traits : Unqual; 1056 import std.experimental.allocator;//import stdx.allocator; 1057 import std.range.primitives : ElementEncodingType; 1058 import core.stdc.string : memcpy; 1059 1060 alias ElemType = ElementEncodingType!StringType; 1061 ElemType[] cp;//auto cp = cast(ElemType[]) allocator.makeArray!(Unqual!ElemType)(str.length); 1062 cp.length = str.length; 1063 memcpy(cast(void*)cp.ptr, cast(void*)str.ptr, str.length * ElemType.sizeof); 1064 1065 static if (intern == Yes.intern) 1066 { 1067 interned[str] = cp; 1068 } 1069 1070 return cp; 1071 } 1072 1073 auto name() @trusted 1074 { 1075 return copy(cursor.name); 1076 } 1077 auto localName() @trusted 1078 { 1079 return copy(cursor.localName); 1080 } 1081 auto prefix() @trusted 1082 { 1083 return copy(cursor.prefix); 1084 } 1085 auto content() @trusted 1086 { 1087 return copy(cursor.content); 1088 } 1089 auto wholeContent() @trusted 1090 { 1091 return copy(cursor.wholeContent); 1092 } 1093 1094 auto attributes() @trusted 1095 { 1096 struct CopyRange 1097 { 1098 typeof(cursor.attributes()) attrs; 1099 alias attrs this; 1100 1101 private CopyingCursor* parent; 1102 1103 auto front() 1104 { 1105 auto attr = attrs.front; 1106 return Attribute!StringType( 1107 parent.copy(attr.name), 1108 parent.copy(attr.value), 1109 ); 1110 } 1111 } 1112 return CopyRange(cursor.attributes, &this); 1113 } 1114 } 1115 1116 /++ 1117 + Instantiates a suitable `CopyingCursor` on top of the given `cursor` and allocator. 1118 +/ 1119 auto copyingCursor(Flag!"intern" intern = No.intern, CursorType)(auto ref CursorType cursor) 1120 { 1121 auto res = CopyingCursor!(CursorType, intern)(); 1122 res.cursor = cursor; 1123 return res; 1124 } 1125 1126 unittest 1127 { 1128 import newxml.lexers; 1129 import newxml.parser; 1130 1131 1132 wstring xml = q{ 1133 <?xml encoding = "utf-8" ?> 1134 <aaa> 1135 <bbb> 1136 <aaa> 1137 </aaa> 1138 </bbb> 1139 Hello, world! 1140 </aaa> 1141 }; 1142 1143 auto cursor = 1144 xml 1145 .lexer 1146 .parser 1147 .cursor!(Yes.conflateCDATA) 1148 .copyingCursor!(Yes.intern)(); 1149 1150 assert(cursor.enter); 1151 auto a1 = cursor.name; 1152 assert(cursor.enter); 1153 auto b1 = cursor.name; 1154 assert(cursor.enter); 1155 auto a2 = cursor.name; 1156 assert(!cursor.enter); 1157 auto a3 = cursor.name; 1158 cursor.exit; 1159 auto b2 = cursor.name; 1160 cursor.exit; 1161 auto a4 = cursor.name; 1162 1163 assert(a1 is a2); 1164 assert(a2 is a3); 1165 assert(a3 is a4); 1166 assert(b1 is b2); 1167 }