1 /*
2 *             Copyright Lodovico Giaretta 2016 - .
3 *  Distributed under the Boost Software License, Version 1.0.
4 *      (See accompanying file LICENSE_1_0.txt or copy at
5 *            http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 /++
9 +   Authors:
10 +   Lodovico Giaretta
11 +   László Szerémi
12 +
13 +   License:
14 +   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
15 +
16 +   Copyright:
17 +   Copyright Lodovico Giaretta 2016 --
18 +/
19 
20 module newxml.cursor;
21 
22 import newxml.interfaces;
23 import newxml.faststrings;
24 
25 import newxml.validation;
26 
27 import std.meta : staticIndexOf;
28 import std.range.primitives;
29 import std.typecons;
30 
31 
32 public class CursorException : XMLException {
33     @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null)
34     {
35         super(msg, file, line, nextInChain);
36     }
37 
38     @nogc @safe pure nothrow this(string msg, Throwable nextInChain, string file = __FILE__, size_t line = __LINE__)
39     {
40         super(msg, file, line, nextInChain);
41     }
42 }
43 @safe:
44 package struct Attribute(StringType)
45 {
46     StringType value;
47     private StringType _name;
48     private size_t colon;
49 
50     this(StringType qualifiedName, StringType value)
51     {
52         this.value = value;
53         name = qualifiedName;
54     }
55 
56     @property auto name() inout
57     {
58         return _name;
59     }
60     @property void name(StringType _name)
61     {
62         this._name = _name;
63         auto i = _name.fastIndexOf(':');
64         if (i > 0)
65             colon = i;
66         else
67             colon = 0;
68     }
69     @property auto prefix() inout
70     {
71         return name[0..colon];
72     }
73     @property StringType localName()
74     {
75         if (colon)
76             return name[colon+1..$];
77         else
78             return name;
79     }
80     StringType toString() {
81         return name ~ " = \"" ~ value ~ "\"";
82     }
83 }
84 
85 /++
86 +   An implementation of the $(LINK2 ../interfaces/isCursor, `isCursor`) trait.
87 +
88 +   This is the only provided cursor that builds on top of a parser (and not on top of another cursor), so it is part 
89 +   of virtually every parsing chain. All documented methods are implementations of the specifications dictated by
90 +   $(LINK2 ../interfaces/isCursor, `isCursor`).
91 +   Parameters:
92 +       P = The parser.
93 +       conflateCData = 
94 +       processBadDocument = If set to `Yes` (default is `No`), then it'll ignore errors as long as it can still 
95 +   process the document. Otherwise it'll throw an appropriate exception if an error is encountered.
96 +/
97 struct Cursor(P, Flag!"conflateCDATA" conflateCDATA = Yes.conflateCDATA,
98     Flag!"processBadDocument" processBadDocument = No.processBadDocument)
99     if (isLowLevelParser!P)
100 {
101     struct AttributesRange
102     {
103         private StringType content;
104         private Attribute!StringType attr;
105         private Cursor* cursor;
106         private bool error;
107 
108         private this(StringType str, ref Cursor cur) @system nothrow
109         {
110             content = str;
111             cursor = &cur;
112         }
113 
114         bool empty() @safe
115         {
116             if (error)
117                 return true;
118 
119             auto i = content.fastIndexOfNeither(" \r\n\t");
120             if (i >= 0)
121             {
122                 content = content[i..$];
123                 return false;
124             }
125             return true;
126         }
127 
128         auto front() @safe
129         {
130             if (attr == attr.init)
131             {
132                 auto i = content.fastIndexOfNeither(" \r\n\t");
133                 assert(i >= 0, "No more attributes...");
134                 content = content[i..$];
135 
136                 auto sep = fastIndexOf(content[0..$], '=');
137                 if (sep == -1)
138                 {
139                     // attribute without value???
140                     static if (processBadDocument == No.processBadDocument) 
141                     {
142                         throw new CursorException("Invalid attribute syntax!");
143                     }
144                     else 
145                     {
146                         error = true;
147                         return attr.init;
148                     }
149                 }
150 
151                 auto name = content[0..sep];
152                 
153                 
154                 auto delta = fastIndexOfAny(name, " \r\n\t");
155                 if (delta >= 0)
156                 {
157                     auto j = name[delta..$].fastIndexOfNeither(" \r\n\t");
158                     if (j != -1)
159                     {
160                         // attribute name contains spaces???
161                         static if (processBadDocument == No.processBadDocument) 
162                         {
163                             throw new CursorException("Invalid attribute syntax!");
164                         } 
165                         else 
166                         {
167                             error = true;
168                             return attr.init;
169                         }
170                     }
171                     name = name[0..delta];
172                 }
173                 if (!isValidXMLName(name)) 
174                 {
175                     static if (processBadDocument == No.processBadDocument)
176                     {
177                         throw new CursorException("Invalid attribute name!");
178                     }
179                     else 
180                     {
181                         error = true;
182                     }
183                 }
184                 attr.name = name;
185 
186                 size_t attEnd;
187                 size_t quote;
188                 delta = (sep + 1 < content.length) ? fastIndexOfNeither(content[sep + 1..$], " \r\n\t") : -1;
189                 if (delta >= 0)
190                 {
191                     quote = sep + 1 + delta;
192                     if (content[quote] == '"' || content[quote] == '\'')
193                     {
194                         delta = fastIndexOf(content[(quote + 1)..$], content[quote]);
195                         if (delta == -1)
196                         {
197                             // attribute quotes never closed???
198                             static if (processBadDocument == No.processBadDocument) 
199                             {
200                                 throw new CursorException("Invalid attribute syntax!");
201                             } 
202                             else 
203                             {
204                                 error = true;
205                                 return attr.init;
206                             }
207                         }
208                         attEnd = quote + 1 + delta;
209                     }
210                     else
211                     {
212                         static if (processBadDocument == No.processBadDocument) 
213                         {
214                             throw new CursorException("Invalid attribute syntax!");
215                         } 
216                         else 
217                         {
218                             error = true;
219                             return attr.init;
220                         }
221                     }
222                 }
223                 else
224                 {
225                     // attribute without value???
226                     static if (processBadDocument == No.processBadDocument) 
227                     {
228                         throw new CursorException("Invalid attribute syntax!");
229                     } 
230                     else 
231                     {
232                         error = true;
233                         return attr.init;
234                     }
235                 }
236                 //attr.value = content[(quote + 1)..attEnd];
237                 static if (processBadDocument == No.processBadDocument) 
238                     attr.value = xmlUnescape(content[(quote + 1)..attEnd], cursor.parser.chrEntities);
239                 else
240                     attr.value = xmlUnescape!No.strict(content[(quote + 1)..attEnd], cursor.parser.chrEntities);
241                 content = content[attEnd+1..$];
242             }
243             return attr;
244         }
245 
246         auto popFront() @safe
247         {
248             front();
249             attr = attr.init;
250         }
251     }
252     /++ The type of characters in the input, as returned by the underlying low level parser. +/
253     alias CharacterType = P.CharacterType;
254 
255     /++ The type of sequences of CharacterType, as returned by this parser +/
256     alias StringType = CharacterType[];
257 
258     private P parser;
259     private ElementType!P currentNode;
260     private bool starting, _documentEnd = true, nextFailed, _xmlDeclNotFound;
261     private ptrdiff_t colon;
262     private size_t nameBegin, nameEnd;
263     public StringType encoding;
264     public StringType docType;
265     ///Loads system entities if needed.
266     ///If not used, then it can protect against certain system entity attacks at the
267     ///cost of having this feature disabled.
268     public @safe StringType delegate(StringType path) sysEntityLoader;
269 
270     /++ Generic constructor; forwards its arguments to the parser constructor +/
271     this(Args...)(Args args)
272     {
273         parser = P(args);
274     }
275 
276     static if (isSaveableLowLevelParser!P)
277     {
278         public auto save()
279         {
280             auto result = this;
281             result.parser = parser.save;
282             return result;
283         }
284     }
285     ///Returns true if XML declaration was not found.
286     public @property bool xmlDeclNotFound() @nogc @safe pure nothrow
287     {   
288         return _xmlDeclNotFound;
289     }
290     /+
291     /** 
292      * Preprocesses the document, mainly the declaration (sets document version and encoding) and the Document type.
293      * NOTE: Does not want to process anything beyond the first processing instruction (`<?xml [...] ?>`) for unknown
294      * reasons, and I cannot get the debugger to find the reason.
295      */
296     public void preprocess() {
297         import std.array;
298         int i;
299         do
300         {
301             i++;
302             switch (currentNode.kind) {
303                 case XMLKind.document:
304                     //enter();
305                     break;
306                 case XMLKind.processingInstruction:
307                     auto attrl = attributes().array;
308                     foreach (attr ; attrl) {
309                         //Attribute!StringType attr = attrl.front;
310                         switch (attr.name) {
311                             case "version":
312                                 if (attr.value == "1.0")
313                                 {
314                                     parser.xmlVersion = XMLVersion.XML1_0;
315                                 }
316                                 else if (attr.value == "1.1")
317                                 {
318                                     parser.xmlVersion = XMLVersion.XML1_1;
319                                 }
320                                 break;
321                             case "encoding":
322                                 encoding = attr.value;
323                                 break;
324                             default:
325                                 break;  //Check whether other types of attributes are allowed here.
326                         }
327                     }
328                     //exit();
329                     /+if (!enter())
330                         goto exitloop;+/
331                     break;
332                 case XMLKind.dtdStart: 
333                     docType = content();
334                     if (!enter())
335                         goto exitloop;
336                     break;
337                 case XMLKind.dtdEmpty:
338                     docType = content();
339                     goto exitloop;
340                 case XMLKind.entityDecl:
341                     StringType entName = name();
342                     //Check for external entities.
343                     parser.chrEntities[entName] = content();
344                     break;
345                 case XMLKind.attlistDecl, XMLKind.elementDecl, XMLKind.notationDecl, XMLKind.declaration:
346                     break;
347                 default:
348                     goto exitloop;   
349             }
350 
351         }
352         while (next);
353         exitloop:
354         exit();
355     }+/
356 
357     private bool advanceInput()
358     {
359         colon = colon.max;
360         nameEnd = 0;
361         parser.popFront();
362         if (!parser.empty)
363         {
364             currentNode = parser.front;
365             return true;
366         }
367         _documentEnd = true;
368         return false;
369     }
370 
371 
372     static if (needSource!P)
373     {
374         /++
375         +   The type of input accepted by this parser,
376         +   i.e., the one accepted by the underlying low level parser.
377         +/
378         alias InputType = P.InputType;
379 
380         /++
381         +   Initializes this cursor (and the underlying low level parser) with the given input.
382         +/
383         void setSource(InputType input)
384         {
385             parser.setSource(input);
386             initialize();
387         }
388     }
389 
390     private void initialize()
391     {
392         // reset private fields
393         nextFailed = false;
394         _xmlDeclNotFound = false;
395         colon = colon.max;
396         nameEnd = 0;
397 
398         if (!parser.empty)
399         {
400             if (parser.front.kind == XMLKind.processingInstruction &&
401                 parser.front.content.length >= 3 &&
402                 fastEqual(parser.front.content[0..3], "xml"))
403             {
404                 currentNode = parser.front;
405             }
406             else
407             {
408                 // document without xml declaration???
409                 // It turns out XML declaration is not mandatory, just assume UTF-8 and XML version 1.0 if it's missing!
410                 currentNode.kind = XMLKind.processingInstruction;
411                 currentNode.content = "xml version = \"1.0\" encoding = \"UTF-8\"";
412                 _xmlDeclNotFound = true;
413             }
414             starting = true;
415             _documentEnd = false;
416         }
417         else
418             _documentEnd = true;
419     }
420 
421     /++ Returns whether the cursor is at the end of the document. +/
422     bool documentEnd()
423     {
424         return _documentEnd;
425     }
426 
427     /++
428     +   Returns whether the cursor is at the beginning of the document
429     +   (i.e. whether no `enter`/`next`/`exit` has been performed successfully and thus
430     +   the cursor points to the xml declaration)
431     +/
432     bool atBeginning()
433     {
434         return starting;
435     }
436 
437     /++
438     +   Advances to the first child of the current node and returns `true`.
439     +   If it returns `false`, the cursor is either on the same node (it wasn't
440     +   an element start) or it is at the close tag of the element it was called on
441     +   (it was a pair open/close tag without any content)
442     +/
443     bool enter()
444     {
445         if (starting)
446         {
447             starting = false;
448             if (currentNode.content is parser.front.content)
449                 return advanceInput();
450             else
451             {
452                 nameEnd = 0;
453                 nameBegin = 0;
454             }
455 
456             currentNode = parser.front;
457             return true;
458         }
459         else if (currentNode.kind == XMLKind.elementStart)
460         {
461             return advanceInput() && currentNode.kind != XMLKind.elementEnd;
462         }
463         else if (currentNode.kind == XMLKind.dtdStart)
464         {
465             return advanceInput() && currentNode.kind != XMLKind.dtdEnd;
466         }
467         else
468             return false;
469     }
470 
471     /++ Advances to the end of the parent of the current node. +/
472     void exit()
473     {
474         if (!nextFailed)
475             while (next()) {}
476 
477         nextFailed = false;
478     }
479 
480     /++
481     +   Advances to the _next sibling of the current node.
482     +   Returns whether it succeded. If it fails, either the
483     +   document has ended or the only meaningful operation is `exit`.
484     +/
485     bool next()
486     {
487         if (parser.empty || starting || nextFailed)
488             return false;
489         else if (currentNode.kind == XMLKind.dtdStart)
490         {
491             /+while (advanceInput && currentNode.kind != XMLKind.dtdEnd) 
492             {
493                 
494             }+/
495         }
496         else if (currentNode.kind == XMLKind.elementStart)
497         {
498             int count = 1;
499             static if (processBadDocument == No.processBadDocument)
500                 StringType currName = name;
501             while (count > 0 && !parser.empty)
502             {
503                 if (!advanceInput)
504                     return false;
505                 if (currentNode.kind == XMLKind.elementStart)
506                     count++;
507                 else if (currentNode.kind == XMLKind.elementEnd)
508                     count--;
509             }
510             static if (processBadDocument == No.processBadDocument)
511             {
512                 if (count != 0 || currName != name)
513                     throw new CursorException("Document is malformed!");
514             }
515         }
516         if (!advanceInput || currentNode.kind == XMLKind.elementEnd || currentNode.kind == XMLKind.dtdEnd)
517         {
518             nextFailed = true;
519             return false;
520         }
521         return true;
522     }
523 
524     /++ Returns the _kind of the current node. +/
525     XMLKind kind() const
526     {
527         if (starting)
528             return XMLKind.document;
529 
530         static if (conflateCDATA == Yes.conflateCDATA)
531             if (currentNode.kind == XMLKind.cdata)
532                 return XMLKind.text;
533 
534         return currentNode.kind;
535     }
536 
537     /++
538     +   If the current node is an element or a doctype, returns its complete _name;
539     +   it it is a processing instruction, return its target;
540     +   otherwise, returns an empty string;
541     +/
542     StringType name()
543     {
544         switch (currentNode.kind)
545         {
546             case XMLKind.document:
547             case XMLKind.text:
548             case XMLKind.cdata:
549             case XMLKind.comment:
550             case XMLKind.declaration:
551             case XMLKind.conditional:
552             case XMLKind.dtdStart:
553             case XMLKind.dtdEmpty:
554             case XMLKind.dtdEnd:
555                 return [];
556             default:
557                 if (!nameEnd)
558                 {
559                     ptrdiff_t i, j;
560                     if ((j = fastIndexOfNeither(currentNode.content, " \r\n\t")) >= 0)
561                         nameBegin = j;
562                     if ((i = fastIndexOfAny(currentNode.content[nameBegin..$], " \r\n\t")) >= 0)
563                         nameEnd = i + nameBegin;
564                     else
565                         nameEnd = currentNode.content.length;
566                 }
567                 return currentNode.content[nameBegin..nameEnd];
568         }
569     }
570 
571     /++
572     +   If the current node is an element, returns its local name (without namespace prefix);
573     +   otherwise, returns the same result as `name`.
574     +/
575     StringType localName()
576     {
577         auto name = name();
578         if (currentNode.kind == XMLKind.elementStart || currentNode.kind == XMLKind.elementEnd)
579         {
580             if (colon == colon.max)
581                 colon = fastIndexOf(name, ':');
582             return name[(colon+1)..$];
583         }
584         return name;
585     }
586 
587     /++
588     +   If the current node is an element, returns its namespace _prefix;
589     +   otherwise, the result in unspecified;
590     +/
591     StringType prefix()
592     {
593         if (currentNode.kind == XMLKind.elementStart || currentNode.kind == XMLKind.elementEnd)
594         {
595             auto name = name;
596             if (colon == colon.max)
597                 colon = fastIndexOf(name, ':');
598 
599             if (colon >= 0)
600                 return name[0..colon];
601             else
602                 return [];
603         }
604         return [];
605     }
606 
607     /++
608     +   If the current node is an element, return its _attributes as a range of triplets
609     +   (`prefix`, `name`, `value`); if the current node is the document node, return the _attributes
610     +   of the xml declaration (encoding, version, ...); otherwise, returns an empty array.
611     +/
612     auto attributes() @trusted
613     {
614         
615 
616         auto kind = currentNode.kind;
617         if (kind == XMLKind.elementStart || kind == XMLKind.elementEmpty || kind == XMLKind.processingInstruction)
618         {
619             name;
620             return AttributesRange(currentNode.content[nameEnd..$], this);
621         }
622         else
623             return AttributesRange();
624     }
625 
626     /++
627     +   Return the text content of a cdata section, a comment or a text node;
628     +   in all other cases, returns the entire node without the name
629     +/
630     StringType content()
631     {
632         if (currentNode.kind == XMLKind.entityDecl) 
633         {
634             sizediff_t b = fastIndexOfAny(currentNode.content[nameEnd..$], "\"\'");
635             sizediff_t e = fastLastIndexOf(currentNode.content[nameEnd..$], currentNode.content[b + nameEnd]);
636             if (b > 0 && e > 0)
637             {
638                 if (b + 1 <= e)
639                     return currentNode.content[nameEnd + b + 1..nameEnd + e];
640                 else
641                     return null;
642             }
643             else
644             {
645                 static if (processBadDocument == No.processBadDocument)
646                     throw new CursorException("Entity content not found!");
647                 else
648                     return null;
649             }
650         }
651         /* else if (currentNode.kind == XMLKind.dtdStart || currentNode.kind == XMLKind.dtdEmpty)
652         {
653             sizediff_t b = fastLastIndexOfAny(currentNode.content[nameEnd..$], " \r\n\t");
654             if (b == -1)
655                 return null;
656             sizediff_t e = fastIndexOfAny(currentNode.content[nameEnd + b..$], " \r\n\t");
657             if (e == -1)
658                 return currentNode.content[nameEnd + b + 1..$];
659             else
660                 return currentNode.content[nameEnd + b + 1..nameEnd + e];
661         } */
662         else
663             return currentNode.content[nameEnd..$];
664     }
665 
666     /++ Returns the entire text of the current node. +/
667     StringType wholeContent() const
668     {
669         return currentNode.content;
670     }
671 }
672 
673 /++
674 +   Instantiates a specialized `Cursor` with the given underlying `parser` and
675 +   the given error handler (defaults to an error handler that just asserts 0).
676 +/
677 template cursor(Flag!"conflateCDATA" conflateCDATA = Yes.conflateCDATA)
678 {
679     /* auto cursor(T)(auto ref T parser)
680         if(isLowLevelParser!T)
681     {
682         return cursor(parser);
683     } */
684     auto cursor(T)(auto ref T parser)
685         if(isLowLevelParser!T)
686     {
687         auto cursor = Cursor!(T, conflateCDATA)();
688         cursor.parser = parser;
689         if (!cursor.parser.empty)
690         {
691             cursor.initialize;
692         }
693         return cursor;
694     }
695 }
696 
697 unittest
698 {
699     import newxml.lexers;
700     import newxml.parser;
701     import std.string : lineSplitter, strip;
702     import std.algorithm : map;
703     import std.array : array;
704     import std.conv : to;
705 
706     wstring xml = q"{
707     <?xml encoding = "utf-8" ?>
708     <!DOCTYPE mydoc https://myUri.org/bla [
709         <!ELEMENT myelem ANY>
710         <!ENTITY   myent    "replacement text">
711         <!ATTLIST myelem foo cdata #REQUIRED >
712         <!NOTATION PUBLIC 'h'>
713         <!FOODECL asdffdsa >
714     ]>
715     <aaa xmlns:myns="something">
716         <myns:bbb myns:att='>'>
717             <!-- lol -->
718             Lots of Text!
719             On multiple lines!
720         </myns:bbb>
721         <![CDATA[ Ciaone! ]]>
722         <ccc/>
723     </aaa>
724     }";
725 
726     auto cursor = xml.lexer.parser.cursor;
727 
728     assert(cursor.atBeginning);
729 
730     // <?xml encoding = "utf-8" ?>
731     assert(cursor.kind() == XMLKind.document);
732     assert(cursor.name() == "xml");
733     assert(cursor.prefix() == "");
734     assert(cursor.localName() == "xml");
735     assert(cursor.attributes().array == [Attribute!wstring("encoding", "utf-8")]);
736     assert(cursor.content() == " encoding = \"utf-8\" ");
737 
738     assert(cursor.enter());
739         assert(!cursor.atBeginning);
740 
741         // <!DOCTYPE mydoc https://myUri.org/bla [
742         assert(cursor.kind == XMLKind.dtdStart);
743         assert(cursor.wholeContent == " mydoc https://myUri.org/bla ");
744 
745         assert(cursor.enter);
746             // <!ELEMENT myelem ANY>
747             assert(cursor.kind == XMLKind.elementDecl);
748             assert(cursor.wholeContent == " myelem ANY");
749 
750             assert(cursor.next);
751             // <!ENTITY   myent    "replacement text">
752             assert(cursor.kind == XMLKind.entityDecl);
753             assert(cursor.wholeContent == "   myent    \"replacement text\"");
754             assert(cursor.name == "myent");
755             assert(cursor.content == "replacement text", to!string(cursor.content));
756 
757             assert(cursor.next);
758             // <!ATTLIST myelem foo cdata #REQUIRED >
759             assert(cursor.kind == XMLKind.attlistDecl);
760             assert(cursor.wholeContent == " myelem foo cdata #REQUIRED ");
761 
762             assert(cursor.next);
763             // <!NOTATION PUBLIC 'h'>
764             assert(cursor.kind == XMLKind.notationDecl);
765             assert(cursor.wholeContent == " PUBLIC 'h'");
766 
767             assert(cursor.next);
768             // <!FOODECL asdffdsa >
769             assert(cursor.kind == XMLKind.declaration);
770             assert(cursor.wholeContent == "FOODECL asdffdsa ");
771 
772             assert(!cursor.next);
773 
774             //assert(cursor.parser._chrEntities["myent"] == "replacement text");
775         cursor.exit;
776 
777         // ]>
778         assert(cursor.kind == XMLKind.dtdEnd);
779         assert(!cursor.wholeContent);
780         assert(cursor.next);
781 
782         // <aaa xmlns:myns="something">
783         assert(cursor.kind() == XMLKind.elementStart);
784         assert(cursor.name() == "aaa");
785         assert(cursor.prefix() == "");
786         assert(cursor.localName() == "aaa");
787         assert(cursor.attributes().array == [Attribute!wstring("xmlns:myns", "something")]);
788         assert(cursor.content() == " xmlns:myns=\"something\"");
789 
790         assert(cursor.enter());
791             // <myns:bbb myns:att='>'>
792             assert(cursor.kind() == XMLKind.elementStart);
793             assert(cursor.name() == "myns:bbb");
794             assert(cursor.prefix() == "myns");
795             assert(cursor.localName() == "bbb");
796             assert(cursor.attributes().array == [Attribute!wstring("myns:att", ">")]);
797             assert(cursor.content() == " myns:att='>'");
798 
799             assert(cursor.enter());
800             cursor.exit();
801 
802             // </myns:bbb>
803             assert(cursor.kind() == XMLKind.elementEnd);
804             assert(cursor.name() == "myns:bbb");
805             assert(cursor.prefix() == "myns");
806             assert(cursor.localName() == "bbb");
807             assert(cursor.attributes().empty);
808             assert(cursor.content() == []);
809 
810             assert(cursor.next());
811             // <![CDATA[ Ciaone! ]]>
812             assert(cursor.kind() == XMLKind.text);
813             assert(cursor.name() == "");
814             assert(cursor.prefix() == "");
815             assert(cursor.localName() == "");
816             assert(cursor.attributes().empty);
817             assert(cursor.content() == " Ciaone! ");
818 
819             assert(cursor.next());
820             // <ccc/>
821             assert(cursor.kind() == XMLKind.elementEmpty);
822             assert(cursor.name() == "ccc");
823             assert(cursor.prefix() == "");
824             assert(cursor.localName() == "ccc");
825             assert(cursor.attributes().empty);
826             assert(cursor.content() == []);
827 
828             assert(!cursor.next());
829         cursor.exit();
830 
831         // </aaa>
832         assert(cursor.kind() == XMLKind.elementEnd);
833         assert(cursor.name() == "aaa");
834         assert(cursor.prefix() == "");
835         assert(cursor.localName() == "aaa");
836         assert(cursor.attributes().empty);
837         assert(cursor.content() == []);
838 
839         assert(!cursor.next());
840     cursor.exit();
841 
842     assert(cursor.documentEnd);
843     assert(!cursor.atBeginning);
844 }
845 
846 /++
847 +   Returns an input range of the children of the node currently pointed by `cursor`.
848 +
849 +   Advancing the range returned by this function also advances `cursor`. It is thus
850 +   not recommended to interleave usage of this function with raw usage of `cursor`.
851 +/
852 auto children(T)(ref T cursor) @trusted
853     if (isCursor!T)
854 {
855     struct XMLRange
856     {
857         T* cursor;
858         bool endReached;
859 
860         bool empty() const { return endReached; }
861         void popFront() { endReached = !cursor.next(); }
862         ref T front() { return *cursor; }
863 
864         ~this() { cursor.exit; }
865     }
866     auto workaround() @system {
867         return XMLRange(&cursor, cursor.enter);    
868     }
869     return workaround();
870 }
871 
872 unittest
873 {
874     import newxml.lexers;
875     import newxml.parser;
876     import std.string : lineSplitter, strip;
877     import std.algorithm : map, equal;
878     import std.array : array;
879     import std.exception : assertThrown;
880     import std.stdio;
881 
882     string xml = q{
883     <?xml encoding = "utf-8" ?>
884     <aaa xmlns:myns="something">
885         <myns:bbb myns:att='>'>
886             <!-- lol -->
887             Lots of Text!
888             On multiple lines!
889         </myns:bbb>
890         <![CDATA[ Ciaone! ]]>
891         <ccc/>
892     </aaa>
893     };
894     string xml_bad = q{
895         <?xml encoding = "utf-8" ?>
896         <AAA>
897             <BBB attr = "this should fail &#xFFFFFFFFFF;" />
898         </AAA>
899     };
900 
901     //import std.experimental.allocator.mallocator;//import stdx.allocator.mallocator;
902 
903     //auto handler = () { assert(0, "Some problem here..."); };
904     auto lexer = RangeLexer!(string)();
905     
906     auto cursor = lexer.parser.cursor!(Yes.conflateCDATA)();
907     assert(cursor.documentEnd);
908     cursor.setSource(xml);
909 
910     // <?xml encoding = "utf-8" ?>
911     assert(cursor.kind() == XMLKind.document);
912     assert(cursor.name() == "xml");
913     assert(cursor.prefix() == "");
914     assert(cursor.localName() == "xml");
915     auto attrs = cursor.attributes;
916     assert(attrs.front == Attribute!string("encoding", "utf-8"));
917     attrs.popFront;
918     assert(attrs.empty);
919     assert(cursor.content() == " encoding = \"utf-8\" ");
920 
921     {
922         auto range1 = cursor.children;
923         // <aaa xmlns:myns="something">
924         assert(range1.front.kind() == XMLKind.elementStart);
925         assert(range1.front.name() == "aaa");
926         assert(range1.front.prefix() == "");
927         assert(range1.front.localName() == "aaa");
928         attrs = range1.front.attributes;
929         assert(attrs.front == Attribute!string("xmlns:myns", "something"));
930         attrs.popFront;
931         assert(attrs.empty);
932         assert(range1.front.content() == " xmlns:myns=\"something\"");
933 
934         {
935             auto range2 = range1.front.children();
936             // <myns:bbb myns:att='>'>
937             assert(range2.front.kind() == XMLKind.elementStart);
938             assert(range2.front.name() == "myns:bbb");
939             assert(range2.front.prefix() == "myns");
940             assert(range2.front.localName() == "bbb");
941             attrs = range2.front.attributes;
942             assert(attrs.front == Attribute!string("myns:att", ">"));
943             attrs.popFront;
944             assert(attrs.empty);
945             assert(range2.front.content() == " myns:att='>'");
946 
947             {
948                 auto range3 = range2.front.children();
949                 // <!-- lol -->
950                 assert(range3.front.kind() == XMLKind.comment);
951                 assert(range3.front.name() == "");
952                 assert(range3.front.prefix() == "");
953                 assert(range3.front.localName() == "");
954                 assert(range3.front.attributes.empty);
955                 assert(range3.front.content() == " lol ");
956 
957                 range3.popFront;
958                 assert(!range3.empty);
959                 // Lots of Text!
960                 // On multiple lines!
961                 assert(range3.front.kind() == XMLKind.text);
962                 assert(range3.front.name() == "");
963                 assert(range3.front.prefix() == "");
964                 assert(range3.front.localName() == "");
965                 assert(range3.front.attributes().empty);
966                 // split and strip so the unittest does not depend on the newline policy or indentation of this file
967                 static immutable linesArr = ["Lots of Text!", "            On multiple lines!", "        "];
968                 assert(range3.front.content().lineSplitter.equal(linesArr));
969 
970                 range3.popFront;
971                 assert(range3.empty);
972             }
973 
974             range2.popFront;
975             assert(!range2.empty);
976             // <<![CDATA[ Ciaone! ]]>
977             assert(range2.front.kind() == XMLKind.text);
978             assert(range2.front.name() == "");
979             assert(range2.front.prefix() == "");
980             assert(range2.front.localName() == "");
981             assert(range2.front.attributes().empty);
982             assert(range2.front.content() == " Ciaone! ");
983 
984             range2.popFront;
985             assert(!range2.empty());
986             // <ccc/>
987             assert(range2.front.kind() == XMLKind.elementEmpty);
988             assert(range2.front.name() == "ccc");
989             assert(range2.front.prefix() == "");
990             assert(range2.front.localName() == "ccc");
991             assert(range2.front.attributes().empty);
992             assert(range2.front.content() == []);
993 
994             range2.popFront;
995             assert(range2.empty());
996         }
997 
998         range1.popFront;
999         assert(range1.empty);
1000     }
1001 
1002     assert(cursor.documentEnd());
1003     {
1004         cursor.setSource(xml_bad);
1005         auto range1 = cursor.children();
1006         assert(range1.front.name == "AAA");
1007         auto range2 = range1.front.children();
1008         assert(range2.front.name == "BBB");
1009         auto range3 = range2.front.attributes();
1010         assertThrown!XMLException(range3.front());
1011     
1012     }
1013 }
1014 
1015 import std.traits : isArray;
1016 
1017 /++
1018 +   A cursor that wraps another cursor, copying all output strings.
1019 +
1020 +   The cursor specification ($(LINK2 ../interfaces/isCursor, `newxml.interfaces.isCursor`))
1021 +   clearly states that a cursor (as the underlying parser and lexer) is free to reuse
1022 +   its internal buffers and thus invalidate every output. This wrapper returns freshly
1023 +   allocated strings, thus allowing references to its outputs to outlive calls to advancing
1024 +   methods.
1025 +
1026 +   This type should not be instantiated directly, but using the helper function
1027 +   `copyingCursor`.
1028 +/
1029 struct CopyingCursor(CursorType, Flag!"intern" intern = No.intern)
1030     if (isCursor!CursorType && isArray!(CursorType.StringType))
1031 {
1032     alias StringType = CursorType.StringType;
1033 
1034     //mixin UsesAllocator!Alloc;
1035 
1036     CursorType cursor;
1037     alias cursor this;
1038 
1039     static if (intern == Yes.intern)
1040     {
1041         import std.typecons: Rebindable;
1042 
1043         Rebindable!(immutable StringType)[const StringType] interned;
1044     }
1045 
1046     private auto copy(StringType str) @system
1047     {
1048         static if (intern == Yes.intern)
1049         {
1050             auto match = str in interned;
1051             if (match)
1052                 return *match;
1053         }
1054 
1055         import std.traits : Unqual;
1056         import std.experimental.allocator;//import stdx.allocator;
1057         import std.range.primitives : ElementEncodingType;
1058         import core.stdc.string : memcpy;
1059 
1060         alias ElemType = ElementEncodingType!StringType;
1061         ElemType[] cp;//auto cp = cast(ElemType[]) allocator.makeArray!(Unqual!ElemType)(str.length);
1062         cp.length = str.length;
1063         memcpy(cast(void*)cp.ptr, cast(void*)str.ptr, str.length * ElemType.sizeof);
1064 
1065         static if (intern == Yes.intern)
1066         {
1067             interned[str] = cp;
1068         }
1069 
1070         return cp;
1071     }
1072 
1073     auto name() @trusted
1074     {
1075         return copy(cursor.name);
1076     }
1077     auto localName() @trusted
1078     {
1079         return copy(cursor.localName);
1080     }
1081     auto prefix() @trusted
1082     {
1083         return copy(cursor.prefix);
1084     }
1085     auto content() @trusted
1086     {
1087         return copy(cursor.content);
1088     }
1089     auto wholeContent() @trusted
1090     {
1091         return copy(cursor.wholeContent);
1092     }
1093 
1094     auto attributes() @trusted
1095     {
1096         struct CopyRange
1097         {
1098             typeof(cursor.attributes()) attrs;
1099             alias attrs this;
1100 
1101             private CopyingCursor* parent;
1102 
1103             auto front()
1104             {
1105                 auto attr = attrs.front;
1106                 return Attribute!StringType(
1107                         parent.copy(attr.name),
1108                         parent.copy(attr.value),
1109                     );
1110             }
1111         }
1112         return CopyRange(cursor.attributes, &this);
1113     }
1114 }
1115 
1116 /++
1117 +   Instantiates a suitable `CopyingCursor` on top of the given `cursor` and allocator.
1118 +/
1119 auto copyingCursor(Flag!"intern" intern = No.intern, CursorType)(auto ref CursorType cursor)
1120 {
1121     auto res = CopyingCursor!(CursorType, intern)();
1122     res.cursor = cursor;
1123     return res;
1124 }
1125 
1126 unittest
1127 {
1128     import newxml.lexers;
1129     import newxml.parser;
1130     
1131 
1132     wstring xml = q{
1133     <?xml encoding = "utf-8" ?>
1134     <aaa>
1135         <bbb>
1136             <aaa>
1137             </aaa>
1138         </bbb>
1139         Hello, world!
1140     </aaa>
1141     };
1142 
1143     auto cursor =
1144          xml
1145         .lexer
1146         .parser
1147         .cursor!(Yes.conflateCDATA)
1148         .copyingCursor!(Yes.intern)();
1149 
1150     assert(cursor.enter);
1151     auto a1 = cursor.name;
1152     assert(cursor.enter);
1153     auto b1 = cursor.name;
1154     assert(cursor.enter);
1155     auto a2 = cursor.name;
1156     assert(!cursor.enter);
1157     auto a3 = cursor.name;
1158     cursor.exit;
1159     auto b2 = cursor.name;
1160     cursor.exit;
1161     auto a4 = cursor.name;
1162 
1163     assert(a1 is a2);
1164     assert(a2 is a3);
1165     assert(a3 is a4);
1166     assert(b1 is b2);
1167 }