1 /*
2 *             Copyright László Szerémi 2022 - .
3 *  Distributed under the Boost Software License, Version 1.0.
4 *      (See accompanying file LICENSE_1_0.txt or copy at
5 *            http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 /++
9 +   Authors:
10 +   Lodovico Giaretta
11 +   László Szerémi
12 +
13 +   License:
14 +   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
15 +
16 +   Copyright:
17 +   Copyright Lodovico Giaretta 2016 --
18 +   Copyright László Szerémi 2022 --
19 +/
20 
21 module newxml.domparser;
22 
23 import newxml.interfaces;
24 import newxml.cursor;
25 
26 import dom = newxml.dom;
27 import newxml.domimpl;
28 import newxml.domstring;
29 
30 /++
31 +   Built on top of Cursor, the DOM builder adds to it the ability to
32 +   build the DOM tree of the document; as the cursor advances, nodes can be
33 +   selectively added to the tree, allowing to built a small representation
34 +   containing only the needed parts of the document.
35 +
36 +   This type should not be instantiated directly. Instead, the helper function
37 +   `domBuilder` should be used.
38 +/
39 struct DOMBuilder(T)
40     if (isCursor!T)
41 {
42     import std.traits : ReturnType;
43 
44     /++
45     +   The underlying Cursor methods are exposed, so that one can, query the properties
46     +   of the current node before deciding if building it or not.
47     +/
48     T cursor;
49     alias cursor this;
50 
51     alias StringType = T.StringType;
52 
53     alias DocumentType = ReturnType!(DOMImplementation.createDocument);
54     alias NodeType = typeof(DocumentType.firstChild);
55 
56     private NodeType currentNode;
57     private DocumentType document;
58     private DOMImplementation domImpl;
59     private bool already_built;
60 
61     this(Args...)(DOMImplementation impl, auto ref Args args)
62     {
63         cursor = typeof(cursor)(args);
64         domImpl = impl;
65     }
66 
67     private void initialize()
68     {
69         document = domImpl.createDocument(null, null, null);
70 
71         if (cursor.kind == XMLKind.document)
72             foreach (attr; cursor.attributes)
73                 switch (attr.name)
74                 {
75                     case "version":
76                         document.xmlVersion = new DOMString(attr.value);
77                         break;
78                     case "standalone":
79                         document.xmlStandalone = attr.value == "yes";
80                         break;
81                     default:
82                         break;
83                 }
84 
85         currentNode = document;
86     }
87 
88     /++
89     +   Initializes this builder and the underlying components.
90     +/
91     void setSource(T.InputType input)
92     {
93         cursor.setSource(input);
94         initialize();
95     }
96 
97     /++
98     +   Same as `cursor.enter`. When entering a node, that node is automatically
99     +   built into the DOM, so that its children can then be safely built if needed.
100     +/
101     bool enter()
102     {
103         if (cursor.atBeginning)
104             return cursor.enter;
105 
106         if (cursor.kind != XMLKind.elementStart)
107             return false;
108 
109         if (!already_built)
110         {
111             auto elem = createCurrent;
112 
113             if (cursor.enter)
114             {
115                 currentNode.appendChild(elem);
116                 currentNode = elem;
117                 return true;
118             }
119         }
120         else if (cursor.enter)
121         {
122             already_built = false;
123             currentNode = currentNode.lastChild;
124             return true;
125         }
126         return false;
127     }
128 
129     /++
130     +   Same as `cursor.exit`
131     +/
132     void exit()
133     {
134         if (currentNode)
135             currentNode = currentNode.parentNode;
136         already_built = false;
137         cursor.exit;
138     }
139 
140     /++
141     +   Same as `cursor.next`.
142     +/
143     bool next()
144     {
145         already_built = false;
146         return cursor.next;
147     }
148 
149     /++
150     +   Adds the current node to the DOM. This operation does not advance the input.
151     +   Calling it more than once does not change the result.
152     +/
153     void build()
154     {
155         if (already_built || cursor.atBeginning)
156             return;
157 
158         auto cur = createCurrent;
159         if (cur)
160             currentNode.appendChild(createCurrent);
161 
162         already_built = true;
163     }
164 
165     /++
166     +   Recursively adds the current node and all its children to the DOM tree.
167     +   Behaves as `cursor.next`: it advances the input to the next sibling, returning
168     +   `true` if and only if there exists such next sibling.
169     +/
170     bool buildRecursive()
171     {
172         if (enter)
173         {
174             while (buildRecursive) {}
175             exit;
176         }
177         else
178             build;
179 
180         return next;
181     }
182 
183     private NodeType createCurrent()
184     // TODO: namespace handling
185     {
186         switch (cursor.kind)
187         {
188             // XMLKind.elementEnd is needed for empty tags: <tag></tag>
189             case XMLKind.elementEnd:
190             case XMLKind.elementStart:
191             case XMLKind.elementEmpty:
192                 auto elem = document.createElement(new DOMString(cursor.name));
193                 foreach (attr; cursor.attributes)
194                 {
195                     elem.setAttribute(new DOMString(attr.name), new DOMString(attr.value));
196                 }
197                 return elem;
198             case XMLKind.text:
199                 return document.createTextNode(new DOMString(cursor.content));
200             case XMLKind.cdata:
201                 return document.createCDATASection(new DOMString(cursor.content));
202             case XMLKind.processingInstruction:
203                 return document.createProcessingInstruction(new DOMString(cursor.name), new DOMString(cursor.content));
204             case XMLKind.comment:
205                 return document.createComment(new DOMString(cursor.content));
206             default:
207                 return null;
208         }
209     }
210 
211     /++
212     +   Returns the Document being built by this builder.
213     +/
214     auto getDocument() { return document; }
215 }
216 
217 /++
218 +   Instantiates a suitable `DOMBuilder` on top of the given `cursor` and `DOMImplementation`.
219 +/
220 auto domBuilder(CursorType)(auto ref CursorType cursor, DOMImplementation domimpl)
221     if (isCursor!CursorType)
222 {
223     auto res = DOMBuilder!(CursorType)(domimpl);
224     res.cursor = cursor;
225     //res.domImpl = impl;
226     res.initialize;
227     return res;
228 }
229 
230 unittest
231 {
232     import std.stdio;
233 
234     import newxml.lexers;
235     import newxml.parser;
236     import newxml.cursor;
237     import domimpl = newxml.domimpl;
238 
239     alias DOMImplType = domimpl.DOMImplementation;
240 
241     string xml = q{
242     <?xml encoding = "utf-8" ?>
243     <aaa xmlns:myns="something">
244         <myns:bbb myns:att='>'>
245             <!-- lol -->
246             Lots of Text!
247             On multiple lines!
248         </myns:bbb>
249         <![CDATA[ Ciaone! ]]>
250         <ccc/>
251     </aaa>
252     };
253 
254     auto builder =
255          xml
256         .lexer
257         .parser
258         .cursor
259         .domBuilder(new DOMImplType());
260 
261     builder.setSource(xml);
262     builder.buildRecursive;
263     auto doc = builder.getDocument;
264 
265     assert(doc.getElementsByTagName(new DOMString("ccc")).length == 1);
266     assert(doc.documentElement.getAttribute(new DOMString("xmlns:myns")) == "something");
267 }
268 
269 unittest
270 {
271     import newxml.lexers;
272     import newxml.parser;
273     import newxml.cursor;
274     import domimpl = newxml.domimpl;
275 
276     alias DOMImplType = domimpl.DOMImplementation;
277 
278     auto xml = `<?xml version="1.0" encoding="UTF-8"?><tag></tag>`;
279     auto builder =
280          xml
281         .lexer
282         .parser
283         .cursor
284         .copyingCursor
285         .domBuilder(new DOMImplType());
286 
287     builder.setSource(xml);
288     builder.buildRecursive;
289     auto doc = builder.getDocument;
290 
291     assert(doc.childNodes.length == 1);
292 }