1 /* 2 * Copyright László Szerémi 2022 - . 3 * Distributed under the Boost Software License, Version 1.0. 4 * (See accompanying file LICENSE_1_0.txt or copy at 5 * http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 /++ 9 + Authors: 10 + Lodovico Giaretta 11 + László Szerémi 12 + 13 + License: 14 + <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 15 + 16 + Copyright: 17 + Copyright Lodovico Giaretta 2016 -- 18 + Copyright László Szerémi 2022 -- 19 +/ 20 21 module newxml.domparser; 22 23 import newxml.interfaces; 24 import newxml.cursor; 25 26 import dom = newxml.dom; 27 import newxml.domimpl; 28 import newxml.domstring; 29 30 /++ 31 + Built on top of Cursor, the DOM builder adds to it the ability to 32 + build the DOM tree of the document; as the cursor advances, nodes can be 33 + selectively added to the tree, allowing to built a small representation 34 + containing only the needed parts of the document. 35 + 36 + This type should not be instantiated directly. Instead, the helper function 37 + `domBuilder` should be used. 38 +/ 39 struct DOMBuilder(T) 40 if (isCursor!T) 41 { 42 import std.traits : ReturnType; 43 44 /++ 45 + The underlying Cursor methods are exposed, so that one can, query the properties 46 + of the current node before deciding if building it or not. 47 +/ 48 T cursor; 49 alias cursor this; 50 51 alias StringType = T.StringType; 52 53 alias DocumentType = ReturnType!(DOMImplementation.createDocument); 54 alias NodeType = typeof(DocumentType.firstChild); 55 56 private NodeType currentNode; 57 private DocumentType document; 58 private DOMImplementation domImpl; 59 private bool already_built; 60 61 this(Args...)(DOMImplementation impl, auto ref Args args) 62 { 63 cursor = typeof(cursor)(args); 64 domImpl = impl; 65 } 66 67 private void initialize() 68 { 69 document = domImpl.createDocument(null, null, null); 70 71 if (cursor.kind == XMLKind.document) 72 foreach (attr; cursor.attributes) 73 switch (attr.name) 74 { 75 case "version": 76 document.xmlVersion = new DOMString(attr.value); 77 break; 78 case "standalone": 79 document.xmlStandalone = attr.value == "yes"; 80 break; 81 default: 82 break; 83 } 84 85 currentNode = document; 86 } 87 88 /++ 89 + Initializes this builder and the underlying components. 90 +/ 91 void setSource(T.InputType input) 92 { 93 cursor.setSource(input); 94 initialize(); 95 } 96 97 /++ 98 + Same as `cursor.enter`. When entering a node, that node is automatically 99 + built into the DOM, so that its children can then be safely built if needed. 100 +/ 101 bool enter() 102 { 103 if (cursor.atBeginning) 104 return cursor.enter; 105 106 if (cursor.kind != XMLKind.elementStart) 107 return false; 108 109 if (!already_built) 110 { 111 auto elem = createCurrent; 112 113 if (cursor.enter) 114 { 115 currentNode.appendChild(elem); 116 currentNode = elem; 117 return true; 118 } 119 } 120 else if (cursor.enter) 121 { 122 already_built = false; 123 currentNode = currentNode.lastChild; 124 return true; 125 } 126 return false; 127 } 128 129 /++ 130 + Same as `cursor.exit` 131 +/ 132 void exit() 133 { 134 if (currentNode) 135 currentNode = currentNode.parentNode; 136 already_built = false; 137 cursor.exit; 138 } 139 140 /++ 141 + Same as `cursor.next`. 142 +/ 143 bool next() 144 { 145 already_built = false; 146 return cursor.next; 147 } 148 149 /++ 150 + Adds the current node to the DOM. This operation does not advance the input. 151 + Calling it more than once does not change the result. 152 +/ 153 void build() 154 { 155 if (already_built || cursor.atBeginning) 156 return; 157 158 auto cur = createCurrent; 159 if (cur) 160 currentNode.appendChild(createCurrent); 161 162 already_built = true; 163 } 164 165 /++ 166 + Recursively adds the current node and all its children to the DOM tree. 167 + Behaves as `cursor.next`: it advances the input to the next sibling, returning 168 + `true` if and only if there exists such next sibling. 169 +/ 170 bool buildRecursive() 171 { 172 if (enter) 173 { 174 while (buildRecursive) {} 175 exit; 176 } 177 else 178 build; 179 180 return next; 181 } 182 183 private NodeType createCurrent() 184 // TODO: namespace handling 185 { 186 switch (cursor.kind) 187 { 188 // XMLKind.elementEnd is needed for empty tags: <tag></tag> 189 case XMLKind.elementEnd: 190 case XMLKind.elementStart: 191 case XMLKind.elementEmpty: 192 auto elem = document.createElement(new DOMString(cursor.name)); 193 foreach (attr; cursor.attributes) 194 { 195 elem.setAttribute(new DOMString(attr.name), new DOMString(attr.value)); 196 } 197 return elem; 198 case XMLKind.text: 199 return document.createTextNode(new DOMString(cursor.content)); 200 case XMLKind.cdata: 201 return document.createCDATASection(new DOMString(cursor.content)); 202 case XMLKind.processingInstruction: 203 return document.createProcessingInstruction(new DOMString(cursor.name), new DOMString(cursor.content)); 204 case XMLKind.comment: 205 return document.createComment(new DOMString(cursor.content)); 206 default: 207 return null; 208 } 209 } 210 211 /++ 212 + Returns the Document being built by this builder. 213 +/ 214 auto getDocument() { return document; } 215 } 216 217 /++ 218 + Instantiates a suitable `DOMBuilder` on top of the given `cursor` and `DOMImplementation`. 219 +/ 220 auto domBuilder(CursorType)(auto ref CursorType cursor, DOMImplementation domimpl) 221 if (isCursor!CursorType) 222 { 223 auto res = DOMBuilder!(CursorType)(domimpl); 224 res.cursor = cursor; 225 //res.domImpl = impl; 226 res.initialize; 227 return res; 228 } 229 230 unittest 231 { 232 import std.stdio; 233 234 import newxml.lexers; 235 import newxml.parser; 236 import newxml.cursor; 237 import domimpl = newxml.domimpl; 238 239 alias DOMImplType = domimpl.DOMImplementation; 240 241 string xml = q{ 242 <?xml encoding = "utf-8" ?> 243 <aaa xmlns:myns="something"> 244 <myns:bbb myns:att='>'> 245 <!-- lol --> 246 Lots of Text! 247 On multiple lines! 248 </myns:bbb> 249 <![CDATA[ Ciaone! ]]> 250 <ccc/> 251 </aaa> 252 }; 253 254 auto builder = 255 xml 256 .lexer 257 .parser 258 .cursor 259 .domBuilder(new DOMImplType()); 260 261 builder.setSource(xml); 262 builder.buildRecursive; 263 auto doc = builder.getDocument; 264 265 assert(doc.getElementsByTagName(new DOMString("ccc")).length == 1); 266 assert(doc.documentElement.getAttribute(new DOMString("xmlns:myns")) == "something"); 267 } 268 269 unittest 270 { 271 import newxml.lexers; 272 import newxml.parser; 273 import newxml.cursor; 274 import domimpl = newxml.domimpl; 275 276 alias DOMImplType = domimpl.DOMImplementation; 277 278 auto xml = `<?xml version="1.0" encoding="UTF-8"?><tag></tag>`; 279 auto builder = 280 xml 281 .lexer 282 .parser 283 .cursor 284 .copyingCursor 285 .domBuilder(new DOMImplType()); 286 287 builder.setSource(xml); 288 builder.buildRecursive; 289 auto doc = builder.getDocument; 290 291 assert(doc.childNodes.length == 1); 292 }