1 /* 2 * Copyright László Szerémi 2022 - . 3 * Distributed under the Boost Software License, Version 1.0. 4 * (See accompanying file LICENSE_1_0.txt or copy at 5 * http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 /++ 9 + This module implements a simple SAX parser. 10 + 11 + Authors: 12 + Lodovico Giaretta 13 + László Szerémi 14 + 15 + License: 16 + <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 17 + 18 + Copyright: 19 + Copyright Lodovico Giaretta 2016, László Szerémi 2022 -- 20 +/ 21 22 module newxml.sax; 23 24 import newxml.interfaces; 25 import newxml.cursor; 26 @safe: 27 /++ 28 + A SAX parser built on top of a cursor. 29 + 30 + Delegates are called when certain events are encountered, then it passes the necessary data to process the 31 + element. 32 +/ 33 struct SAXParser(T) 34 if (isCursor!T) 35 { 36 public T cursor; 37 alias StringType = T.StringType; 38 alias AttrRange = T.AttributesRange; 39 ///Called when a Document declaration is reached. 40 public void delegate(StringType[StringType] attributes) onDocument; 41 ///Called on a non-empty element start. Provides access to the attributes. 42 public void delegate(StringType name, StringType[StringType] attributes) onElementStart; 43 ///Called on an empty element. Provides access to the attributes. 44 public void delegate(StringType name, StringType[StringType] attributes) onElementEmpty; 45 ///Called on a non-empty element ending. 46 public void delegate(StringType name) onElementEnd; 47 ///Called when a text chunk is encountered. 48 public void delegate(StringType content) onText; 49 ///Called when a comment is encountered. 50 public void delegate(StringType content) onComment; 51 ///Called when a processing instruction is encountered. 52 public void delegate(StringType name, StringType content) onProcessingInstruction; 53 ///Called when a CDataSection node is encountered. 54 public void delegate(StringType content) onCDataSection; 55 56 /++ 57 + Initializes this parser (and the underlying low level one) with the given input. 58 +/ 59 void setSource(T.InputType input) 60 { 61 cursor.setSource(input); 62 } 63 64 static if (isSaveableCursor!T) 65 { 66 auto save() 67 { 68 auto result = this; 69 result.cursor = cursor.save; 70 return result; 71 } 72 } 73 74 /++ 75 + Processes the entire document; every time a node of 76 + `XMLKind` XXX is found, the corresponding method `onXXX(underlyingCursor)` 77 + of the handler is called, if it exists. 78 +/ 79 void processDocument() 80 { 81 import std.traits : hasMember; 82 while (!cursor.documentEnd) 83 { 84 switch (cursor.kind) 85 { 86 case XMLKind.document: 87 if (onDocument !is null) 88 onDocument(createAArray(cursor.attributes)); 89 break; 90 case XMLKind.elementStart: 91 if (onElementStart !is null) 92 onElementStart(cursor.name, createAArray(cursor.attributes)); 93 break; 94 case XMLKind.elementEnd: 95 if (onElementEnd !is null) 96 onElementEnd(cursor.name); 97 break; 98 case XMLKind.elementEmpty: 99 if (onElementEmpty !is null) 100 onElementEmpty(cursor.name, createAArray(cursor.attributes)); 101 break; 102 case XMLKind.text: 103 if (onText !is null) 104 onText(cursor.content); 105 break; 106 case XMLKind.comment: 107 if (onComment !is null) 108 onComment(cursor.content); 109 break; 110 case XMLKind.processingInstruction: 111 if (onProcessingInstruction !is null) 112 onProcessingInstruction(cursor.name, cursor.content); 113 break; 114 case XMLKind.cdata: 115 if (onCDataSection !is null) 116 onCDataSection(cursor.content); 117 break; 118 119 default: break; 120 } 121 122 if (cursor.enter) 123 { 124 } 125 else if (!cursor.next) 126 cursor.exit; 127 } 128 } 129 protected StringType[StringType] createAArray(AttrRange source) { 130 StringType[StringType] result; 131 foreach (key; source) { 132 result[key.name] = key.value; 133 } 134 return result; 135 } 136 } 137 138 /++ 139 + Instantiates a suitable SAX parser from the given `cursor` and `handler`. 140 +/ 141 auto saxParser(CursorType)(auto ref CursorType cursor) 142 if (isCursor!CursorType) 143 { 144 auto res = SAXParser!(CursorType)(); 145 res.cursor = cursor; 146 return res; 147 } 148 149 unittest 150 { 151 import newxml.parser; 152 import newxml.lexers; 153 import std.conv : to; 154 155 dstring xml = q{ 156 <?xml encoding = "utf-8" ?> 157 <aaa xmlns:myns="something"> 158 <myns:bbb myns:att='>'> 159 <!-- lol --> 160 Lots of Text! 161 On multiple lines! 162 </myns:bbb> 163 <![CDATA[ Ciaone! ]]> 164 <ccc/> 165 </aaa> 166 }; 167 168 struct MyHandler 169 { 170 int max_nesting; 171 int current_nesting; 172 int total_invocations; 173 174 void onElementStart(dstring name, dstring[dstring] attributes) 175 { 176 total_invocations++; 177 current_nesting++; 178 if (current_nesting > max_nesting) 179 max_nesting = current_nesting; 180 } 181 void onElementEnd(dstring name) 182 { 183 total_invocations++; 184 current_nesting--; 185 } 186 void onElementEmpty(dstring name, dstring[dstring] attributes) { total_invocations++; } 187 void onProcessingInstruction(dstring name, dstring content) { total_invocations++; } 188 void onText(dstring content) { total_invocations++; } 189 void onDocument(dstring[dstring] attribute) 190 { 191 assert(attribute["encoding"] == "utf-8"); 192 total_invocations++; 193 } 194 void onComment(dstring content) 195 { 196 assert(content == " lol "); 197 total_invocations++; 198 } 199 } 200 201 202 MyHandler handler; 203 auto parser = 204 xml 205 .lexer 206 .parser 207 .cursor 208 .saxParser; 209 210 parser.setSource(xml); 211 parser.onDocument = &handler.onDocument; 212 parser.onElementStart = &handler.onElementStart; 213 parser.onElementEnd = &handler.onElementEnd; 214 parser.onElementEmpty = &handler.onElementEmpty; 215 parser.onText = &handler.onText; 216 parser.onComment = &handler.onComment; 217 parser.onProcessingInstruction = &handler.onProcessingInstruction; 218 parser.processDocument(); 219 220 assert(handler.max_nesting == 2, to!string(handler.max_nesting)); 221 assert(handler.current_nesting == 0, to!string(handler.current_nesting)); 222 assert(handler.total_invocations == 9, to!string(handler.total_invocations)); 223 }