1 /*
2 *             Copyright László Szerémi 2022 - .
3 *  Distributed under the Boost Software License, Version 1.0.
4 *      (See accompanying file LICENSE_1_0.txt or copy at
5 *            http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 /++
9 +   This module implements a simple SAX parser.
10 +
11 +   Authors:
12 +   Lodovico Giaretta
13 +   László Szerémi
14 +
15 +   License:
16 +   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
17 +
18 +   Copyright:
19 +   Copyright Lodovico Giaretta 2016, László Szerémi 2022 --
20 +/
21 
22 module newxml.sax;
23 
24 import newxml.interfaces;
25 import newxml.cursor;
26 @safe:
27 /++
28 +   A SAX parser built on top of a cursor.
29 +
30 +   Delegates are called when certain events are encountered, then it passes the necessary data to process the
31 +   element. 
32 +/
33 struct SAXParser(T)
34     if (isCursor!T)
35 {
36     public T cursor;
37     alias StringType = T.StringType;
38     alias AttrRange = T.AttributesRange;
39     ///Called when a Document declaration is reached.
40     public void delegate(StringType[StringType] attributes) onDocument;
41     ///Called on a non-empty element start. Provides access to the attributes.
42     public void delegate(StringType name, StringType[StringType] attributes) onElementStart;
43     ///Called on an empty element. Provides access to the attributes.
44     public void delegate(StringType name, StringType[StringType] attributes) onElementEmpty;
45     ///Called on a non-empty element ending.
46     public void delegate(StringType name) onElementEnd;
47     ///Called when a text chunk is encountered.
48     public void delegate(StringType content) onText;
49     ///Called when a comment is encountered.
50     public void delegate(StringType content) onComment;
51     ///Called when a processing instruction is encountered.
52     public void delegate(StringType name, StringType content) onProcessingInstruction;
53     ///Called when a CDataSection node is encountered.
54     public void delegate(StringType content) onCDataSection;
55 
56     /++
57     +   Initializes this parser (and the underlying low level one) with the given input.
58     +/
59     void setSource(T.InputType input)
60     {
61         cursor.setSource(input);
62     }
63 
64     static if (isSaveableCursor!T)
65     {
66         auto save()
67         {
68             auto result = this;
69             result.cursor = cursor.save;
70             return result;
71         }
72     }
73 
74     /++
75     +   Processes the entire document; every time a node of
76     +   `XMLKind` XXX is found, the corresponding method `onXXX(underlyingCursor)`
77     +   of the handler is called, if it exists.
78     +/
79     void processDocument()
80     {
81         import std.traits : hasMember;
82         while (!cursor.documentEnd)
83         {
84             switch (cursor.kind)
85             {
86                 case XMLKind.document:
87                     if (onDocument !is null)
88                         onDocument(createAArray(cursor.attributes));
89                     break;
90                 case XMLKind.elementStart:
91                     if (onElementStart !is null)
92                         onElementStart(cursor.name, createAArray(cursor.attributes));
93                     break;
94                 case XMLKind.elementEnd:
95                     if (onElementEnd !is null)
96                         onElementEnd(cursor.name);
97                     break;
98                 case XMLKind.elementEmpty:
99                     if (onElementEmpty !is null)
100                         onElementEmpty(cursor.name, createAArray(cursor.attributes));
101                     break;
102                 case XMLKind.text:
103                     if (onText !is null)
104                         onText(cursor.content);
105                     break;
106                 case XMLKind.comment:
107                     if (onComment !is null)
108                         onComment(cursor.content);
109                     break;
110                 case XMLKind.processingInstruction:
111                     if (onProcessingInstruction !is null)
112                         onProcessingInstruction(cursor.name, cursor.content);
113                     break;
114                 case XMLKind.cdata:
115                     if (onCDataSection !is null)
116                         onCDataSection(cursor.content);
117                     break;
118                 
119                 default: break;
120             }
121 
122             if (cursor.enter)
123             {
124             }
125             else if (!cursor.next)
126                 cursor.exit;
127         }
128     }
129     protected StringType[StringType] createAArray(AttrRange source) {
130         StringType[StringType] result;
131         foreach (key; source) {
132             result[key.name] = key.value;
133         }
134         return result;
135     }
136 }
137 
138 /++
139 +   Instantiates a suitable SAX parser from the given `cursor` and `handler`.
140 +/
141 auto saxParser(CursorType)(auto ref CursorType cursor)
142     if (isCursor!CursorType)
143 {
144     auto res = SAXParser!(CursorType)();
145     res.cursor = cursor;
146     return res;
147 }
148 
149 unittest
150 {
151     import newxml.parser;
152     import newxml.lexers;
153     import std.conv : to;
154 
155     dstring xml = q{
156     <?xml encoding = "utf-8" ?>
157     <aaa xmlns:myns="something">
158         <myns:bbb myns:att='>'>
159             <!-- lol -->
160             Lots of Text!
161             On multiple lines!
162         </myns:bbb>
163         <![CDATA[ Ciaone! ]]>
164         <ccc/>
165     </aaa>
166     };
167 
168     struct MyHandler
169     {
170         int max_nesting;
171         int current_nesting;
172         int total_invocations;
173 
174         void onElementStart(dstring name, dstring[dstring] attributes)
175         {
176             total_invocations++;
177             current_nesting++;
178             if (current_nesting > max_nesting)
179                 max_nesting = current_nesting;
180         }
181         void onElementEnd(dstring name)
182         {
183             total_invocations++;
184             current_nesting--;
185         }
186         void onElementEmpty(dstring name, dstring[dstring] attributes) { total_invocations++; }
187         void onProcessingInstruction(dstring name, dstring content) { total_invocations++; }
188         void onText(dstring content) { total_invocations++; }
189         void onDocument(dstring[dstring] attribute)
190         {
191             assert(attribute["encoding"] == "utf-8");
192             total_invocations++;
193         }
194         void onComment(dstring content)
195         {
196             assert(content == " lol ");
197             total_invocations++;
198         }
199     }
200 
201 
202     MyHandler handler;
203     auto parser =
204          xml
205         .lexer
206         .parser
207         .cursor
208         .saxParser;
209 
210     parser.setSource(xml);
211     parser.onDocument = &handler.onDocument;
212     parser.onElementStart = &handler.onElementStart;
213     parser.onElementEnd = &handler.onElementEnd;
214     parser.onElementEmpty = &handler.onElementEmpty;
215     parser.onText = &handler.onText;
216     parser.onComment = &handler.onComment;
217     parser.onProcessingInstruction = &handler.onProcessingInstruction;
218     parser.processDocument();
219 
220     assert(handler.max_nesting == 2, to!string(handler.max_nesting));
221     assert(handler.current_nesting == 0, to!string(handler.current_nesting));
222     assert(handler.total_invocations == 9, to!string(handler.total_invocations));
223 }