1 /*
2 *             Copyright Lodovico Giaretta 2016 - .
3 *  Distributed under the Boost Software License, Version 1.0.
4 *      (See accompanying file LICENSE_1_0.txt or copy at
5 *            http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 /++
9 +   This module contains some templates to check whether a type exposes the correct
10 +   interface to be an xml lexer, parser or cursor; it also contains some simple
11 +   types used in various parts of the library;
12 +
13 +   Authors:
14 +   Lodovico Giaretta
15 +   László Szerémi
16 +
17 +   License:
18 +   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
19 +
20 +   Copyright:
21 +   Copyright Lodovico Giaretta 2016 --
22 +/
23 
24 module newxml.interfaces;
25 
26 import std.range.primitives;
27 import std.traits;
28 
29 // LEVEL 1: LEXERS
30 
31 /++
32 +   Checks whether its argument fulfills all requirements to be used as an XML lexer.
33 +
34 +   An XML lexer is the first component in the parsing chain. It masks from the parser
35 +   the shape of the input and the type of the characters in it. The slices returned by
36 +   the lexer are ephemeral: every reference to them may or may not be invalidated when a
37 +   new slice is requested by the parser. It is thus responsibility of the user to copy the
38 +   output if necessary.
39 +
40 +   Params:
41 +       L = the type to be tested
42 +
43 +   Returns:
44 +   `true` if L satisfies the XML lexer specification here stated; `false` otherwise
45 +
46 +   Specification:
47 +   A lexer shall support at least these methods and aliases:
48 +   $(UL
49 +       $(LI `alias CharacterType`: the type of a single source character; most
50 +             methods will deal with slices of this type;)
51 +       $(LI `alias InputType`: the type of the input which is used to feed this
52 +             lexer;)
53 +       $(LI `void setSource(InputType)`: sets the input source for this lexer;
54 +             the lexer may perform other initialization work and even consume
55 +             part of the input during this operation; after (partial or complete)
56 +             usage, a lexer may be reinitialized and used with another input
57 +             by calling this function;)
58 +       $(LI `bool empty()`: returns `true` if the entire input has been consumed;
59 +            `false` otherwise;)
60 +       $(LI `void start()`: instructs the lexer that a new token starts at the
61 +             current positions; the next calls to `get` will retrive the input
62 +             from the current position; this call may invalidate any reference
63 +             to any slice previosly returned from `get`)
64 +       $(LI `CharacterType[] get()`: returns the contents of the input going from
65 +             the last call to `start` till the current position;)
66 +       $(LI `bool testAndAdvance(CharacterType)`: tests whether the input character
67 +             at the current position matches the one passed as parameter; if
68 +             it is the case, this method returns `true` and advances the input
69 +             past the said character; otherwise, it returns `false` and no action
70 +             is performed;)
71 +       $(LI `void advanceUntil(CharacterType, bool)`: advances the input until
72 +             the given character is found; if the second parameter is true, the
73 +             input is then advanced past the found character;)
74 +       $(LI `void advanceUntilAny(CharacterType[], bool)`: advances the input
75 +             until any of the given characters is found; if the second parameter
76 +             is true, the input is then advanced past the found character;)
77 +       $(LI `void dropWhile(CharacterType[])`: advances the input until a character
78 +             different from the given ones is found; the characters advanced by
79 +             this method may or may not be included in the output of a subsequent
80 +             `get`; for this reason, this method should only be called immediately
81 +             before `start`, to skip unneeded characters between two tokens.)
82 +   )
83 +
84 +   Examples:
85 +   ---
86 +   /* extract a word surrounded by whitespaces */
87 +   auto getWord(L)(ref L lexer)
88 +       if (isLexer!L)
89 +   {
90 +       // drop leading whitespaces
91 +       lexer.dropWhile(" \n\r\t");
92 +
93 +       // start building the word
94 +       lexer.start;
95 +
96 +       // keep advancing until you find the trailing whitespaces
97 +       lexer.advanceUntilAny(" \n\r\t", false);
98 +
99 +       // return what you found
100 +       return lexer.get;
101 +   }
102 +
103 +   /* extract a key/value pair from a string like " key : value " */
104 +   auto getKeyValuePair(ref L lexer)
105 +       if (isLexer!L)
106 +   {
107 +       // drop leading whitespaces
108 +       lexer.dropWhile(" \n\r\t");
109 +
110 +       // here starts the key, which ends with either a whitespace or a colon
111 +       lexer.start;
112 +       lexer.advanceUntilAny(" \n\r\t:", false);
113 +       auto key = lexer.get;
114 +
115 +       // skip any spaces after the key
116 +       lexer.dropWhile(" \n\r\t");
117 +       // now there must be a colon
118 +       assert(lexer.testAndAdvance(':'));
119 +       // skip all space after the colon
120 +       lexer.dropWhile(" \n\r\t");
121 +
122 +       // here starts the value, which ends at the first whitespace
123 +       lexer.start;
124 +       lexer.advanceUntilAny(" \n\r\t", false);
125 +       auto value = lexer.get;
126 +
127 +       // return the pair
128 +       return tuple(key, value);
129 +   }
130 +   ---
131 +/
132 template isLexer(L)
133 {
134     enum bool isLexer = is(typeof(
135     (inout int = 0)
136     {
137         alias C = L.CharacterType;
138 
139         L lexer;
140         char c;
141         bool b;
142         string s;
143         C[] cs;
144 
145         b = lexer.empty;
146         lexer.start();
147         cs = lexer.get();
148         b = lexer.testAndAdvance(c);
149         lexer.advanceUntil(c, b);
150         lexer.advanceUntilAny(s, b);
151         lexer.dropWhile(s);
152     }));
153 }
154 
155 /++
156 +   Checks whether its argument is a saveable lexer.
157 +
158 +   A saveable lexer is a lexer enhanced with a `save` method analogous to the `save`
159 +   method of `ForwardRange`s.
160 +
161 +   Params:
162 +       L = the type to be tested
163 +
164 +   Returns:
165 +   `true` if L is a lexer (as specified by `isLexer`) and also supports the `save`
166 +   method as specified here; `false` otherwise
167 +
168 +   Specification:
169 +   The type shall support at least:
170 +   $(UL
171 +       $(LI all methods and aliases specified by `isLexer`)
172 +       $(LI `L save()`: returns an independent copy of the current lexer; the
173 +             copy must start at the position the original lexer was when this method
174 +             was called; the two copies shall be independent, in that advancing one
175 +             does not advance the other.)
176 +   )
177 +/
178 template isSaveableLexer(L)
179 {
180     enum bool isSaveableLexer = isLexer!L && is(typeof(
181     (inout int = 0)
182     {
183         L lexer1;
184         L lexer2 = lexer1.save();
185     }));
186 }
187 
188 // LEVEL 2: PARSERS
189 
190 /++
191 +   Enumeration of XML events/nodes, used by various components.
192 +/
193 enum XMLKind
194 {
195     /++ The `<?xml` `?>` declaration at the beginning of the entire document +/
196     document,
197 
198     /++ The beginning of a document type declaration `<!DOCTYPE ... [` +/
199     dtdStart,
200     /++ The end of a document type declaration `] >` +/
201     dtdEnd,
202     /++ A document type declaration without an internal subset +/
203     dtdEmpty,
204 
205     /++ A start tag, delimited by `<` and `>` +/
206     elementStart,
207 
208     /++ An end tag, delimited by `</` and `>` +/
209     elementEnd,
210 
211     /++ An empty tag, delimited by `<` and `/>` +/
212     elementEmpty,
213 
214     /++ A text element, without any specific delimiter +/
215     text,
216 
217     /++ A cdata section, delimited by `<![cdata` and `]]>` +/
218     cdata,
219 
220     /++ A comment, delimited by `<!--` and `-->` +/
221     comment,
222 
223     /++ A processing instruction, delimited by `<?` and `?>` +/
224     processingInstruction,
225 
226     /++ An attlist declaration, delimited by `<!ATTLIST` and `>` +/
227     attlistDecl,
228     /++ An element declaration, delimited by `<!ELEMENT` and `>` +/
229     elementDecl,
230     /++ An entity declaration, delimited by `<!ENTITY` and `>` +/
231     entityDecl,
232     /++ A notation declaration, delimited by `<!NOTATION` and `>` +/
233     notationDecl,
234     /++ Any unrecognized kind of declaration, delimited by `<!` and `>` +/
235     declaration,
236 
237     /++ A conditional section, delimited by `<![` `[` and `]]>` +/
238     conditional,
239 }
240 
241 /++
242 +   Checks whether its argument fulfills all requirements to be used as XML parser.
243 +
244 +   An XML parser is the second component in the parsing chain. It is usually built
245 +   on top of a lexer and used to feed a cursor.
246 +   The slices contained in the tokens returned by the parser are ephemeral: every
247 +   reference to them may or may not be invalidated by subsequent calls to `popFront`.
248 +   If the caller needs them, it has to copy them somewhere else.
249 +
250 +   Params:
251 +       P = the type to be tested
252 +
253 +   Returns:
254 +   `true` if P satisfies the XML parser specification here stated; `false` otherwise
255 +
256 +   Specification:
257 +   The parser shall at least:
258 +   $(UL
259 +       $(LI have `alias CharacterType`: the type of a single source character;)
260 +       $(LI have `alias InputType`: the type of the input which is used to feed this
261 +            parser;)
262 +       $(LI be an `InputRange`, whose elements shall support at least the following fields:
263 +            $(UL
264 +               $(LI `XMLKind kind`: the kind of this node;)
265 +               $(LI `P.CharacterType[] content`: the contents of this node, excluding
266 +                     the delimiters specified in the documentation of `XMLKind`;)
267 +            ))
268 +       $(LI have `void setSource(InputType)`: sets the input source for this parser
269 +            and eventual underlying components; the parser may perform other
270 +            initialization work and even consume part of the input during this
271 +            operation; after (partial or complete) usage, a parser may be reinitialized
272 +            and used with another input by calling this function;)
273 +   )
274 +/
275 template isLowLevelParser(P)
276 {
277     enum bool isLowLevelParser = isInputRange!P && is(typeof(ElementType!P.kind) == XMLKind)
278                                  && is(typeof(ElementType!P.content) == P.CharacterType[]);
279 }
280 
281 /++
282 +   Checks whether its argument is a saveable parser.
283 +
284 +   A saveable parser is a parser enhanced with a `save` method analogous to the `save`
285 +   method of `ForwardRange`s.
286 +
287 +   Params:
288 +       P = the type to be tested
289 +
290 +   Returns:
291 +   `true` if P is a parser (as specified by `isLowLevelParser`) and also supports the
292 +   `save` method as specified here; `false` otherwise
293 +
294 +   Specification:
295 +   The type shall support at least:
296 +   $(UL
297 +       $(LI all methods and aliases specified by `isLowLevelParser`)
298 +       $(LI `P save()`: returns an independent copy of the current parser; the
299 +             copy must start at the position the original parser was when this method
300 +             was called; the two copies shall be independent, in that advancing one
301 +             does not advance the other.)
302 +   )
303 +/
304 template isSaveableLowLevelParser(P)
305 {
306     enum bool isSaveableLowLevelParser = isLowLevelParser!P && isForwardRange!P;
307 }
308 
309 // LEVEL 3: CURSORS
310 
311 /++
312 +   Checks whether its argument fulfills all requirements to be used as XML cursor.
313 +
314 +   The cursor is the hearth of the XML parsing chain. Every higher level component
315 +   (SAX, DOM, validations) builds on top of this concept.
316 +   A cursor is a logical pointer inside a stream of XML nodes. It can be queried
317 +   for properties of the current node (kind, name, attributes, ...) and it can be
318 +   advanced in the stream. It cannot move backwards. Any reference to the outputs
319 +   of a cursor may or may not be invalidated by advancing operations.
320 +
321 +   Params:
322 +       CursorType = the type to be tested
323 +
324 +   Returns:
325 +   `true` if CursorType satisfies the XML cursor specification here stated;
326 +   `false` otherwise
327 +
328 +   Specification:
329 +   A cursor shall support at least these methods and aliases:
330 +   $(UL
331 +       $(LI `alias StringType`: the type of an output string; most methods will
332 +             return instances of this type;)
333 +       $(LI `alias InputType`: the type of the input which is used to feed this
334 +             cursor;)
335 +       $(LI `void setSource(InputType)`: sets the input source for this cursor and
336 +             eventual underlying components; the cursor may perform other initialization
337 +             work and even consume part of the input during this operation; after
338 +            (partial or complete) usage, a cursor may be reinitialized and used with
339 +             another input by calling this function;)
340 +       $(LI `bool atBeginning()`: returns true if the cursor has never been advanced;
341 +             it is thus pointing to the node of type `XMLKind.document` representing
342 +             the XML declaration of the document;)
343 +       $(LI `bool documentEnd()`: returns `true` if the input has been completely
344 +             consumed; if it is the case, any advancing operation will perform no action)
345 +       $(LI  the following methods can be used to query the current node properties:
346 +             $(UL
347 +               $(LI `XMLKind kind()`: returns the `XMLKind` of the current node;)
348 +               $(LI `StringType name()`: returns the qualified name of the current
349 +                     element or the target of the current processing instruction;
350 +                     the empty string in all other cases;)
351 +               $(LI `StringType localName()`: returns the local name of the
352 +                     current element, if it has a prefix; the empty string in all
353 +                     other cases;)
354 +               $(LI `StringType prefix()`: returns the prefix of the current element,
355 +                     if it has any; the empty string in all other cases;)
356 +               $(LI `auto attributes()`: returns a range of all attributes defined
357 +                     on the current element; if the current node is a processing
358 +                     instruction, its data section is parsed as if it was the attributes
359 +                     list of an element (which is quite common); for all other node
360 +                     kinds, an empty range is returned.
361 +                     The type returned by this range `front` method shall at least support
362 +                     the following fields:
363 +                     $(UL
364 +                       $(LI `StringType name`: the qualified name of the attribute;)
365 +                       $(LI `StringType prefix`: the prefix of the attribute, if it
366 +                             has any; the empty string otherwise;)
367 +                       $(LI `StringType localName`: the local name of the attribute,
368 +                             if it has any prefix; the empty string otherwise;)
369 +                       $(LI `StringType value`: the value of the attribute;)
370 +                     ))
371 +               $(LI `StringType content()`: returns the text content of the current
372 +                     comment, text node or cdata section or the data of the current
373 +                     processing instruction; the empty string in all other cases;)
374 +               $(LI `StringType wholeContent()`: returns the entire content of the node;)
375 +             ))
376 +       $(LI  the following methods can be used to advance the cursor in the stream
377 +             of XML nodes:
378 +             $(UL
379 +               $(LI `bool enter()`: tries to advance the cursor to the first child
380 +                     of the current node; returns `true` if the operation succeeded;
381 +                     otherwise, if the cursor was positioned on the start tag of an
382 +                     element, it is now positioned on its closing tag; otherwise,
383 +                     the cursor did not advance;)
384 +               $(LI `bool next()`: tries to advance the cursor to the next sibling
385 +                     of the current node; returns `true` if the operation succeded;
386 +                     otherwise (i.e. the cursor was positioned on the last child
387 +                     of an element) it is now positioned on the closing tag of the
388 +                     parent element;)
389 +               $(LI `void exit()`: advances the cursor to the closing tag of the
390 +                     element containing the current node;)
391 +             ))
392 +   )
393 +
394 +   Examples:
395 +   ---
396 +   /* recursively prints the kind of each node */
397 +   void recursivePrint(CursorType)(ref CursorType cursor)
398 +       if (isCursor!CursorType)
399 +   {
400 +       do
401 +       {
402 +           // print the kind of the current node
403 +           writeln(cursor.kind);
404 +           // if the node has children
405 +           if (cursor.enter)
406 +           {
407 +               // recursively print them
408 +               recursivePrint(cursor);
409 +               // back to the current level
410 +               cursor.exit;
411 +           }
412 +       }
413 +       // iterate on every sibling
414 +       while (cursor.next)
415 +   }
416 +   ---
417 +/
418 template isCursor(CursorType)
419 {
420     enum bool isCursor = is(typeof(
421     (inout int = 0)
422     {
423         alias S = CursorType.StringType;
424 
425         CursorType cursor;
426         bool b;
427 
428         b = cursor.atBeginning;
429         b = cursor.documentEnd;
430         b = cursor.next;
431         b = cursor.enter;
432         cursor.exit;
433         XMLKind kind = cursor.kind;
434         auto s = cursor.name;
435         s = cursor.localName;
436         s = cursor.prefix;
437         s = cursor.content;
438         s = cursor.wholeContent;
439         auto attrs = cursor.attributes;
440         s = attrs.front.prefix;
441         s = attrs.front.localName;
442         s = attrs.front.name;
443         s = attrs.front.value;
444     }
445     ));
446 }
447 
448 /++
449 +   Checks whether its argument is a saveable cursor.
450 +
451 +   A saveable cursor is a cursor enhanced with a `save` method analogous to the `save`
452 +   method of `ForwardRange`s.
453 +
454 +   Params:
455 +       CursorType = the type to be tested
456 +
457 +   Returns:
458 +   `true` if CursorType is a cursor (as specified by `isCursor`) and also supports the
459 +   `save` method as specified here; `false` otherwise
460 +
461 +   Specification:
462 +   The type shall support at least:
463 +   $(UL
464 +       $(LI all methods and aliases specified by `isCursor`)
465 +       $(LI `CursorType save()`: returns an independent copy of the current cursor; the
466 +             copy must start at the position the original cursor was when this method
467 +             was called; the two copies shall be independent, in that advancing one
468 +             does not advance the other.)
469 +   )
470 +/
471 template isSaveableCursor(CursorType)
472 {
473     enum bool isSaveableCursor = isCursor!CursorType && is(typeof(
474     (inout int = 0)
475     {
476         CursorType cursor1;
477         CursorType cursor2 = cursor1.save();
478     }));
479 }
480 
481 // WRITERS
482 /++ 
483  + Tests whether the given type is a writer.
484  +/
485 template isWriter(WriterType)
486 {
487     enum bool isWriter = is(typeof(
488     (inout int = 0)
489     {
490         alias StringType = WriterType.StringType;
491 
492         WriterType writer;
493         StringType s;
494 
495         writer.writeXMLDeclaration(10, s, true);
496         writer.writeComment(s);
497         writer.writeText(s);
498         writer.writeCDATA(s);
499         writer.writeProcessingInstruction(s, s);
500         writer.startElement(s);
501         writer.closeElement(s);
502         writer.writeAttribute(s, s);
503     }));
504 }
505 
506 // COMMON
507 
508 template needSource(T)
509 {
510     enum bool needSource = is(typeof(
511     (inout int = 0)
512     {
513         alias InputType = T.InputType;
514 
515         T component;
516         InputType input;
517 
518         component.setSource(input);
519     }));
520 }
521 
522 /++
523 +   Generic XML exception; thrown whenever a component experiences an error, unless
524 +   the user provided a custom error handler.
525 +/
526 class XMLException : Exception
527 {
528     @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, 
529             Throwable nextInChain = null)
530     {
531         super(msg, file, line, nextInChain);
532     }
533 
534     @nogc @safe pure nothrow this(string msg, Throwable nextInChain, string file = __FILE__, size_t line = __LINE__)
535     {
536         super(msg, file, line, nextInChain);
537     }
538 }
539 
540 /** 
541  * Defines the document's XML version.
542  */
543 enum XMLVersion {
544     XML1_0,
545     XML1_1,
546 }