1 /* 2 * Copyright Lodovico Giaretta 2016 - . 3 * Distributed under the Boost Software License, Version 1.0. 4 * (See accompanying file LICENSE_1_0.txt or copy at 5 * http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 /++ 9 + This module contains some templates to check whether a type exposes the correct 10 + interface to be an xml lexer, parser or cursor; it also contains some simple 11 + types used in various parts of the library; 12 + 13 + Authors: 14 + Lodovico Giaretta 15 + László Szerémi 16 + 17 + License: 18 + <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 19 + 20 + Copyright: 21 + Copyright Lodovico Giaretta 2016 -- 22 +/ 23 24 module newxml.interfaces; 25 26 import std.range.primitives; 27 import std.traits; 28 29 // LEVEL 1: LEXERS 30 31 /++ 32 + Checks whether its argument fulfills all requirements to be used as an XML lexer. 33 + 34 + An XML lexer is the first component in the parsing chain. It masks from the parser 35 + the shape of the input and the type of the characters in it. The slices returned by 36 + the lexer are ephemeral: every reference to them may or may not be invalidated when a 37 + new slice is requested by the parser. It is thus responsibility of the user to copy the 38 + output if necessary. 39 + 40 + Params: 41 + L = the type to be tested 42 + 43 + Returns: 44 + `true` if L satisfies the XML lexer specification here stated; `false` otherwise 45 + 46 + Specification: 47 + A lexer shall support at least these methods and aliases: 48 + $(UL 49 + $(LI `alias CharacterType`: the type of a single source character; most 50 + methods will deal with slices of this type;) 51 + $(LI `alias InputType`: the type of the input which is used to feed this 52 + lexer;) 53 + $(LI `void setSource(InputType)`: sets the input source for this lexer; 54 + the lexer may perform other initialization work and even consume 55 + part of the input during this operation; after (partial or complete) 56 + usage, a lexer may be reinitialized and used with another input 57 + by calling this function;) 58 + $(LI `bool empty()`: returns `true` if the entire input has been consumed; 59 + `false` otherwise;) 60 + $(LI `void start()`: instructs the lexer that a new token starts at the 61 + current positions; the next calls to `get` will retrive the input 62 + from the current position; this call may invalidate any reference 63 + to any slice previosly returned from `get`) 64 + $(LI `CharacterType[] get()`: returns the contents of the input going from 65 + the last call to `start` till the current position;) 66 + $(LI `bool testAndAdvance(CharacterType)`: tests whether the input character 67 + at the current position matches the one passed as parameter; if 68 + it is the case, this method returns `true` and advances the input 69 + past the said character; otherwise, it returns `false` and no action 70 + is performed;) 71 + $(LI `void advanceUntil(CharacterType, bool)`: advances the input until 72 + the given character is found; if the second parameter is true, the 73 + input is then advanced past the found character;) 74 + $(LI `void advanceUntilAny(CharacterType[], bool)`: advances the input 75 + until any of the given characters is found; if the second parameter 76 + is true, the input is then advanced past the found character;) 77 + $(LI `void dropWhile(CharacterType[])`: advances the input until a character 78 + different from the given ones is found; the characters advanced by 79 + this method may or may not be included in the output of a subsequent 80 + `get`; for this reason, this method should only be called immediately 81 + before `start`, to skip unneeded characters between two tokens.) 82 + ) 83 + 84 + Examples: 85 + --- 86 + /* extract a word surrounded by whitespaces */ 87 + auto getWord(L)(ref L lexer) 88 + if (isLexer!L) 89 + { 90 + // drop leading whitespaces 91 + lexer.dropWhile(" \n\r\t"); 92 + 93 + // start building the word 94 + lexer.start; 95 + 96 + // keep advancing until you find the trailing whitespaces 97 + lexer.advanceUntilAny(" \n\r\t", false); 98 + 99 + // return what you found 100 + return lexer.get; 101 + } 102 + 103 + /* extract a key/value pair from a string like " key : value " */ 104 + auto getKeyValuePair(ref L lexer) 105 + if (isLexer!L) 106 + { 107 + // drop leading whitespaces 108 + lexer.dropWhile(" \n\r\t"); 109 + 110 + // here starts the key, which ends with either a whitespace or a colon 111 + lexer.start; 112 + lexer.advanceUntilAny(" \n\r\t:", false); 113 + auto key = lexer.get; 114 + 115 + // skip any spaces after the key 116 + lexer.dropWhile(" \n\r\t"); 117 + // now there must be a colon 118 + assert(lexer.testAndAdvance(':')); 119 + // skip all space after the colon 120 + lexer.dropWhile(" \n\r\t"); 121 + 122 + // here starts the value, which ends at the first whitespace 123 + lexer.start; 124 + lexer.advanceUntilAny(" \n\r\t", false); 125 + auto value = lexer.get; 126 + 127 + // return the pair 128 + return tuple(key, value); 129 + } 130 + --- 131 +/ 132 template isLexer(L) 133 { 134 enum bool isLexer = is(typeof( 135 (inout int = 0) 136 { 137 alias C = L.CharacterType; 138 139 L lexer; 140 char c; 141 bool b; 142 string s; 143 C[] cs; 144 145 b = lexer.empty; 146 lexer.start(); 147 cs = lexer.get(); 148 b = lexer.testAndAdvance(c); 149 lexer.advanceUntil(c, b); 150 lexer.advanceUntilAny(s, b); 151 lexer.dropWhile(s); 152 })); 153 } 154 155 /++ 156 + Checks whether its argument is a saveable lexer. 157 + 158 + A saveable lexer is a lexer enhanced with a `save` method analogous to the `save` 159 + method of `ForwardRange`s. 160 + 161 + Params: 162 + L = the type to be tested 163 + 164 + Returns: 165 + `true` if L is a lexer (as specified by `isLexer`) and also supports the `save` 166 + method as specified here; `false` otherwise 167 + 168 + Specification: 169 + The type shall support at least: 170 + $(UL 171 + $(LI all methods and aliases specified by `isLexer`) 172 + $(LI `L save()`: returns an independent copy of the current lexer; the 173 + copy must start at the position the original lexer was when this method 174 + was called; the two copies shall be independent, in that advancing one 175 + does not advance the other.) 176 + ) 177 +/ 178 template isSaveableLexer(L) 179 { 180 enum bool isSaveableLexer = isLexer!L && is(typeof( 181 (inout int = 0) 182 { 183 L lexer1; 184 L lexer2 = lexer1.save(); 185 })); 186 } 187 188 // LEVEL 2: PARSERS 189 190 /++ 191 + Enumeration of XML events/nodes, used by various components. 192 +/ 193 enum XMLKind 194 { 195 /++ The `<?xml` `?>` declaration at the beginning of the entire document +/ 196 document, 197 198 /++ The beginning of a document type declaration `<!DOCTYPE ... [` +/ 199 dtdStart, 200 /++ The end of a document type declaration `] >` +/ 201 dtdEnd, 202 /++ A document type declaration without an internal subset +/ 203 dtdEmpty, 204 205 /++ A start tag, delimited by `<` and `>` +/ 206 elementStart, 207 208 /++ An end tag, delimited by `</` and `>` +/ 209 elementEnd, 210 211 /++ An empty tag, delimited by `<` and `/>` +/ 212 elementEmpty, 213 214 /++ A text element, without any specific delimiter +/ 215 text, 216 217 /++ A cdata section, delimited by `<![cdata` and `]]>` +/ 218 cdata, 219 220 /++ A comment, delimited by `<!--` and `-->` +/ 221 comment, 222 223 /++ A processing instruction, delimited by `<?` and `?>` +/ 224 processingInstruction, 225 226 /++ An attlist declaration, delimited by `<!ATTLIST` and `>` +/ 227 attlistDecl, 228 /++ An element declaration, delimited by `<!ELEMENT` and `>` +/ 229 elementDecl, 230 /++ An entity declaration, delimited by `<!ENTITY` and `>` +/ 231 entityDecl, 232 /++ A notation declaration, delimited by `<!NOTATION` and `>` +/ 233 notationDecl, 234 /++ Any unrecognized kind of declaration, delimited by `<!` and `>` +/ 235 declaration, 236 237 /++ A conditional section, delimited by `<![` `[` and `]]>` +/ 238 conditional, 239 } 240 241 /++ 242 + Checks whether its argument fulfills all requirements to be used as XML parser. 243 + 244 + An XML parser is the second component in the parsing chain. It is usually built 245 + on top of a lexer and used to feed a cursor. 246 + The slices contained in the tokens returned by the parser are ephemeral: every 247 + reference to them may or may not be invalidated by subsequent calls to `popFront`. 248 + If the caller needs them, it has to copy them somewhere else. 249 + 250 + Params: 251 + P = the type to be tested 252 + 253 + Returns: 254 + `true` if P satisfies the XML parser specification here stated; `false` otherwise 255 + 256 + Specification: 257 + The parser shall at least: 258 + $(UL 259 + $(LI have `alias CharacterType`: the type of a single source character;) 260 + $(LI have `alias InputType`: the type of the input which is used to feed this 261 + parser;) 262 + $(LI be an `InputRange`, whose elements shall support at least the following fields: 263 + $(UL 264 + $(LI `XMLKind kind`: the kind of this node;) 265 + $(LI `P.CharacterType[] content`: the contents of this node, excluding 266 + the delimiters specified in the documentation of `XMLKind`;) 267 + )) 268 + $(LI have `void setSource(InputType)`: sets the input source for this parser 269 + and eventual underlying components; the parser may perform other 270 + initialization work and even consume part of the input during this 271 + operation; after (partial or complete) usage, a parser may be reinitialized 272 + and used with another input by calling this function;) 273 + ) 274 +/ 275 template isLowLevelParser(P) 276 { 277 enum bool isLowLevelParser = isInputRange!P && is(typeof(ElementType!P.kind) == XMLKind) 278 && is(typeof(ElementType!P.content) == P.CharacterType[]); 279 } 280 281 /++ 282 + Checks whether its argument is a saveable parser. 283 + 284 + A saveable parser is a parser enhanced with a `save` method analogous to the `save` 285 + method of `ForwardRange`s. 286 + 287 + Params: 288 + P = the type to be tested 289 + 290 + Returns: 291 + `true` if P is a parser (as specified by `isLowLevelParser`) and also supports the 292 + `save` method as specified here; `false` otherwise 293 + 294 + Specification: 295 + The type shall support at least: 296 + $(UL 297 + $(LI all methods and aliases specified by `isLowLevelParser`) 298 + $(LI `P save()`: returns an independent copy of the current parser; the 299 + copy must start at the position the original parser was when this method 300 + was called; the two copies shall be independent, in that advancing one 301 + does not advance the other.) 302 + ) 303 +/ 304 template isSaveableLowLevelParser(P) 305 { 306 enum bool isSaveableLowLevelParser = isLowLevelParser!P && isForwardRange!P; 307 } 308 309 // LEVEL 3: CURSORS 310 311 /++ 312 + Checks whether its argument fulfills all requirements to be used as XML cursor. 313 + 314 + The cursor is the hearth of the XML parsing chain. Every higher level component 315 + (SAX, DOM, validations) builds on top of this concept. 316 + A cursor is a logical pointer inside a stream of XML nodes. It can be queried 317 + for properties of the current node (kind, name, attributes, ...) and it can be 318 + advanced in the stream. It cannot move backwards. Any reference to the outputs 319 + of a cursor may or may not be invalidated by advancing operations. 320 + 321 + Params: 322 + CursorType = the type to be tested 323 + 324 + Returns: 325 + `true` if CursorType satisfies the XML cursor specification here stated; 326 + `false` otherwise 327 + 328 + Specification: 329 + A cursor shall support at least these methods and aliases: 330 + $(UL 331 + $(LI `alias StringType`: the type of an output string; most methods will 332 + return instances of this type;) 333 + $(LI `alias InputType`: the type of the input which is used to feed this 334 + cursor;) 335 + $(LI `void setSource(InputType)`: sets the input source for this cursor and 336 + eventual underlying components; the cursor may perform other initialization 337 + work and even consume part of the input during this operation; after 338 + (partial or complete) usage, a cursor may be reinitialized and used with 339 + another input by calling this function;) 340 + $(LI `bool atBeginning()`: returns true if the cursor has never been advanced; 341 + it is thus pointing to the node of type `XMLKind.document` representing 342 + the XML declaration of the document;) 343 + $(LI `bool documentEnd()`: returns `true` if the input has been completely 344 + consumed; if it is the case, any advancing operation will perform no action) 345 + $(LI the following methods can be used to query the current node properties: 346 + $(UL 347 + $(LI `XMLKind kind()`: returns the `XMLKind` of the current node;) 348 + $(LI `StringType name()`: returns the qualified name of the current 349 + element or the target of the current processing instruction; 350 + the empty string in all other cases;) 351 + $(LI `StringType localName()`: returns the local name of the 352 + current element, if it has a prefix; the empty string in all 353 + other cases;) 354 + $(LI `StringType prefix()`: returns the prefix of the current element, 355 + if it has any; the empty string in all other cases;) 356 + $(LI `auto attributes()`: returns a range of all attributes defined 357 + on the current element; if the current node is a processing 358 + instruction, its data section is parsed as if it was the attributes 359 + list of an element (which is quite common); for all other node 360 + kinds, an empty range is returned. 361 + The type returned by this range `front` method shall at least support 362 + the following fields: 363 + $(UL 364 + $(LI `StringType name`: the qualified name of the attribute;) 365 + $(LI `StringType prefix`: the prefix of the attribute, if it 366 + has any; the empty string otherwise;) 367 + $(LI `StringType localName`: the local name of the attribute, 368 + if it has any prefix; the empty string otherwise;) 369 + $(LI `StringType value`: the value of the attribute;) 370 + )) 371 + $(LI `StringType content()`: returns the text content of the current 372 + comment, text node or cdata section or the data of the current 373 + processing instruction; the empty string in all other cases;) 374 + $(LI `StringType wholeContent()`: returns the entire content of the node;) 375 + )) 376 + $(LI the following methods can be used to advance the cursor in the stream 377 + of XML nodes: 378 + $(UL 379 + $(LI `bool enter()`: tries to advance the cursor to the first child 380 + of the current node; returns `true` if the operation succeeded; 381 + otherwise, if the cursor was positioned on the start tag of an 382 + element, it is now positioned on its closing tag; otherwise, 383 + the cursor did not advance;) 384 + $(LI `bool next()`: tries to advance the cursor to the next sibling 385 + of the current node; returns `true` if the operation succeded; 386 + otherwise (i.e. the cursor was positioned on the last child 387 + of an element) it is now positioned on the closing tag of the 388 + parent element;) 389 + $(LI `void exit()`: advances the cursor to the closing tag of the 390 + element containing the current node;) 391 + )) 392 + ) 393 + 394 + Examples: 395 + --- 396 + /* recursively prints the kind of each node */ 397 + void recursivePrint(CursorType)(ref CursorType cursor) 398 + if (isCursor!CursorType) 399 + { 400 + do 401 + { 402 + // print the kind of the current node 403 + writeln(cursor.kind); 404 + // if the node has children 405 + if (cursor.enter) 406 + { 407 + // recursively print them 408 + recursivePrint(cursor); 409 + // back to the current level 410 + cursor.exit; 411 + } 412 + } 413 + // iterate on every sibling 414 + while (cursor.next) 415 + } 416 + --- 417 +/ 418 template isCursor(CursorType) 419 { 420 enum bool isCursor = is(typeof( 421 (inout int = 0) 422 { 423 alias S = CursorType.StringType; 424 425 CursorType cursor; 426 bool b; 427 428 b = cursor.atBeginning; 429 b = cursor.documentEnd; 430 b = cursor.next; 431 b = cursor.enter; 432 cursor.exit; 433 XMLKind kind = cursor.kind; 434 auto s = cursor.name; 435 s = cursor.localName; 436 s = cursor.prefix; 437 s = cursor.content; 438 s = cursor.wholeContent; 439 auto attrs = cursor.attributes; 440 s = attrs.front.prefix; 441 s = attrs.front.localName; 442 s = attrs.front.name; 443 s = attrs.front.value; 444 } 445 )); 446 } 447 448 /++ 449 + Checks whether its argument is a saveable cursor. 450 + 451 + A saveable cursor is a cursor enhanced with a `save` method analogous to the `save` 452 + method of `ForwardRange`s. 453 + 454 + Params: 455 + CursorType = the type to be tested 456 + 457 + Returns: 458 + `true` if CursorType is a cursor (as specified by `isCursor`) and also supports the 459 + `save` method as specified here; `false` otherwise 460 + 461 + Specification: 462 + The type shall support at least: 463 + $(UL 464 + $(LI all methods and aliases specified by `isCursor`) 465 + $(LI `CursorType save()`: returns an independent copy of the current cursor; the 466 + copy must start at the position the original cursor was when this method 467 + was called; the two copies shall be independent, in that advancing one 468 + does not advance the other.) 469 + ) 470 +/ 471 template isSaveableCursor(CursorType) 472 { 473 enum bool isSaveableCursor = isCursor!CursorType && is(typeof( 474 (inout int = 0) 475 { 476 CursorType cursor1; 477 CursorType cursor2 = cursor1.save(); 478 })); 479 } 480 481 // WRITERS 482 /++ 483 + Tests whether the given type is a writer. 484 +/ 485 template isWriter(WriterType) 486 { 487 enum bool isWriter = is(typeof( 488 (inout int = 0) 489 { 490 alias StringType = WriterType.StringType; 491 492 WriterType writer; 493 StringType s; 494 495 writer.writeXMLDeclaration(10, s, true); 496 writer.writeComment(s); 497 writer.writeText(s); 498 writer.writeCDATA(s); 499 writer.writeProcessingInstruction(s, s); 500 writer.startElement(s); 501 writer.closeElement(s); 502 writer.writeAttribute(s, s); 503 })); 504 } 505 506 // COMMON 507 508 template needSource(T) 509 { 510 enum bool needSource = is(typeof( 511 (inout int = 0) 512 { 513 alias InputType = T.InputType; 514 515 T component; 516 InputType input; 517 518 component.setSource(input); 519 })); 520 } 521 522 /++ 523 + Generic XML exception; thrown whenever a component experiences an error, unless 524 + the user provided a custom error handler. 525 +/ 526 class XMLException : Exception 527 { 528 @nogc @safe pure nothrow this(string msg, string file = __FILE__, size_t line = __LINE__, 529 Throwable nextInChain = null) 530 { 531 super(msg, file, line, nextInChain); 532 } 533 534 @nogc @safe pure nothrow this(string msg, Throwable nextInChain, string file = __FILE__, size_t line = __LINE__) 535 { 536 super(msg, file, line, nextInChain); 537 } 538 } 539 540 /** 541 * Defines the document's XML version. 542 */ 543 enum XMLVersion { 544 XML1_0, 545 XML1_1, 546 }