1 module newxml.domstring; 2 3 import std.range; 4 import std.string; 5 import std.utf; 6 7 import newxml.faststrings; 8 import newxml.interfaces; 9 10 version (newxml_force_utf8) { 11 alias XMLCh = immutable(char); 12 } else version (newxml_force_utf32) { 13 alias XMLCh = immutable(dchar); 14 } else { 15 alias XMLCh = immutable(wchar); 16 } 17 /** 18 * Proper DOMString implementation, with some added range capabilities. 19 * Authors: 20 * László Szerémi 21 * Contains UTF-16 strings by default, but can be configured to either UTF-8 or UTF-32 with version labels. 22 */ 23 public class DOMString : RandomAccessFinite!XMLCh { 24 ///Stores the character data. 25 private XMLCh[] buffer; 26 ///Front and rear positions. 27 private size_t frontPos, backPos; 28 /**`foreach` iteration uses opApply, since one delegate call per loop 29 * iteration is faster than three virtual function calls. 30 * TO DO: Use metaprogramming to make it able to be used in all sorts of context. 31 */ 32 int opApply(scope int delegate(XMLCh) deleg) { 33 for (size_t i ; i < buffer.length ; i++) { 34 int result = deleg(buffer[i]); 35 if (result) return result; 36 } 37 return 0; 38 } 39 40 /// Ditto 41 int opApply(scope int delegate(size_t, XMLCh) deleg) { 42 for (size_t i ; i < buffer.length ; i++) { 43 int result = deleg(i, buffer[i]); 44 if (result) return result; 45 } 46 return 0; 47 } 48 @safe: 49 /** 50 * Default constructor for DOMString. The resulting DOMString object refers to no string at all 51 * Difference from C++ implementation: Does not compare with 0 52 */ 53 this() @nogc nothrow pure { 54 55 } 56 ///Copy constructor. 57 this(const(DOMString) other) nothrow pure { 58 buffer = other.buffer.dup; 59 backPos = buffer.length; 60 } 61 ///Constructor to build a DOMString from an XML character array. (XMLCh is a UTF-16 character by default, can be configured with version labels) 62 this(XMLCh* other) @nogc @trusted nothrow pure { 63 buffer = fromStringz(other); 64 backPos = buffer.length; 65 } 66 /** 67 * Constructor to build a DOMString from a character array of given length. 68 * Params: 69 * other = The character array to be imported into the DOMString 70 * length = The length of the character array to be imported 71 */ 72 this(XMLCh* other, size_t length) @nogc @system nothrow pure { 73 buffer = other[0..length]; 74 backPos = buffer.length; 75 } 76 version (newxml_force_utf8) { 77 78 } else { 79 /** 80 * Constructor to build a DOMString from an 8 bit character array. 81 * Params: 82 * other = The character array to be imported into the DOMString 83 */ 84 this(const(char)* other) @trusted nothrow pure { 85 version (newxml_force_utf32) { 86 buffer = toUTF32(fromStringz(other)); 87 } else { 88 buffer = toUTF16(fromStringz(other)); 89 } 90 backPos = buffer.length; 91 } 92 } 93 ///Creates DOMString objects from standard D strings. 94 this(T)(T[] other) nothrow pure { 95 version (newxml_force_utf8) { 96 buffer = toUTF8(other); 97 } else version (newxml_force_utf32) { 98 buffer = toUTF32(other); 99 } else { 100 buffer = toUTF16(other); 101 } 102 backPos = buffer.length; 103 } 104 /** 105 * Append a null-terminated XMLCh * (Unicode) string to this string. 106 * Params: 107 * other = The object to be appended 108 */ 109 void appendData(XMLCh* other) @trusted nothrow pure { 110 buffer ~= fromStringz(other); 111 backPos = buffer.length; 112 } 113 /** 114 * Append a single Unicode character to this string. 115 * Params: 116 * ch = The single character to be appended 117 */ 118 void appendData(XMLCh ch) nothrow pure { 119 buffer ~= ch; 120 backPos = buffer.length; 121 } 122 /** 123 * Appends the content of another DOMString to this string. 124 * Params: 125 * other = The object to be appended 126 */ 127 void appendData(DOMString other) nothrow pure { 128 buffer ~= other.buffer; 129 backPos = buffer.length; 130 } 131 /** 132 * Appends a D string to this string. 133 * Params: 134 * other = The D string (string/wstring/dstring) as an array 135 */ 136 void appendData(T)(T[] other) nothrow pure { 137 version (newxml_force_utf8) { 138 buffer ~= toUTF8(other); 139 } else version (newxml_force_utf32) { 140 buffer ~= toUTF32(other); 141 } else { 142 buffer ~= toUTF16(other); 143 } 144 backPos = buffer.length; 145 } 146 /** 147 * Returns the character at the specified position. 148 * Params: 149 * index = The position at which the character is being requested 150 * Returns: Returns the character at the specified position. 151 */ 152 XMLCh charAt(size_t index) @nogc nothrow pure { 153 return buffer[index]; 154 } 155 /** 156 * Makes a clone of a the DOMString. 157 * Returns: The object to be cloned. 158 */ 159 DOMString clone() nothrow pure const { 160 return new DOMString(this); 161 } 162 //TO DO:read up on how this works 163 int compareString(DOMString other) { 164 return int.init; 165 } 166 /** 167 * Clears the data of this DOMString. 168 * Params: 169 * offset = The position from the beginning from which the data must be deleted 170 * count = The count of characters from the offset that must be deleted 171 */ 172 void deleteData(size_t offset, size_t count) pure { 173 if (offset + count > buffer.length) 174 throw new XMLException("offset + count larger than buffer length!"); 175 buffer = buffer[0..offset] ~ buffer[offset+count..$]; 176 backPos = buffer.length; 177 } 178 /** 179 * Compare a DOMString with a null-terminated raw 16-bit character string. 180 * Params: 181 * other = The character string to be compared with. 182 * Returns: True if the strings are the same, false otherwise. 183 */ 184 bool equals(XMLCh* other) @trusted pure const { 185 auto str = fromStringz(other); 186 if (str.length != buffer.length) return false; 187 return fastEqual(buffer, str); 188 } 189 /** 190 * Tells if a DOMString contains the same character data as another. 191 * Params: 192 * other = The DOMString to be compared with. 193 * Returns: True if the two DOMStrings are same, false otherwise. 194 */ 195 bool equals(DOMString other) pure const { 196 if (buffer.length != other.length) return false; 197 return fastEqual(buffer, other.buffer); 198 } 199 /** 200 * Compares the content of a D string against a DOMString. 201 * Params: 202 * other = The D string to be compared with. 203 * Returns: True if their textual data are the same, false otherwise. 204 */ 205 bool equals(T)(T other) pure const { 206 XMLCh[] o; 207 version (newxml_force_utf8) 208 o = toUTF8(other); 209 else version (newxml_force_utf32) 210 o = toUTF32(other); 211 else 212 o = toUTF16(other); 213 if (buffer.length != o.length) return false; 214 return fastEqual(buffer, o); 215 } 216 /** 217 * Inserts a string within the existing DOMString at an arbitrary position. 218 * Params: 219 * offset = The offset from the beginning at which the insertion needs to be done in this object 220 * data = The DOMString containing the data that needs to be inserted 221 */ 222 void insertData(size_t offset, DOMString data) pure nothrow { 223 buffer = buffer[0..offset] ~ data.buffer ~ buffer[offset..$]; 224 } 225 /** 226 * Inserts a string of type XMLCh within the existing DOMString at an arbitrary position 227 * Params: 228 * offset = The offset from the beginning at which the insertion needs to be done in this object 229 * other = The DOMString containing the data that needs to be inserted 230 */ 231 void insertData(size_t offset, XMLCh[] other) pure nothrow { 232 buffer = buffer[0..offset] ~ other ~ buffer[offset..$]; 233 } 234 /** 235 * Compares the string against various other types or itself using the `==` and `!=` operators. 236 * Params: 237 * other = The instance of the type to be tested against. 238 * Returns: True if they have the same textual data, false otherwise. 239 */ 240 bool opEquals(R)(R other) pure const { 241 return equals(other); 242 } 243 T opCast(T)() const { 244 static if (is(T == string)) { 245 return transcodeToUTF8; 246 } else static if (is(T == wstring)) { 247 return transcodeToUTF16; 248 } else static if (is(T == dstring)) { 249 return transcodeToUTF32; 250 } 251 } 252 /** 253 * Implements easy array appending with operator overloading. 254 * Params: 255 * rhs = The data to be appended to the string. 256 */ 257 auto opOpAssign(string op, R)(R rhs) { 258 static if(op == "+" || op == "~"){ 259 appendData(rhs); 260 } 261 } 262 ///Dumps the DOMString on the console. 263 void print() const { 264 import std.stdio; 265 write(buffer); 266 } 267 ///Dumps the DOMString on the console with a line feed at the end. 268 void println() const { 269 import std.stdio; 270 writeln(buffer); 271 } 272 ///Returns a handle to the raw buffer in the DOMString. 273 XMLCh* rawBuffer() @system @nogc nothrow pure const { 274 return buffer.ptr; 275 } 276 alias ptr = rawBuffer; 277 ///Returns the underlying array (string). 278 XMLCh[] getDString() @nogc nothrow pure const { 279 return buffer; 280 } 281 /** 282 * Preallocate storage in the string to hold a given number of characters. A DOMString will grow its buffer on 283 * demand, as characters are added, but it can be more efficient to allocate once in advance, if the size is known. 284 * Params: 285 * size = The number of characters to reserve. 286 */ 287 void reserve(size_t size) nothrow pure { 288 buffer.reserve(size); 289 } 290 /** 291 * Returns a sub-string of the DOMString starting at a specified position. 292 * Params: 293 * offset = The offset from the beginning from which the sub-string is being requested. 294 * count = The count of characters in the requested sub-string 295 * Returns: The sub-string of the DOMString being requested 296 */ 297 DOMString substringData(size_t offset, size_t count) nothrow pure const { 298 return new DOMString(buffer[offset..offset + count]); 299 } 300 /** 301 * Returns a copy of the string, transcoded to the local code page. The caller owns the (char *) string that is 302 * returned, and is responsible for deleting it. 303 * Returns: A pointer to a newly allocated buffer of char elements, which represents the original string, but in 304 * the local encoding. 305 * Note: This function is using the `toStringz` function, and rules of that apply here too. 306 */ 307 immutable(char)* transcode() @trusted pure nothrow const { 308 return toStringz(toUTF8(buffer)); 309 } 310 /** 311 * Transcodes the string as a UTF-8 string 312 * Returns: The content of this string as UTF-8 data. 313 */ 314 string transcodeToUTF8() pure nothrow const { 315 return toUTF8(buffer); 316 } 317 /** 318 * Transcodes the string as a UTF-16 string 319 * Returns: The content of this string as UTF-16 data. 320 */ 321 wstring transcodeToUTF16() pure nothrow const { 322 return toUTF16(buffer); 323 } 324 /** 325 * Transcodes the string as a UTF-32 string 326 * Returns: The content of this string as UTF-32 data. 327 */ 328 dstring transcodeToUTF32() pure nothrow const { 329 return toUTF32(buffer); 330 } 331 ///Templated transcoder. 332 T transcodeTo(T)() pure nothrow const { 333 static if (is(T == string)) 334 return transcodeToUTF8; 335 else static if (is(T == wstring)) 336 return transcodeToUTF16; 337 else static if (is(T == dstring)) 338 return transcodeToUTF32; 339 else static assert(0, "Template parameter `" ~ "` not supported for function `DOMString.transcodeTo(T)()`"); 340 } 341 //range stuff begins here 342 ///Returns the front element of the range. 343 @property XMLCh front() @nogc nothrow pure { 344 return buffer[frontPos]; 345 346 } 347 /**Calls $(REF moveFront, std, range, primitives) on the wrapped range, if 348 * possible. Otherwise, throws an $(LREF UnsupportedRangeMethod) exception. 349 */ 350 XMLCh moveFront() { 351 if (frontPos + 1 < backPos) 352 frontPos++; 353 return buffer[frontPos]; 354 } 355 356 ///Moves the front pointer up by one. 357 void popFront() { 358 if (frontPos + 1 < backPos) 359 frontPos++; 360 } 361 362 ///Returns true if all content of the string have been consumed. 363 @property bool empty() { 364 return !(frontPos + 1 < backPos); 365 } 366 ///Returns the back element of the range. 367 @property XMLCh back() { 368 return buffer[backPos - 1]; 369 } 370 371 /**Calls $(REF moveBack, std, range, primitives) on the wrapped range, if 372 * possible. Otherwise, throws an $(LREF UnsupportedRangeMethod) exception 373 */ 374 XMLCh moveBack() { 375 if (backPos > 1) 376 backPos--; 377 return buffer[backPos]; 378 } 379 ///Moves the back pointer down by one. 380 void popBack() { 381 if (backPos > 1) 382 backPos--; 383 } 384 ///Returns a copy of the DOMString. 385 @property RandomAccessFinite!XMLCh save() { 386 return new DOMString(this); 387 } 388 ///Allows the characters to be accessed in an array-like fashion. 389 XMLCh opIndex(size_t index) @nogc nothrow pure const { 390 return buffer[index]; 391 } 392 /** 393 * Returns a slice of the string. 394 * Params: 395 * from = The beginning point. 396 * to = The ending point + 1. 397 * Returns: The content of the slice as a DOMString. 398 */ 399 DOMString opSlice(size_t from, size_t to) nothrow pure const { 400 return new DOMString(buffer[from..to]); 401 } 402 ///Moves the front pointer to the given position. 403 XMLCh moveAt(size_t pos) @nogc nothrow pure { 404 frontPos = pos; 405 return buffer[frontPos]; 406 } 407 ///Returns the length of the string. 408 @property size_t length() @nogc nothrow pure const { 409 return buffer.length; 410 } 411 412 /// 413 alias opDollar = length; 414 } 415 unittest { 416 DOMString test0 = new DOMString("Hello World!"), test1 = new DOMString("Hello World!"w), 417 test2 = new DOMString("Hello World!"d); 418 assert(test0 == "Hello World!"w); 419 assert(test1 == "Hello World!"w); 420 assert(test2 == "Hello World!"w); 421 assert(test1 == test2); 422 assert(test0.length == 12); 423 assert(test1.length == 12); 424 assert(test2.length == 12); 425 assert(test0[3..5].getDString == "lo"); 426 427 DOMString test3 = new DOMString("test"); 428 test3.insertData(2, "te"); 429 assert(test3 == "tetest"); 430 test3.deleteData(2, 2); 431 assert(test3 == "test"); 432 foreach (size_t i, XMLCh c; test3) { 433 assert(c == "test"[i]); 434 } 435 }