1 /++ 2 JSON Parsing API 3 4 Copyright: Tamedia Digital, 2016-2017 5 6 Authors: Ilya Yaroshenko 7 8 License: MIT 9 10 Macros: 11 SUBMODULE = $(LINK2 fghj_$1.html, fghj.$1) 12 SUBREF = $(LINK2 fghj_$1.html#.$2, $(TT $2))$(NBSP) 13 T2=$(TR $(TDNW $(LREF $1)) $(TD $+)) 14 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4)) 15 +/ 16 module fghj.jsonparser; 17 18 import fghj.fghj; 19 import fghj.outputarray; 20 import std.experimental.allocator.gc_allocator; 21 import std.meta; 22 import std.range.primitives; 23 import std.traits; 24 import std.typecons; 25 import mir.serde: SerdeException; 26 27 version(LDC) 28 { 29 import ldc.attributes: optStrategy; 30 enum minsize = optStrategy("minsize"); 31 32 static if (__traits(targetHasFeature, "sse4.2")) 33 { 34 import core.simd; 35 import ldc.simd; 36 import ldc.gccbuiltins_x86; 37 version = SSE42; 38 } 39 } 40 else 41 { 42 enum minsize; 43 } 44 45 version(X86_64) 46 version = X86_Any; 47 else 48 version(X86) 49 version = X86_Any; 50 51 private alias FGHJGCAllocator = typeof(GCAllocator.instance); 52 53 /++ 54 Parses json value 55 Params: 56 chunks = input range composed of elements type of `const(ubyte)[]`. 57 `chunks` can use the same buffer for each chunk. 58 initLength = initial output buffer length. Minimum value is 32. 59 Returns: 60 FGHJ value 61 +/ 62 Fghj parseJson( 63 Flag!"includingNewLine" includingNewLine = Yes.includingNewLine, 64 Flag!"spaces" spaces = Yes.spaces, 65 Chunks) 66 (Chunks chunks, size_t initLength = 32) 67 if(is(ElementType!Chunks : const(ubyte)[])) 68 { 69 enum assumeValid = false; 70 auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(FGHJGCAllocator.instance, chunks); 71 return parseJson(parser); 72 } 73 74 /// 75 unittest 76 { 77 import std.range: chunks; 78 auto text = cast(const ubyte[])`true `; 79 auto ch = text.chunks(3); 80 assert(ch.parseJson(32).data == [1]); 81 } 82 83 84 /++ 85 Parses json value 86 Params: 87 str = input string 88 allocator = (optional) memory allocator 89 Returns: 90 FGHJ value 91 +/ 92 Fghj parseJson( 93 Flag!"includingNewLine" includingNewLine = Yes.includingNewLine, 94 Flag!"spaces" spaces = Yes.spaces, 95 Flag!"assumeValid" assumeValid = No.assumeValid, 96 Allocator, 97 ) 98 (in char[] str, auto ref Allocator allocator) 99 { 100 auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(allocator, str); 101 return parseJson(parser); 102 } 103 104 105 /// 106 @system unittest { 107 import std.experimental.allocator.mallocator: Mallocator; 108 import std.experimental.allocator.showcase: StackFront; 109 110 StackFront!(1024, Mallocator) allocator; 111 auto json = parseJson(`{"ak": {"sub": "subval"} }`, allocator); 112 assert(json["ak", "sub"] == "subval"); 113 } 114 115 /// Faulty location 116 pure unittest 117 { 118 import fghj; 119 try 120 { 121 auto data = `[1, 2, ]`.parseJson; 122 } 123 catch(FghjSerdeException e) 124 { 125 import std.conv; 126 /// zero based index 127 assert(e.location == 7); 128 return; 129 } 130 assert(0); 131 } 132 133 /// ditto 134 Fghj parseJson( 135 Flag!"includingNewLine" includingNewLine = Yes.includingNewLine, 136 Flag!"spaces" spaces = Yes.spaces, 137 Flag!"assumeValid" assumeValid = No.assumeValid, 138 ) 139 (in char[] str) 140 { 141 auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(FGHJGCAllocator.instance, str); 142 return parseJson(parser); 143 } 144 145 /// 146 unittest 147 { 148 assert(`{"ak": {"sub": "subval"} }`.parseJson["ak", "sub"] == "subval"); 149 } 150 151 152 private Fghj parseJson(Parser)(ref Parser parser) { 153 size_t location; 154 if (parser.parse(location)) 155 throw new FghjSerdeException(parser.lastError, location); 156 return Fghj(parser.result); 157 } 158 159 160 deprecated("please remove the initBufferLength argument (latest)") 161 auto parseJsonByLine( 162 Flag!"spaces" spaces = Yes.spaces, 163 Input) 164 (Input input, sizediff_t initBufferLength) 165 { 166 return .parseJsonByLine!(spaces, No.throwOnInvalidLines, Input)(input); 167 } 168 169 /++ 170 Parses JSON value in each line from a Range of buffers. 171 Params: 172 spaces = adds support for spaces beetwen json tokens. Default value is Yes. 173 throwOnInvalidLines = throws an $(LREF SerdeException) on invalid lines if Yes and ignore invalid lines if No. Default value is No. 174 input = input range composed of elements type of `const(ubyte)[]` or string / const(char)[]. 175 `chunks` can use the same buffer for each chunk. 176 Returns: 177 Input range composed of FGHJ values. Each value uses the same internal buffer. 178 +/ 179 auto parseJsonByLine( 180 Flag!"spaces" spaces = Yes.spaces, 181 Flag!"throwOnInvalidLines" throwOnInvalidLines = No.throwOnInvalidLines, 182 Input) 183 (Input input) 184 { 185 alias Parser = JsonParser!(false, cast(bool)spaces, false, FGHJGCAllocator, Input); 186 struct ByLineValue 187 { 188 Parser parser; 189 private bool _empty, _nextEmpty; 190 191 void popFront() 192 { 193 for(;;) 194 { 195 assert(!empty); 196 if(_nextEmpty) 197 { 198 _empty = true; 199 return; 200 } 201 // parser.oa.shift = 0; 202 parser.dataLength = 0; 203 auto error = parser.parse; 204 if(!error) 205 { 206 auto t = parser.skipSpaces_; 207 if(t != '\n' && t != 0) 208 { 209 error = FghjErrorCode.unexpectedValue; 210 parser._lastError = "expected new line or end of input"; 211 } 212 else 213 if(t == 0) 214 { 215 _nextEmpty = true; 216 return; 217 } 218 else 219 { 220 parser.skipNewLine; 221 _nextEmpty = !parser.skipSpaces_; 222 return; 223 } 224 } 225 static if (throwOnInvalidLines) 226 throw new SerdeException(parser.lastError); 227 else 228 parser.skipLine(); 229 } 230 } 231 232 auto front() @property 233 { 234 assert(!empty); 235 return Fghj(parser.result); 236 } 237 238 bool empty() 239 { 240 return _empty; 241 } 242 } 243 ByLineValue ret; 244 if(input.empty) 245 { 246 ret._empty = ret._nextEmpty = true; 247 } 248 else 249 { 250 ret = ByLineValue(Parser(FGHJGCAllocator.instance, input)); 251 ret.popFront; 252 } 253 return ret; 254 } 255 256 version(LDC) 257 { 258 public import ldc.intrinsics: _expect = llvm_expect; 259 } 260 else 261 { 262 T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T)) 263 { 264 return val; 265 } 266 } 267 268 enum FghjErrorCode 269 { 270 success, 271 unexpectedEnd, 272 unexpectedValue, 273 } 274 275 private __gshared immutable ubyte[256] parseFlags = [ 276 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 277 0,0,0,0,0,0,0,0, 0,6,2,0,0,6,0,0, // 0 278 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, // 1 279 7,1,0,1,1,1,1,1, 1,1,1,9,1,9,9,1, // 2 280 9,9,9,9,9,9,9,9, 9,9,1,1,1,1,1,1, // 3 281 282 1,1,1,1,1,9,1,1, 1,1,1,1,1,1,1,1, // 4 283 1,1,1,1,1,1,1,1, 1,1,1,1,0,1,1,1, // 5 284 1,1,1,1,1,9,1,1, 1,1,1,1,1,1,1,1, // 6 285 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, // 7 286 287 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 288 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 289 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 290 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 291 292 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 293 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 294 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 295 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 296 ]; 297 298 private __gshared immutable byte[256] uniFlags = [ 299 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 300 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 0 301 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 1 302 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 2 303 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 3 304 305 -1,10,11,12,13,14,15,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 4 306 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 5 307 -1,10,11,12,13,14,15,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 6 308 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, // 7 309 310 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 311 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 312 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 313 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 314 315 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 316 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 317 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 318 -1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1, 319 ]; 320 321 322 pragma(inline, true) 323 bool isPlainJsonCharacter()(size_t c) 324 { 325 return (parseFlags[c] & 1) != 0; 326 } 327 328 pragma(inline, true) 329 bool isJsonWhitespace()(size_t c) 330 { 331 return (parseFlags[c] & 2) != 0; 332 } 333 334 pragma(inline, true) 335 bool isJsonLineWhitespace()(size_t c) 336 { 337 return (parseFlags[c] & 4) != 0; 338 } 339 340 pragma(inline, true) 341 bool isJsonNumber()(size_t c) 342 { 343 return (parseFlags[c] & 8) != 0; 344 } 345 346 package auto assumePure(T)(T t) 347 if (isFunctionPointer!T || isDelegate!T) 348 { 349 enum attrs = functionAttributes!T | FunctionAttribute.pure_; 350 return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t; 351 } 352 353 package auto callPure(alias fn,T...)(T args) 354 { 355 auto fp = assumePure(&fn); 356 return (*fp)(args); 357 } 358 359 /+ 360 Fast picewise stack 361 +/ 362 private struct Stack 363 { 364 import core.stdc.stdlib: cmalloc = malloc, cfree = free; 365 @disable this(this); 366 367 struct Node 368 { 369 enum length = 32; // 2 power 370 Node* prev; 371 size_t* buff; 372 } 373 374 size_t[Node.length] buffer = void; 375 size_t length = 0; 376 Node node; 377 378 pure: 379 380 void push()(size_t value) 381 { 382 version(LDC) 383 pragma(inline, true); 384 immutable local = length++ & (Node.length - 1); 385 if (local) 386 { 387 node.buff[local] = value; 388 } 389 else 390 if (length == 1) 391 { 392 node = Node(null, buffer.ptr); 393 buffer[0] = value; 394 } 395 else 396 { 397 auto prevNode = cast(Node*) callPure!cmalloc(Node.sizeof); 398 *prevNode = node; 399 node.prev = prevNode; 400 node.buff = cast(size_t*) callPure!cmalloc(Node.length * size_t.sizeof); 401 node.buff[0] = value; 402 } 403 } 404 405 size_t top()() 406 { 407 version(LDC) 408 pragma(inline, true); 409 assert(length); 410 immutable local = (length - 1) & (Node.length - 1); 411 return node.buff[local]; 412 } 413 414 size_t pop()() 415 { 416 version(LDC) 417 pragma(inline, true); 418 assert(length); 419 immutable local = --length & (Node.length - 1); 420 immutable ret = node.buff[local]; 421 if (local == 0) 422 { 423 if (node.buff != buffer.ptr) 424 { 425 callPure!cfree(node.buff); 426 node = *node.prev; 427 } 428 } 429 return ret; 430 } 431 432 pragma(inline, false) 433 void free()() 434 { 435 version(LDC) 436 pragma(inline, true); 437 if (node.buff is null) 438 return; 439 while(node.buff !is buffer.ptr) 440 { 441 callPure!cfree(node.buff); 442 node = *node.prev; 443 } 444 } 445 } 446 447 unittest 448 { 449 Stack stack; 450 assert(stack.length == 0); 451 foreach(i; 1 .. 100) 452 { 453 stack.push(i); 454 assert(stack.length == i); 455 assert(stack.top() == i); 456 } 457 foreach_reverse(i; 1 .. 100) 458 { 459 assert(stack.length == i); 460 assert(stack.pop() == i); 461 } 462 assert(stack.length == 0); 463 } 464 465 /// 466 auto jsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[])(auto ref Allocator allocator, Input input) { 467 return JsonParser!(includingNewLine, hasSpaces, assumeValid, Allocator, Input)(allocator, input); 468 } 469 470 /// 471 struct JsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[]) 472 { 473 474 ubyte[] data; 475 Allocator* allocator; 476 Input input; 477 static if (chunked) 478 ubyte[] front; 479 else 480 alias front = input; 481 size_t dataLength; 482 483 string _lastError; 484 485 enum bool chunked = !is(Input : const(char)[]); 486 487 this(ref Allocator allocator, Input input) 488 489 { 490 this.input = input; 491 this.allocator = &allocator; 492 } 493 494 bool prepareInput_()() 495 { 496 static if (chunked) 497 { 498 if (front.length == 0) 499 { 500 assert(!input.empty); 501 input.popFront; 502 if (input.empty) 503 return false; 504 front = cast(typeof(front)) input.front; 505 } 506 } 507 return front.length != 0; 508 } 509 510 void skipNewLine()() 511 { 512 assert(front.length); 513 assert(front[0] == '\n'); 514 front = front[1 .. $]; 515 } 516 517 char skipSpaces_()() 518 { 519 static if (hasSpaces) 520 for(;;) 521 { 522 if (prepareInput_ == false) 523 return 0; 524 static if (includingNewLine) 525 alias isWhite = isJsonWhitespace; 526 else 527 alias isWhite = isJsonLineWhitespace; 528 if (isWhite(front[0])) 529 { 530 front = front[1 .. $]; 531 continue; 532 } 533 return front[0]; 534 } 535 else 536 { 537 if (prepareInput_ == false) 538 return 0; 539 return front[0]; 540 } 541 } 542 543 bool skipLine()() 544 { 545 for(;;) 546 { 547 if (_expect(!prepareInput_, false)) 548 return false; 549 auto c = front[0]; 550 front = front[1 .. $]; 551 if (c == '\n') 552 return true; 553 } 554 } 555 556 auto result()() 557 { 558 return data[0 .. dataLength]; 559 } 560 561 string lastError()() @property 562 { 563 return _lastError; 564 } 565 566 567 FghjErrorCode parse() 568 { 569 size_t location; 570 return parse(location); 571 } 572 573 pragma(inline, false) 574 FghjErrorCode parse(out size_t location) 575 { 576 version(SSE42) 577 { 578 enum byte16 str2E = [ 579 '\u0001', '\u001F', 580 '\"', '\"', 581 '\\', '\\', 582 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0']; 583 enum byte16 num2E = ['+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'e', 'E', '\0']; 584 byte16 str2 = str2E; 585 byte16 num2 = num2E; 586 } 587 588 const(ubyte)* strPtr; 589 const(ubyte)* strEnd; 590 ubyte* dataPtr; 591 ubyte* stringAndNumberShift = void; 592 static if (chunked) 593 { 594 bool prepareInput()() 595 { 596 pragma(inline, false); 597 if(strPtr) 598 { 599 location += front.length; 600 input.popFront; 601 if (input.empty) 602 { 603 return false; 604 } 605 } 606 front = cast(typeof(front)) input.front; 607 if (front.length == 0) 608 return false; 609 strPtr = front.ptr; 610 strEnd = front.ptr + front.length; 611 const dataAddLength = front.length * 6; 612 const dataLength = dataPtr - data.ptr; 613 const dataRequiredLength = dataLength + dataAddLength; 614 if (data.length < dataRequiredLength) 615 { 616 const valueLength = stringAndNumberShift - dataPtr; 617 import std.algorithm.comparison: max; 618 const len = max(data.length * 2, dataRequiredLength); 619 allocator.reallocate(*cast(void[]*)&data, len); 620 dataPtr = data.ptr + dataLength; 621 stringAndNumberShift = dataPtr + valueLength; 622 } 623 return true; 624 } 625 strPtr = front.ptr; 626 strEnd = front.ptr + front.length; 627 } 628 else 629 { 630 strPtr = cast(const(ubyte)*) input.ptr; 631 strEnd = cast(const(ubyte)*) input.ptr + input.length; 632 enum bool prepareInput = false; 633 } 634 635 auto rl = (strEnd - strPtr) * 6; 636 if (data.ptr !is null && data.length < rl) 637 { 638 allocator.deallocate(data); 639 data = null; 640 } 641 if (data.ptr is null) 642 { 643 data = cast(ubyte[])allocator.allocate(rl); 644 } 645 dataPtr = data.ptr; 646 647 bool skipSpaces()() 648 { 649 version(LDC) 650 pragma(inline, true); 651 static if (includingNewLine) 652 alias isWhite = isJsonWhitespace; 653 else 654 alias isWhite = isJsonLineWhitespace; 655 F: 656 { 657 if (_expect(strEnd != strPtr, true)) 658 { 659 L: 660 static if (hasSpaces) 661 { 662 if (isWhite(strPtr[0])) 663 { 664 strPtr++; 665 goto F; 666 } 667 } 668 return true; 669 } 670 else 671 { 672 if (prepareInput) 673 goto L; 674 return false; 675 } 676 } 677 678 } 679 680 @minsize 681 int readUnicode()(ref dchar d) 682 { 683 version(LDC) 684 pragma(inline, true); 685 uint e = 0; 686 size_t i = 4; 687 do 688 { 689 if (strEnd == strPtr && !prepareInput) 690 return 1; 691 int c = uniFlags[*strPtr++]; 692 assert(c < 16); 693 if (c == -1) 694 return -1; 695 assert(c >= 0); 696 e <<= 4; 697 e ^= c; 698 } 699 while(--i); 700 d = e; 701 return 0; 702 } 703 704 Stack stack; 705 706 typeof(return) retCode; 707 bool currIsKey = void; 708 size_t stackValue = void; 709 goto value; 710 711 /////////// RETURN 712 ret: 713 front = front[cast(typeof(front.ptr)) strPtr - front.ptr .. $]; 714 dataLength = dataPtr - data.ptr; 715 assert(stack.length == 0); 716 ret_final: 717 return retCode; 718 /////////// 719 720 key: 721 if (!skipSpaces) 722 goto object_key_unexpectedEnd; 723 key_start: 724 if (*strPtr != '"') 725 goto object_key_start_unexpectedValue; 726 currIsKey = true; 727 stringAndNumberShift = dataPtr; 728 // reserve 1 byte for the length 729 dataPtr += 1; 730 goto string; 731 next: 732 if (stack.length == 0) 733 goto ret; 734 { 735 if (!skipSpaces) 736 goto next_unexpectedEnd; 737 stackValue = stack.top; 738 const isObject = stackValue & 1; 739 auto v = *strPtr++; 740 if (isObject) 741 { 742 if (v == ',') 743 goto key; 744 if (v != '}') 745 goto next_unexpectedValue; 746 } 747 else 748 { 749 if (v == ',') 750 goto value; 751 if (v != ']') 752 goto next_unexpectedValue; 753 } 754 } 755 structure_end: { 756 stackValue = stack.pop(); 757 const structureShift = stackValue >> 1; 758 const structureLengthPtr = data.ptr + structureShift; 759 const size_t structureLength = dataPtr - structureLengthPtr - 4; 760 if (structureLength > uint.max) 761 goto object_or_array_is_to_large; 762 version(X86_Any) 763 *cast(uint*) structureLengthPtr = cast(uint) structureLength; 764 else 765 *cast(ubyte[4]*) structureLengthPtr = cast(ubyte[4]) cast(uint[1]) [cast(uint) structureLength]; 766 goto next; 767 } 768 value: 769 if (!skipSpaces) 770 goto value_unexpectedEnd; 771 value_start: 772 switch(*strPtr) 773 { 774 stringValue: 775 case '"': 776 currIsKey = false; 777 *dataPtr++ = Fghj.Kind..string; 778 stringAndNumberShift = dataPtr; 779 // reserve 4 byte for the length 780 dataPtr += 4; 781 goto string; 782 case '-': 783 case '0': 784 .. 785 case '9': { 786 *dataPtr++ = Fghj.Kind.number; 787 stringAndNumberShift = dataPtr; 788 // reserve 1 byte for the length 789 dataPtr++; // write the first character 790 *dataPtr++ = *strPtr++; 791 for(;;) 792 { 793 if (strEnd == strPtr && !prepareInput) 794 goto number_found; 795 version(SSE42) 796 { 797 while (strEnd >= strPtr + 16) 798 { 799 pragma(msg, "EEEE"); 800 byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr); 801 size_t ecx = __builtin_ia32_pcmpistri128(num2, str1, 0x10); 802 storeUnaligned!byte16(str1, cast(byte*)dataPtr); 803 strPtr += ecx; 804 dataPtr += ecx; 805 if(ecx != 16) 806 goto number_found; 807 } 808 } 809 else 810 { 811 while(strEnd >= strPtr + 4) 812 { 813 char c0 = strPtr[0]; dataPtr += 4; if (!isJsonNumber(c0)) goto number_found0; 814 char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isJsonNumber(c1)) goto number_found1; 815 char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isJsonNumber(c2)) goto number_found2; 816 char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isJsonNumber(c3)) goto number_found3; 817 strPtr += 4; dataPtr[-1] = c3; 818 } 819 } 820 while(strEnd > strPtr) 821 { 822 char c0 = strPtr[0]; if (!isJsonNumber(c0)) goto number_found; dataPtr[0] = c0; 823 strPtr += 1; 824 dataPtr += 1; 825 } 826 } 827 version(SSE42){} else 828 { 829 number_found3: dataPtr++; strPtr++; 830 number_found2: dataPtr++; strPtr++; 831 number_found1: dataPtr++; strPtr++; 832 number_found0: dataPtr -= 4; 833 } 834 number_found: 835 836 auto numberLength = dataPtr - stringAndNumberShift - 1; 837 if (numberLength > ubyte.max) 838 goto number_length_unexpectedValue; 839 *stringAndNumberShift = cast(ubyte) numberLength; 840 goto next; 841 } 842 case '{': 843 strPtr++; 844 *dataPtr++ = Fghj.Kind.object; 845 stack.push(((dataPtr - data.ptr) << 1) ^ 1); 846 dataPtr += 4; 847 if (!skipSpaces) 848 goto object_first_value_start_unexpectedEnd; 849 if (*strPtr != '}') 850 goto key_start; 851 strPtr++; 852 goto structure_end; 853 case '[': 854 strPtr++; 855 *dataPtr++ = Fghj.Kind.array; 856 stack.push(((dataPtr - data.ptr) << 1) ^ 0); 857 dataPtr += 4; 858 if (!skipSpaces) 859 goto array_first_value_start_unexpectedEnd; 860 if (*strPtr != ']') 861 goto value_start; 862 strPtr++; 863 goto structure_end; 864 foreach (name; AliasSeq!("false", "null", "true")) 865 { 866 case name[0]: 867 if (_expect(strEnd - strPtr >= name.length, true)) 868 { 869 static if (!assumeValid) 870 { 871 version(X86_Any) 872 { 873 enum uint referenceValue = 874 (uint(name[$ - 4]) << 0x00) ^ 875 (uint(name[$ - 3]) << 0x08) ^ 876 (uint(name[$ - 2]) << 0x10) ^ 877 (uint(name[$ - 1]) << 0x18); 878 if (*cast(uint*)(strPtr + bool(name.length == 5)) != referenceValue) 879 { 880 static if (name == "true") 881 goto true_unexpectedValue; 882 else 883 static if (name == "false") 884 goto false_unexpectedValue; 885 else 886 goto null_unexpectedValue; 887 } 888 } 889 else 890 { 891 char[name.length - 1] c = void; 892 import std.range: iota; 893 foreach (i; aliasSeqOf!(iota(1, name.length))) 894 c[i - 1] = strPtr[i]; 895 foreach (i; aliasSeqOf!(iota(1, name.length))) 896 { 897 if (c[i - 1] != name[i]) 898 { 899 900 static if (name == "true") 901 goto true_unexpectedValue; 902 else 903 static if (name == "false") 904 goto false_unexpectedValue; 905 else 906 goto null_unexpectedValue; 907 } 908 } 909 } 910 } 911 static if (name == "null") 912 *dataPtr++ = Fghj.Kind.null_; 913 else 914 static if (name == "false") 915 *dataPtr++ = Fghj.Kind.false_; 916 else 917 *dataPtr++ = Fghj.Kind.true_; 918 strPtr += name.length; 919 goto next; 920 } 921 else 922 { 923 strPtr += 1; 924 foreach (i; 1 .. name.length) 925 { 926 if (strEnd == strPtr && !prepareInput) 927 { 928 static if (name == "true") 929 goto true_unexpectedEnd; 930 else 931 static if (name == "false") 932 goto false_unexpectedEnd; 933 else 934 goto null_unexpectedEnd; 935 } 936 static if (!assumeValid) 937 { 938 if (_expect(strPtr[0] != name[i], false)) 939 { 940 static if (name == "true") 941 goto true_unexpectedValue; 942 else 943 static if (name == "false") 944 goto false_unexpectedValue; 945 else 946 goto null_unexpectedValue; 947 } 948 } 949 strPtr++; 950 } 951 static if (name == "null") 952 *dataPtr++ = Fghj.Kind.null_; 953 else 954 static if (name == "false") 955 *dataPtr++ = Fghj.Kind.false_; 956 else 957 *dataPtr++ = Fghj.Kind.true_; 958 goto next; 959 } 960 } 961 default: goto value_unexpectedStart; 962 } 963 964 string: 965 debug assert(*strPtr == '"', "Internal FGHJ logic error. Please report an issue."); 966 strPtr += 1; 967 968 StringLoop: { 969 for(;;) 970 { 971 if (strEnd == strPtr && !prepareInput) 972 goto string_unexpectedEnd; 973 version(SSE42) 974 { 975 while (strEnd >= strPtr + 16) 976 { 977 byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr); 978 size_t ecx = __builtin_ia32_pcmpistri128(str2, str1, 0x04); 979 storeUnaligned!byte16(str1, cast(byte*)dataPtr); 980 strPtr += ecx; 981 dataPtr += ecx; 982 if(ecx != 16) 983 goto string_found; 984 } 985 } 986 else 987 { 988 while(strEnd >= strPtr + 4) 989 { 990 char c0 = strPtr[0]; dataPtr += 4; if (!isPlainJsonCharacter(c0)) goto string_found0; 991 char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isPlainJsonCharacter(c1)) goto string_found1; 992 char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isPlainJsonCharacter(c2)) goto string_found2; 993 char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isPlainJsonCharacter(c3)) goto string_found3; 994 strPtr += 4; dataPtr[-1] = c3; 995 } 996 } 997 while(strEnd > strPtr) 998 { 999 char c0 = strPtr[0]; if (!isPlainJsonCharacter(c0)) goto string_found; dataPtr[0] = c0; 1000 strPtr += 1; 1001 dataPtr += 1; 1002 } 1003 } 1004 version(SSE42) {} else 1005 { 1006 string_found3: dataPtr++; strPtr++; 1007 string_found2: dataPtr++; strPtr++; 1008 string_found1: dataPtr++; strPtr++; 1009 string_found0: dataPtr -= 4; 1010 } 1011 string_found: 1012 1013 uint c = strPtr[0]; 1014 if (c == '\"') 1015 { 1016 strPtr += 1; 1017 if (currIsKey) 1018 { 1019 auto stringLength = dataPtr - stringAndNumberShift - 1; 1020 if (stringLength > ubyte.max) 1021 goto key_is_to_large; 1022 *cast(ubyte*)stringAndNumberShift = cast(ubyte) stringLength; 1023 if (!skipSpaces) 1024 goto failed_to_read_after_key; 1025 if (*strPtr != ':') 1026 goto unexpected_character_after_key; 1027 strPtr++; 1028 goto value; 1029 } 1030 else 1031 { 1032 auto stringLength = dataPtr - stringAndNumberShift - 4; 1033 if (stringLength > uint.max) 1034 goto string_length_is_too_large; 1035 version(X86_Any) 1036 *cast(uint*)stringAndNumberShift = cast(uint) stringLength; 1037 else 1038 *cast(ubyte[4]*)stringAndNumberShift = cast(ubyte[4]) cast(uint[1]) [cast(uint) stringLength]; 1039 goto next; 1040 } 1041 } 1042 if (c == '\\') 1043 { 1044 strPtr += 1; 1045 if (strEnd == strPtr && !prepareInput) 1046 goto string_unexpectedEnd; 1047 c = *strPtr++; 1048 switch(c) 1049 { 1050 case '/' : 1051 case '\"': 1052 case '\\': 1053 *dataPtr++ = cast(ubyte) c; 1054 goto StringLoop; 1055 case 'b' : *dataPtr++ = '\b'; goto StringLoop; 1056 case 'f' : *dataPtr++ = '\f'; goto StringLoop; 1057 case 'n' : *dataPtr++ = '\n'; goto StringLoop; 1058 case 'r' : *dataPtr++ = '\r'; goto StringLoop; 1059 case 't' : *dataPtr++ = '\t'; goto StringLoop; 1060 case 'u' : 1061 uint wur = void; 1062 dchar d = void; 1063 if (auto r = (readUnicode(d))) 1064 { 1065 if (r == 1) 1066 goto string_unexpectedEnd; 1067 goto string_unexpectedValue; 1068 } 1069 if (_expect(0xD800 <= d && d <= 0xDFFF, false)) 1070 { 1071 if (d >= 0xDC00) 1072 goto string_unexpectedValue; 1073 if (strEnd == strPtr && !prepareInput) 1074 goto string_unexpectedEnd; 1075 if (*strPtr++ != '\\') 1076 goto string_unexpectedValue; 1077 if (strEnd == strPtr && !prepareInput) 1078 goto string_unexpectedEnd; 1079 if (*strPtr++ != 'u') 1080 goto string_unexpectedValue; 1081 d = (d & 0x3FF) << 10; 1082 dchar trailing; 1083 if (auto r = (readUnicode(trailing))) 1084 { 1085 if (r == 1) 1086 goto string_unexpectedEnd; 1087 goto string_unexpectedValue; 1088 } 1089 if (!(0xDC00 <= trailing && trailing <= 0xDFFF)) 1090 goto invalid_trail_surrogate; 1091 { 1092 d |= trailing & 0x3FF; 1093 d += 0x10000; 1094 } 1095 } 1096 if (!(d < 0xD800 || (d > 0xDFFF && d <= 0x10FFFF))) 1097 goto invalid_utf_value; 1098 encodeUTF8(d, dataPtr); 1099 goto StringLoop; 1100 default: goto string_unexpectedValue; 1101 } 1102 } 1103 goto string_unexpectedValue; 1104 } 1105 1106 ret_error: 1107 location += strPtr - cast(const(ubyte)*)front.ptr; 1108 dataLength = dataPtr - data.ptr; 1109 stack.free(); 1110 goto ret_final; 1111 unexpectedEnd: 1112 retCode = FghjErrorCode.unexpectedEnd; 1113 goto ret_error; 1114 unexpectedValue: 1115 retCode = FghjErrorCode.unexpectedValue; 1116 goto ret_error; 1117 object_key_unexpectedEnd: 1118 _lastError = "unexpected end of object key"; 1119 goto unexpectedEnd; 1120 object_key_start_unexpectedValue: 1121 _lastError = "expected '\"' when start parsing object key"; 1122 goto unexpectedValue; 1123 key_is_to_large: 1124 _lastError = "key length is limited to 255 characters"; 1125 goto unexpectedValue; 1126 object_or_array_is_to_large: 1127 _lastError = "object or array serialized size is limited to 2^32-1"; 1128 goto unexpectedValue; 1129 next_unexpectedEnd: 1130 stackValue = stack.top; 1131 _lastError = (stackValue & 1) ? "unexpected end when parsing object" : "unexpected end when parsing array"; 1132 goto unexpectedEnd; 1133 next_unexpectedValue: 1134 stackValue = stack.top; 1135 _lastError = (stackValue & 1) ? "expected ',' or `}` when parsing object" : "expected ',' or `]` when parsing array"; 1136 goto unexpectedValue; 1137 value_unexpectedStart: 1138 _lastError = "unexpected character when start parsing JSON value"; 1139 goto unexpectedEnd; 1140 value_unexpectedEnd: 1141 _lastError = "unexpected end when start parsing JSON value"; 1142 goto unexpectedEnd; 1143 number_length_unexpectedValue: 1144 _lastError = "number length is limited to 255 characters"; 1145 goto unexpectedValue; 1146 object_first_value_start_unexpectedEnd: 1147 _lastError = "unexpected end of input data after '{'"; 1148 goto unexpectedEnd; 1149 array_first_value_start_unexpectedEnd: 1150 _lastError = "unexpected end of input data after '['"; 1151 goto unexpectedEnd; 1152 false_unexpectedEnd: 1153 _lastError = "unexpected end when parsing 'false'"; 1154 goto unexpectedEnd; 1155 false_unexpectedValue: 1156 _lastError = "unexpected character when parsing 'false'"; 1157 goto unexpectedValue; 1158 null_unexpectedEnd: 1159 _lastError = "unexpected end when parsing 'null'"; 1160 goto unexpectedEnd; 1161 null_unexpectedValue: 1162 _lastError = "unexpected character when parsing 'null'"; 1163 goto unexpectedValue; 1164 true_unexpectedEnd: 1165 _lastError = "unexpected end when parsing 'true'"; 1166 goto unexpectedEnd; 1167 true_unexpectedValue: 1168 _lastError = "unexpected character when parsing 'true'"; 1169 goto unexpectedValue; 1170 string_unexpectedEnd: 1171 _lastError = "unexpected end when parsing string"; 1172 goto unexpectedEnd; 1173 string_unexpectedValue: 1174 _lastError = "unexpected character when parsing string"; 1175 goto unexpectedValue; 1176 failed_to_read_after_key: 1177 _lastError = "unexpected end after object key"; 1178 goto unexpectedEnd; 1179 unexpected_character_after_key: 1180 _lastError = "unexpected character after key"; 1181 goto unexpectedValue; 1182 string_length_is_too_large: 1183 _lastError = "string size is limited to 2^32-1"; 1184 goto unexpectedValue; 1185 invalid_trail_surrogate: 1186 _lastError = "invalid UTF-16 trail surrogate"; 1187 goto unexpectedValue; 1188 invalid_utf_value: 1189 _lastError = "invalid UTF value"; 1190 goto unexpectedValue; 1191 } 1192 } 1193 1194 unittest 1195 { 1196 import mir.conv; 1197 auto fghj_data = parseJson(` [ true, 123 , [ false, 123.0 , "123211" ], "3e23e" ] `); 1198 auto str = fghj_data.to!string; 1199 auto str2 = `[true,123,[false,123.0,"123211"],"3e23e"]`; 1200 assert( str == str2); 1201 } 1202 1203 pragma(inline, true) 1204 void encodeUTF8()(dchar c, ref ubyte* ptr) 1205 { 1206 if (c < 0x80) 1207 { 1208 ptr[0] = cast(ubyte) (c); 1209 ptr += 1; 1210 } 1211 else 1212 if (c < 0x800) 1213 { 1214 ptr[0] = cast(ubyte) (0xC0 | (c >> 6)); 1215 ptr[1] = cast(ubyte) (0x80 | (c & 0x3F)); 1216 ptr += 2; 1217 } 1218 else 1219 if (c < 0x10000) 1220 { 1221 ptr[0] = cast(ubyte) (0xE0 | (c >> 12)); 1222 ptr[1] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F)); 1223 ptr[2] = cast(ubyte) (0x80 | (c & 0x3F)); 1224 ptr += 3; 1225 } 1226 else 1227 { 1228 // assert(c < 0x200000); 1229 ptr[0] = cast(ubyte) (0xF0 | (c >> 18)); 1230 ptr[1] = cast(ubyte) (0x80 | ((c >> 12) & 0x3F)); 1231 ptr[2] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F)); 1232 ptr[3] = cast(ubyte) (0x80 | (c & 0x3F)); 1233 ptr += 4; 1234 } 1235 } 1236 1237 unittest 1238 { 1239 auto fghj = "[\"\u007F\"]".parseJson; 1240 } 1241 1242 unittest 1243 { 1244 auto f = `"\uD801\uDC37"`.parseJson; 1245 assert(f == "\"\U00010437\"".parseJson); 1246 } 1247 1248 unittest 1249 { 1250 import std.string; 1251 import std.range; 1252 static immutable str = `"1234567890qwertyuiopfghjghjklzxcvbnm"`; 1253 auto data = Fghj(str[1..$-1]); 1254 assert(data == parseJson(str)); 1255 foreach(i; 1 .. str.length) 1256 { 1257 auto s = parseJson(str.representation.chunks(i)); 1258 assert(data == s); 1259 } 1260 } 1261 1262 unittest 1263 { 1264 import std.string; 1265 import std.range; 1266 static immutable str = `"\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/"`; 1267 auto data = Fghj("\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/"); 1268 assert(data == parseJson(str)); 1269 foreach(i; 1 .. str.length) 1270 assert(data == parseJson(str.representation.chunks(i))); 1271 } 1272 1273 unittest 1274 { 1275 import std.string; 1276 import std.range; 1277 static immutable str = `"\u0026"`; 1278 auto data = Fghj("&"); 1279 assert(data == parseJson(str)); 1280 } 1281 1282 version(unittest) immutable string test_data = 1283 q{{ 1284 "coordinates": [ 1285 { 1286 "x": 0.29811521136061625, 1287 "y": 0.47980763779335556, 1288 "z": 0.1704431616620138, 1289 "name": "tqxvsg 2780", 1290 "opts": { 1291 "1": [ 1292 1, 1293 true 1294 ] 1295 } 1296 } 1297 ], 1298 "info": "some info" 1299 } 1300 };