1 /++
2 JSON Parsing API
3 
4 Copyright: Tamedia Digital, 2016-2017
5 
6 Authors: Ilya Yaroshenko
7 
8 License: MIT
9 
10 Macros:
11 SUBMODULE = $(LINK2 fghj_$1.html, fghj.$1)
12 SUBREF = $(LINK2 fghj_$1.html#.$2, $(TT $2))$(NBSP)
13 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
14 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4))
15 +/
16 module fghj.jsonparser;
17 
18 import fghj.fghj;
19 import fghj.outputarray;
20 import std.experimental.allocator.gc_allocator;
21 import std.meta;
22 import std.range.primitives;
23 import std.traits;
24 import std.typecons;
25 import mir.serde: SerdeException;
26 
27 version(LDC)
28 {
29     import ldc.attributes: optStrategy;
30     enum minsize = optStrategy("minsize");
31 
32     static if (__traits(targetHasFeature, "sse4.2"))
33     {
34         import core.simd;
35         import ldc.simd;
36         import ldc.gccbuiltins_x86;
37         version = SSE42;
38     }
39 }
40 else
41 {
42     enum minsize;
43 }
44 
45 version(X86_64)
46     version = X86_Any;
47 else
48 version(X86)
49     version = X86_Any;
50 
51 private alias FGHJGCAllocator = typeof(GCAllocator.instance);
52 
53 /++
54 Parses json value
55 Params:
56     chunks = input range composed of elements type of `const(ubyte)[]`.
57         `chunks` can use the same buffer for each chunk.
58     initLength = initial output buffer length. Minimum value is 32.
59 Returns:
60     FGHJ value
61 +/
62 Fghj parseJson(
63     Flag!"includingNewLine" includingNewLine = Yes.includingNewLine,
64     Flag!"spaces" spaces = Yes.spaces,
65     Chunks)
66     (Chunks chunks, size_t initLength = 32)
67     if(is(ElementType!Chunks : const(ubyte)[]))
68 {
69     enum assumeValid = false;
70     auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(FGHJGCAllocator.instance, chunks);
71     return parseJson(parser);
72 }
73 
74 ///
75 unittest
76 {
77     import std.range: chunks;
78     auto text = cast(const ubyte[])`true `;
79     auto ch = text.chunks(3);
80     assert(ch.parseJson(32).data == [1]);
81 }
82 
83 
84 /++
85 Parses json value
86 Params:
87     str = input string
88     allocator = (optional) memory allocator
89 Returns:
90     FGHJ value
91 +/
92 Fghj parseJson(
93     Flag!"includingNewLine" includingNewLine = Yes.includingNewLine,
94     Flag!"spaces" spaces = Yes.spaces,
95     Flag!"assumeValid" assumeValid = No.assumeValid,
96     Allocator,
97     )
98     (in char[] str, auto ref Allocator allocator)
99 {
100     auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(allocator, str);
101     return parseJson(parser);
102 }
103 
104 
105 ///
106 @system unittest {
107     import std.experimental.allocator.mallocator: Mallocator;
108     import std.experimental.allocator.showcase: StackFront;
109 
110     StackFront!(1024, Mallocator) allocator;
111     auto json = parseJson(`{"ak": {"sub": "subval"} }`, allocator);
112     assert(json["ak", "sub"] == "subval");
113 }
114 
115 /// Faulty location
116 pure unittest
117 {
118     import fghj;
119     try
120     {
121         auto data = `[1, 2, ]`.parseJson;
122     }
123     catch(FghjSerdeException e)
124     {
125         import std.conv;
126         /// zero based index
127         assert(e.location == 7);
128         return;
129     }
130     assert(0);
131 }
132 
133 /// ditto
134 Fghj parseJson(
135     Flag!"includingNewLine" includingNewLine = Yes.includingNewLine,
136     Flag!"spaces" spaces = Yes.spaces,
137     Flag!"assumeValid" assumeValid = No.assumeValid,
138     )
139     (in char[] str)
140 {
141     auto parser = jsonParser!(includingNewLine, spaces, assumeValid)(FGHJGCAllocator.instance, str);
142     return parseJson(parser);
143 }
144 
145 ///
146 unittest
147 {
148     assert(`{"ak": {"sub": "subval"} }`.parseJson["ak", "sub"] == "subval");
149 }
150 
151 
152 private Fghj parseJson(Parser)(ref Parser parser) {
153     size_t location;
154     if (parser.parse(location))
155         throw new FghjSerdeException(parser.lastError, location);
156     return Fghj(parser.result);
157 }
158 
159 
160 deprecated("please remove the initBufferLength argument (latest)")
161 auto parseJsonByLine(
162     Flag!"spaces" spaces = Yes.spaces,
163     Input)
164     (Input input, sizediff_t initBufferLength)
165 {
166     return .parseJsonByLine!(spaces,  No.throwOnInvalidLines, Input)(input);
167 }
168 
169 /++
170 Parses JSON value in each line from a Range of buffers.
171 Params:
172     spaces = adds support for spaces beetwen json tokens. Default value is Yes.
173     throwOnInvalidLines = throws an $(LREF SerdeException) on invalid lines if Yes and ignore invalid lines if No. Default value is No.
174     input = input range composed of elements type of `const(ubyte)[]` or string / const(char)[].
175         `chunks` can use the same buffer for each chunk.
176 Returns:
177     Input range composed of FGHJ values. Each value uses the same internal buffer.
178 +/
179 auto parseJsonByLine(
180     Flag!"spaces" spaces = Yes.spaces,
181     Flag!"throwOnInvalidLines" throwOnInvalidLines = No.throwOnInvalidLines,
182     Input)
183     (Input input)
184 {
185     alias Parser = JsonParser!(false, cast(bool)spaces, false, FGHJGCAllocator, Input);
186     struct ByLineValue
187     {
188         Parser parser;
189         private bool _empty, _nextEmpty;
190 
191         void popFront()
192         {
193             for(;;)
194             {
195                 assert(!empty);
196                 if(_nextEmpty)
197                 {
198                     _empty = true;
199                     return;
200                 }
201                 // parser.oa.shift = 0;
202                 parser.dataLength = 0;
203                 auto error = parser.parse;
204                 if(!error)
205                 {
206                     auto t = parser.skipSpaces_;
207                     if(t != '\n' && t != 0)
208                     {
209                         error = FghjErrorCode.unexpectedValue;
210                         parser._lastError = "expected new line or end of input";
211                     }
212                     else
213                     if(t == 0)
214                     {
215                         _nextEmpty = true;
216                         return;
217                     }
218                     else
219                     {
220                         parser.skipNewLine;
221                         _nextEmpty = !parser.skipSpaces_;
222                         return;
223                     }
224                 }
225                 static if (throwOnInvalidLines)
226                     throw new SerdeException(parser.lastError);
227                 else
228                     parser.skipLine();
229             }
230         }
231 
232         auto front() @property
233         {
234             assert(!empty);
235             return Fghj(parser.result);
236         }
237 
238         bool empty()
239         {
240             return _empty;
241         }
242     }
243     ByLineValue ret;
244     if(input.empty)
245     {
246         ret._empty = ret._nextEmpty = true;
247     }
248     else
249     {
250         ret = ByLineValue(Parser(FGHJGCAllocator.instance, input));
251         ret.popFront;
252     }
253     return ret;
254 }
255 
256 version(LDC)
257 {
258     public import ldc.intrinsics: _expect = llvm_expect;
259 }
260 else
261 {
262     T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T))
263     {
264         return val;
265     }
266 }
267 
268 enum FghjErrorCode
269 {
270     success,
271     unexpectedEnd,
272     unexpectedValue,
273 }
274 
275 private __gshared immutable ubyte[256] parseFlags = [
276  // 0 1 2 3 4 5 6 7   8 9 A B C D E F
277     0,0,0,0,0,0,0,0,  0,6,2,0,0,6,0,0, // 0
278     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, // 1
279     7,1,0,1,1,1,1,1,  1,1,1,9,1,9,9,1, // 2
280     9,9,9,9,9,9,9,9,  9,9,1,1,1,1,1,1, // 3
281 
282     1,1,1,1,1,9,1,1,  1,1,1,1,1,1,1,1, // 4
283     1,1,1,1,1,1,1,1,  1,1,1,1,0,1,1,1, // 5
284     1,1,1,1,1,9,1,1,  1,1,1,1,1,1,1,1, // 6
285     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1, // 7
286 
287     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
288     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
289     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
290     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
291 
292     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
293     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
294     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
295     1,1,1,1,1,1,1,1,  1,1,1,1,1,1,1,1,
296 ];
297 
298 private __gshared immutable byte[256] uniFlags = [
299  //  0  1  2  3  4  5  6  7    8  9  A  B  C  D  E  F
300     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 0
301     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 1
302     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 2
303      0, 1, 2, 3, 4, 5, 6, 7,   8, 9,-1,-1,-1,-1,-1,-1, // 3
304 
305     -1,10,11,12,13,14,15,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 4
306     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 5
307     -1,10,11,12,13,14,15,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 6
308     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1, // 7
309 
310     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
311     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
312     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
313     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
314 
315     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
316     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
317     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
318     -1,-1,-1,-1,-1,-1,-1,-1,  -1,-1,-1,-1,-1,-1,-1,-1,
319 ];
320 
321 
322 pragma(inline, true)
323 bool isPlainJsonCharacter()(size_t c)
324 {
325     return (parseFlags[c] & 1) != 0;
326 }
327 
328 pragma(inline, true)
329 bool isJsonWhitespace()(size_t c)
330 {
331     return (parseFlags[c] & 2) != 0;
332 }
333 
334 pragma(inline, true)
335 bool isJsonLineWhitespace()(size_t c)
336 {
337     return (parseFlags[c] & 4) != 0;
338 }
339 
340 pragma(inline, true)
341 bool isJsonNumber()(size_t c)
342 {
343     return (parseFlags[c] & 8) != 0;
344 }
345 
346 package auto assumePure(T)(T t)
347     if (isFunctionPointer!T || isDelegate!T)
348 {
349     enum attrs = functionAttributes!T | FunctionAttribute.pure_;
350     return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
351 }
352 
353 package auto callPure(alias fn,T...)(T args)
354 {
355     auto fp = assumePure(&fn);
356     return (*fp)(args);
357 }
358 
359 /+
360 Fast picewise stack
361 +/
362 private struct Stack
363 {
364     import core.stdc.stdlib: cmalloc = malloc, cfree = free;
365     @disable this(this);
366 
367     struct Node
368     {
369         enum length = 32; // 2 power
370         Node* prev;
371         size_t* buff;
372     }
373 
374     size_t[Node.length] buffer = void;
375     size_t length = 0;
376     Node node;
377 
378 pure:
379 
380     void push()(size_t value)
381     {
382         version(LDC)
383             pragma(inline, true);
384         immutable local = length++ & (Node.length - 1);
385         if (local)
386         {
387             node.buff[local] = value;
388         }
389         else
390         if (length == 1)
391         {
392             node = Node(null, buffer.ptr);
393             buffer[0] = value;
394         }
395         else
396         {
397             auto prevNode = cast(Node*) callPure!cmalloc(Node.sizeof);
398             *prevNode = node;
399             node.prev = prevNode;
400             node.buff = cast(size_t*) callPure!cmalloc(Node.length * size_t.sizeof);
401             node.buff[0] = value;
402         }
403     }
404 
405     size_t top()()
406     {
407         version(LDC)
408             pragma(inline, true);
409         assert(length);
410         immutable local = (length - 1) & (Node.length - 1);
411         return node.buff[local];
412     }
413 
414     size_t pop()()
415     {
416         version(LDC)
417             pragma(inline, true);
418         assert(length);
419         immutable local = --length & (Node.length - 1);
420         immutable ret = node.buff[local];
421         if (local == 0)
422         {
423             if (node.buff != buffer.ptr)
424             {
425                 callPure!cfree(node.buff);
426                 node = *node.prev;
427             }
428         }
429         return ret;
430     }
431 
432     pragma(inline, false)
433     void free()()
434     {
435         version(LDC)
436             pragma(inline, true);
437         if (node.buff is null)
438             return;
439         while(node.buff !is buffer.ptr)
440         {
441             callPure!cfree(node.buff);
442             node = *node.prev;
443         }
444     }
445 }
446 
447 unittest
448 {
449     Stack stack;
450     assert(stack.length == 0);
451     foreach(i; 1 .. 100)
452     {
453         stack.push(i);
454         assert(stack.length == i);
455         assert(stack.top() == i);
456     }
457     foreach_reverse(i; 1 .. 100)
458     {
459         assert(stack.length == i);
460         assert(stack.pop() == i);
461     }
462     assert(stack.length == 0);
463 }
464 
465 ///
466 auto jsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[])(auto ref Allocator allocator, Input input) {
467     return JsonParser!(includingNewLine, hasSpaces, assumeValid, Allocator, Input)(allocator, input);
468 }
469 
470 ///
471 struct JsonParser(bool includingNewLine, bool hasSpaces, bool assumeValid, Allocator, Input = const(ubyte)[])
472 {
473 
474     ubyte[] data;
475     Allocator* allocator;
476     Input input;
477     static if (chunked)
478         ubyte[] front;
479     else
480         alias front = input;
481     size_t dataLength;
482 
483     string _lastError;
484 
485     enum bool chunked = !is(Input : const(char)[]);
486 
487     this(ref Allocator allocator, Input input)
488 
489     {
490         this.input = input;
491         this.allocator = &allocator;
492     }
493 
494     bool prepareInput_()()
495     {
496         static if (chunked)
497         {
498             if (front.length == 0)
499             {
500                 assert(!input.empty);
501                 input.popFront;
502                 if (input.empty)
503                     return false;
504                 front = cast(typeof(front)) input.front;
505             }
506         }
507         return front.length != 0;
508     }
509 
510     void skipNewLine()()
511     {
512         assert(front.length);
513         assert(front[0] == '\n');
514         front = front[1 .. $];
515     }
516 
517     char skipSpaces_()()
518     {
519         static if (hasSpaces)
520         for(;;)
521         {
522             if (prepareInput_ == false)
523                 return 0;
524             static if (includingNewLine)
525                 alias isWhite = isJsonWhitespace;
526             else
527                 alias isWhite = isJsonLineWhitespace;
528             if (isWhite(front[0]))
529             {
530                 front = front[1 .. $];
531                 continue;
532             }
533             return front[0];
534         }
535         else
536         {
537             if (prepareInput_ == false)
538                 return 0;
539             return front[0];
540         }
541     }
542 
543     bool skipLine()()
544     {
545         for(;;)
546         {
547             if (_expect(!prepareInput_, false))
548                 return false;
549             auto c = front[0];
550             front = front[1 .. $];
551             if (c == '\n')
552                 return true;
553         }
554     }
555 
556     auto result()()
557     {
558         return data[0 .. dataLength];
559     }
560 
561     string lastError()() @property
562     {
563         return _lastError;
564     }
565 
566 
567     FghjErrorCode parse()
568     {
569         size_t location;
570         return parse(location);
571     }
572 
573     pragma(inline, false)
574     FghjErrorCode parse(out size_t location)
575     {
576         version(SSE42)
577         {
578             enum byte16 str2E = [
579                 '\u0001', '\u001F',
580                 '\"', '\"',
581                 '\\', '\\',
582                 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'];
583             enum byte16 num2E = ['+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'e', 'E', '\0'];
584             byte16 str2 = str2E;
585             byte16 num2 = num2E;
586         }
587 
588         const(ubyte)* strPtr;
589         const(ubyte)* strEnd;
590         ubyte* dataPtr;
591         ubyte* stringAndNumberShift = void;
592         static if (chunked)
593         {
594             bool prepareInput()()
595             {
596                 pragma(inline, false);
597                 if(strPtr)
598                 {
599                     location += front.length;
600                     input.popFront;
601                     if (input.empty)
602                     {
603                         return false;
604                     }
605                 }
606                 front = cast(typeof(front)) input.front;
607                 if (front.length == 0)
608                     return false;
609                 strPtr = front.ptr;
610                 strEnd = front.ptr + front.length;
611                 const dataAddLength = front.length * 6;
612                 const dataLength = dataPtr - data.ptr;
613                 const dataRequiredLength = dataLength + dataAddLength;
614                 if (data.length < dataRequiredLength)
615                 {
616                     const valueLength = stringAndNumberShift - dataPtr;
617                     import std.algorithm.comparison: max;
618                     const len = max(data.length * 2, dataRequiredLength);
619                     allocator.reallocate(*cast(void[]*)&data, len);
620                     dataPtr = data.ptr + dataLength;
621                     stringAndNumberShift = dataPtr + valueLength;
622                 }
623                 return true;
624             }
625             strPtr = front.ptr;
626             strEnd = front.ptr + front.length;
627         }
628         else
629         {
630             strPtr = cast(const(ubyte)*) input.ptr;
631             strEnd = cast(const(ubyte)*) input.ptr + input.length;
632             enum bool prepareInput = false;
633         }
634 
635         auto rl = (strEnd - strPtr) * 6;
636         if (data.ptr !is null && data.length < rl)
637         {
638             allocator.deallocate(data);
639             data = null;
640         }
641         if (data.ptr is null)
642         {
643             data = cast(ubyte[])allocator.allocate(rl);
644         }
645         dataPtr = data.ptr;
646 
647         bool skipSpaces()()
648         {
649             version(LDC)
650                 pragma(inline, true);
651             static if (includingNewLine)
652                 alias isWhite = isJsonWhitespace;
653             else
654                 alias isWhite = isJsonLineWhitespace;
655             F:
656             {
657                 if (_expect(strEnd != strPtr, true))
658                 {
659                 L:
660                     static if (hasSpaces)
661                     {
662                         if (isWhite(strPtr[0]))
663                         {
664                             strPtr++;
665                             goto F;
666                         }
667                     }
668                     return true;
669                 }
670                 else
671                 {
672                     if (prepareInput)
673                         goto L;
674                     return false;
675                 }
676             }
677 
678         }
679 
680         @minsize
681         int readUnicode()(ref dchar d)
682         {
683             version(LDC)
684                 pragma(inline, true);
685             uint e = 0;
686             size_t i = 4;
687             do
688             {
689                 if (strEnd == strPtr && !prepareInput)
690                     return 1;
691                 int c = uniFlags[*strPtr++];
692                 assert(c < 16);
693                 if (c == -1)
694                     return -1;
695                 assert(c >= 0);
696                 e <<= 4;
697                 e ^= c;
698             }
699             while(--i);
700             d = e;
701             return 0;
702         }
703 
704         Stack stack;
705 
706         typeof(return) retCode;
707         bool currIsKey = void;
708         size_t stackValue = void;
709         goto value;
710 
711 /////////// RETURN
712     ret:
713         front = front[cast(typeof(front.ptr)) strPtr - front.ptr .. $];
714         dataLength = dataPtr - data.ptr;
715         assert(stack.length == 0);
716     ret_final:
717         return retCode;
718 ///////////
719 
720     key:
721         if (!skipSpaces)
722             goto object_key_unexpectedEnd;
723     key_start:
724         if (*strPtr != '"')
725             goto object_key_start_unexpectedValue;
726         currIsKey = true;
727         stringAndNumberShift = dataPtr;
728         // reserve 1 byte for the length
729         dataPtr += 1;
730         goto string;
731     next:
732         if (stack.length == 0)
733             goto ret;
734         {
735             if (!skipSpaces)
736                 goto next_unexpectedEnd;
737             stackValue = stack.top;
738             const isObject = stackValue & 1;
739             auto v = *strPtr++;
740             if (isObject)
741             {
742                 if (v == ',')
743                     goto key;
744                 if (v != '}')
745                     goto next_unexpectedValue;
746             }
747             else
748             {
749                 if (v == ',')
750                     goto value;
751                 if (v != ']')
752                     goto next_unexpectedValue;
753             }
754         }
755     structure_end: {
756         stackValue = stack.pop();
757         const structureShift = stackValue >> 1;
758         const structureLengthPtr = data.ptr + structureShift;
759         const size_t structureLength = dataPtr - structureLengthPtr - 4;
760         if (structureLength > uint.max)
761             goto object_or_array_is_to_large;
762         version(X86_Any)
763             *cast(uint*) structureLengthPtr = cast(uint) structureLength;
764         else
765             *cast(ubyte[4]*) structureLengthPtr = cast(ubyte[4]) cast(uint[1]) [cast(uint) structureLength];
766         goto next;
767     }
768     value:
769         if (!skipSpaces)
770             goto value_unexpectedEnd;
771     value_start:
772         switch(*strPtr)
773         {
774             stringValue:
775             case '"':
776                 currIsKey = false;
777                 *dataPtr++ = Fghj.Kind..string;
778                 stringAndNumberShift = dataPtr;
779                 // reserve 4 byte for the length
780                 dataPtr += 4;
781                 goto string;
782             case '-':
783             case '0':
784             ..
785             case '9': {
786                 *dataPtr++ = Fghj.Kind.number;
787                 stringAndNumberShift = dataPtr;
788                 // reserve 1 byte for the length
789                 dataPtr++; // write the first character
790                 *dataPtr++ = *strPtr++;
791                 for(;;)
792                 {
793                     if (strEnd == strPtr && !prepareInput)
794                         goto number_found;
795                     version(SSE42)
796                     {
797                         while (strEnd >= strPtr + 16)
798                         {
799                             pragma(msg, "EEEE");
800                             byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr);
801                             size_t ecx = __builtin_ia32_pcmpistri128(num2, str1, 0x10);
802                             storeUnaligned!byte16(str1, cast(byte*)dataPtr);
803                             strPtr += ecx;
804                             dataPtr += ecx;
805                             if(ecx != 16)
806                                 goto number_found;
807                         }
808                     }
809                     else
810                     {
811                         while(strEnd >= strPtr + 4)
812                         {
813                             char c0 = strPtr[0]; dataPtr += 4;     if (!isJsonNumber(c0)) goto number_found0;
814                             char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isJsonNumber(c1)) goto number_found1;
815                             char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isJsonNumber(c2)) goto number_found2;
816                             char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isJsonNumber(c3)) goto number_found3;
817                             strPtr += 4;         dataPtr[-1] = c3;
818                         }
819                     }
820                     while(strEnd > strPtr)
821                     {
822                         char c0 = strPtr[0]; if (!isJsonNumber(c0)) goto number_found; dataPtr[0] = c0;
823                         strPtr += 1;
824                         dataPtr += 1;
825                     }
826                 }
827             version(SSE42){} else
828             {
829                 number_found3: dataPtr++; strPtr++;
830                 number_found2: dataPtr++; strPtr++;
831                 number_found1: dataPtr++; strPtr++;
832                 number_found0: dataPtr -= 4;
833             }
834             number_found:
835 
836                 auto numberLength = dataPtr - stringAndNumberShift - 1;
837                 if (numberLength > ubyte.max)
838                     goto number_length_unexpectedValue;
839                 *stringAndNumberShift = cast(ubyte) numberLength;
840                 goto next;
841             }
842             case '{':
843                 strPtr++;
844                 *dataPtr++ = Fghj.Kind.object;
845                 stack.push(((dataPtr - data.ptr) << 1) ^ 1);
846                 dataPtr += 4;
847                 if (!skipSpaces)
848                     goto object_first_value_start_unexpectedEnd;
849                 if (*strPtr != '}')
850                     goto key_start;
851                 strPtr++;
852                 goto structure_end;
853             case '[':
854                 strPtr++;
855                 *dataPtr++ = Fghj.Kind.array;
856                 stack.push(((dataPtr - data.ptr) << 1) ^ 0);
857                 dataPtr += 4;
858                 if (!skipSpaces)
859                     goto array_first_value_start_unexpectedEnd;
860                 if (*strPtr != ']')
861                     goto value_start;
862                 strPtr++;
863                 goto structure_end;
864             foreach (name; AliasSeq!("false", "null", "true"))
865             {
866             case name[0]:
867                     if (_expect(strEnd - strPtr >= name.length, true))
868                     {
869                         static if (!assumeValid)
870                         {
871                             version(X86_Any)
872                             {
873                                 enum uint referenceValue =
874                                         (uint(name[$ - 4]) << 0x00) ^
875                                         (uint(name[$ - 3]) << 0x08) ^
876                                         (uint(name[$ - 2]) << 0x10) ^
877                                         (uint(name[$ - 1]) << 0x18);
878                                 if (*cast(uint*)(strPtr + bool(name.length == 5)) != referenceValue)
879                                 {
880                                     static if (name == "true")
881                                         goto true_unexpectedValue;
882                                     else
883                                     static if (name == "false")
884                                         goto false_unexpectedValue;
885                                     else
886                                         goto null_unexpectedValue;
887                                 }
888                             }
889                             else
890                             {
891                                 char[name.length - 1] c = void;
892                                 import std.range: iota;
893                                 foreach (i; aliasSeqOf!(iota(1, name.length)))
894                                     c[i - 1] = strPtr[i];
895                                 foreach (i; aliasSeqOf!(iota(1, name.length)))
896                                 {
897                                     if (c[i - 1] != name[i])
898                                     {
899 
900                                         static if (name == "true")
901                                             goto true_unexpectedValue;
902                                         else
903                                         static if (name == "false")
904                                             goto false_unexpectedValue;
905                                         else
906                                             goto null_unexpectedValue;
907                                     }
908                                 }
909                             }
910                         }
911                         static if (name == "null")
912                             *dataPtr++ = Fghj.Kind.null_;
913                         else
914                         static if (name == "false")
915                             *dataPtr++ = Fghj.Kind.false_;
916                         else
917                             *dataPtr++ = Fghj.Kind.true_;
918                         strPtr += name.length;
919                         goto next;
920                     }
921                     else
922                     {
923                         strPtr += 1;
924                         foreach (i; 1 .. name.length)
925                         {
926                             if (strEnd == strPtr && !prepareInput)
927                             {
928                                 static if (name == "true")
929                                     goto true_unexpectedEnd;
930                                 else
931                                 static if (name == "false")
932                                     goto false_unexpectedEnd;
933                                 else
934                                     goto null_unexpectedEnd;
935                             }
936                             static if (!assumeValid)
937                             {
938                                 if (_expect(strPtr[0] != name[i], false))
939                                 {
940                                     static if (name == "true")
941                                         goto true_unexpectedValue;
942                                     else
943                                     static if (name == "false")
944                                         goto false_unexpectedValue;
945                                     else
946                                         goto null_unexpectedValue;
947                                 }
948                             }
949                             strPtr++;
950                         }
951                         static if (name == "null")
952                             *dataPtr++ = Fghj.Kind.null_;
953                         else
954                         static if (name == "false")
955                             *dataPtr++ = Fghj.Kind.false_;
956                         else
957                             *dataPtr++ = Fghj.Kind.true_;
958                         goto next;
959                     }
960             }
961             default: goto value_unexpectedStart;
962         }
963 
964     string:
965         debug assert(*strPtr == '"', "Internal FGHJ logic error. Please report an issue.");
966         strPtr += 1;
967 
968     StringLoop: {
969         for(;;)
970         {
971             if (strEnd == strPtr && !prepareInput)
972                 goto string_unexpectedEnd;
973             version(SSE42)
974             {
975                 while (strEnd >= strPtr + 16)
976                 {
977                     byte16 str1 = loadUnaligned!byte16(cast(byte*)strPtr);
978                     size_t ecx = __builtin_ia32_pcmpistri128(str2, str1, 0x04);
979                     storeUnaligned!byte16(str1, cast(byte*)dataPtr);
980                     strPtr += ecx;
981                     dataPtr += ecx;
982                     if(ecx != 16)
983                         goto string_found;
984                 }
985             }
986             else
987             {
988                 while(strEnd >= strPtr + 4)
989                 {
990                     char c0 = strPtr[0]; dataPtr += 4;     if (!isPlainJsonCharacter(c0)) goto string_found0;
991                     char c1 = strPtr[1]; dataPtr[-4] = c0; if (!isPlainJsonCharacter(c1)) goto string_found1;
992                     char c2 = strPtr[2]; dataPtr[-3] = c1; if (!isPlainJsonCharacter(c2)) goto string_found2;
993                     char c3 = strPtr[3]; dataPtr[-2] = c2; if (!isPlainJsonCharacter(c3)) goto string_found3;
994                     strPtr += 4;         dataPtr[-1] = c3;
995                 }
996             }
997             while(strEnd > strPtr)
998             {
999                 char c0 = strPtr[0]; if (!isPlainJsonCharacter(c0)) goto string_found; dataPtr[0] = c0;
1000                 strPtr += 1;
1001                 dataPtr += 1;
1002             }
1003         }
1004         version(SSE42) {} else
1005         {
1006             string_found3: dataPtr++; strPtr++;
1007             string_found2: dataPtr++; strPtr++;
1008             string_found1: dataPtr++; strPtr++;
1009             string_found0: dataPtr -= 4;
1010         }
1011         string_found:
1012 
1013         uint c = strPtr[0];
1014         if (c == '\"')
1015         {
1016             strPtr += 1;
1017             if (currIsKey)
1018             {
1019                 auto stringLength = dataPtr - stringAndNumberShift - 1;
1020                 if (stringLength > ubyte.max)
1021                     goto key_is_to_large;
1022                 *cast(ubyte*)stringAndNumberShift = cast(ubyte) stringLength;
1023                 if (!skipSpaces)
1024                     goto failed_to_read_after_key;
1025                 if (*strPtr != ':')
1026                     goto unexpected_character_after_key;
1027                 strPtr++;
1028                 goto value;
1029             }
1030             else
1031             {
1032                 auto stringLength = dataPtr - stringAndNumberShift - 4;
1033                 if (stringLength > uint.max)
1034                     goto string_length_is_too_large;
1035                 version(X86_Any)
1036                     *cast(uint*)stringAndNumberShift = cast(uint) stringLength;
1037                 else
1038                     *cast(ubyte[4]*)stringAndNumberShift = cast(ubyte[4]) cast(uint[1]) [cast(uint) stringLength];
1039                 goto next;
1040             }
1041         }
1042         if (c == '\\')
1043         {
1044             strPtr += 1;
1045             if (strEnd == strPtr && !prepareInput)
1046                 goto string_unexpectedEnd;
1047             c = *strPtr++;
1048             switch(c)
1049             {
1050                 case '/' :
1051                 case '\"':
1052                 case '\\':
1053                     *dataPtr++ = cast(ubyte) c;
1054                     goto StringLoop;
1055                 case 'b' : *dataPtr++ = '\b'; goto StringLoop;
1056                 case 'f' : *dataPtr++ = '\f'; goto StringLoop;
1057                 case 'n' : *dataPtr++ = '\n'; goto StringLoop;
1058                 case 'r' : *dataPtr++ = '\r'; goto StringLoop;
1059                 case 't' : *dataPtr++ = '\t'; goto StringLoop;
1060                 case 'u' :
1061                     uint wur = void;
1062                     dchar d = void;
1063                     if (auto r = (readUnicode(d)))
1064                     {
1065                         if (r == 1)
1066                             goto string_unexpectedEnd;
1067                         goto string_unexpectedValue;
1068                     }
1069                     if (_expect(0xD800 <= d && d <= 0xDFFF, false))
1070                     {
1071                         if (d >= 0xDC00)
1072                             goto string_unexpectedValue;
1073                         if (strEnd == strPtr && !prepareInput)
1074                             goto string_unexpectedEnd;
1075                         if (*strPtr++ != '\\')
1076                             goto string_unexpectedValue;
1077                         if (strEnd == strPtr && !prepareInput)
1078                             goto string_unexpectedEnd;
1079                         if (*strPtr++ != 'u')
1080                             goto string_unexpectedValue;
1081                         d = (d & 0x3FF) << 10;
1082                         dchar trailing;
1083                         if (auto r = (readUnicode(trailing)))
1084                         {
1085                             if (r == 1)
1086                                 goto string_unexpectedEnd;
1087                             goto string_unexpectedValue;
1088                         }
1089                         if (!(0xDC00 <= trailing && trailing <= 0xDFFF))
1090                             goto invalid_trail_surrogate;
1091                         {
1092                             d |= trailing & 0x3FF;
1093                             d += 0x10000;
1094                         }
1095                     }
1096                     if (!(d < 0xD800 || (d > 0xDFFF && d <= 0x10FFFF)))
1097                         goto invalid_utf_value;
1098                     encodeUTF8(d, dataPtr);
1099                     goto StringLoop;
1100                 default: goto string_unexpectedValue;
1101             }
1102         }
1103         goto string_unexpectedValue;
1104     }
1105 
1106     ret_error:
1107         location += strPtr - cast(const(ubyte)*)front.ptr;
1108         dataLength = dataPtr - data.ptr;
1109         stack.free();
1110         goto ret_final;
1111     unexpectedEnd:
1112         retCode = FghjErrorCode.unexpectedEnd;
1113         goto ret_error;
1114     unexpectedValue:
1115         retCode = FghjErrorCode.unexpectedValue;
1116         goto ret_error;
1117     object_key_unexpectedEnd:
1118         _lastError = "unexpected end of object key";
1119         goto unexpectedEnd;
1120     object_key_start_unexpectedValue:
1121         _lastError = "expected '\"' when start parsing object key";
1122         goto unexpectedValue;
1123     key_is_to_large:
1124         _lastError = "key length is limited to 255 characters";
1125         goto unexpectedValue;
1126     object_or_array_is_to_large:
1127         _lastError = "object or array serialized size is limited to 2^32-1";
1128         goto unexpectedValue;
1129     next_unexpectedEnd:
1130         stackValue = stack.top;
1131         _lastError = (stackValue & 1) ? "unexpected end when parsing object" : "unexpected end when parsing array";
1132         goto unexpectedEnd;
1133     next_unexpectedValue:
1134         stackValue = stack.top;
1135         _lastError = (stackValue & 1) ? "expected ',' or `}` when parsing object" : "expected ',' or `]` when parsing array";
1136         goto unexpectedValue;
1137     value_unexpectedStart:
1138         _lastError = "unexpected character when start parsing JSON value";
1139         goto unexpectedEnd;
1140     value_unexpectedEnd:
1141         _lastError = "unexpected end when start parsing JSON value";
1142         goto unexpectedEnd;
1143     number_length_unexpectedValue:
1144         _lastError = "number length is limited to 255 characters";
1145         goto unexpectedValue;
1146     object_first_value_start_unexpectedEnd:
1147         _lastError = "unexpected end of input data after '{'";
1148         goto unexpectedEnd;
1149     array_first_value_start_unexpectedEnd:
1150         _lastError = "unexpected end of input data after '['";
1151         goto unexpectedEnd;
1152     false_unexpectedEnd:
1153         _lastError = "unexpected end when parsing 'false'";
1154         goto unexpectedEnd;
1155     false_unexpectedValue:
1156         _lastError = "unexpected character when parsing 'false'";
1157         goto unexpectedValue;
1158     null_unexpectedEnd:
1159         _lastError = "unexpected end when parsing 'null'";
1160         goto unexpectedEnd;
1161     null_unexpectedValue:
1162         _lastError = "unexpected character when parsing 'null'";
1163         goto unexpectedValue;
1164     true_unexpectedEnd:
1165         _lastError = "unexpected end when parsing 'true'";
1166         goto unexpectedEnd;
1167     true_unexpectedValue:
1168         _lastError = "unexpected character when parsing 'true'";
1169         goto unexpectedValue;
1170     string_unexpectedEnd:
1171         _lastError = "unexpected end when parsing string";
1172         goto unexpectedEnd;
1173     string_unexpectedValue:
1174         _lastError = "unexpected character when parsing string";
1175         goto unexpectedValue;
1176     failed_to_read_after_key:
1177         _lastError = "unexpected end after object key";
1178         goto unexpectedEnd;
1179     unexpected_character_after_key:
1180         _lastError = "unexpected character after key";
1181         goto unexpectedValue;
1182     string_length_is_too_large:
1183         _lastError = "string size is limited to 2^32-1";
1184         goto unexpectedValue;
1185     invalid_trail_surrogate:
1186         _lastError = "invalid UTF-16 trail surrogate";
1187         goto unexpectedValue;
1188     invalid_utf_value:
1189         _lastError = "invalid UTF value";
1190         goto unexpectedValue;
1191     }
1192 }
1193 
1194 unittest
1195 {
1196     import mir.conv;
1197     auto fghj_data = parseJson(` [ true, 123 , [ false, 123.0 , "123211" ], "3e23e" ] `);
1198     auto str = fghj_data.to!string;
1199     auto str2 = `[true,123,[false,123.0,"123211"],"3e23e"]`;
1200     assert( str == str2);
1201 }
1202 
1203 pragma(inline, true)
1204 void encodeUTF8()(dchar c, ref ubyte* ptr)
1205 {
1206     if (c < 0x80)
1207     {
1208         ptr[0] = cast(ubyte) (c);
1209         ptr += 1;
1210     }
1211     else
1212     if (c < 0x800)
1213     {
1214         ptr[0] = cast(ubyte) (0xC0 | (c >> 6));
1215         ptr[1] = cast(ubyte) (0x80 | (c & 0x3F));
1216         ptr += 2;
1217     }
1218     else
1219     if (c < 0x10000)
1220     {
1221         ptr[0] = cast(ubyte) (0xE0 | (c >> 12));
1222         ptr[1] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F));
1223         ptr[2] = cast(ubyte) (0x80 | (c & 0x3F));
1224         ptr += 3;
1225     }
1226     else
1227     {
1228     //    assert(c < 0x200000);
1229         ptr[0] = cast(ubyte) (0xF0 | (c >> 18));
1230         ptr[1] = cast(ubyte) (0x80 | ((c >> 12) & 0x3F));
1231         ptr[2] = cast(ubyte) (0x80 | ((c >> 6) & 0x3F));
1232         ptr[3] = cast(ubyte) (0x80 | (c & 0x3F));
1233         ptr += 4;
1234     }
1235 }
1236 
1237 unittest
1238 {
1239     auto fghj = "[\"\u007F\"]".parseJson;
1240 }
1241 
1242 unittest
1243 {
1244     auto f = `"\uD801\uDC37"`.parseJson;
1245     assert(f == "\"\U00010437\"".parseJson);
1246 }
1247 
1248 unittest
1249 {
1250     import std.string;
1251     import std.range;
1252     static immutable str = `"1234567890qwertyuiopfghjghjklzxcvbnm"`;
1253     auto data = Fghj(str[1..$-1]);
1254     assert(data == parseJson(str));
1255     foreach(i; 1 .. str.length)
1256     {
1257         auto s  = parseJson(str.representation.chunks(i));
1258         assert(data == s);
1259     }
1260 }
1261 
1262 unittest
1263 {
1264     import std.string;
1265     import std.range;
1266     static immutable str = `"\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/\t\r\f\b\"\\\/"`;
1267     auto data = Fghj("\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/\t\r\f\b\"\\/");
1268     assert(data == parseJson(str));
1269     foreach(i; 1 .. str.length)
1270         assert(data == parseJson(str.representation.chunks(i)));
1271 }
1272 
1273 unittest
1274 {
1275     import std.string;
1276     import std.range;
1277     static immutable str = `"\u0026"`;
1278     auto data = Fghj("&");
1279     assert(data == parseJson(str));
1280 }
1281 
1282 version(unittest) immutable string test_data =
1283 q{{
1284   "coordinates": [
1285     {
1286       "x": 0.29811521136061625,
1287       "y": 0.47980763779335556,
1288       "z": 0.1704431616620138,
1289       "name": "tqxvsg 2780",
1290       "opts": {
1291         "1": [
1292           1,
1293           true
1294         ]
1295       }
1296     }
1297   ],
1298   "info": "some info"
1299 }
1300 };