basic_parser_impl.hpp 90 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_IMPL_HPP
  11. #define BOOST_JSON_BASIC_PARSER_IMPL_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/detail/literals.hpp>
  14. #include <boost/json/basic_parser.hpp>
  15. #include <boost/json/error.hpp>
  16. #include <boost/json/detail/buffer.hpp>
  17. #include <boost/json/detail/charconv/from_chars.hpp>
  18. #include <boost/json/detail/sse2.hpp>
  19. #include <boost/mp11/algorithm.hpp>
  20. #include <boost/mp11/integral.hpp>
  21. #include <cmath>
  22. #include <limits>
  23. #include <cstring>
  24. #ifdef _MSC_VER
  25. #pragma warning(push)
  26. #pragma warning(disable: 4702) // unreachable code
  27. #pragma warning(disable: 4127) // conditional expression is constant
  28. #endif
  29. /* This file must be manually included to get the
  30. function template definitions for basic_parser.
  31. */
  32. /* Reference:
  33. https://www.json.org/
  34. RFC 7159: The JavaScript Object Notation (JSON) Data Interchange Format
  35. https://tools.ietf.org/html/rfc7159
  36. https://ampl.com/netlib/fp/dtoa.c
  37. */
  38. #ifndef BOOST_JSON_DOCS
  39. namespace boost {
  40. namespace json {
  41. namespace detail {
  42. inline
  43. double
  44. pow10(int exp) noexcept
  45. {
  46. static double const tab[618] = {
  47. 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301,
  48. 1e-300, 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
  49. 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281,
  50. 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
  51. 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, 1e-261,
  52. 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253, 1e-252, 1e-251,
  53. 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, 1e-243, 1e-242, 1e-241,
  54. 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231,
  55. 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221,
  56. 1e-220, 1e-219, 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211,
  57. 1e-210, 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201,
  58. 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191,
  59. 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
  60. 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, 1e-171,
  61. 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, 1e-162, 1e-161,
  62. 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, 1e-153, 1e-152, 1e-151,
  63. 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141,
  64. 1e-140, 1e-139, 1e-138, 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131,
  65. 1e-130, 1e-129, 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121,
  66. 1e-120, 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
  67. 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101,
  68. 1e-100, 1e-099, 1e-098, 1e-097, 1e-096, 1e-095, 1e-094, 1e-093, 1e-092, 1e-091,
  69. 1e-090, 1e-089, 1e-088, 1e-087, 1e-086, 1e-085, 1e-084, 1e-083, 1e-082, 1e-081,
  70. 1e-080, 1e-079, 1e-078, 1e-077, 1e-076, 1e-075, 1e-074, 1e-073, 1e-072, 1e-071,
  71. 1e-070, 1e-069, 1e-068, 1e-067, 1e-066, 1e-065, 1e-064, 1e-063, 1e-062, 1e-061,
  72. 1e-060, 1e-059, 1e-058, 1e-057, 1e-056, 1e-055, 1e-054, 1e-053, 1e-052, 1e-051,
  73. 1e-050, 1e-049, 1e-048, 1e-047, 1e-046, 1e-045, 1e-044, 1e-043, 1e-042, 1e-041,
  74. 1e-040, 1e-039, 1e-038, 1e-037, 1e-036, 1e-035, 1e-034, 1e-033, 1e-032, 1e-031,
  75. 1e-030, 1e-029, 1e-028, 1e-027, 1e-026, 1e-025, 1e-024, 1e-023, 1e-022, 1e-021,
  76. 1e-020, 1e-019, 1e-018, 1e-017, 1e-016, 1e-015, 1e-014, 1e-013, 1e-012, 1e-011,
  77. 1e-010, 1e-009, 1e-008, 1e-007, 1e-006, 1e-005, 1e-004, 1e-003, 1e-002, 1e-001,
  78. 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009,
  79. 1e+010, 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019,
  80. 1e+020, 1e+021, 1e+022, 1e+023, 1e+024, 1e+025, 1e+026, 1e+027, 1e+028, 1e+029,
  81. 1e+030, 1e+031, 1e+032, 1e+033, 1e+034, 1e+035, 1e+036, 1e+037, 1e+038, 1e+039,
  82. 1e+040, 1e+041, 1e+042, 1e+043, 1e+044, 1e+045, 1e+046, 1e+047, 1e+048, 1e+049,
  83. 1e+050, 1e+051, 1e+052, 1e+053, 1e+054, 1e+055, 1e+056, 1e+057, 1e+058, 1e+059,
  84. 1e+060, 1e+061, 1e+062, 1e+063, 1e+064, 1e+065, 1e+066, 1e+067, 1e+068, 1e+069,
  85. 1e+070, 1e+071, 1e+072, 1e+073, 1e+074, 1e+075, 1e+076, 1e+077, 1e+078, 1e+079,
  86. 1e+080, 1e+081, 1e+082, 1e+083, 1e+084, 1e+085, 1e+086, 1e+087, 1e+088, 1e+089,
  87. 1e+090, 1e+091, 1e+092, 1e+093, 1e+094, 1e+095, 1e+096, 1e+097, 1e+098, 1e+099,
  88. 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
  89. 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119,
  90. 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129,
  91. 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139,
  92. 1e+140, 1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149,
  93. 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159,
  94. 1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169,
  95. 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
  96. 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189,
  97. 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199,
  98. 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209,
  99. 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219,
  100. 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229,
  101. 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239,
  102. 1e+240, 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249,
  103. 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259,
  104. 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
  105. 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279,
  106. 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, 1e+289,
  107. 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299,
  108. 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308 };
  109. if( exp > 308 )
  110. {
  111. return std::numeric_limits<double>::infinity();
  112. }
  113. else if( exp < -308 )
  114. {
  115. // due to the way pow10 is used by dec_to_float,
  116. // we can afford to return 0.0 here
  117. return 0.0;
  118. }
  119. else
  120. {
  121. exp += 308;
  122. BOOST_ASSERT(exp >= 0 && exp < 618);
  123. return tab[exp];
  124. }
  125. }
  126. inline
  127. double
  128. dec_to_float(
  129. std::uint64_t m,
  130. std::int32_t e,
  131. bool neg) noexcept
  132. {
  133. // convert to double explicitly to silence warnings
  134. double x = static_cast<double>(m);
  135. if(neg)
  136. x = -x;
  137. if(e < -305)
  138. {
  139. x *= 1e-305 ;
  140. e += 305;
  141. }
  142. if(e >= -22 && e < 0)
  143. return x / pow10(-e);
  144. return x * pow10(e);
  145. }
  146. inline
  147. bool
  148. is_control(char c) noexcept
  149. {
  150. return static_cast<unsigned char>(c) < 32;
  151. }
  152. inline
  153. int
  154. hex_digit(unsigned char c) noexcept
  155. {
  156. // by Peter Dimov
  157. if( c >= '0' && c <= '9' )
  158. return c - '0';
  159. c &= ~0x20;
  160. if( c >= 'A' && c <= 'F' )
  161. return 10 + c - 'A';
  162. return -1;
  163. }
  164. } // detail
  165. //----------------------------------------------------------
  166. template< class Handler >
  167. template< bool StackEmpty_, char First_ >
  168. struct basic_parser<Handler>::
  169. parse_number_helper
  170. {
  171. basic_parser* parser;
  172. char const* p;
  173. template< std::size_t N >
  174. char const*
  175. operator()( mp11::mp_size_t<N> ) const
  176. {
  177. return parser->parse_number(
  178. p,
  179. std::integral_constant<bool, StackEmpty_>(),
  180. std::integral_constant<char, First_>(),
  181. std::integral_constant<
  182. number_precision, static_cast<number_precision>(N)>() );
  183. }
  184. };
  185. //----------------------------------------------------------
  186. template<class Handler>
  187. void
  188. basic_parser<Handler>::
  189. reserve()
  190. {
  191. if(BOOST_JSON_LIKELY(
  192. ! st_.empty()))
  193. return;
  194. // Reserve the largest stack we need,
  195. // to avoid reallocation during suspend.
  196. st_.reserve(
  197. sizeof(state) + // document parsing state
  198. (sizeof(state) +
  199. sizeof(std::size_t)) * depth() + // array and object state + size
  200. sizeof(state) + // value parsing state
  201. sizeof(std::size_t) + // string size
  202. sizeof(state)); // comment state
  203. }
  204. //----------------------------------------------------------
  205. //
  206. // The sentinel value is returned by parse functions
  207. // to indicate that the parser failed, or suspended.
  208. // this is used as it is distinct from all valid values
  209. // for data in write
  210. template<class Handler>
  211. const char*
  212. basic_parser<Handler>::
  213. sentinel()
  214. {
  215. // the "+1" ensures that the returned pointer is unique even if
  216. // the given input buffer borders on this object
  217. return reinterpret_cast<
  218. const char*>(this) + 1;
  219. }
  220. template<class Handler>
  221. bool
  222. basic_parser<Handler>::
  223. incomplete(
  224. const detail::const_stream_wrapper& cs)
  225. {
  226. return cs.begin() == sentinel();
  227. }
  228. //----------------------------------------------------------
  229. //
  230. // These functions are declared with the BOOST_NOINLINE
  231. // attribute to avoid polluting the parsers hot-path.
  232. // They return the canary value to indicate suspension
  233. // or failure.
  234. template<class Handler>
  235. const char*
  236. basic_parser<Handler>::
  237. suspend_or_fail(state st)
  238. {
  239. if(BOOST_JSON_LIKELY(
  240. ! ec_ && more_))
  241. {
  242. // suspend
  243. reserve();
  244. st_.push_unchecked(st);
  245. }
  246. return sentinel();
  247. }
  248. template<class Handler>
  249. const char*
  250. basic_parser<Handler>::
  251. suspend_or_fail(
  252. state st,
  253. std::size_t n)
  254. {
  255. if(BOOST_JSON_LIKELY(
  256. ! ec_ && more_))
  257. {
  258. // suspend
  259. reserve();
  260. st_.push_unchecked(n);
  261. st_.push_unchecked(st);
  262. }
  263. return sentinel();
  264. }
  265. template<class Handler>
  266. const char*
  267. basic_parser<Handler>::
  268. fail(const char* p) noexcept
  269. {
  270. BOOST_ASSERT( p != sentinel() );
  271. end_ = p;
  272. return sentinel();
  273. }
  274. template<class Handler>
  275. const char*
  276. basic_parser<Handler>::
  277. fail(
  278. const char* p,
  279. error ev,
  280. source_location const* loc) noexcept
  281. {
  282. BOOST_ASSERT( p != sentinel() );
  283. end_ = p;
  284. ec_.assign(ev, loc);
  285. return sentinel();
  286. }
  287. template<class Handler>
  288. const char*
  289. basic_parser<Handler>::
  290. maybe_suspend(
  291. const char* p,
  292. state st)
  293. {
  294. if( p != sentinel() )
  295. end_ = p;
  296. if(BOOST_JSON_LIKELY(more_))
  297. {
  298. // suspend
  299. reserve();
  300. st_.push_unchecked(st);
  301. }
  302. return sentinel();
  303. }
  304. template<class Handler>
  305. const char*
  306. basic_parser<Handler>::
  307. maybe_suspend(
  308. const char* p,
  309. state st,
  310. std::size_t n)
  311. {
  312. BOOST_ASSERT( p != sentinel() );
  313. end_ = p;
  314. if(BOOST_JSON_LIKELY(more_))
  315. {
  316. // suspend
  317. reserve();
  318. st_.push_unchecked(n);
  319. st_.push_unchecked(st);
  320. }
  321. return sentinel();
  322. }
  323. template<class Handler>
  324. const char*
  325. basic_parser<Handler>::
  326. maybe_suspend(
  327. const char* p,
  328. state st,
  329. const number& num)
  330. {
  331. BOOST_ASSERT( p != sentinel() );
  332. end_ = p;
  333. if(BOOST_JSON_LIKELY(more_))
  334. {
  335. // suspend
  336. num_ = num;
  337. reserve();
  338. st_.push_unchecked(st);;
  339. }
  340. return sentinel();
  341. }
  342. template<class Handler>
  343. const char*
  344. basic_parser<Handler>::
  345. suspend(
  346. const char* p,
  347. state st)
  348. {
  349. BOOST_ASSERT( p != sentinel() );
  350. end_ = p;
  351. // suspend
  352. reserve();
  353. st_.push_unchecked(st);
  354. return sentinel();
  355. }
  356. template<class Handler>
  357. const char*
  358. basic_parser<Handler>::
  359. suspend(
  360. const char* p,
  361. state st,
  362. const number& num)
  363. {
  364. BOOST_ASSERT( p != sentinel() );
  365. end_ = p;
  366. // suspend
  367. num_ = num;
  368. reserve();
  369. st_.push_unchecked(st);
  370. return sentinel();
  371. }
  372. template<class Handler>
  373. template<
  374. bool StackEmpty_/*,
  375. bool Terminal_*/>
  376. const char*
  377. basic_parser<Handler>::
  378. parse_comment(const char* p,
  379. std::integral_constant<bool, StackEmpty_> stack_empty,
  380. /*std::integral_constant<bool, Terminal_>*/ bool terminal)
  381. {
  382. detail::const_stream_wrapper cs(p, end_);
  383. const char* start = cs.begin();
  384. std::size_t remain;
  385. if(! stack_empty && ! st_.empty())
  386. {
  387. state st;
  388. st_.pop(st);
  389. switch(st)
  390. {
  391. default: BOOST_JSON_UNREACHABLE();
  392. case state::com1: goto do_com1;
  393. case state::com2: goto do_com2;
  394. case state::com3: goto do_com3;
  395. case state::com4: goto do_com4;
  396. }
  397. }
  398. BOOST_ASSERT(*cs == '/');
  399. ++cs;
  400. do_com1:
  401. if(BOOST_JSON_UNLIKELY(! cs))
  402. return maybe_suspend(cs.begin(), state::com1);
  403. switch(*cs)
  404. {
  405. default:
  406. {
  407. BOOST_STATIC_CONSTEXPR source_location loc
  408. = BOOST_CURRENT_LOCATION;
  409. return fail(cs.begin(), error::syntax, &loc);
  410. }
  411. case '/':
  412. ++cs;
  413. do_com2:
  414. // KRYSTIAN TODO: this is a mess, we have to fix this
  415. remain = cs.remain();
  416. cs = remain ? static_cast<const char*>(
  417. std::memchr(cs.begin(), '\n', remain)) : sentinel();
  418. if(! cs.begin())
  419. cs = sentinel();
  420. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  421. {
  422. // if the doc does not terminate
  423. // with a newline, treat it as the
  424. // end of the comment
  425. if(terminal && ! more_)
  426. {
  427. if(BOOST_JSON_UNLIKELY(! h_.on_comment(
  428. {start, cs.remain(start)}, ec_)))
  429. return fail(cs.end());
  430. return cs.end();
  431. }
  432. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  433. {start, cs.remain(start)}, ec_)))
  434. return fail(cs.end());
  435. if(terminal)
  436. return suspend(cs.end(), state::com2);
  437. return maybe_suspend(cs.end(), state::com2);
  438. }
  439. break;
  440. case '*':
  441. do
  442. {
  443. ++cs;
  444. do_com3:
  445. // KRYSTIAN TODO: this is a mess, we have to fix this
  446. remain = cs.remain();
  447. cs = remain ? static_cast<const char*>(
  448. std::memchr(cs.begin(), '*', remain)) : sentinel();
  449. if(! cs.begin())
  450. cs = sentinel();
  451. // stopped inside a c comment
  452. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  453. {
  454. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  455. {start, cs.remain(start)}, ec_)))
  456. return fail(cs.end());
  457. return maybe_suspend(cs.end(), state::com3);
  458. }
  459. // found a asterisk, check if the next char is a slash
  460. ++cs;
  461. do_com4:
  462. if(BOOST_JSON_UNLIKELY(! cs))
  463. {
  464. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  465. {start, cs.used(start)}, ec_)))
  466. return fail(cs.begin());
  467. return maybe_suspend(cs.begin(), state::com4);
  468. }
  469. }
  470. while(*cs != '/');
  471. }
  472. ++cs;
  473. if(BOOST_JSON_UNLIKELY(! h_.on_comment(
  474. {start, cs.used(start)}, ec_)))
  475. return fail(cs.begin());
  476. return cs.begin();
  477. }
  478. template<class Handler>
  479. template<bool StackEmpty_>
  480. const char*
  481. basic_parser<Handler>::
  482. parse_document(const char* p,
  483. std::integral_constant<bool, StackEmpty_> stack_empty)
  484. {
  485. detail::const_stream_wrapper cs(p, end_);
  486. if(! stack_empty && ! st_.empty())
  487. {
  488. state st;
  489. st_.peek(st);
  490. switch(st)
  491. {
  492. default: goto do_doc2;
  493. case state::doc1:
  494. st_.pop(st);
  495. goto do_doc1;
  496. case state::doc3:
  497. st_.pop(st);
  498. goto do_doc3;
  499. case state::com1: case state::com2:
  500. case state::com3: case state::com4:
  501. goto do_doc4;
  502. }
  503. }
  504. do_doc1:
  505. cs = detail::count_whitespace(cs.begin(), cs.end());
  506. if(BOOST_JSON_UNLIKELY(! cs))
  507. return maybe_suspend(cs.begin(), state::doc1);
  508. do_doc2:
  509. switch(+opt_.allow_comments |
  510. (opt_.allow_trailing_commas << 1) |
  511. (opt_.allow_invalid_utf8 << 2))
  512. {
  513. // no extensions
  514. default:
  515. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::false_type(), std::false_type(), opt_.allow_invalid_utf16);
  516. break;
  517. // comments
  518. case 1:
  519. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::false_type(), std::false_type(), opt_.allow_invalid_utf16);
  520. break;
  521. // trailing
  522. case 2:
  523. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::true_type(), std::false_type(), opt_.allow_invalid_utf16);
  524. break;
  525. // comments & trailing
  526. case 3:
  527. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::true_type(), std::false_type(), opt_.allow_invalid_utf16);
  528. break;
  529. // skip validation
  530. case 4:
  531. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::false_type(), std::true_type(), opt_.allow_invalid_utf16);
  532. break;
  533. // comments & skip validation
  534. case 5:
  535. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::false_type(), std::true_type(), opt_.allow_invalid_utf16);
  536. break;
  537. // trailing & skip validation
  538. case 6:
  539. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::true_type(), std::true_type(), opt_.allow_invalid_utf16);
  540. break;
  541. // comments & trailing & skip validation
  542. case 7:
  543. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::true_type(), std::true_type(), opt_.allow_invalid_utf16);
  544. break;
  545. }
  546. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  547. // the appropriate state has already been pushed into stack
  548. return sentinel();
  549. do_doc3:
  550. cs = detail::count_whitespace(cs.begin(), cs.end());
  551. if(BOOST_JSON_UNLIKELY(! cs))
  552. {
  553. if(more_)
  554. return suspend(cs.begin(), state::doc3);
  555. }
  556. else if(opt_.allow_comments && *cs == '/')
  557. {
  558. do_doc4:
  559. cs = parse_comment(cs.begin(), stack_empty, std::true_type());
  560. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  561. return sentinel();
  562. goto do_doc3;
  563. }
  564. return cs.begin();
  565. }
  566. template<class Handler>
  567. template<
  568. bool StackEmpty_,
  569. bool AllowComments_/*,
  570. bool AllowTrailing_,
  571. bool AllowBadUTF8_*/>
  572. const char*
  573. basic_parser<Handler>::
  574. parse_value(const char* p,
  575. std::integral_constant<bool, StackEmpty_> stack_empty,
  576. std::integral_constant<bool, AllowComments_> allow_comments,
  577. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  578. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  579. bool allow_bad_utf16)
  580. {
  581. if(stack_empty || st_.empty())
  582. {
  583. loop:
  584. switch(*p)
  585. {
  586. case '0':
  587. return mp11::mp_with_index<3>(
  588. static_cast<unsigned char>(opt_.numbers),
  589. parse_number_helper<true, '0'>{ this, p });
  590. case '-':
  591. return mp11::mp_with_index<3>(
  592. static_cast<unsigned char>(opt_.numbers),
  593. parse_number_helper<true, '-'>{ this, p });
  594. case '1': case '2': case '3':
  595. case '4': case '5': case '6':
  596. case '7': case '8': case '9':
  597. return mp11::mp_with_index<3>(
  598. static_cast<unsigned char>(opt_.numbers),
  599. parse_number_helper<true, '+'>{ this, p });
  600. case 'n':
  601. return parse_literal( p, detail::literals_c<detail::literals::null>() );
  602. case 't':
  603. return parse_literal( p, detail::literals_c<detail::literals::true_>() );
  604. case 'f':
  605. return parse_literal( p, detail::literals_c<detail::literals::false_>() );
  606. case 'I':
  607. if( !opt_.allow_infinity_and_nan )
  608. {
  609. BOOST_STATIC_CONSTEXPR source_location loc
  610. = BOOST_CURRENT_LOCATION;
  611. return fail(p, error::syntax, &loc);
  612. }
  613. return parse_literal( p, detail::literals_c<detail::literals::infinity>() );
  614. case 'N':
  615. if( !opt_.allow_infinity_and_nan )
  616. {
  617. BOOST_STATIC_CONSTEXPR source_location loc
  618. = BOOST_CURRENT_LOCATION;
  619. return fail(p, error::syntax, &loc);
  620. }
  621. return parse_literal(p, detail::literals_c<detail::literals::nan>() );
  622. case '"':
  623. return parse_string(p, std::true_type(), std::false_type(), allow_bad_utf8, allow_bad_utf16);
  624. case '[':
  625. return parse_array(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  626. case '{':
  627. return parse_object(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  628. case '/':
  629. if(! allow_comments)
  630. {
  631. BOOST_STATIC_CONSTEXPR source_location loc
  632. = BOOST_CURRENT_LOCATION;
  633. return fail(p, error::syntax, &loc);
  634. }
  635. p = parse_comment(p, stack_empty, std::false_type());
  636. // KRYSTIAN NOTE: incomplete takes const_stream, we either
  637. // can add an overload, change the existing one to take a pointer,
  638. // or just leave it as is
  639. if(BOOST_JSON_UNLIKELY(p == sentinel()))
  640. return maybe_suspend(p, state::val2);
  641. BOOST_FALLTHROUGH;
  642. case ' ':
  643. case '\t':
  644. case '\n':
  645. case '\r':
  646. p = detail::count_whitespace(p, end_);
  647. if(BOOST_JSON_UNLIKELY(p == end_))
  648. return maybe_suspend(p, state::val1);
  649. goto loop;
  650. default:
  651. {
  652. BOOST_STATIC_CONSTEXPR source_location loc
  653. = BOOST_CURRENT_LOCATION;
  654. return fail(p, error::syntax, &loc);
  655. }
  656. }
  657. }
  658. return resume_value(p, allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  659. }
  660. template<class Handler>
  661. template<
  662. bool AllowComments_/*,
  663. bool AllowTrailing_,
  664. bool AllowBadUTF8_*/>
  665. const char*
  666. basic_parser<Handler>::
  667. resume_value(const char* p,
  668. std::integral_constant<bool, AllowComments_> allow_comments,
  669. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  670. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  671. bool allow_bad_utf16)
  672. {
  673. state st;
  674. st_.peek(st);
  675. switch(st)
  676. {
  677. default: BOOST_JSON_UNREACHABLE();
  678. case state::lit1:
  679. return parse_literal(p, detail::literals_c<detail::literals::resume>() );
  680. case state::str1: case state::str2:
  681. case state::str8:
  682. return parse_string(p, std::false_type(), std::false_type(), allow_bad_utf8, allow_bad_utf16);
  683. case state::arr1: case state::arr2:
  684. case state::arr3: case state::arr4:
  685. case state::arr5: case state::arr6:
  686. return parse_array(p, std::false_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  687. case state::obj1: case state::obj2:
  688. case state::obj3: case state::obj4:
  689. case state::obj5: case state::obj6:
  690. case state::obj7: case state::obj8:
  691. case state::obj9: case state::obj10:
  692. case state::obj11:
  693. return parse_object(p, std::false_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  694. case state::num1: case state::num2:
  695. case state::num3: case state::num4:
  696. case state::num5: case state::num6:
  697. case state::num7: case state::num8:
  698. case state::exp1: case state::exp2:
  699. case state::exp3:
  700. return mp11::mp_with_index<3>(
  701. static_cast<unsigned char>(opt_.numbers),
  702. parse_number_helper<false, 0>{ this, p });
  703. // KRYSTIAN NOTE: these are special cases
  704. case state::val1:
  705. {
  706. st_.pop(st);
  707. BOOST_ASSERT(st_.empty());
  708. p = detail::count_whitespace(p, end_);
  709. if(BOOST_JSON_UNLIKELY(p == end_))
  710. return maybe_suspend(p, state::val1);
  711. return parse_value(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  712. }
  713. case state::val2:
  714. {
  715. st_.pop(st);
  716. p = parse_comment(p, std::false_type(), std::false_type());
  717. if(BOOST_JSON_UNLIKELY(p == sentinel()))
  718. return maybe_suspend(p, state::val2);
  719. if(BOOST_JSON_UNLIKELY( p == end_ ))
  720. return maybe_suspend(p, state::val3);
  721. BOOST_ASSERT(st_.empty());
  722. return parse_value(p, std::true_type(), std::true_type(), allow_trailing, allow_bad_utf8, allow_bad_utf16);
  723. }
  724. case state::val3:
  725. {
  726. st_.pop(st);
  727. return parse_value(p, std::true_type(), std::true_type(), allow_trailing, allow_bad_utf8, allow_bad_utf16);
  728. }
  729. }
  730. }
  731. template<class Handler>
  732. template<class Literal>
  733. const char*
  734. basic_parser<Handler>::
  735. parse_literal(const char* p, Literal)
  736. {
  737. using L = detail::literals;
  738. std::size_t cur_lit;
  739. std::size_t offset;
  740. detail::const_stream_wrapper cs(p, end_);
  741. BOOST_IF_CONSTEXPR( Literal::value != L::resume )
  742. {
  743. constexpr std::size_t index = literal_index(Literal::value);
  744. constexpr char const* literal = detail::literal_strings[index];
  745. constexpr std::size_t sz = detail::literal_sizes[index];
  746. if(BOOST_JSON_LIKELY( cs.remain() >= sz ))
  747. {
  748. int const cmp = std::memcmp(cs.begin(), literal, sz);
  749. if( cmp != 0 )
  750. {
  751. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  752. return fail(cs.begin(), error::syntax, &loc);
  753. }
  754. BOOST_IF_CONSTEXPR( Literal::value == L::null )
  755. {
  756. if(BOOST_JSON_UNLIKELY(
  757. ! h_.on_null(ec_)))
  758. return fail(cs.begin());
  759. }
  760. else BOOST_IF_CONSTEXPR( Literal::value == L::true_ )
  761. {
  762. if(BOOST_JSON_UNLIKELY(
  763. ! h_.on_bool(true, ec_)))
  764. return fail(cs.begin());
  765. }
  766. else BOOST_IF_CONSTEXPR( Literal::value == L::false_ )
  767. {
  768. if(BOOST_JSON_UNLIKELY(
  769. ! h_.on_bool(false, ec_)))
  770. return fail(cs.begin());
  771. }
  772. else BOOST_IF_CONSTEXPR( Literal::value == L::infinity )
  773. {
  774. if(BOOST_JSON_UNLIKELY(
  775. ! h_.on_double(
  776. std::numeric_limits<double>::infinity(),
  777. string_view(literal, sz),
  778. ec_)))
  779. return fail(cs.begin());
  780. }
  781. else BOOST_IF_CONSTEXPR( Literal::value == L::neg_infinity )
  782. {
  783. if(BOOST_JSON_UNLIKELY(
  784. ! h_.on_double(
  785. -std::numeric_limits<double>::infinity(),
  786. string_view(literal, sz),
  787. ec_)))
  788. return fail(cs.begin());
  789. }
  790. else BOOST_IF_CONSTEXPR( Literal::value == L::nan )
  791. {
  792. if(BOOST_JSON_UNLIKELY(
  793. ! h_.on_double(
  794. std::numeric_limits<double>::quiet_NaN(),
  795. string_view(literal, sz),
  796. ec_)))
  797. return fail(cs.begin());
  798. }
  799. else
  800. {
  801. BOOST_JSON_UNREACHABLE();
  802. }
  803. cs += sz;
  804. return cs.begin();
  805. }
  806. offset = 0;
  807. cur_lit = index;
  808. }
  809. else
  810. {
  811. state st;
  812. st_.pop(st);
  813. BOOST_ASSERT( st == state::lit1 );
  814. cur_lit = cur_lit_;
  815. offset = lit_offset_;
  816. }
  817. std::size_t const lit_size = detail::literal_sizes[cur_lit];
  818. std::size_t const size = (std::min)( lit_size - offset, cs.remain() );
  819. int cmp = 0;
  820. if(BOOST_JSON_LIKELY( cs.begin() ))
  821. cmp = std::memcmp(
  822. cs.begin(), detail::literal_strings[cur_lit] + offset, size );
  823. if( cmp != 0 )
  824. {
  825. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  826. return fail(cs.begin(), error::syntax, &loc);
  827. }
  828. if(BOOST_JSON_UNLIKELY( offset + size < lit_size ))
  829. {
  830. BOOST_ASSERT( cur_lit < 256 );
  831. cur_lit_ = static_cast<unsigned char>( cur_lit );
  832. BOOST_ASSERT( offset + size < 256 );
  833. lit_offset_ = static_cast<unsigned char>( offset + size );
  834. return maybe_suspend(cs.begin() + size, state::lit1);
  835. }
  836. switch( static_cast<L>(cur_lit) )
  837. {
  838. case L::null:
  839. if(BOOST_JSON_UNLIKELY(
  840. ! h_.on_null(ec_)))
  841. return fail(cs.begin());
  842. break;
  843. case L::true_:
  844. if(BOOST_JSON_UNLIKELY(
  845. ! h_.on_bool(true, ec_)))
  846. return fail(cs.begin());
  847. break;
  848. case L::false_:
  849. if(BOOST_JSON_UNLIKELY(
  850. ! h_.on_bool(false, ec_)))
  851. return fail(cs.begin());
  852. break;
  853. case L::infinity:
  854. if(BOOST_JSON_UNLIKELY(
  855. ! h_.on_double(
  856. std::numeric_limits<double>::infinity(),
  857. string_view(
  858. detail::literal_strings[ literal_index(L::infinity) ],
  859. detail::literal_sizes[ literal_index(L::infinity) ]),
  860. ec_)))
  861. return fail(cs.begin());
  862. break;
  863. case L::neg_infinity:
  864. if(BOOST_JSON_UNLIKELY(
  865. ! h_.on_double(
  866. -std::numeric_limits<double>::infinity(),
  867. string_view(
  868. detail::literal_strings[ literal_index(L::neg_infinity) ],
  869. detail::literal_sizes[ literal_index(L::neg_infinity) ]),
  870. ec_)))
  871. return fail(cs.begin());
  872. break;
  873. case L::nan:
  874. if(BOOST_JSON_UNLIKELY(
  875. ! h_.on_double(
  876. std::numeric_limits<double>::quiet_NaN(),
  877. string_view(
  878. detail::literal_strings[ literal_index(L::nan) ],
  879. detail::literal_sizes[ literal_index(L::nan) ]),
  880. ec_)))
  881. return fail(cs.begin());
  882. break;
  883. default: BOOST_JSON_UNREACHABLE();
  884. }
  885. cs += size;
  886. return cs.begin();
  887. }
  888. //----------------------------------------------------------
  889. template<class Handler>
  890. template<bool StackEmpty_, bool IsKey_>
  891. const char*
  892. basic_parser<Handler>::
  893. parse_string(const char* p,
  894. std::integral_constant<bool, StackEmpty_> stack_empty,
  895. std::integral_constant<bool, IsKey_> is_key,
  896. bool allow_bad_utf8,
  897. bool allow_bad_utf16)
  898. {
  899. detail::const_stream_wrapper cs(p, end_);
  900. std::size_t total;
  901. char const* start;
  902. std::size_t size;
  903. if(! stack_empty && ! st_.empty())
  904. {
  905. state st;
  906. st_.pop(st);
  907. st_.pop(total);
  908. switch(st)
  909. {
  910. default: BOOST_JSON_UNREACHABLE();
  911. case state::str2: goto do_str2;
  912. case state::str8: goto do_str8;
  913. case state::str1: break;
  914. }
  915. }
  916. else
  917. {
  918. BOOST_ASSERT(*cs == '\x22'); // '"'
  919. ++cs;
  920. total = 0;
  921. }
  922. do_str1:
  923. start = cs.begin();
  924. cs = allow_bad_utf8?
  925. detail::count_valid<true>(cs.begin(), cs.end()):
  926. detail::count_valid<false>(cs.begin(), cs.end());
  927. size = cs.used(start);
  928. if(is_key)
  929. {
  930. BOOST_ASSERT(total <= Handler::max_key_size);
  931. if(BOOST_JSON_UNLIKELY(size >
  932. Handler::max_key_size - total))
  933. {
  934. BOOST_STATIC_CONSTEXPR source_location loc
  935. = BOOST_CURRENT_LOCATION;
  936. return fail(cs.begin(), error::key_too_large, &loc);
  937. }
  938. }
  939. else
  940. {
  941. BOOST_ASSERT(total <= Handler::max_string_size);
  942. if(BOOST_JSON_UNLIKELY(size >
  943. Handler::max_string_size - total))
  944. {
  945. BOOST_STATIC_CONSTEXPR source_location loc
  946. = BOOST_CURRENT_LOCATION;
  947. return fail(cs.begin(), error::string_too_large, &loc);
  948. }
  949. }
  950. total += size;
  951. if(BOOST_JSON_UNLIKELY(! cs))
  952. {
  953. // call handler if the string isn't empty
  954. if(BOOST_JSON_LIKELY(size))
  955. {
  956. {
  957. bool r = is_key?
  958. h_.on_key_part( {start, size}, total, ec_ ):
  959. h_.on_string_part( {start, size}, total, ec_ );
  960. if(BOOST_JSON_UNLIKELY(!r))
  961. {
  962. return fail(cs.begin());
  963. }
  964. }
  965. }
  966. return maybe_suspend(cs.begin(), state::str1, total);
  967. }
  968. // at this point all valid characters have been skipped, so any remaining
  969. // if there are any more characters, they are either escaped, or incomplete
  970. // utf8, or invalid utf8
  971. if(BOOST_JSON_UNLIKELY(*cs != '\x22')) // '"'
  972. {
  973. // sequence is invalid or incomplete
  974. if((*cs & 0x80) && !allow_bad_utf8)
  975. {
  976. seq_.save(cs.begin(), cs.remain());
  977. if(BOOST_JSON_UNLIKELY(seq_.complete()))
  978. {
  979. BOOST_STATIC_CONSTEXPR source_location loc
  980. = BOOST_CURRENT_LOCATION;
  981. return fail(cs.begin(), error::syntax, &loc);
  982. }
  983. if(BOOST_JSON_LIKELY(size))
  984. {
  985. bool const r = is_key?
  986. h_.on_key_part( {start, size}, total, ec_ ):
  987. h_.on_string_part( {start, size}, total, ec_ );
  988. if(BOOST_JSON_UNLIKELY( !r ))
  989. return fail( cs.begin() );
  990. }
  991. return maybe_suspend(cs.end(), state::str8, total);
  992. }
  993. else if(BOOST_JSON_LIKELY(*cs == '\\'))
  994. {
  995. // flush unescaped run from input
  996. if(BOOST_JSON_LIKELY(size))
  997. {
  998. bool const r = is_key?
  999. h_.on_key_part( {start, size}, total, ec_ ):
  1000. h_.on_string_part( {start, size}, total, ec_ );
  1001. if(BOOST_JSON_UNLIKELY( !r ))
  1002. return fail( cs.begin() );
  1003. }
  1004. do_str2:
  1005. cs = parse_escaped(cs.begin(), total, stack_empty, is_key, allow_bad_utf16);
  1006. if(BOOST_JSON_UNLIKELY( incomplete(cs) ))
  1007. return suspend_or_fail(state::str2, total);
  1008. goto do_str1;
  1009. }
  1010. // illegal control
  1011. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1012. return fail(cs.begin(), error::syntax, &loc);
  1013. }
  1014. {
  1015. bool r = is_key?
  1016. h_.on_key( {start, size}, total, ec_ ):
  1017. h_.on_string( {start, size}, total, ec_ );
  1018. if(BOOST_JSON_UNLIKELY(!r))
  1019. {
  1020. return fail(cs.begin());
  1021. }
  1022. }
  1023. ++cs;
  1024. return cs.begin();
  1025. do_str8:
  1026. uint8_t needed = seq_.needed();
  1027. if(BOOST_JSON_UNLIKELY( !seq_.append(cs.begin(), cs.remain()) ))
  1028. return maybe_suspend(cs.end(), state::str8, total);
  1029. if(BOOST_JSON_UNLIKELY( !seq_.valid() ))
  1030. {
  1031. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1032. return fail(cs.begin(), error::syntax, &loc);
  1033. }
  1034. {
  1035. bool const r = is_key?
  1036. h_.on_key_part( {seq_.data(), seq_.length()}, total, ec_ ):
  1037. h_.on_string_part( {seq_.data(), seq_.length()}, total, ec_ );
  1038. if(BOOST_JSON_UNLIKELY( !r ))
  1039. return fail( cs.begin() );
  1040. }
  1041. cs += needed;
  1042. goto do_str1;
  1043. }
  1044. template<class Handler>
  1045. template<bool StackEmpty_>
  1046. const char*
  1047. basic_parser<Handler>::
  1048. parse_escaped(
  1049. const char* p,
  1050. std::size_t& total,
  1051. std::integral_constant<bool, StackEmpty_> stack_empty,
  1052. bool is_key,
  1053. bool allow_bad_utf16)
  1054. {
  1055. constexpr unsigned urc = 0xFFFD; // Unicode replacement character
  1056. auto const ev_too_large = is_key?
  1057. error::key_too_large : error::string_too_large;
  1058. auto const max_size = is_key?
  1059. Handler::max_key_size : Handler::max_string_size;
  1060. int digit;
  1061. //---------------------------------------------------------------
  1062. //
  1063. // To handle escapes, a local temporary buffer accumulates
  1064. // the unescaped result. The algorithm attempts to fill the
  1065. // buffer to capacity before invoking the handler.
  1066. // In some cases the temporary buffer needs to be flushed
  1067. // before it is full:
  1068. // * When the closing double quote is seen
  1069. // * When there in no more input (and more is expected later)
  1070. // A goal of the algorithm is to call the handler as few times
  1071. // as possible. Thus, when the first escape is encountered,
  1072. // the algorithm attempts to fill the temporary buffer first.
  1073. //
  1074. detail::buffer<BOOST_JSON_STACK_BUFFER_SIZE> temp;
  1075. // Unescaped JSON is never larger than its escaped version.
  1076. // To efficiently process only what will fit in the temporary buffer,
  1077. // the size of the input stream is temporarily "clipped" to the size
  1078. // of the temporary buffer.
  1079. // handle escaped character
  1080. detail::clipped_const_stream cs(p, end_);
  1081. cs.clip(temp.max_size());
  1082. if(! stack_empty && ! st_.empty())
  1083. {
  1084. state st;
  1085. st_.pop(st);
  1086. switch(st)
  1087. {
  1088. default: BOOST_JSON_UNREACHABLE();
  1089. case state::str3: goto do_str3;
  1090. case state::str4: goto do_str4;
  1091. case state::str5: goto do_str5;
  1092. case state::str6: goto do_str6;
  1093. case state::str7: goto do_str7;
  1094. case state::sur1: goto do_sur1;
  1095. case state::sur2: goto do_sur2;
  1096. case state::sur3: goto do_sur3;
  1097. case state::sur4: goto do_sur4;
  1098. case state::sur5: goto do_sur5;
  1099. case state::sur6: goto do_sur6;
  1100. }
  1101. }
  1102. while(true)
  1103. {
  1104. BOOST_ASSERT( temp.capacity() );
  1105. BOOST_ASSERT(*cs == '\\');
  1106. ++cs;
  1107. do_str3:
  1108. if(BOOST_JSON_UNLIKELY(! cs))
  1109. {
  1110. if(BOOST_JSON_LIKELY(! temp.empty()))
  1111. {
  1112. BOOST_ASSERT(total <= max_size);
  1113. if(BOOST_JSON_UNLIKELY(
  1114. temp.size() > max_size - total))
  1115. {
  1116. BOOST_STATIC_CONSTEXPR source_location loc
  1117. = BOOST_CURRENT_LOCATION;
  1118. return fail(cs.begin(), ev_too_large, &loc);
  1119. }
  1120. total += temp.size();
  1121. {
  1122. bool r = is_key
  1123. ? h_.on_key_part(temp.get(), total, ec_)
  1124. : h_.on_string_part(temp.get(), total, ec_);
  1125. if(BOOST_JSON_UNLIKELY(!r))
  1126. {
  1127. return fail(cs.begin());
  1128. }
  1129. }
  1130. temp.clear();
  1131. }
  1132. cs.clip(temp.max_size());
  1133. if(BOOST_JSON_UNLIKELY(! cs))
  1134. return maybe_suspend(cs.begin(), state::str3);
  1135. }
  1136. switch(*cs)
  1137. {
  1138. default:
  1139. {
  1140. BOOST_STATIC_CONSTEXPR source_location loc
  1141. = BOOST_CURRENT_LOCATION;
  1142. return fail(cs.begin(), error::syntax, &loc);
  1143. }
  1144. case '\x22': // '"'
  1145. temp.push_back('\x22');
  1146. ++cs;
  1147. break;
  1148. case '\\':
  1149. temp.push_back('\\');
  1150. ++cs;
  1151. break;
  1152. case '/':
  1153. temp.push_back('/');
  1154. ++cs;
  1155. break;
  1156. case 'b':
  1157. temp.push_back('\x08');
  1158. ++cs;
  1159. break;
  1160. case 'f':
  1161. temp.push_back('\x0c');
  1162. ++cs;
  1163. break;
  1164. case 'n':
  1165. temp.push_back('\x0a');
  1166. ++cs;
  1167. break;
  1168. case 'r':
  1169. temp.push_back('\x0d');
  1170. ++cs;
  1171. break;
  1172. case 't':
  1173. temp.push_back('\x09');
  1174. ++cs;
  1175. break;
  1176. case 'u':
  1177. // utf16 escape
  1178. //
  1179. // fast path only when the buffer
  1180. // is large enough for 2 surrogates
  1181. if(BOOST_JSON_LIKELY(cs.remain() > 10))
  1182. {
  1183. // KRYSTIAN TODO: this could be done
  1184. // with fewer instructions
  1185. digit = detail::load_little_endian<4>(
  1186. cs.begin() + 1);
  1187. int d4 = detail::hex_digit(static_cast<
  1188. unsigned char>(digit >> 24));
  1189. int d3 = detail::hex_digit(static_cast<
  1190. unsigned char>(digit >> 16));
  1191. int d2 = detail::hex_digit(static_cast<
  1192. unsigned char>(digit >> 8));
  1193. int d1 = detail::hex_digit(static_cast<
  1194. unsigned char>(digit));
  1195. if(BOOST_JSON_UNLIKELY(
  1196. (d1 | d2 | d3 | d4) == -1))
  1197. {
  1198. if(d1 != -1)
  1199. ++cs;
  1200. if(d2 != -1)
  1201. ++cs;
  1202. if(d3 != -1)
  1203. ++cs;
  1204. BOOST_STATIC_CONSTEXPR source_location loc
  1205. = BOOST_CURRENT_LOCATION;
  1206. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1207. }
  1208. // 32 bit unicode scalar value
  1209. unsigned u1 =
  1210. (d1 << 12) + (d2 << 8) +
  1211. (d3 << 4) + d4;
  1212. // valid unicode scalar values are
  1213. // [0, D7FF] and [E000, 10FFFF]
  1214. // values within this range are valid utf-8
  1215. // code points and invalid leading surrogates.
  1216. if(BOOST_JSON_LIKELY(
  1217. u1 < 0xd800 || u1 > 0xdfff))
  1218. {
  1219. cs += 5;
  1220. temp.append_utf8(u1);
  1221. break;
  1222. }
  1223. if(BOOST_JSON_UNLIKELY(u1 > 0xdbff))
  1224. {
  1225. // If it's an illegal leading surrogate and
  1226. // the parser does not allow it, return an error.
  1227. if(!allow_bad_utf16)
  1228. {
  1229. BOOST_STATIC_CONSTEXPR source_location loc
  1230. = BOOST_CURRENT_LOCATION;
  1231. return fail(cs.begin(), error::illegal_leading_surrogate,
  1232. &loc);
  1233. }
  1234. // Otherwise, append the Unicode replacement character
  1235. else
  1236. {
  1237. cs += 5;
  1238. temp.append_utf8(urc);
  1239. break;
  1240. }
  1241. }
  1242. cs += 5;
  1243. // KRYSTIAN TODO: this can be a two byte load
  1244. // and a single comparison. We lose error information,
  1245. // but it's faster.
  1246. if(BOOST_JSON_UNLIKELY(*cs != '\\'))
  1247. {
  1248. // If the next character is not a backslash and
  1249. // the parser does not allow it, return a syntax error.
  1250. if(!allow_bad_utf16)
  1251. {
  1252. BOOST_STATIC_CONSTEXPR source_location loc
  1253. = BOOST_CURRENT_LOCATION;
  1254. return fail(cs.begin(), error::syntax, &loc);
  1255. }
  1256. // Otherwise, append the Unicode replacement character since
  1257. // the first code point is a valid leading surrogate
  1258. else
  1259. {
  1260. temp.append_utf8(urc);
  1261. break;
  1262. }
  1263. }
  1264. ++cs;
  1265. if(BOOST_JSON_UNLIKELY(*cs != 'u'))
  1266. {
  1267. if (!allow_bad_utf16)
  1268. {
  1269. BOOST_STATIC_CONSTEXPR source_location loc
  1270. = BOOST_CURRENT_LOCATION;
  1271. return fail(cs.begin(), error::syntax, &loc);
  1272. }
  1273. // Otherwise, append the Unicode replacement character since
  1274. // the first code point is a valid leading surrogate
  1275. else
  1276. {
  1277. temp.append_utf8(urc);
  1278. goto do_str3;
  1279. }
  1280. }
  1281. ++cs;
  1282. digit = detail::load_little_endian<4>(cs.begin());
  1283. d4 = detail::hex_digit(static_cast<
  1284. unsigned char>(digit >> 24));
  1285. d3 = detail::hex_digit(static_cast<
  1286. unsigned char>(digit >> 16));
  1287. d2 = detail::hex_digit(static_cast<
  1288. unsigned char>(digit >> 8));
  1289. d1 = detail::hex_digit(static_cast<
  1290. unsigned char>(digit));
  1291. if(BOOST_JSON_UNLIKELY(
  1292. (d1 | d2 | d3 | d4) == -1))
  1293. {
  1294. if(d1 != -1)
  1295. ++cs;
  1296. if(d2 != -1)
  1297. ++cs;
  1298. if(d3 != -1)
  1299. ++cs;
  1300. BOOST_STATIC_CONSTEXPR source_location loc
  1301. = BOOST_CURRENT_LOCATION;
  1302. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1303. }
  1304. unsigned u2 =
  1305. (d1 << 12) + (d2 << 8) +
  1306. (d3 << 4) + d4;
  1307. // Check if the second code point is a valid trailing surrogate.
  1308. // Valid trailing surrogates are [DC00, DFFF]
  1309. if(BOOST_JSON_UNLIKELY(
  1310. u2 < 0xdc00 || u2 > 0xdfff))
  1311. {
  1312. // If not valid and the parser does not allow it, return an error.
  1313. if(!allow_bad_utf16)
  1314. {
  1315. BOOST_STATIC_CONSTEXPR source_location loc
  1316. = BOOST_CURRENT_LOCATION;
  1317. return fail(cs.begin(), error::illegal_trailing_surrogate,
  1318. &loc);
  1319. }
  1320. // Append the replacement character for the
  1321. // first leading surrogate.
  1322. cs += 4;
  1323. temp.append_utf8(urc);
  1324. // Check if the second code point is a
  1325. // valid unicode scalar value (invalid leading
  1326. // or trailing surrogate)
  1327. if (u2 < 0xd800 || u2 > 0xdbff)
  1328. {
  1329. temp.append_utf8(u2);
  1330. break;
  1331. }
  1332. // If it is a valid leading surrogate
  1333. else
  1334. {
  1335. u1_ = u2;
  1336. goto do_sur1;
  1337. }
  1338. }
  1339. cs += 4;
  1340. // Calculate the Unicode code point from the surrogate pair and
  1341. // append the UTF-8 representation.
  1342. unsigned cp =
  1343. ((u1 - 0xd800) << 10) +
  1344. ((u2 - 0xdc00)) +
  1345. 0x10000;
  1346. // utf-16 surrogate pair
  1347. temp.append_utf8(cp);
  1348. break;
  1349. }
  1350. // flush
  1351. if(BOOST_JSON_LIKELY(! temp.empty()))
  1352. {
  1353. BOOST_ASSERT(total <= max_size);
  1354. if(BOOST_JSON_UNLIKELY(
  1355. temp.size() > max_size - total))
  1356. {
  1357. BOOST_STATIC_CONSTEXPR source_location loc
  1358. = BOOST_CURRENT_LOCATION;
  1359. return fail(cs.begin(), ev_too_large, &loc);
  1360. }
  1361. total += temp.size();
  1362. {
  1363. bool r = is_key
  1364. ? h_.on_key_part(temp.get(), total, ec_)
  1365. : h_.on_string_part(temp.get(), total, ec_);
  1366. if(BOOST_JSON_UNLIKELY(!r))
  1367. {
  1368. return fail(cs.begin());
  1369. }
  1370. }
  1371. temp.clear();
  1372. cs.clip(temp.max_size());
  1373. }
  1374. ++cs;
  1375. // utf-16 escape
  1376. do_str4:
  1377. if(BOOST_JSON_UNLIKELY(! cs))
  1378. return maybe_suspend(cs.begin(), state::str4);
  1379. digit = detail::hex_digit(*cs);
  1380. if(BOOST_JSON_UNLIKELY(digit == -1))
  1381. {
  1382. BOOST_STATIC_CONSTEXPR source_location loc
  1383. = BOOST_CURRENT_LOCATION;
  1384. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1385. }
  1386. ++cs;
  1387. u1_ = digit << 12;
  1388. do_str5:
  1389. if(BOOST_JSON_UNLIKELY(! cs))
  1390. return maybe_suspend(cs.begin(), state::str5);
  1391. digit = detail::hex_digit(*cs);
  1392. if(BOOST_JSON_UNLIKELY(digit == -1))
  1393. {
  1394. BOOST_STATIC_CONSTEXPR source_location loc
  1395. = BOOST_CURRENT_LOCATION;
  1396. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1397. }
  1398. ++cs;
  1399. u1_ += digit << 8;
  1400. do_str6:
  1401. if(BOOST_JSON_UNLIKELY(! cs))
  1402. return maybe_suspend(cs.begin(), state::str6);
  1403. digit = detail::hex_digit(*cs);
  1404. if(BOOST_JSON_UNLIKELY(digit == -1))
  1405. {
  1406. BOOST_STATIC_CONSTEXPR source_location loc
  1407. = BOOST_CURRENT_LOCATION;
  1408. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1409. }
  1410. ++cs;
  1411. u1_ += digit << 4;
  1412. do_str7:
  1413. if(BOOST_JSON_UNLIKELY(! cs))
  1414. return maybe_suspend(cs.begin(), state::str7);
  1415. digit = detail::hex_digit(*cs);
  1416. if(BOOST_JSON_UNLIKELY(digit == -1))
  1417. {
  1418. BOOST_STATIC_CONSTEXPR source_location loc
  1419. = BOOST_CURRENT_LOCATION;
  1420. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1421. }
  1422. ++cs;
  1423. u1_ += digit;
  1424. if(BOOST_JSON_LIKELY(
  1425. u1_ < 0xd800 || u1_ > 0xdfff))
  1426. {
  1427. BOOST_ASSERT(temp.empty());
  1428. // utf-8 codepoint
  1429. temp.append_utf8(u1_);
  1430. break;
  1431. }
  1432. if(BOOST_JSON_UNLIKELY(u1_ > 0xdbff))
  1433. {
  1434. // If it's an illegal leading surrogate and
  1435. // the parser does not allow it, return an error.
  1436. if(!allow_bad_utf16)
  1437. {
  1438. BOOST_STATIC_CONSTEXPR source_location loc
  1439. = BOOST_CURRENT_LOCATION;
  1440. return fail(cs.begin(), error::illegal_leading_surrogate, &loc);
  1441. }
  1442. // Otherwise, append the Unicode replacement character
  1443. else
  1444. {
  1445. BOOST_ASSERT(temp.empty());
  1446. temp.append_utf8(urc);
  1447. break;
  1448. }
  1449. }
  1450. do_sur1:
  1451. if(BOOST_JSON_UNLIKELY(! cs))
  1452. return maybe_suspend(cs.begin(), state::sur1);
  1453. if(BOOST_JSON_UNLIKELY(*cs != '\\'))
  1454. {
  1455. // If the next character is not a backslash and
  1456. // the parser does not allow it, return a syntax error.
  1457. if(!allow_bad_utf16)
  1458. {
  1459. BOOST_STATIC_CONSTEXPR source_location loc
  1460. = BOOST_CURRENT_LOCATION;
  1461. return fail(cs.begin(), error::syntax, &loc);
  1462. }
  1463. // Otherwise, append the Unicode replacement character since
  1464. // the first code point is a valid leading surrogate
  1465. else
  1466. {
  1467. temp.append_utf8(urc);
  1468. break;
  1469. }
  1470. }
  1471. ++cs;
  1472. do_sur2:
  1473. if(BOOST_JSON_UNLIKELY(! cs))
  1474. return maybe_suspend(cs.begin(), state::sur2);
  1475. if(BOOST_JSON_UNLIKELY(*cs != 'u'))
  1476. {
  1477. if (!allow_bad_utf16)
  1478. {
  1479. BOOST_STATIC_CONSTEXPR source_location loc
  1480. = BOOST_CURRENT_LOCATION;
  1481. return fail(cs.begin(), error::syntax, &loc);
  1482. }
  1483. // Otherwise, append the Unicode replacement character since
  1484. // the first code point is a valid leading surrogate
  1485. else
  1486. {
  1487. temp.append_utf8(urc);
  1488. goto do_str3;
  1489. }
  1490. }
  1491. ++cs;
  1492. do_sur3:
  1493. if(BOOST_JSON_UNLIKELY(! cs))
  1494. return maybe_suspend(cs.begin(), state::sur3);
  1495. digit = detail::hex_digit(*cs);
  1496. if(BOOST_JSON_UNLIKELY(digit == -1))
  1497. {
  1498. BOOST_STATIC_CONSTEXPR source_location loc
  1499. = BOOST_CURRENT_LOCATION;
  1500. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1501. }
  1502. ++cs;
  1503. u2_ = digit << 12;
  1504. do_sur4:
  1505. if(BOOST_JSON_UNLIKELY(! cs))
  1506. return maybe_suspend(cs.begin(), state::sur4);
  1507. digit = detail::hex_digit(*cs);
  1508. if(BOOST_JSON_UNLIKELY(digit == -1))
  1509. {
  1510. BOOST_STATIC_CONSTEXPR source_location loc
  1511. = BOOST_CURRENT_LOCATION;
  1512. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1513. }
  1514. ++cs;
  1515. u2_ += digit << 8;
  1516. do_sur5:
  1517. if(BOOST_JSON_UNLIKELY(! cs))
  1518. return maybe_suspend(cs.begin(), state::sur5);
  1519. digit = detail::hex_digit(*cs);
  1520. if(BOOST_JSON_UNLIKELY(digit == -1))
  1521. {
  1522. BOOST_STATIC_CONSTEXPR source_location loc
  1523. = BOOST_CURRENT_LOCATION;
  1524. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1525. }
  1526. ++cs;
  1527. u2_ += digit << 4;
  1528. do_sur6:
  1529. if(BOOST_JSON_UNLIKELY(! cs))
  1530. return maybe_suspend(cs.begin(), state::sur6);
  1531. digit = detail::hex_digit(*cs);
  1532. if(BOOST_JSON_UNLIKELY(digit == -1))
  1533. {
  1534. BOOST_STATIC_CONSTEXPR source_location loc
  1535. = BOOST_CURRENT_LOCATION;
  1536. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1537. }
  1538. ++cs;
  1539. u2_ += digit;
  1540. // Check if the second code point is a valid trailing surrogate.
  1541. // Valid trailing surrogates are [DC00, DFFF]
  1542. if(BOOST_JSON_UNLIKELY(
  1543. u2_ < 0xdc00 || u2_ > 0xdfff))
  1544. {
  1545. // If not valid and the parser does not allow it, return an error.
  1546. if(!allow_bad_utf16)
  1547. {
  1548. BOOST_STATIC_CONSTEXPR source_location loc
  1549. = BOOST_CURRENT_LOCATION;
  1550. return fail(cs.begin(), error::illegal_trailing_surrogate, &loc);
  1551. }
  1552. // Append the replacement character for the
  1553. // first leading surrogate.
  1554. temp.append_utf8(urc);
  1555. // Check if the second code point is a
  1556. // valid unicode scalar value (invalid leading
  1557. // or trailing surrogate)
  1558. if (u2_ < 0xd800 || u2_ > 0xdbff)
  1559. {
  1560. temp.append_utf8(u2_);
  1561. break;
  1562. }
  1563. // If it is a valid leading surrogate
  1564. else
  1565. {
  1566. u1_ = u2_;
  1567. goto do_sur1;
  1568. }
  1569. }
  1570. // Calculate the Unicode code point from the surrogate pair and
  1571. // append the UTF-8 representation.
  1572. unsigned cp =
  1573. ((u1_ - 0xd800) << 10) +
  1574. ((u2_ - 0xdc00)) +
  1575. 0x10000;
  1576. // utf-16 surrogate pair
  1577. temp.append_utf8(cp);
  1578. }
  1579. // flush
  1580. if(BOOST_JSON_UNLIKELY( !cs ) || *cs != '\\')
  1581. break;
  1582. }
  1583. if(BOOST_JSON_LIKELY( temp.size() ))
  1584. {
  1585. BOOST_ASSERT(total <= max_size);
  1586. if(BOOST_JSON_UNLIKELY( temp.size() > max_size - total ))
  1587. {
  1588. BOOST_STATIC_CONSTEXPR source_location loc
  1589. = BOOST_CURRENT_LOCATION;
  1590. return fail(cs.begin(), ev_too_large, &loc);
  1591. }
  1592. total += temp.size();
  1593. bool const r = is_key
  1594. ? h_.on_key_part(temp.get(), total, ec_)
  1595. : h_.on_string_part(temp.get(), total, ec_);
  1596. if(BOOST_JSON_UNLIKELY( !r ))
  1597. return fail( cs.begin() );
  1598. }
  1599. return cs.begin();
  1600. }
  1601. //----------------------------------------------------------
  1602. template<class Handler>
  1603. template<
  1604. bool StackEmpty_,
  1605. bool AllowComments_/*,
  1606. bool AllowTrailing_,
  1607. bool AllowBadUTF8_*/>
  1608. const char*
  1609. basic_parser<Handler>::
  1610. parse_object(const char* p,
  1611. std::integral_constant<bool, StackEmpty_> stack_empty,
  1612. std::integral_constant<bool, AllowComments_> allow_comments,
  1613. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  1614. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  1615. bool allow_bad_utf16)
  1616. {
  1617. detail::const_stream_wrapper cs(p, end_);
  1618. std::size_t size;
  1619. if(! stack_empty && ! st_.empty())
  1620. {
  1621. // resume
  1622. state st;
  1623. st_.pop(st);
  1624. st_.pop(size);
  1625. switch(st)
  1626. {
  1627. default: BOOST_JSON_UNREACHABLE();
  1628. case state::obj1: goto do_obj1;
  1629. case state::obj2: goto do_obj2;
  1630. case state::obj3: goto do_obj3;
  1631. case state::obj4: goto do_obj4;
  1632. case state::obj5: goto do_obj5;
  1633. case state::obj6: goto do_obj6;
  1634. case state::obj7: goto do_obj7;
  1635. case state::obj8: goto do_obj8;
  1636. case state::obj9: goto do_obj9;
  1637. case state::obj10: goto do_obj10;
  1638. case state::obj11: goto do_obj11;
  1639. }
  1640. }
  1641. BOOST_ASSERT(*cs == '{');
  1642. size = 0;
  1643. if(BOOST_JSON_UNLIKELY(! depth_))
  1644. {
  1645. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1646. return fail(cs.begin(), error::too_deep, &loc);
  1647. }
  1648. --depth_;
  1649. if(BOOST_JSON_UNLIKELY(
  1650. ! h_.on_object_begin(ec_)))
  1651. return fail(cs.begin());
  1652. ++cs;
  1653. // object:
  1654. // '{' *ws '}'
  1655. // '{' *ws string *ws ':' *ws value *ws *[ ',' *ws string *ws ':' *ws value *ws ] '}'
  1656. do_obj1:
  1657. cs = detail::count_whitespace(cs.begin(), cs.end());
  1658. if(BOOST_JSON_UNLIKELY(! cs))
  1659. return maybe_suspend(cs.begin(), state::obj1, size);
  1660. if(BOOST_JSON_LIKELY(*cs != '}'))
  1661. {
  1662. if(BOOST_JSON_UNLIKELY(*cs != '\x22'))
  1663. {
  1664. if(allow_comments && *cs == '/')
  1665. {
  1666. do_obj2:
  1667. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1668. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1669. return suspend_or_fail(state::obj2, size);
  1670. goto do_obj1;
  1671. }
  1672. BOOST_STATIC_CONSTEXPR source_location loc
  1673. = BOOST_CURRENT_LOCATION;
  1674. return fail(cs.begin(), error::syntax, &loc);
  1675. }
  1676. loop:
  1677. if(BOOST_JSON_UNLIKELY(++size >
  1678. Handler::max_object_size))
  1679. {
  1680. BOOST_STATIC_CONSTEXPR source_location loc
  1681. = BOOST_CURRENT_LOCATION;
  1682. return fail(cs.begin(), error::object_too_large, &loc);
  1683. }
  1684. do_obj3:
  1685. cs = parse_string(cs.begin(), stack_empty, std::true_type(), allow_bad_utf8, allow_bad_utf16);
  1686. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1687. return suspend_or_fail(state::obj3, size);
  1688. do_obj4:
  1689. cs = detail::count_whitespace(cs.begin(), cs.end());
  1690. if(BOOST_JSON_UNLIKELY(! cs))
  1691. return maybe_suspend(cs.begin(), state::obj4, size);
  1692. if(BOOST_JSON_UNLIKELY(*cs != ':'))
  1693. {
  1694. if(allow_comments && *cs == '/')
  1695. {
  1696. do_obj5:
  1697. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1698. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1699. return suspend_or_fail(state::obj5, size);
  1700. goto do_obj4;
  1701. }
  1702. BOOST_STATIC_CONSTEXPR source_location loc
  1703. = BOOST_CURRENT_LOCATION;
  1704. return fail(cs.begin(), error::syntax, &loc);
  1705. }
  1706. ++cs;
  1707. do_obj6:
  1708. cs = detail::count_whitespace(cs.begin(), cs.end());
  1709. if(BOOST_JSON_UNLIKELY(! cs))
  1710. return maybe_suspend(cs.begin(), state::obj6, size);
  1711. do_obj7:
  1712. cs = parse_value(cs.begin(), stack_empty, allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  1713. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1714. return suspend_or_fail(state::obj7, size);
  1715. do_obj8:
  1716. cs = detail::count_whitespace(cs.begin(), cs.end());
  1717. if(BOOST_JSON_UNLIKELY(! cs))
  1718. return maybe_suspend(cs.begin(), state::obj8, size);
  1719. if(BOOST_JSON_LIKELY(*cs == ','))
  1720. {
  1721. ++cs;
  1722. do_obj9:
  1723. cs = detail::count_whitespace(cs.begin(), cs.end());
  1724. if(BOOST_JSON_UNLIKELY(! cs))
  1725. return maybe_suspend(cs.begin(), state::obj9, size);
  1726. // loop for next element
  1727. if(BOOST_JSON_LIKELY(*cs == '\x22'))
  1728. goto loop;
  1729. if(! allow_trailing || *cs != '}')
  1730. {
  1731. if(allow_comments && *cs == '/')
  1732. {
  1733. do_obj10:
  1734. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1735. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1736. return suspend_or_fail(state::obj10, size);
  1737. goto do_obj9;
  1738. }
  1739. BOOST_STATIC_CONSTEXPR source_location loc
  1740. = BOOST_CURRENT_LOCATION;
  1741. return fail(cs.begin(), error::syntax, &loc);
  1742. }
  1743. }
  1744. else if(BOOST_JSON_UNLIKELY(*cs != '}'))
  1745. {
  1746. if(allow_comments && *cs == '/')
  1747. {
  1748. do_obj11:
  1749. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1750. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1751. return suspend_or_fail(state::obj11, size);
  1752. goto do_obj8;
  1753. }
  1754. BOOST_STATIC_CONSTEXPR source_location loc
  1755. = BOOST_CURRENT_LOCATION;
  1756. return fail(cs.begin(), error::syntax, &loc);
  1757. }
  1758. // got closing brace, fall through
  1759. }
  1760. if(BOOST_JSON_UNLIKELY(
  1761. ! h_.on_object_end(size, ec_)))
  1762. return fail(cs.begin());
  1763. ++depth_;
  1764. ++cs;
  1765. return cs.begin();
  1766. }
  1767. //----------------------------------------------------------
  1768. template<class Handler>
  1769. template<
  1770. bool StackEmpty_,
  1771. bool AllowComments_/*,
  1772. bool AllowTrailing_,
  1773. bool AllowBadUTF8_*/>
  1774. const char*
  1775. basic_parser<Handler>::
  1776. parse_array(const char* p,
  1777. std::integral_constant<bool, StackEmpty_> stack_empty,
  1778. std::integral_constant<bool, AllowComments_> allow_comments,
  1779. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  1780. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  1781. bool allow_bad_utf16)
  1782. {
  1783. detail::const_stream_wrapper cs(p, end_);
  1784. std::size_t size;
  1785. if(! stack_empty && ! st_.empty())
  1786. {
  1787. // resume
  1788. state st;
  1789. st_.pop(st);
  1790. st_.pop(size);
  1791. switch(st)
  1792. {
  1793. default: BOOST_JSON_UNREACHABLE();
  1794. case state::arr1: goto do_arr1;
  1795. case state::arr2: goto do_arr2;
  1796. case state::arr3: goto do_arr3;
  1797. case state::arr4: goto do_arr4;
  1798. case state::arr5: goto do_arr5;
  1799. case state::arr6: goto do_arr6;
  1800. }
  1801. }
  1802. BOOST_ASSERT(*cs == '[');
  1803. size = 0;
  1804. if(BOOST_JSON_UNLIKELY(! depth_))
  1805. {
  1806. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1807. return fail(cs.begin(), error::too_deep, &loc);
  1808. }
  1809. --depth_;
  1810. if(BOOST_JSON_UNLIKELY(
  1811. ! h_.on_array_begin(ec_)))
  1812. return fail(cs.begin());
  1813. ++cs;
  1814. // array:
  1815. // '[' *ws ']'
  1816. // '[' *ws value *ws *[ ',' *ws value *ws ] ']'
  1817. do_arr1:
  1818. cs = detail::count_whitespace(cs.begin(), cs.end());
  1819. if(BOOST_JSON_UNLIKELY(! cs))
  1820. return maybe_suspend(cs.begin(), state::arr1, size);
  1821. if(BOOST_JSON_LIKELY(*cs != ']'))
  1822. {
  1823. loop:
  1824. if(allow_comments && *cs == '/')
  1825. {
  1826. do_arr2:
  1827. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1828. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1829. return suspend_or_fail(state::arr2, size);
  1830. goto do_arr1;
  1831. }
  1832. if(BOOST_JSON_UNLIKELY(++size >
  1833. Handler::max_array_size))
  1834. {
  1835. BOOST_STATIC_CONSTEXPR source_location loc
  1836. = BOOST_CURRENT_LOCATION;
  1837. return fail(cs.begin(), error::array_too_large, &loc);
  1838. }
  1839. do_arr3:
  1840. // array is not empty, value required
  1841. cs = parse_value(cs.begin(), stack_empty, allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  1842. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1843. return suspend_or_fail(state::arr3, size);
  1844. do_arr4:
  1845. cs = detail::count_whitespace(cs.begin(), cs.end());
  1846. if(BOOST_JSON_UNLIKELY(! cs))
  1847. return maybe_suspend(cs.begin(), state::arr4, size);
  1848. if(BOOST_JSON_LIKELY(*cs == ','))
  1849. {
  1850. ++cs;
  1851. do_arr5:
  1852. cs = detail::count_whitespace(cs.begin(), cs.end());
  1853. if(BOOST_JSON_UNLIKELY(! cs))
  1854. return maybe_suspend(cs.begin(), state::arr5, size);
  1855. // loop for next element
  1856. if(! allow_trailing || *cs != ']')
  1857. goto loop;
  1858. }
  1859. else if(BOOST_JSON_UNLIKELY(*cs != ']'))
  1860. {
  1861. if(allow_comments && *cs == '/')
  1862. {
  1863. do_arr6:
  1864. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1865. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1866. return suspend_or_fail(state::arr6, size);
  1867. goto do_arr4;
  1868. }
  1869. BOOST_STATIC_CONSTEXPR source_location loc
  1870. = BOOST_CURRENT_LOCATION;
  1871. return fail(cs.begin(), error::syntax, &loc);
  1872. }
  1873. // got closing bracket; fall through
  1874. }
  1875. if(BOOST_JSON_UNLIKELY(
  1876. ! h_.on_array_end(size, ec_)))
  1877. return fail(cs.begin());
  1878. ++depth_;
  1879. ++cs;
  1880. return cs.begin();
  1881. }
  1882. //----------------------------------------------------------
  1883. template<class Handler>
  1884. template<bool StackEmpty_, char First_, number_precision Numbers_>
  1885. const char*
  1886. basic_parser<Handler>::
  1887. parse_number(const char* p,
  1888. std::integral_constant<bool, StackEmpty_> stack_empty,
  1889. std::integral_constant<char, First_> first,
  1890. std::integral_constant<number_precision, Numbers_> mode)
  1891. {
  1892. constexpr bool precise_parsing = mode == number_precision::precise;
  1893. constexpr bool no_parsing = mode == number_precision::none;
  1894. // only one of these will be true if we are not resuming
  1895. // if negative then !zero_first && !nonzero_first
  1896. // if zero_first then !nonzero_first && !negative
  1897. // if nonzero_first then !zero_first && !negative
  1898. bool const negative = first == '-';
  1899. bool const zero_first = first == '0';
  1900. bool const nonzero_first = first == '+';
  1901. detail::const_stream_wrapper cs(p, end_);
  1902. number num;
  1903. const char* begin = cs.begin();
  1904. if(stack_empty || st_.empty())
  1905. {
  1906. num.bias = 0;
  1907. num.exp = 0;
  1908. num.frac = false;
  1909. num_buf_.clear();
  1910. //----------------------------------
  1911. //
  1912. // '-'
  1913. // leading minus sign
  1914. //
  1915. BOOST_ASSERT(cs);
  1916. if(negative)
  1917. ++cs;
  1918. num.neg = negative;
  1919. num.frac = false;
  1920. num.exp = 0;
  1921. num.bias = 0;
  1922. // fast path
  1923. if( cs.remain() >= 16 + 1 + 16 ) // digits . digits
  1924. {
  1925. int n1;
  1926. if( nonzero_first ||
  1927. (negative && *cs != '0') )
  1928. {
  1929. n1 = detail::count_digits( cs.begin() );
  1930. BOOST_ASSERT(n1 >= 0 && n1 <= 16);
  1931. if( negative && n1 == 0 && opt_.allow_infinity_and_nan )
  1932. {
  1933. return parse_literal(
  1934. p - 1,
  1935. detail::literals_c<detail::literals::neg_infinity>());
  1936. }
  1937. if( ! nonzero_first && n1 == 0 )
  1938. {
  1939. // digit required
  1940. BOOST_STATIC_CONSTEXPR source_location loc
  1941. = BOOST_CURRENT_LOCATION;
  1942. return fail(cs.begin(), error::syntax, &loc);
  1943. }
  1944. BOOST_IF_CONSTEXPR( !no_parsing )
  1945. num.mant = detail::parse_unsigned( 0, cs.begin(), n1 );
  1946. else
  1947. num.mant = 0;
  1948. cs += n1;
  1949. // integer or floating-point with
  1950. // >= 16 leading digits
  1951. if( n1 == 16 )
  1952. {
  1953. goto do_num2;
  1954. }
  1955. }
  1956. else
  1957. {
  1958. // 0. floating-point or 0e integer
  1959. num.mant = 0;
  1960. n1 = 0;
  1961. ++cs;
  1962. }
  1963. {
  1964. const char c = *cs;
  1965. if(c != '.')
  1966. {
  1967. if((c | 32) == 'e')
  1968. {
  1969. ++cs;
  1970. goto do_exp1;
  1971. }
  1972. BOOST_IF_CONSTEXPR( negative && !no_parsing )
  1973. num.mant = ~num.mant + 1;
  1974. goto finish_signed;
  1975. }
  1976. }
  1977. // floating-point number
  1978. ++cs;
  1979. int n2 = detail::count_digits( cs.begin() );
  1980. BOOST_ASSERT(n2 >= 0 && n2 <= 16);
  1981. if( n2 == 0 )
  1982. {
  1983. // digit required
  1984. BOOST_STATIC_CONSTEXPR source_location loc
  1985. = BOOST_CURRENT_LOCATION;
  1986. return fail(cs.begin(), error::syntax, &loc);
  1987. }
  1988. // floating-point mantissa overflow
  1989. if( n1 + n2 >= 19 )
  1990. {
  1991. goto do_num7;
  1992. }
  1993. BOOST_IF_CONSTEXPR( !no_parsing )
  1994. num.mant = detail::parse_unsigned( num.mant, cs.begin(), n2 );
  1995. BOOST_ASSERT(num.bias == 0);
  1996. num.bias -= n2;
  1997. cs += n2;
  1998. char ch = *cs;
  1999. if( (ch | 32) == 'e' )
  2000. {
  2001. ++cs;
  2002. goto do_exp1;
  2003. }
  2004. else if( ch >= '0' && ch <= '9' )
  2005. {
  2006. goto do_num8;
  2007. }
  2008. goto finish_dub;
  2009. }
  2010. }
  2011. else
  2012. {
  2013. num = num_;
  2014. state st;
  2015. st_.pop(st);
  2016. switch(st)
  2017. {
  2018. default: BOOST_JSON_UNREACHABLE();
  2019. case state::num1: goto do_num1;
  2020. case state::num2: goto do_num2;
  2021. case state::num3: goto do_num3;
  2022. case state::num4: goto do_num4;
  2023. case state::num5: goto do_num5;
  2024. case state::num6: goto do_num6;
  2025. case state::num7: goto do_num7;
  2026. case state::num8: goto do_num8;
  2027. case state::exp1: goto do_exp1;
  2028. case state::exp2: goto do_exp2;
  2029. case state::exp3: goto do_exp3;
  2030. }
  2031. }
  2032. //----------------------------------
  2033. //
  2034. // DIGIT
  2035. // first digit
  2036. //
  2037. do_num1:
  2038. if(zero_first || nonzero_first ||
  2039. BOOST_JSON_LIKELY(cs))
  2040. {
  2041. char const c = *cs;
  2042. if(zero_first)
  2043. {
  2044. ++cs;
  2045. num.mant = 0;
  2046. goto do_num6;
  2047. }
  2048. else if(nonzero_first || BOOST_JSON_LIKELY(
  2049. c >= '1' && c <= '9'))
  2050. {
  2051. ++cs;
  2052. num.mant = c - '0';
  2053. }
  2054. else if(BOOST_JSON_UNLIKELY(
  2055. c == '0'))
  2056. {
  2057. ++cs;
  2058. num.mant = 0;
  2059. goto do_num6;
  2060. }
  2061. else if( (negative || num.neg) && opt_.allow_infinity_and_nan )
  2062. {
  2063. st_.push(state::lit1);
  2064. cur_lit_ = literal_index(detail::literals::neg_infinity);
  2065. lit_offset_ = 1;
  2066. return parse_literal(
  2067. cs.begin(), detail::literals_c<detail::literals::resume>() );
  2068. }
  2069. else
  2070. {
  2071. BOOST_STATIC_CONSTEXPR source_location loc
  2072. = BOOST_CURRENT_LOCATION;
  2073. return fail(cs.begin(), error::syntax, &loc);
  2074. }
  2075. }
  2076. else
  2077. {
  2078. if(BOOST_JSON_UNLIKELY(
  2079. ! h_.on_number_part(
  2080. {begin, cs.used(begin)}, ec_)))
  2081. return fail(cs.begin());
  2082. BOOST_IF_CONSTEXPR( precise_parsing )
  2083. num_buf_.append( begin, cs.used(begin) );
  2084. return maybe_suspend(
  2085. cs.begin(), state::num1, num);
  2086. }
  2087. //----------------------------------
  2088. //
  2089. // 1*DIGIT
  2090. // significant digits left of decimal
  2091. //
  2092. do_num2:
  2093. if(negative || (!stack_empty && num.neg))
  2094. {
  2095. for(;;)
  2096. {
  2097. if(BOOST_JSON_UNLIKELY(! cs))
  2098. {
  2099. if(BOOST_JSON_UNLIKELY(more_))
  2100. {
  2101. if(BOOST_JSON_UNLIKELY(
  2102. ! h_.on_number_part(
  2103. {begin, cs.used(begin)}, ec_)))
  2104. return fail(cs.begin());
  2105. BOOST_IF_CONSTEXPR( precise_parsing )
  2106. num_buf_.append( begin, cs.used(begin) );
  2107. return suspend(cs.begin(), state::num2, num);
  2108. }
  2109. goto finish_int;
  2110. }
  2111. char const c = *cs;
  2112. if(BOOST_JSON_LIKELY(
  2113. c >= '0' && c <= '9'))
  2114. {
  2115. ++cs;
  2116. // 9223372036854775808 INT64_MIN
  2117. if( num.mant > 922337203685477580 || (
  2118. num.mant == 922337203685477580 && c > '8'))
  2119. break;
  2120. BOOST_IF_CONSTEXPR( !no_parsing )
  2121. num.mant = 10 * num.mant + ( c - '0' );
  2122. continue;
  2123. }
  2124. goto do_num6; // [.eE]
  2125. }
  2126. }
  2127. else
  2128. {
  2129. for(;;)
  2130. {
  2131. if(BOOST_JSON_UNLIKELY(! cs))
  2132. {
  2133. if(BOOST_JSON_UNLIKELY(more_))
  2134. {
  2135. if(BOOST_JSON_UNLIKELY(
  2136. ! h_.on_number_part(
  2137. {begin, cs.used(begin)}, ec_)))
  2138. return fail(cs.begin());
  2139. BOOST_IF_CONSTEXPR( precise_parsing )
  2140. num_buf_.append( begin, cs.used(begin) );
  2141. return suspend(cs.begin(), state::num2, num);
  2142. }
  2143. goto finish_int;
  2144. }
  2145. char const c = *cs;
  2146. if(BOOST_JSON_LIKELY(
  2147. c >= '0' && c <= '9'))
  2148. {
  2149. ++cs;
  2150. // 18446744073709551615 UINT64_MAX
  2151. if( num.mant > 1844674407370955161 || (
  2152. num.mant == 1844674407370955161 && c > '5'))
  2153. break;
  2154. BOOST_IF_CONSTEXPR( !no_parsing )
  2155. num.mant = 10 * num.mant + ( c - '0' );
  2156. }
  2157. else
  2158. {
  2159. goto do_num6; // [.eE]
  2160. }
  2161. }
  2162. }
  2163. ++num.bias;
  2164. //----------------------------------
  2165. //
  2166. // 1*DIGIT
  2167. // non-significant digits left of decimal
  2168. //
  2169. do_num3:
  2170. for(;;)
  2171. {
  2172. if(BOOST_JSON_UNLIKELY(! cs))
  2173. {
  2174. if(BOOST_JSON_UNLIKELY(more_))
  2175. {
  2176. if(BOOST_JSON_UNLIKELY(
  2177. ! h_.on_number_part(
  2178. {begin, cs.used(begin)}, ec_)))
  2179. return fail(cs.begin());
  2180. BOOST_IF_CONSTEXPR( precise_parsing )
  2181. num_buf_.append( begin, cs.used(begin) );
  2182. return suspend(cs.begin(), state::num3, num);
  2183. }
  2184. goto finish_dub;
  2185. }
  2186. char const c = *cs;
  2187. if(BOOST_JSON_UNLIKELY(
  2188. c >= '0' && c <= '9'))
  2189. {
  2190. if(BOOST_JSON_UNLIKELY( num.bias + 1 == INT_MAX ))
  2191. {
  2192. BOOST_STATIC_CONSTEXPR source_location loc
  2193. = BOOST_CURRENT_LOCATION;
  2194. return fail(cs.begin(), error::exponent_overflow, &loc);
  2195. }
  2196. ++cs;
  2197. ++num.bias;
  2198. }
  2199. else if(BOOST_JSON_LIKELY(
  2200. c == '.'))
  2201. {
  2202. ++cs;
  2203. break;
  2204. }
  2205. else if((c | 32) == 'e')
  2206. {
  2207. ++cs;
  2208. goto do_exp1;
  2209. }
  2210. else
  2211. {
  2212. goto finish_dub;
  2213. }
  2214. }
  2215. //----------------------------------
  2216. //
  2217. // DIGIT
  2218. // first non-significant digit
  2219. // to the right of decimal
  2220. //
  2221. do_num4:
  2222. {
  2223. if(BOOST_JSON_UNLIKELY(! cs))
  2224. {
  2225. if(BOOST_JSON_UNLIKELY(
  2226. ! h_.on_number_part(
  2227. {begin, cs.used(begin)}, ec_)))
  2228. return fail(cs.begin());
  2229. BOOST_IF_CONSTEXPR( precise_parsing )
  2230. num_buf_.append( begin, cs.used(begin) );
  2231. return maybe_suspend(
  2232. cs.begin(), state::num4, num);
  2233. }
  2234. char const c = *cs;
  2235. if(BOOST_JSON_LIKELY(
  2236. //static_cast<unsigned char>(c - '0') < 10))
  2237. c >= '0' && c <= '9'))
  2238. {
  2239. ++cs;
  2240. }
  2241. else
  2242. {
  2243. // digit required
  2244. BOOST_STATIC_CONSTEXPR source_location loc
  2245. = BOOST_CURRENT_LOCATION;
  2246. return fail(cs.begin(), error::syntax, &loc);
  2247. }
  2248. }
  2249. //----------------------------------
  2250. //
  2251. // 1*DIGIT
  2252. // non-significant digits
  2253. // to the right of decimal
  2254. //
  2255. do_num5:
  2256. for(;;)
  2257. {
  2258. if(BOOST_JSON_UNLIKELY(! cs))
  2259. {
  2260. if(BOOST_JSON_UNLIKELY(more_))
  2261. {
  2262. if(BOOST_JSON_UNLIKELY(
  2263. ! h_.on_number_part(
  2264. {begin, cs.used(begin)}, ec_)))
  2265. return fail(cs.begin());
  2266. BOOST_IF_CONSTEXPR( precise_parsing )
  2267. num_buf_.append( begin, cs.used(begin) );
  2268. return suspend(cs.begin(), state::num5, num);
  2269. }
  2270. goto finish_dub;
  2271. }
  2272. char const c = *cs;
  2273. if(BOOST_JSON_LIKELY(
  2274. c >= '0' && c <= '9'))
  2275. {
  2276. ++cs;
  2277. }
  2278. else if((c | 32) == 'e')
  2279. {
  2280. ++cs;
  2281. goto do_exp1;
  2282. }
  2283. else
  2284. {
  2285. goto finish_dub;
  2286. }
  2287. }
  2288. //----------------------------------
  2289. //
  2290. // [.eE]
  2291. //
  2292. do_num6:
  2293. {
  2294. if(BOOST_JSON_UNLIKELY(! cs))
  2295. {
  2296. if(BOOST_JSON_UNLIKELY(more_))
  2297. {
  2298. if(BOOST_JSON_UNLIKELY(
  2299. ! h_.on_number_part(
  2300. {begin, cs.used(begin)}, ec_)))
  2301. return fail(cs.begin());
  2302. BOOST_IF_CONSTEXPR( precise_parsing )
  2303. num_buf_.append( begin, cs.used(begin) );
  2304. return suspend(cs.begin(), state::num6, num);
  2305. }
  2306. goto finish_int;
  2307. }
  2308. char const c = *cs;
  2309. if(BOOST_JSON_LIKELY(
  2310. c == '.'))
  2311. {
  2312. ++cs;
  2313. }
  2314. else if((c | 32) == 'e')
  2315. {
  2316. ++cs;
  2317. goto do_exp1;
  2318. }
  2319. else
  2320. {
  2321. goto finish_int;
  2322. }
  2323. }
  2324. //----------------------------------
  2325. //
  2326. // DIGIT
  2327. // first significant digit
  2328. // to the right of decimal
  2329. //
  2330. do_num7:
  2331. {
  2332. if(BOOST_JSON_UNLIKELY(! cs))
  2333. {
  2334. if(BOOST_JSON_UNLIKELY(more_))
  2335. {
  2336. if(BOOST_JSON_UNLIKELY(
  2337. ! h_.on_number_part(
  2338. {begin, cs.used(begin)}, ec_)))
  2339. return fail(cs.begin());
  2340. BOOST_IF_CONSTEXPR( precise_parsing )
  2341. num_buf_.append( begin, cs.used(begin) );
  2342. return suspend(cs.begin(), state::num7, num);
  2343. }
  2344. // digit required
  2345. BOOST_STATIC_CONSTEXPR source_location loc
  2346. = BOOST_CURRENT_LOCATION;
  2347. return fail(cs.begin(), error::syntax, &loc);
  2348. }
  2349. char const c = *cs;
  2350. if(BOOST_JSON_UNLIKELY(
  2351. c < '0' || c > '9'))
  2352. {
  2353. // digit required
  2354. BOOST_STATIC_CONSTEXPR source_location loc
  2355. = BOOST_CURRENT_LOCATION;
  2356. return fail(cs.begin(), error::syntax, &loc);
  2357. }
  2358. }
  2359. //----------------------------------
  2360. //
  2361. // 1*DIGIT
  2362. // significant digits
  2363. // to the right of decimal
  2364. //
  2365. do_num8:
  2366. for(;;)
  2367. {
  2368. if(BOOST_JSON_UNLIKELY(! cs))
  2369. {
  2370. if(BOOST_JSON_UNLIKELY(more_))
  2371. {
  2372. if(BOOST_JSON_UNLIKELY(
  2373. ! h_.on_number_part(
  2374. {begin, cs.used(begin)}, ec_)))
  2375. return fail(cs.begin());
  2376. BOOST_IF_CONSTEXPR( precise_parsing )
  2377. num_buf_.append( begin, cs.used(begin) );
  2378. return suspend(cs.begin(), state::num8, num);
  2379. }
  2380. goto finish_dub;
  2381. }
  2382. char const c = *cs;
  2383. if(BOOST_JSON_LIKELY(
  2384. c >= '0' && c <= '9'))
  2385. {
  2386. ++cs;
  2387. if(!no_parsing && BOOST_JSON_LIKELY(
  2388. num.mant <= 9007199254740991)) // 2^53-1
  2389. {
  2390. if(BOOST_JSON_UNLIKELY( num.bias - 1 == INT_MIN ))
  2391. {
  2392. BOOST_STATIC_CONSTEXPR source_location loc
  2393. = BOOST_CURRENT_LOCATION;
  2394. return fail(cs.begin(), error::exponent_overflow, &loc);
  2395. }
  2396. --num.bias;
  2397. num.mant = 10 * num.mant + ( c - '0' );
  2398. }
  2399. else
  2400. {
  2401. goto do_num5;
  2402. }
  2403. }
  2404. else if((c | 32) == 'e')
  2405. {
  2406. ++cs;
  2407. goto do_exp1;
  2408. }
  2409. else
  2410. {
  2411. goto finish_dub;
  2412. }
  2413. }
  2414. //----------------------------------
  2415. //
  2416. // *[+-]
  2417. //
  2418. do_exp1:
  2419. if(BOOST_JSON_UNLIKELY(! cs))
  2420. {
  2421. if(BOOST_JSON_UNLIKELY(
  2422. ! h_.on_number_part(
  2423. {begin, cs.used(begin)}, ec_)))
  2424. return fail(cs.begin());
  2425. BOOST_IF_CONSTEXPR( precise_parsing )
  2426. num_buf_.append( begin, cs.used(begin) );
  2427. return maybe_suspend(
  2428. cs.begin(), state::exp1, num);
  2429. }
  2430. if(*cs == '+')
  2431. {
  2432. ++cs;
  2433. }
  2434. else if(*cs == '-')
  2435. {
  2436. ++cs;
  2437. num.frac = true;
  2438. }
  2439. //----------------------------------
  2440. //
  2441. // DIGIT
  2442. // first digit of the exponent
  2443. //
  2444. do_exp2:
  2445. {
  2446. if(BOOST_JSON_UNLIKELY(! cs))
  2447. {
  2448. if(BOOST_JSON_UNLIKELY(more_))
  2449. {
  2450. if(BOOST_JSON_UNLIKELY(
  2451. ! h_.on_number_part(
  2452. {begin, cs.used(begin)}, ec_)))
  2453. return fail(cs.begin());
  2454. BOOST_IF_CONSTEXPR( precise_parsing )
  2455. num_buf_.append( begin, cs.used(begin) );
  2456. return suspend(cs.begin(), state::exp2, num);
  2457. }
  2458. // digit required
  2459. BOOST_STATIC_CONSTEXPR source_location loc
  2460. = BOOST_CURRENT_LOCATION;
  2461. return fail(cs.begin(), error::syntax, &loc);
  2462. }
  2463. char const c = *cs;
  2464. if(BOOST_JSON_UNLIKELY(
  2465. c < '0' || c > '9'))
  2466. {
  2467. // digit required
  2468. BOOST_STATIC_CONSTEXPR source_location loc
  2469. = BOOST_CURRENT_LOCATION;
  2470. return fail(cs.begin(), error::syntax, &loc);
  2471. }
  2472. ++cs;
  2473. num.exp = c - '0';
  2474. }
  2475. //----------------------------------
  2476. //
  2477. // 1*DIGIT
  2478. // subsequent digits in the exponent
  2479. //
  2480. do_exp3:
  2481. for(;;)
  2482. {
  2483. if(BOOST_JSON_UNLIKELY(! cs))
  2484. {
  2485. if(BOOST_JSON_UNLIKELY(more_))
  2486. {
  2487. if(BOOST_JSON_UNLIKELY(
  2488. ! h_.on_number_part(
  2489. {begin, cs.used(begin)}, ec_)))
  2490. return fail(cs.begin());
  2491. BOOST_IF_CONSTEXPR( precise_parsing )
  2492. num_buf_.append( begin, cs.used(begin) );
  2493. return suspend(cs.begin(), state::exp3, num);
  2494. }
  2495. }
  2496. else
  2497. {
  2498. char const c = *cs;
  2499. if(BOOST_JSON_LIKELY( c >= '0' && c <= '9' ))
  2500. {
  2501. if(BOOST_JSON_UNLIKELY(
  2502. // 2147483647 INT_MAX
  2503. num.exp > 214748364 ||
  2504. (num.exp == 214748364 && c > '7')
  2505. ))
  2506. num.exp = INT_MAX;
  2507. else BOOST_IF_CONSTEXPR( !no_parsing )
  2508. num.exp = 10 * num.exp + ( c - '0' );
  2509. ++cs;
  2510. continue;
  2511. }
  2512. }
  2513. BOOST_ASSERT(num.exp >= 0);
  2514. if ( num.frac )
  2515. {
  2516. if(BOOST_JSON_UNLIKELY( num.bias < (INT_MIN + num.exp) ))
  2517. {
  2518. // if exponent overflowed, bias is a very large negative
  2519. // number, and mantissa isn't zero, then we cannot parse the
  2520. // number correctly
  2521. if(BOOST_JSON_UNLIKELY(
  2522. (num.exp == INT_MAX) &&
  2523. (num.bias < 0) &&
  2524. (num.exp + num.bias < 308) &&
  2525. num.mant ))
  2526. {
  2527. BOOST_STATIC_CONSTEXPR source_location loc
  2528. = BOOST_CURRENT_LOCATION;
  2529. return fail(cs.begin(), error::exponent_overflow, &loc);
  2530. }
  2531. num.bias = 0;
  2532. num.exp = INT_MAX;
  2533. }
  2534. }
  2535. else if (BOOST_JSON_UNLIKELY( num.bias > (INT_MAX - num.exp) ))
  2536. {
  2537. // if exponent overflowed, bias is a very large positive number,
  2538. // and mantissa isn't zero, then we cannot parse the
  2539. // number correctly
  2540. if(BOOST_JSON_UNLIKELY(
  2541. (num.exp == INT_MAX) &&
  2542. (num.bias > 0) &&
  2543. (num.exp - num.bias < 308) &&
  2544. num.mant ))
  2545. {
  2546. BOOST_STATIC_CONSTEXPR source_location loc
  2547. = BOOST_CURRENT_LOCATION;
  2548. return fail(cs.begin(), error::exponent_overflow, &loc);
  2549. }
  2550. num.bias = 0;
  2551. num.exp = INT_MAX;
  2552. }
  2553. goto finish_dub;
  2554. }
  2555. finish_int:
  2556. if(negative || (!stack_empty && num.neg))
  2557. {
  2558. if(BOOST_JSON_UNLIKELY(
  2559. ! h_.on_int64(static_cast<
  2560. int64_t>(~num.mant + 1), {begin, cs.used(begin)}, ec_)))
  2561. return fail(cs.begin());
  2562. return cs.begin();
  2563. }
  2564. if(num.mant <= INT64_MAX)
  2565. {
  2566. finish_signed:
  2567. if(BOOST_JSON_UNLIKELY(
  2568. ! h_.on_int64(static_cast<
  2569. int64_t>(num.mant), {begin, cs.used(begin)}, ec_)))
  2570. return fail(cs.begin());
  2571. return cs.begin();
  2572. }
  2573. if(BOOST_JSON_UNLIKELY(
  2574. ! h_.on_uint64(num.mant, {begin, cs.used(begin)}, ec_)))
  2575. return fail(cs.begin());
  2576. return cs.begin();
  2577. finish_dub:
  2578. double d;
  2579. std::size_t const size = cs.used(begin);
  2580. BOOST_ASSERT( !num_buf_.size() || precise_parsing );
  2581. BOOST_IF_CONSTEXPR( precise_parsing )
  2582. {
  2583. char const* data = begin;
  2584. std::size_t full_size = size;
  2585. // if we previously suspended or if the current input ends with the
  2586. // number, we need to copy the current part of the number to the
  2587. // temporary buffer
  2588. if(BOOST_JSON_UNLIKELY( num_buf_.size() ))
  2589. {
  2590. data = num_buf_.append( begin, size );
  2591. full_size = num_buf_.size();
  2592. }
  2593. auto const err = detail::charconv::from_chars(
  2594. data, data + full_size, d );
  2595. BOOST_ASSERT( err.ec != std::errc::invalid_argument );
  2596. BOOST_ASSERT( err.ptr == data + full_size );
  2597. (void)err;
  2598. }
  2599. else BOOST_IF_CONSTEXPR( no_parsing )
  2600. d = 0;
  2601. else
  2602. d = detail::dec_to_float(
  2603. num.mant,
  2604. num.bias + (num.frac ?
  2605. -num.exp : num.exp),
  2606. num.neg);
  2607. if(BOOST_JSON_UNLIKELY(
  2608. ! h_.on_double(d, {begin, size}, ec_)))
  2609. return fail(cs.begin());
  2610. return cs.begin();
  2611. }
  2612. //----------------------------------------------------------
  2613. template<class Handler>
  2614. template<class... Args>
  2615. basic_parser<Handler>::
  2616. basic_parser(
  2617. parse_options const& opt,
  2618. Args&&... args)
  2619. : h_(std::forward<Args>(args)...)
  2620. , opt_(opt)
  2621. {
  2622. }
  2623. //----------------------------------------------------------
  2624. template<class Handler>
  2625. void
  2626. basic_parser<Handler>::
  2627. reset() noexcept
  2628. {
  2629. ec_ = {};
  2630. st_.clear();
  2631. more_ = true;
  2632. done_ = false;
  2633. clean_ = true;
  2634. num_buf_.clear();
  2635. }
  2636. template<class Handler>
  2637. void
  2638. basic_parser<Handler>::
  2639. fail(system::error_code ec) noexcept
  2640. {
  2641. if(! ec)
  2642. {
  2643. // assign an arbitrary
  2644. // error code to prevent UB
  2645. BOOST_JSON_FAIL(ec_, error::incomplete);
  2646. }
  2647. else
  2648. {
  2649. ec_ = ec;
  2650. }
  2651. done_ = false;
  2652. }
  2653. //----------------------------------------------------------
  2654. template<class Handler>
  2655. std::size_t
  2656. basic_parser<Handler>::
  2657. write_some(
  2658. bool more,
  2659. char const* data,
  2660. std::size_t size,
  2661. system::error_code& ec)
  2662. {
  2663. // see if we exited via exception
  2664. // on the last call to write_some
  2665. if(! clean_)
  2666. {
  2667. // prevent UB
  2668. if(! ec_)
  2669. {
  2670. BOOST_JSON_FAIL(ec_, error::exception);
  2671. }
  2672. }
  2673. if(ec_)
  2674. {
  2675. // error is sticky
  2676. ec = ec_;
  2677. return 0;
  2678. }
  2679. clean_ = false;
  2680. more_ = more;
  2681. end_ = data + size;
  2682. const char* p;
  2683. if(BOOST_JSON_LIKELY(st_.empty()))
  2684. {
  2685. // first time
  2686. depth_ = opt_.max_depth;
  2687. if(BOOST_JSON_UNLIKELY(
  2688. ! h_.on_document_begin(ec_)))
  2689. {
  2690. ec = ec_;
  2691. return 0;
  2692. }
  2693. p = parse_document(data, std::true_type());
  2694. }
  2695. else
  2696. {
  2697. p = parse_document(data, std::false_type());
  2698. }
  2699. if(BOOST_JSON_LIKELY(p != sentinel()))
  2700. {
  2701. BOOST_ASSERT(! ec_);
  2702. if(! done_)
  2703. {
  2704. done_ = true;
  2705. h_.on_document_end(ec_);
  2706. }
  2707. }
  2708. else
  2709. {
  2710. if(! ec_)
  2711. {
  2712. if(! more_)
  2713. {
  2714. BOOST_JSON_FAIL(ec_, error::incomplete);
  2715. }
  2716. else if(! st_.empty())
  2717. {
  2718. // consume as much trailing whitespace in
  2719. // the JSON document as possible, but still
  2720. // consider the parse complete
  2721. state st;
  2722. st_.peek(st);
  2723. if( st == state::doc3 &&
  2724. ! done_)
  2725. {
  2726. done_ = true;
  2727. h_.on_document_end(ec_);
  2728. }
  2729. }
  2730. }
  2731. p = end_;
  2732. }
  2733. ec = ec_;
  2734. clean_ = true;
  2735. return p - data;
  2736. }
  2737. template<class Handler>
  2738. std::size_t
  2739. basic_parser<Handler>::
  2740. write_some(
  2741. bool more,
  2742. char const* data,
  2743. std::size_t size,
  2744. std::error_code& ec)
  2745. {
  2746. system::error_code jec;
  2747. std::size_t const result = write_some(more, data, size, jec);
  2748. ec = jec;
  2749. return result;
  2750. }
  2751. #endif
  2752. } // namespace json
  2753. } // namespace boost
  2754. #ifdef _MSC_VER
  2755. #pragma warning(pop)
  2756. #endif
  2757. #endif