This function scans a string according to Sect. 7 of RFC 7159. While scanning, bytes are escaped and copied into buffer token_buffer. Then the function returns successfully, token_buffer is not null-terminated (as it may contain \0 bytes), and token_buffer.size() is the number of bytes in the string.
7379 case std::char_traits<char>::eof():
7381 error_message =
"invalid string: missing closing quote";
7382 return token_type::parse_error;
7388 return token_type::value_string;
7432 const int codepoint1 = get_codepoint();
7433 int codepoint = codepoint1;
7437 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
7438 return token_type::parse_error;
7442 if (0xD800 <= codepoint1
and codepoint1 <= 0xDBFF)
7447 const int codepoint2 = get_codepoint();
7451 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
7452 return token_type::parse_error;
7459 codepoint =
static_cast<int>(
7461 (
static_cast<unsigned int>(codepoint1) << 10u)
7463 +
static_cast<unsigned int>(codepoint2)
7471 error_message =
"invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF";
7472 return token_type::parse_error;
7477 error_message =
"invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF";
7478 return token_type::parse_error;
7485 error_message =
"invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
7486 return token_type::parse_error;
7491 assert(0x00 <= codepoint
and codepoint <= 0x10FFFF);
7494 if (codepoint < 0x80)
7499 else if (codepoint <= 0x7FF)
7502 add(static_cast<int>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
7503 add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
7505 else if (codepoint <= 0xFFFF)
7508 add(static_cast<int>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
7509 add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
7510 add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
7515 add(static_cast<int>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
7516 add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
7517 add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
7518 add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
7526 error_message =
"invalid string: forbidden character after backslash";
7527 return token_type::parse_error;
7536 error_message =
"invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
7537 return token_type::parse_error;
7542 error_message =
"invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
7543 return token_type::parse_error;
7548 error_message =
"invalid string: control character U+0002 (STX) must be escaped to \\u0002";
7549 return token_type::parse_error;
7554 error_message =
"invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
7555 return token_type::parse_error;
7560 error_message =
"invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
7561 return token_type::parse_error;
7566 error_message =
"invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
7567 return token_type::parse_error;
7572 error_message =
"invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
7573 return token_type::parse_error;
7578 error_message =
"invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
7579 return token_type::parse_error;
7584 error_message =
"invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
7585 return token_type::parse_error;
7590 error_message =
"invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
7591 return token_type::parse_error;
7596 error_message =
"invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
7597 return token_type::parse_error;
7602 error_message =
"invalid string: control character U+000B (VT) must be escaped to \\u000B";
7603 return token_type::parse_error;
7608 error_message =
"invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
7609 return token_type::parse_error;
7614 error_message =
"invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
7615 return token_type::parse_error;
7620 error_message =
"invalid string: control character U+000E (SO) must be escaped to \\u000E";
7621 return token_type::parse_error;
7626 error_message =
"invalid string: control character U+000F (SI) must be escaped to \\u000F";
7627 return token_type::parse_error;
7632 error_message =
"invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
7633 return token_type::parse_error;
7638 error_message =
"invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
7639 return token_type::parse_error;
7644 error_message =
"invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
7645 return token_type::parse_error;
7650 error_message =
"invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
7651 return token_type::parse_error;
7656 error_message =
"invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
7657 return token_type::parse_error;
7662 error_message =
"invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
7663 return token_type::parse_error;
7668 error_message =
"invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
7669 return token_type::parse_error;
7674 error_message =
"invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
7675 return token_type::parse_error;
7680 error_message =
"invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
7681 return token_type::parse_error;
7686 error_message =
"invalid string: control character U+0019 (EM) must be escaped to \\u0019";
7687 return token_type::parse_error;
7692 error_message =
"invalid string: control character U+001A (SUB) must be escaped to \\u001A";
7693 return token_type::parse_error;
7698 error_message =
"invalid string: control character U+001B (ESC) must be escaped to \\u001B";
7699 return token_type::parse_error;
7704 error_message =
"invalid string: control character U+001C (FS) must be escaped to \\u001C";
7705 return token_type::parse_error;
7710 error_message =
"invalid string: control character U+001D (GS) must be escaped to \\u001D";
7711 return token_type::parse_error;
7716 error_message =
"invalid string: control character U+001E (RS) must be escaped to \\u001E";
7717 return token_type::parse_error;
7722 error_message =
"invalid string: control character U+001F (US) must be escaped to \\u001F";
7723 return token_type::parse_error;
7860 return token_type::parse_error;
7870 return token_type::parse_error;
7894 return token_type::parse_error;
7904 return token_type::parse_error;
7914 return token_type::parse_error;
7926 return token_type::parse_error;
7936 return token_type::parse_error;
7944 error_message =
"invalid string: ill-formed UTF-8 byte";
7945 return token_type::parse_error;
void reset() noexcept
reset token_buffer; current character is beginning of token
return match has_match and(match.match_pdg==11 or match.match_pdg==-11)
#define JSON_HEDLEY_LIKELY(expr)
std::char_traits< char >::int_type current
the current character
#define JSON_HEDLEY_UNLIKELY(expr)