1
\$\begingroup\$

Code: https://github.com/Loki-Astari/Puzzle/tree/master/JSON-1

From Challenge: https://codingchallenges.fyi/challenges/challenge-json-parser

json1.cpp

#include <cctype>
#include <fstream>
#include <iostream>
#include <string>

enum class Token {EndOfStream, Invalid, OpenCurlyBrace, CloseCurlBrace, OpenSquareBrace, CloseSquareBrace, Colon, Comma, String, Number, True, False, Null};

class JsonLexer
{
    std::istream&   file;
    std::string     token;

    Token extractTrue();
    Token extractFalse();
    Token extractNull();
    Token extractString();
    Token extractNumber(char n);

    public:
        JsonLexer(std::istream& file)
            : file(file)
        {}
        Token nextToken();
};

class JsonParser
{
    JsonLexer&      lexer;

    bool parseValue(Token next);
    bool parseObject(Token next);
    bool parseArray(Token next);

    public:
        JsonParser(JsonLexer& lexer)
            : lexer(lexer)
        {}

    bool parse();
};

bool checkJson(std::string const& fileName, std::istream& file)
{
    JsonLexer       lexer(file);
    JsonParser      parser(lexer);

    bool valid      = parser.parse();
    std::cout << fileName << ":\t\t" << ((valid ? "Valid" : "In Valid")) << "\n";
    return valid;
}

int main(int argc, char* argv[])
{
    bool result = true;
    if (argc == 1) {
        result = checkJson("", std::cin);
    }
    else {
        for (int loop = 1; loop < argc; ++loop) {
            std::ifstream   file(argv[loop]);
            if (!file) {
                std::cerr << "Invalid File: " << argv[loop] << "\n";
            }
            if (!checkJson(argv[loop], file)) {
                result = false;
            }
        }
    }
    return result ? 0 : 1;
}

bool JsonParser::parse()
{
    Token   next = lexer.nextToken();
    bool result = parseValue(next);
    if (result) {
        next = lexer.nextToken();
        // There should be no more tokens on the input stream.
        // If there are then this is an error.
        result = (next == Token::EndOfStream);
    }
    return result;
};

bool JsonParser::parseValue(Token next)
{
    switch (next)
    {
        case Token::OpenCurlyBrace:         return parseObject(next);
        case Token::OpenSquareBrace:        return parseArray(next);
        case Token::String:                 return true;
        case Token::Number:                 return true;
        case Token::True:                   return true;
        case Token::False:                  return true;
        case Token::Null:                   return true;
        default:
            // Anything else is an error for a value.
            return false;
    }
}

bool JsonParser::parseArray(Token next)
{
    if (next != Token::OpenSquareBrace) {
        return false;
    }
    next = lexer.nextToken();
    if (next == Token::CloseSquareBrace) {
        return true;
    }
    while (true)
    {
        if (!parseValue(next)) {
            return false;
        }
        next = lexer.nextToken();
        if (next == Token::CloseSquareBrace) {
            return true;
        }
        if (next != Token::Comma) {
            return false;
        }
        next = lexer.nextToken();
    }
}

bool JsonParser::parseObject(Token next)
{
    if (next != Token::OpenCurlyBrace) {
        return false;
    }
    next = lexer.nextToken();
    if (next == Token::CloseCurlBrace) {
        return true;
    }
    while (true)
    {
        if (next != Token::String) {
            return false;
        }
        next = lexer.nextToken();
        if (next != Token::Colon) {
            return false;
        }
        next = lexer.nextToken();
        if (!parseValue(next)) {
            return false;
        }
        next = lexer.nextToken();
        if (next == Token::CloseCurlBrace) {
            return true;
        }
        if (next != Token::Comma) {
            return false;
        }
        next = lexer.nextToken();
    }
}

Token JsonLexer::nextToken()
{
    char    n;
    if (file >> n) {
        // White space automatically skipped.
        switch (n) {
            case '{':       return Token::OpenCurlyBrace;
            case '}':       return Token::CloseCurlBrace;
            case '[':       return Token::OpenSquareBrace;
            case ']':       return Token::CloseSquareBrace;
            case ':':       return Token::Colon;
            case ',':       return Token::Comma;
            case 't':       return extractTrue();
            case 'f':       return extractFalse();
            case 'n':       return extractNull();
            case '"':       return extractString();
            default:        return extractNumber(n);
        }
    }
    else {
        // Failing to read from the stream is an invalid token.
        return Token::EndOfStream;
    }
}

Token JsonLexer::extractTrue()
{
    static char buffer[3];
    if (!file.read(buffer, 3) || file.gcount() != 3 || strncmp(buffer, "rue", 3) != 0) {
        return Token::Invalid;
    }
    return Token::True;
}

Token JsonLexer::extractFalse()
{
    static char buffer[4];
    if (!file.read(buffer, 4) || file.gcount() != 4 || strncmp(buffer, "alse", 4) != 0) {
        return Token::Invalid;
    }
    return Token::False;
}

Token JsonLexer::extractNull()
{
    static char buffer[3];
    if (!file.read(buffer, 3) || file.gcount() != 3 || strncmp(buffer, "ull", 3) != 0) {
        return Token::Invalid;
    }
    return Token::Null;
}

Token JsonLexer::extractString()
{
    token.clear();
    while (true) {
        int n = file.get();
        if (n == std::char_traits<char>::eof()) {
            return Token::Invalid;
        }
        if (n == '"') {
            return Token::String;
        }
        if (n != '\\') {
            token += n;
            continue;
        }
        n = file.get();
        if (n == std::char_traits<char>::eof()) {
            return Token::Invalid;
        }
        switch (n) {
            case '"':
            case '\\':
            case '/':
                token += n;
                break;
            case 'b':       token += '\b';break;
            case 'f':       token += '\f';break;
            case 'n':       token += '\n';break;
            case 'r':       token += '\r';break;
            case 't':       token += '\t';break;
            case 'u':
            {
                int UTF8 = 0;
                for (int loop = 0; loop < 4; ++loop) {
                    n = file.get();
                    if (n == std::char_traits<char>::eof()) {
                        return Token::Invalid;
                    }
                    if (n >= '0' && n <= '9') {
                        UTF8 = UTF8 * 16 + (n - '0');
                    }
                    else if (n >= 'a' && n <= 'f') {
                        UTF8 = UTF8 * 16 + (n - 'a' + 10);
                    }
                    else if (n >= 'A' && n <= 'F') {
                        UTF8 = UTF8 * 16 + (n - 'A' + 10);
                    }
                    else {
                        return Token::Invalid;
                    }
                }
                if (UTF8 <= 0x7f) {
                    // U+0000   U+007F  0yyyzzzz    
                    token += static_cast<char>(UTF8);
                }
                else if (UTF8 <= 0x7FF) {
                    // U+0080   U+07FF  110xxxyy    10yyzzzz    
                    int z = (UTF8 >> 0) & 0xF;
                    int y = (UTF8 >> 4) & 0xF;
                    int x = (UTF8 >> 8) & 0x7;
                    token += static_cast<char>(0xC0 | (x << 2) | (y >> 2));
                    token += static_cast<char>(0x80 | ((y & 0x3) << 4) | z);
                }
                else if (UTF8 <= 0xFFFF) {
                    // U+0800   U+FFFF  1110wwww    10xxxxyy    10yyzzzz    
                    int z = (UTF8 >>  0) & 0xF;
                    int y = (UTF8 >>  4) & 0xF;
                    int x = (UTF8 >>  8) & 0xF;
                    int w = (UTF8 >> 12) & 0xF;
                    token += static_cast<char>(0xE0 | w);
                    token += static_cast<char>(0x80 | (x << 2) | (y >> 2));
                    token += static_cast<char>(0x80 | ((y & 0x3) << 4) | z);
                }
                else if (UTF8 <= 0x10FFFF) {
                    // U+010000 U+10FFFF    11110uvv    10vvwwww    10xxxxyy    10yyzzzz
                    int z = (UTF8 >>  0) & 0xF;
                    int y = (UTF8 >>  4) & 0xF;
                    int x = (UTF8 >>  8) & 0xF;
                    int w = (UTF8 >> 12) & 0xF;
                    int v = (UTF8 >> 16) & 0xF;
                    int u = (UTF8 >> 20) & 0x1;
                    token += static_cast<char>(0xF0 | (u << 2) | (v >> 2));
                    token += static_cast<char>(0x80 | ((v & 0x3) << 4) | w);
                    token += static_cast<char>(0x80 | (x << 2) | (y >> 2));
                    token += static_cast<char>(0x80 | ((y & 0x3) << 4) | z);
                }
                else {
                    return Token::Invalid;
                }
                break;
            }
            default:
                return Token::Invalid;
        }
    }
}

Token JsonLexer::extractNumber(char c)
{
    token.clear();

    // Optional neg sign.
    if (c == '-') {
        token += c;
        int n = file.get();
        if (n == std::char_traits<char>::eof()) {
            return Token::Invalid;
        }
        c = n;
    }

    if (!std::isdigit(c)) {
        return Token::Invalid;
    }
    else if (c == '0') {
        // Leading zero must not be followed by numbers;
        token += c;
        int n = file.get();
        if (n == std::char_traits<char>::eof()) {
            return Token::Number;
        }
        c = n;
    }
    else {
        // Any other digit then suck up all the digits.
        while (std::isdigit(c)) {
            token += c;
            int n = file.get();
            if (n == std::char_traits<char>::eof()) {
                return Token::Number;
            }
            c = n;
        }
    }

    // Fraction
    if (c == '.') {
        token += c;
        int n = file.get();
        if (n == std::char_traits<char>::eof()) {
            return Token::Invalid;
        }
        c = n;

        if (!std::isdigit(c)) {
            return Token::Invalid;
        }
        while (std::isdigit(c)) {
            token += c;
            int n = file.get();
            if (n == std::char_traits<char>::eof()) {
                return Token::Number;
            }
            c = n;
        }
    }

    // Exponent
    if (c == 'e' || c == 'E') {
        token += c;
        int n = file.get();
        if (n == std::char_traits<char>::eof()) {
            return Token::Invalid;
        }
        c = n;

        // Optional sign
        if (c == '-' || c == '+') {
            token += c;
            int n = file.get();
            if (n == std::char_traits<char>::eof()) {
                return Token::Invalid;
            }
            c = n;
        }

        if (!std::isdigit(c)) {
            return Token::Invalid;
        }
        while (std::isdigit(c)) {
            token += c;
            int n = file.get();
            if (n == std::char_traits<char>::eof()) {
                return Token::Number;
            }
            c = n;
        }
    }
    // unused character.
    // put it back on the stream.
    file.unget();
    return Token::Number;
}
\$\endgroup\$

1 Answer 1

2
\$\begingroup\$

Aesthetics

Purely aesthetic, but the following is way too long a line.

enum class Token {EndOfStream, Invalid, OpenCurlyBrace, CloseCurlBrace, OpenSquareBrace, CloseSquareBrace, Colon, Comma, String, Number, True, False, Null};

Revised:

enum class Token {
    EndOfStream, Invalid, OpenCurlyBrace, CloseCurlBrace, 
    OpenSquareBrace, CloseSquareBrace, Colon, Comma, String, 
    Number, True, False, Null
};

You have several places where newlines between control structures would also aid readability.

Control flow

Is there a reason to not have all of the cases that return true; not simply fall through to case Token::Null?

bool JsonParser::parseValue(Token next)
{
    switch (next)
    {
        case Token::OpenCurlyBrace:         return parseObject(next);
        case Token::OpenSquareBrace:        return parseArray(next);
        case Token::String:                 return true;
        case Token::Number:                 return true;
        case Token::True:                   return true;
        case Token::False:                  return true;
        case Token::Null:                   return true;
        default:
            // Anything else is an error for a value.
            return false;
    }
}
\$\endgroup\$

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.