1
\$\begingroup\$

I'm working on a homework writing a toy interpreter. All the expressions in this language are prefix expressions. The language has a lot of lexical tokens and I wrote code as follows:

Value *expression() {
    std::string token;
    cin >> token;

    if (token == "make")
        return make();
    else if (token == "thing")
        return thing();
    else if (token == "print")
        return print();
    else if (token == "read")
        return read();
    else if (token == "add")
        return new Number(*expression() + *expression());
    else if (token == "sub")
        return new Number(*expression() - *expression());
    else if (token == "mul")
        return new Number(*expression() * *expression());
    else if (token == "div")
        return new Number(*expression() / *expression());
    else if (token == "mod")
        return new Number(int(*expression()) % int(*expression()));
    else if (token == "true")
        return new Bool(true);
    else if (token == "false")
        return new Bool(false);
    else if (token[0] == '"')       // String
        return new Word(token.substr(1));
    else if (token[0] == ':')
        return thing(token.substr(1));
    else                            // Number
        return new Number(std::stod(token));
}

int main() {
    while (true) {
        try {
            auto res = expression();
        } catch (const std::exception &e) {
            std::cerr << "Caught exception: " << e.what() << std::endl;
        }
    }
    return 0;
}

But I think it's kind of ugly to have such a long (and will be longer) if-else chain, and maybe it will cost much time to compare along this chain. I don't know whether there will be some optimizations done by the compiler.

I'm wondering if I have some better way to organize my code, or is this if-else chain method already good enough?

The complete compilable code is:

#include <exception>
#include <iostream>
#include <string>
#include <unordered_map>
using std::cin;
using std::cout;
using std::endl;

class Value {
public:
    virtual operator double() = 0;
    virtual std::string toString() = 0;
};

class Number : public Value {
    double value;

public:
    Number(double d) : value(d) {}
    operator double() { return value; }
    std::string toString() { return std::to_string(value); }
};

class Word : public Value {
    std::string value;

public:
    Word(const std::string &str) : value(str) {}
    Word(std::string &&str) : value(str) {}
    operator double() { return std::stod(value); }
    std::string get() { return value; }
    std::string toString() { return value; }
};

class Bool : public Value {
    bool value;

public:
    Bool(bool b) : value(b) {}
    Bool(const std::string &str) {
        if (str == "true")
            value = true;
        else if (str == "false")
            value = false;
        else
            throw std::invalid_argument{str + " is not a valid Bool value"};
    }
    operator double() { throw std::invalid_argument{"Bool is not convertable to Number。"}; }
    std::string toString() { return value ? "true" : "false"; }
};

template <typename T, typename U> T downcastOrDie(U *ptr) {
    auto res = dynamic_cast<T>(ptr);
    if (!res) throw std::invalid_argument{"dynamic cast failed"};
    return res;
}

std::unordered_map<std::string, Value *> symbolTbl;

void program();
Value *expression();
Value *make();
Value *thing();
Value *thing(const std::string &name);
Value *print();
Value *read();


Value *expression() {
    std::string token;
    cin >> token;

    if (token == "make")
        return make();
    else if (token == "thing")
        return thing();
    else if (token == "print")
        return print();
    else if (token == "read")
        return read();
    else if (token == "add")
        return new Number(*expression() + *expression());
    else if (token == "sub")
        return new Number(*expression() - *expression());
    else if (token == "mul")
        return new Number(*expression() * *expression());
    else if (token == "div")
        return new Number(*expression() / *expression());
    else if (token == "mod")
        return new Number(int(*expression()) % int(*expression()));
    else if (token == "true")
        return new Bool(true);
    else if (token == "false")
        return new Bool(false);
    else if (token[0] == '"') // String
        return new Word(token.substr(1));
    else if (token[0] == ':')
        return thing(token.substr(1));
    else // Number
        return new Number(std::stod(token));
}

Value *make() {
    auto name = expression();
    auto value = expression();
    symbolTbl[downcastOrDie<Word *>(name)->get()] = value;
    return value;
}

Value *thing() { return thing(downcastOrDie<Word *>(expression())->get()); }

Value *thing(const std::string &name) { return symbolTbl[name]; }

Value *print() {
    auto val = expression();
    cout << val->toString() << endl;
    return val;
}

bool isDouble(const std::string &str) {
    bool hasDecimalPoint = false;
    if (str.length() == 0) return false;

    auto it = str.begin();
    if (*it == '-') ++it;

    for (; it != str.end(); ++it) {
        if (*it == '.') {
            if (hasDecimalPoint)
                return false;
            else
                hasDecimalPoint = true;
        } else if (!std::isdigit(*it))
            return false;
    }
    return true;
}

Value *read() {
    std::string input;
    cin >> input;

    if (isDouble(input)) {
        return new Number(std::stod(input));
    } else {
        return new Word(input);
    }
}

int main() {
    while (true) {
        try {
            auto res = expression();
        } catch (const std::exception &e) {
            std::cerr << "Caught exception: " << e.what() << std::endl;
        }
    }
    return 0;
}
\$\endgroup\$
4
  • \$\begingroup\$ @CrisLuengo Could you please give me some more information? Should I make a map from string to something like std::function? Is it a good practice considering factors such as maintainability, readability, and efficiency? \$\endgroup\$ Commented Aug 29, 2023 at 2:09
  • \$\begingroup\$ That is one way. Or you can return an integer (enum) that you switch on. Or you can return a label that you goto (don’t do that one though). Though looking at your list, which contains different levels of operants and expressions, it seems to me you need to re-read the chapter on parsing in your programming book. Actually we don’t write parsers by hand, that process was already automated back in the 1970’s (with yacc). \$\endgroup\$ Commented Aug 29, 2023 at 2:23
  • \$\begingroup\$ @CrisLuengo Not all languages can be parsed by yacc parsers. Recursive decent parsers can be easily written by hand in many programming languages, and it's a nice learning experience. Parser generators are indeed the way to go for serious projects, but you then have to learn how to write your language's grammar in a format that the parser generator understands, and sometimes fight with proper integration of the generated code into your own project. \$\endgroup\$ Commented Aug 29, 2023 at 9:15
  • \$\begingroup\$ Use flex and bison to do this. \$\endgroup\$ Commented Sep 6, 2023 at 23:36

1 Answer 1

5
\$\begingroup\$

Use a look-up table

As already mentioned by Cris Luengo in the comments, you can avoid the long if-else sequence by using a look-up table. You want to map tokens to functions that will do the processing related to that token. It can look like:

using action_type = std::function<Value*(std::string_view)>;

static std::unordered_map<std::string, action_type> actions = {
    {"make",  [](auto){ return make(); }},
    {"thing", [](auto){ return thing(); }},
    …
    {"add",   [](auto){ return new Number(*expression() + *expression()); }},
    …
    {":",     [](auto token){ return thing(token.substr(1)); }},
};

And then expression() can be simplified to:

Value *expression() {
    std::string token;
    cin >> token;

    if (auto action = actions.find(token); action != actions.end()) {
        return action(token);
    } else /* Number */ {
        return new Number(std::stod(token));
    }
}

It will be faster too, since a std::unordered_map() uses a hash table to look up the token in \$O(1)\$ time, whereas your code has to check the token against all possible values.

Avoid manual memory management

Avoid calling new and delete manually in your code. It's easy to forget to call delete and to introduce a memory leak that way. For example, in just this piece of code:

*expression() + *expression()

You leaked two memory allocations. Instead, use std::unique_ptr<> to store your pointers in; it will automatically delete the storage when the pointer is going out of scope:

std::unique_ptr<Value> make();

using action_type = std::function<std::unique_ptr<Value>(std::string_view)>;

static std::unordered_map<std::string, action_type> actions = {
    {"make",  [](auto){ return make(); }},
    …
    {"add",   [](auto){
        return std::make_unique<Number>(*expression() + *expression());
    }},
    …
};

std::unique_ptr<Value> expression() {
    …
    return action(token);
    …
}

Now the memory leak in the addition expression is gone.

Consider using std::variant to store values

You are using inheritance to be able to pass around Values of different types. This is fine, but one drawback is that you have to allocate each Value in some way, because you can only pass it around using pointers. That can result in your code making lots of memory allocations. A different way to do this is to use std::variant, which will allow you to pass Values by value (no pun intended). The code would look like:

class Number /* no inheritance */ {
    double value;
public:
    /* same members as before */
    …
};
…
using Value = std::variant<Number, Word, Bool>;

Value make();

using action_type = std::function<Value(std::string_view)>;

static std::unordered_map<std::string, action_type> actions = {
    {"make",  [](auto){ return make(); }},
    …
    {"add",   [](auto){ return expression() + expression(); }},
    …
}

Value expression() {
    …
    return action(token);
    …
}

Notice how convenient it is to no longer have to deal with Values being passed around as pointer! Of course, now you still have to deal with how to add two Values together. But how does addition work now? If you could only add Numbers together, it could look like this:

Value operator+(const Value& lhs_value, const Value& rhs_value) {
    double lhs = std::get<Number>(lhs_value);
    double rhs = std::get<Number>(rhs_value);
    return Number(lhs + rhs);
}

Note that std::get<Number>(lhs_value) will throw an exception if lhs was a Word or a Bool. Alternatively, you can use std::visit():

Value operator+(const Value& lhs_value, const Value& rhs_value) {
    return std::visit([](auto& lhs, auto& rhs) {
        return lhs + rhs;
    }, lhs_value, rhs_value);
}

This basically unpacks the Values, so the lambda is called with the concrete types (Number, Word and/or Bool). It then relies on operator+() existing for those types.

Be careful with string to double conversion

All the standard library functions that deal with conversion from strings to numbers, like std::stod(), will ignore any non-number characters that occur after the number. So std::stod("123four") will return 123, and will not throw an exception. Consider using std::from_chars() instead, as it reports how far it read, and then check that it read the whole string.

I/O can fail at any time

Be aware that any input/output operations can fail at any time, even std::cin and std::cout: maybe the user closed the terminal, or pressed ctrl-D (on UNIX). If you don't handle this, your program will go into an infinite loop reading empty strings. In read() for example, I recommend you add:

if (!std::cin.good()) {
    throw std::runtime_error("Error reading input");
}

And of course, in main() you should know when exceptions are fatal, and break out of the while(true) loop when it is.

\$\endgroup\$
2
  • 1
    \$\begingroup\$ Thank you for the review! I've tried to accept the advices, and my new code is here. Sorry for being unclear, the language is a weak typed language where Words can be implicitly converted into Numbers or Bools when needed. So I use Value class with a std::variant as a member, so that I can create implicit conversion more easily. I'm not sure whether there are more possible improvements, but I'll post a follow up question when I add more features. \$\endgroup\$ Commented Sep 1, 2023 at 9:46
  • 1
    \$\begingroup\$ Great, we're looking forward to the follow-up question :) \$\endgroup\$ Commented Sep 1, 2023 at 9:58

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.