working AST

This commit is contained in:
zanostro 2025-12-10 18:02:06 +01:00
parent 7c6379c62d
commit 9e9039af05
13 changed files with 962 additions and 36 deletions

View file

@ -0,0 +1,55 @@
#ifndef LEXER_H
#define LEXER_H
#include <string>
#include <stdexcept>
#include <cstddef>
class SyntaxError : public std::runtime_error {
public:
int row;
int col;
SyntaxError(const std::string& msg, int row_, int col_)
: std::runtime_error(msg), row(row_), col(col_) {}
};
class Lexer {
public:
int row;
int col;
explicit Lexer(std::string input);
Lexer& mark();
std::string extract(int ofs);
std::string extract();
char peek(int ahead) const;
char peek() const;
char advance();
bool advanceIf(char ch);
void advance(char ch);
bool skipWhitespace();
std::string readTo(char delimiter);
std::string readAlphanumeric();
std::string readDigits(int radix);
private:
std::string input_;
std::size_t pos_;
std::size_t start_;
static int digitValue(char c, int radix);
};
#endif // LEXER_H

View file

@ -1,16 +1,45 @@
// mnemonic.h
#ifndef MNEMONIC_H
#define MNEMONIC_H
#include <cstdint>
#include <string>
#include <vector>
#include <variant>
using std::string;
#include "opcode.h"
struct Empty {};
struct Register { int num; };
struct Immediate { int value; };
struct SymbolRef {
std::string name;
bool indexed = false;
bool immediate = false;
bool indirect = false;
};
using Operand = std::variant<Empty, Register, Immediate, SymbolRef>;
class Mnemonic {
public:
string toString() const;
Mnemonic(std::uint8_t opcode, InstructionType type, bool extended)
: _opcode(opcode), _extended(extended), _type(type) {}
std::uint8_t opcode() const { return _opcode; }
bool extended() const { return _extended; }
InstructionType type() const { return _type; }
std::vector<Operand>& operands() { return _operands; }
const std::vector<Operand>& operands() const { return _operands; }
std::string toString() const;
private:
std::uint8_t _opcode;
bool _extended;
InstructionType _type;
std::vector<Operand> _operands;
};
#endif // MNEMONIC_H
#endif // MNEMONIC_H

View file

@ -3,23 +3,22 @@
#include <string>
#include <memory>
#include <vector>
#include <variant>
#include <cstdint>
#include "mnemonic.h"
using std::string;
class Node {
public:
virtual ~Node() = default;
string getLabel() const;
string getComment() const;
std::shared_ptr<Mnemonic> getMnemonic() const;
string toString() const;
string getLabel() const { return _label; }
string getComment() const { return _comment; }
std::shared_ptr<Mnemonic> getMnemonic() const { return _mnemonic; }
virtual string toString() const;
protected:
string _label;
@ -27,5 +26,73 @@ protected:
string _comment;
};
class InstructionNode : public Node {
public:
InstructionNode(string label,
std::shared_ptr<Mnemonic> mnemonic,
string comment) {
_label = std::move(label);
_mnemonic = std::move(mnemonic);
_comment = std::move(comment);
}
#endif // NODE_H
string toString() const override;
};
class CommentNode : public Node {
public:
explicit CommentNode(string text) {
_comment = std::move(text);
}
string toString() const override;
};
enum class DirectiveKind {
START, END, BASE, NOBASE, EQU, ORG, LTORG,
EXTDEF, EXTREF, CSECT
};
using DirectiveArg = std::variant<std::monostate, int, std::string, std::vector<std::string>>;
class DirectiveNode : public Node {
public:
DirectiveNode(string label, DirectiveKind kind, DirectiveArg arg, string comment)
: _kind(kind), _arg(std::move(arg)) {
_label = std::move(label);
_comment = std::move(comment);
}
DirectiveKind kind() const { return _kind; }
const DirectiveArg& arg() const { return _arg; }
string toString() const override;
private:
DirectiveKind _kind;
DirectiveArg _arg;
};
enum class DataKind { WORD, BYTE, RESW, RESB };
using DataValue = std::variant<std::monostate, int, std::vector<uint8_t>>;
class DataNode : public Node {
public:
DataNode(string label, DataKind kind, DataValue value, string comment)
: _kind(kind), _value(std::move(value)) {
_label = std::move(label);
_comment = std::move(comment);
}
DataKind kind() const { return _kind; }
const DataValue& value() const { return _value; }
string toString() const override;
private:
DataKind _kind;
DataValue _value;
};
#endif // NODE_H

View file

@ -3,6 +3,10 @@
#include "utils.h"
#include <unordered_map>
#include <string_view>
#include <optional>
// ==============================
// Opcode definitions (SIC/XE)
// ==============================
@ -87,6 +91,8 @@
#define LDVS 0x68
#define LDVT 0x04
static std::unordered_map<std::string_view, uint8_t> mnemonicToOpcode;
static bool opcodeTablesInitialized = false;
enum class InstructionType {
@ -110,6 +116,10 @@ struct InstructionInfo {
extern InstructionInfo instructions[];
extern InstructionInfo instructionsEXEX[];
extern std::optional<uint8_t> findOpcodeByMnemonic(std::string_view name);
extern const InstructionInfo& getInstructionInfo(uint8_t opcode);
// Initialize the instruction table
void loadInstructionSet();

View file

@ -0,0 +1,52 @@
// parser.h
#ifndef PARSER_H
#define PARSER_H
#include <string>
#include <vector>
#include <memory>
#include <unordered_map>
#include <cstdint>
#include "lexer.h"
#include "code.h"
#include "opcode.h"
#include "mnemonic.h"
class Parser {
public:
Parser() = default;
Code parse(const std::string& input);
private:
std::string parseLabel();
std::shared_ptr<Mnemonic> parseMnemonic();
std::string parseSymbol();
int parseRegister();
void parseComma();
bool parseIndexed();
int parseNumber(int lo, int hi);
std::vector<std::uint8_t> parseData();
void parseOperands(Mnemonic& m);
bool isDirective(const std::string& name);
bool isDataDirective(const std::string& name);
std::shared_ptr<Node> parseDirective(const std::string& label, const std::string& directive);
std::shared_ptr<Node> parseDataDirective(const std::string& label, const std::string& directive);
std::shared_ptr<Node> parseInstruction();
Code parseCode();
std::shared_ptr<Mnemonic> makeMnemonic(const std::string& name, bool extended);
static void initMnemonicMap();
private:
Lexer lexer_{""};
static inline std::unordered_map<std::string, std::uint8_t> s_nameToOpcode{};
static inline bool s_mnemonicMapInitialized = false;
};
#endif // PARSER_H