working AST
This commit is contained in:
parent
7c6379c62d
commit
9e9039af05
13 changed files with 962 additions and 36 deletions
|
|
@ -1,7 +1,7 @@
|
|||
cmake_minimum_required(VERSION 3.10)
|
||||
project(simulator_SIC_XE VERSION 1.0 LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# Put all build outputs under target/bin
|
||||
|
|
@ -13,6 +13,10 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_DIR})
|
|||
# Collect all .cpp sources under src/
|
||||
file(GLOB_RECURSE SOURCES "${PROJECT_SOURCE_DIR}/src/*.cpp")
|
||||
|
||||
|
||||
set(MAIN_SRC "${PROJECT_SOURCE_DIR}/src/main.cpp")
|
||||
list(REMOVE_ITEM SOURCES ${MAIN_SRC})
|
||||
|
||||
if(NOT SOURCES)
|
||||
message(WARNING "No source files found in ${PROJECT_SOURCE_DIR}/src — the build will create an empty library")
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include "../../include/instructions.h"
|
||||
#include "../../include/opcode.h"
|
||||
#include "../../include/constants.h"
|
||||
#include "../../../include/loader.h"
|
||||
#include "../../include/loader.h"
|
||||
|
||||
#include <QIntValidator>
|
||||
#include <QLineEdit>
|
||||
|
|
|
|||
55
simulator_SIC_XE/include/lexer.h
Normal file
55
simulator_SIC_XE/include/lexer.h
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <cstddef>
|
||||
|
||||
class SyntaxError : public std::runtime_error {
|
||||
public:
|
||||
int row;
|
||||
int col;
|
||||
|
||||
SyntaxError(const std::string& msg, int row_, int col_)
|
||||
: std::runtime_error(msg), row(row_), col(col_) {}
|
||||
};
|
||||
|
||||
|
||||
class Lexer {
|
||||
public:
|
||||
int row;
|
||||
int col;
|
||||
|
||||
explicit Lexer(std::string input);
|
||||
|
||||
Lexer& mark();
|
||||
|
||||
std::string extract(int ofs);
|
||||
std::string extract();
|
||||
|
||||
char peek(int ahead) const;
|
||||
char peek() const;
|
||||
|
||||
char advance();
|
||||
|
||||
bool advanceIf(char ch);
|
||||
void advance(char ch);
|
||||
|
||||
|
||||
bool skipWhitespace();
|
||||
|
||||
std::string readTo(char delimiter);
|
||||
|
||||
std::string readAlphanumeric();
|
||||
|
||||
std::string readDigits(int radix);
|
||||
|
||||
private:
|
||||
std::string input_;
|
||||
std::size_t pos_;
|
||||
std::size_t start_;
|
||||
|
||||
static int digitValue(char c, int radix);
|
||||
};
|
||||
|
||||
#endif // LEXER_H
|
||||
|
|
@ -1,16 +1,45 @@
|
|||
// mnemonic.h
|
||||
#ifndef MNEMONIC_H
|
||||
#define MNEMONIC_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <variant>
|
||||
|
||||
using std::string;
|
||||
#include "opcode.h"
|
||||
|
||||
struct Empty {};
|
||||
struct Register { int num; };
|
||||
struct Immediate { int value; };
|
||||
struct SymbolRef {
|
||||
std::string name;
|
||||
bool indexed = false;
|
||||
bool immediate = false;
|
||||
bool indirect = false;
|
||||
};
|
||||
|
||||
using Operand = std::variant<Empty, Register, Immediate, SymbolRef>;
|
||||
|
||||
class Mnemonic {
|
||||
public:
|
||||
string toString() const;
|
||||
Mnemonic(std::uint8_t opcode, InstructionType type, bool extended)
|
||||
: _opcode(opcode), _extended(extended), _type(type) {}
|
||||
|
||||
std::uint8_t opcode() const { return _opcode; }
|
||||
bool extended() const { return _extended; }
|
||||
InstructionType type() const { return _type; }
|
||||
|
||||
std::vector<Operand>& operands() { return _operands; }
|
||||
const std::vector<Operand>& operands() const { return _operands; }
|
||||
|
||||
std::string toString() const;
|
||||
|
||||
private:
|
||||
std::uint8_t _opcode;
|
||||
bool _extended;
|
||||
InstructionType _type;
|
||||
std::vector<Operand> _operands;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // MNEMONIC_H
|
||||
#endif // MNEMONIC_H
|
||||
|
|
|
|||
|
|
@ -3,23 +3,22 @@
|
|||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include <vector>
|
||||
#include <variant>
|
||||
#include <cstdint>
|
||||
#include "mnemonic.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
class Node {
|
||||
public:
|
||||
virtual ~Node() = default;
|
||||
|
||||
string getLabel() const;
|
||||
|
||||
string getComment() const;
|
||||
|
||||
std::shared_ptr<Mnemonic> getMnemonic() const;
|
||||
|
||||
string toString() const;
|
||||
|
||||
string getLabel() const { return _label; }
|
||||
string getComment() const { return _comment; }
|
||||
std::shared_ptr<Mnemonic> getMnemonic() const { return _mnemonic; }
|
||||
|
||||
virtual string toString() const;
|
||||
|
||||
protected:
|
||||
string _label;
|
||||
|
|
@ -27,5 +26,73 @@ protected:
|
|||
string _comment;
|
||||
};
|
||||
|
||||
class InstructionNode : public Node {
|
||||
public:
|
||||
InstructionNode(string label,
|
||||
std::shared_ptr<Mnemonic> mnemonic,
|
||||
string comment) {
|
||||
_label = std::move(label);
|
||||
_mnemonic = std::move(mnemonic);
|
||||
_comment = std::move(comment);
|
||||
}
|
||||
|
||||
#endif // NODE_H
|
||||
string toString() const override;
|
||||
};
|
||||
|
||||
class CommentNode : public Node {
|
||||
public:
|
||||
explicit CommentNode(string text) {
|
||||
_comment = std::move(text);
|
||||
}
|
||||
|
||||
string toString() const override;
|
||||
};
|
||||
|
||||
enum class DirectiveKind {
|
||||
START, END, BASE, NOBASE, EQU, ORG, LTORG,
|
||||
EXTDEF, EXTREF, CSECT
|
||||
};
|
||||
|
||||
using DirectiveArg = std::variant<std::monostate, int, std::string, std::vector<std::string>>;
|
||||
|
||||
class DirectiveNode : public Node {
|
||||
public:
|
||||
DirectiveNode(string label, DirectiveKind kind, DirectiveArg arg, string comment)
|
||||
: _kind(kind), _arg(std::move(arg)) {
|
||||
_label = std::move(label);
|
||||
_comment = std::move(comment);
|
||||
}
|
||||
|
||||
DirectiveKind kind() const { return _kind; }
|
||||
const DirectiveArg& arg() const { return _arg; }
|
||||
|
||||
string toString() const override;
|
||||
|
||||
private:
|
||||
DirectiveKind _kind;
|
||||
DirectiveArg _arg;
|
||||
};
|
||||
|
||||
enum class DataKind { WORD, BYTE, RESW, RESB };
|
||||
|
||||
using DataValue = std::variant<std::monostate, int, std::vector<uint8_t>>;
|
||||
|
||||
class DataNode : public Node {
|
||||
public:
|
||||
DataNode(string label, DataKind kind, DataValue value, string comment)
|
||||
: _kind(kind), _value(std::move(value)) {
|
||||
_label = std::move(label);
|
||||
_comment = std::move(comment);
|
||||
}
|
||||
|
||||
DataKind kind() const { return _kind; }
|
||||
const DataValue& value() const { return _value; }
|
||||
|
||||
string toString() const override;
|
||||
|
||||
private:
|
||||
DataKind _kind;
|
||||
DataValue _value;
|
||||
};
|
||||
|
||||
#endif // NODE_H
|
||||
|
|
|
|||
|
|
@ -3,6 +3,10 @@
|
|||
|
||||
#include "utils.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <string_view>
|
||||
#include <optional>
|
||||
|
||||
// ==============================
|
||||
// Opcode definitions (SIC/XE)
|
||||
// ==============================
|
||||
|
|
@ -87,6 +91,8 @@
|
|||
#define LDVS 0x68
|
||||
#define LDVT 0x04
|
||||
|
||||
static std::unordered_map<std::string_view, uint8_t> mnemonicToOpcode;
|
||||
static bool opcodeTablesInitialized = false;
|
||||
|
||||
|
||||
enum class InstructionType {
|
||||
|
|
@ -110,6 +116,10 @@ struct InstructionInfo {
|
|||
extern InstructionInfo instructions[];
|
||||
extern InstructionInfo instructionsEXEX[];
|
||||
|
||||
extern std::optional<uint8_t> findOpcodeByMnemonic(std::string_view name);
|
||||
extern const InstructionInfo& getInstructionInfo(uint8_t opcode);
|
||||
|
||||
|
||||
// Initialize the instruction table
|
||||
void loadInstructionSet();
|
||||
|
||||
|
|
|
|||
52
simulator_SIC_XE/include/parser.h
Normal file
52
simulator_SIC_XE/include/parser.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// parser.h
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
|
||||
#include "lexer.h"
|
||||
#include "code.h"
|
||||
#include "opcode.h"
|
||||
#include "mnemonic.h"
|
||||
|
||||
class Parser {
|
||||
public:
|
||||
Parser() = default;
|
||||
|
||||
Code parse(const std::string& input);
|
||||
|
||||
private:
|
||||
std::string parseLabel();
|
||||
std::shared_ptr<Mnemonic> parseMnemonic();
|
||||
std::string parseSymbol();
|
||||
int parseRegister();
|
||||
void parseComma();
|
||||
bool parseIndexed();
|
||||
int parseNumber(int lo, int hi);
|
||||
std::vector<std::uint8_t> parseData();
|
||||
|
||||
void parseOperands(Mnemonic& m);
|
||||
|
||||
bool isDirective(const std::string& name);
|
||||
bool isDataDirective(const std::string& name);
|
||||
std::shared_ptr<Node> parseDirective(const std::string& label, const std::string& directive);
|
||||
std::shared_ptr<Node> parseDataDirective(const std::string& label, const std::string& directive);
|
||||
|
||||
std::shared_ptr<Node> parseInstruction();
|
||||
Code parseCode();
|
||||
|
||||
std::shared_ptr<Mnemonic> makeMnemonic(const std::string& name, bool extended);
|
||||
static void initMnemonicMap();
|
||||
|
||||
private:
|
||||
Lexer lexer_{""};
|
||||
|
||||
static inline std::unordered_map<std::string, std::uint8_t> s_nameToOpcode{};
|
||||
static inline bool s_mnemonicMapInitialized = false;
|
||||
};
|
||||
|
||||
#endif // PARSER_H
|
||||
138
simulator_SIC_XE/src/lexer.cpp
Normal file
138
simulator_SIC_XE/src/lexer.cpp
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
#include "lexer.h"
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
|
||||
Lexer::Lexer(std::string input)
|
||||
: input_(std::move(input)),
|
||||
pos_(0),
|
||||
start_(0),
|
||||
row(1),
|
||||
col(1)
|
||||
{
|
||||
}
|
||||
|
||||
Lexer& Lexer::mark() {
|
||||
start_ = pos_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::string Lexer::extract(int ofs) {
|
||||
std::size_t end = pos_ + static_cast<std::size_t>(ofs);
|
||||
if (end > input_.size()) {
|
||||
end = input_.size();
|
||||
}
|
||||
if (end < start_) {
|
||||
end = start_;
|
||||
}
|
||||
return input_.substr(start_, end - start_);
|
||||
}
|
||||
|
||||
std::string Lexer::extract() {
|
||||
return extract(0);
|
||||
}
|
||||
|
||||
char Lexer::peek(int ahead) const {
|
||||
std::size_t idx = pos_ + static_cast<std::size_t>(ahead);
|
||||
if (idx < input_.size()) {
|
||||
return input_[idx];
|
||||
}
|
||||
return '\0'; // sentinel for "no more chars"
|
||||
}
|
||||
|
||||
char Lexer::peek() const {
|
||||
return peek(0);
|
||||
}
|
||||
|
||||
char Lexer::advance() {
|
||||
char ch = peek();
|
||||
if (ch == '\0') {
|
||||
return '\0'; // don't move past end
|
||||
}
|
||||
|
||||
++pos_;
|
||||
|
||||
// update logical location
|
||||
if (ch == '\n') {
|
||||
++row;
|
||||
col = 1;
|
||||
} else if (ch == '\t') {
|
||||
col = ((col - 1) / 4) * 4 + 5;
|
||||
} else {
|
||||
++col;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
bool Lexer::advanceIf(char ch) {
|
||||
if (peek() != ch) {
|
||||
return false;
|
||||
}
|
||||
advance();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Lexer::advance(char ch) {
|
||||
if (!advanceIf(ch)) {
|
||||
throw SyntaxError(std::string("'") + ch + "' expected", row, col);
|
||||
}
|
||||
}
|
||||
|
||||
bool Lexer::skipWhitespace() {
|
||||
while (true) {
|
||||
char p = peek();
|
||||
if (p == ' ' || p == '\t') {
|
||||
advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
char p = peek();
|
||||
return (p == '\n' || p == '\0');
|
||||
}
|
||||
|
||||
std::string Lexer::readTo(char delimiter) {
|
||||
mark();
|
||||
while (peek() > 0 && peek() != delimiter) {
|
||||
advance();
|
||||
}
|
||||
if (peek() == delimiter) {
|
||||
advance(); // consume delimiter
|
||||
}
|
||||
// exclude delimiter itself (like Java's extract(-1))
|
||||
return extract(-1);
|
||||
}
|
||||
|
||||
std::string Lexer::readAlphanumeric() {
|
||||
mark();
|
||||
while (true) {
|
||||
char c = peek();
|
||||
if (std::isalnum(static_cast<unsigned char>(c)) || c == '_') {
|
||||
advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return extract();
|
||||
}
|
||||
|
||||
int Lexer::digitValue(char c, int radix) {
|
||||
if (radix < 2 || radix > 36) return -1;
|
||||
int v = -1;
|
||||
if (c >= '0' && c <= '9') {
|
||||
v = c - '0';
|
||||
} else if (c >= 'A' && c <= 'Z') {
|
||||
v = c - 'A' + 10;
|
||||
} else if (c >= 'a' && c <= 'z') {
|
||||
v = c - 'a' + 10;
|
||||
}
|
||||
if (v >= 0 && v < radix) return v;
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string Lexer::readDigits(int radix) {
|
||||
mark();
|
||||
while (digitValue(peek(), radix) != -1) {
|
||||
advance();
|
||||
}
|
||||
return extract();
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#include "mnemonic.h"
|
||||
|
||||
string Mnemonic::toString() const
|
||||
{
|
||||
return string();
|
||||
}
|
||||
|
|
@ -1,22 +1,120 @@
|
|||
#include "node.h"
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
string Node::getLabel() const
|
||||
{
|
||||
return _label;
|
||||
string Node::toString() const {
|
||||
std::ostringstream oss;
|
||||
if (!_label.empty()) oss << _label << " ";
|
||||
if (_mnemonic) oss << _mnemonic->toString() << " ";
|
||||
if (!_comment.empty()) oss << "." << _comment;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
string Node::getComment() const
|
||||
{
|
||||
return _comment;
|
||||
std::string Mnemonic::toString() const {
|
||||
std::ostringstream oss;
|
||||
oss << "[OP:" << std::hex << (int)_opcode << "]";
|
||||
if (_extended) oss << "+";
|
||||
// Print operands
|
||||
for (size_t i = 0; i < _operands.size(); ++i) {
|
||||
if (i > 0) oss << ",";
|
||||
std::visit([&](auto&& arg) {
|
||||
using T = std::decay_t<decltype(arg)>;
|
||||
if constexpr (std::is_same_v<T, Empty>) {
|
||||
// nothing
|
||||
} else if constexpr (std::is_same_v<T, Register>) {
|
||||
oss << "R" << arg.num;
|
||||
} else if constexpr (std::is_same_v<T, Immediate>) {
|
||||
oss << "#" << arg.value;
|
||||
} else if constexpr (std::is_same_v<T, SymbolRef>) {
|
||||
oss << arg.name;
|
||||
if (arg.indexed) oss << ",X";
|
||||
}
|
||||
}, _operands[i]);
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::shared_ptr<Mnemonic> Node::getMnemonic() const
|
||||
{
|
||||
return _mnemonic;
|
||||
string InstructionNode::toString() const {
|
||||
std::ostringstream oss;
|
||||
if (!_label.empty()) oss << _label << " ";
|
||||
if (_mnemonic) oss << _mnemonic->toString();
|
||||
if (!_comment.empty()) oss << " ." << _comment;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
string Node::toString() const
|
||||
{
|
||||
return (_label.length() > 0 ? _label + " " : "") + (_mnemonic ? _mnemonic->toString() + " ": "") + "." + _comment;
|
||||
string CommentNode::toString() const {
|
||||
return "." + _comment;
|
||||
}
|
||||
|
||||
string DirectiveNode::toString() const {
|
||||
std::ostringstream oss;
|
||||
if (!_label.empty()) oss << _label << " ";
|
||||
switch (_kind) {
|
||||
case DirectiveKind::START: oss << "START"; break;
|
||||
case DirectiveKind::END: oss << "END"; break;
|
||||
case DirectiveKind::BASE: oss << "BASE"; break;
|
||||
case DirectiveKind::NOBASE: oss << "NOBASE"; break;
|
||||
case DirectiveKind::EQU: oss << "EQU"; break;
|
||||
case DirectiveKind::ORG: oss << "ORG"; break;
|
||||
case DirectiveKind::LTORG: oss << "LTORG"; break;
|
||||
case DirectiveKind::EXTDEF: oss << "EXTDEF"; break;
|
||||
case DirectiveKind::EXTREF: oss << "EXTREF"; break;
|
||||
case DirectiveKind::CSECT: oss << "CSECT"; break;
|
||||
}
|
||||
std::visit([&](auto&& arg) {
|
||||
using T = std::decay_t<decltype(arg)>;
|
||||
if constexpr (std::is_same_v<T, std::monostate>) {
|
||||
// no arg
|
||||
} else if constexpr (std::is_same_v<T, int>) {
|
||||
oss << " " << std::hex << arg;
|
||||
} else if constexpr (std::is_same_v<T, std::string>) {
|
||||
oss << " " << arg;
|
||||
} else if constexpr (std::is_same_v<T, std::vector<std::string>>) {
|
||||
for (size_t i = 0; i < arg.size(); ++i) {
|
||||
if (i > 0) oss << ",";
|
||||
oss << arg[i];
|
||||
}
|
||||
}
|
||||
}, _arg);
|
||||
if (!_comment.empty()) oss << " ." << _comment;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
string DataNode::toString() const {
|
||||
std::ostringstream oss;
|
||||
if (!_label.empty()) oss << _label << " ";
|
||||
switch (_kind) {
|
||||
case DataKind::WORD: oss << "WORD"; break;
|
||||
case DataKind::BYTE: oss << "BYTE"; break;
|
||||
case DataKind::RESW: oss << "RESW"; break;
|
||||
case DataKind::RESB: oss << "RESB"; break;
|
||||
}
|
||||
std::visit([&](auto&& arg) {
|
||||
using T = std::decay_t<decltype(arg)>;
|
||||
if constexpr (std::is_same_v<T, std::monostate>) {
|
||||
// no value
|
||||
} else if constexpr (std::is_same_v<T, int>) {
|
||||
oss << " " << arg;
|
||||
} else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
|
||||
// Try to display as string if all printable ASCII
|
||||
bool isPrintable = !arg.empty() && std::all_of(arg.begin(), arg.end(),
|
||||
[](uint8_t b) { return b >= 32 && b <= 126; });
|
||||
|
||||
if (isPrintable) {
|
||||
oss << " C'";
|
||||
for (uint8_t b : arg) oss << static_cast<char>(b);
|
||||
oss << "'";
|
||||
} else {
|
||||
// Display as hex
|
||||
oss << " X'";
|
||||
for (uint8_t b : arg) {
|
||||
oss << std::hex << std::setw(2) << std::setfill('0') << (int)b;
|
||||
}
|
||||
oss << "'";
|
||||
}
|
||||
}
|
||||
}, _value);
|
||||
if (!_comment.empty()) oss << " ." << _comment;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
|
@ -95,8 +95,36 @@ void loadInstructionSet()
|
|||
if (instructions[i].name == nullptr) instructions[i] = {"INVALID", InstructionType::INVALID, nullptr};
|
||||
if (instructionsEXEX[i].name == nullptr) instructionsEXEX[i] = {"INVALID", InstructionType::INVALID, nullptr};
|
||||
}
|
||||
|
||||
// Initialize mnemonicToOpcode map
|
||||
for (int i = 0; i < 0xff; ++i) {
|
||||
if (instructions[i].type != InstructionType::INVALID) {
|
||||
mnemonicToOpcode.emplace(instructions[i].name, static_cast<uint8_t>(i));
|
||||
}
|
||||
if (instructionsEXEX[i].type != InstructionType::INVALID) {
|
||||
mnemonicToOpcode.emplace(instructionsEXEX[i].name, static_cast<uint8_t>(i));
|
||||
}
|
||||
}
|
||||
opcodeTablesInitialized = true;
|
||||
}
|
||||
|
||||
std::optional<uint8_t> findOpcodeByMnemonic(std::string_view name)
|
||||
{
|
||||
auto it = mnemonicToOpcode.find(name);
|
||||
if (it == mnemonicToOpcode.end())
|
||||
return std::nullopt;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
const InstructionInfo& getInstructionInfo(uint8_t opcode)
|
||||
{
|
||||
if (instructions[opcode].type != InstructionType::INVALID)
|
||||
return instructions[opcode];
|
||||
return instructionsEXEX[opcode];
|
||||
}
|
||||
|
||||
|
||||
|
||||
AddressingMode getAddressingMode(int ni)
|
||||
{
|
||||
switch (ni) {
|
||||
|
|
|
|||
449
simulator_SIC_XE/src/parser.cpp
Normal file
449
simulator_SIC_XE/src/parser.cpp
Normal file
|
|
@ -0,0 +1,449 @@
|
|||
// parser.cpp
|
||||
#include "parser.h"
|
||||
#include <cctype>
|
||||
#include <limits>
|
||||
#include <string_view>
|
||||
|
||||
void Parser::initMnemonicMap() {
|
||||
if (s_mnemonicMapInitialized) return;
|
||||
|
||||
loadInstructionSet();
|
||||
|
||||
for (int op = 0; op < 0xFF; ++op) {
|
||||
const auto& info = instructions[op];
|
||||
if (info.name && info.type != InstructionType::INVALID) {
|
||||
s_nameToOpcode.emplace(info.name, static_cast<std::uint8_t>(op));
|
||||
}
|
||||
const auto& ex = instructionsEXEX[op];
|
||||
if (ex.name && ex.type != InstructionType::INVALID) {
|
||||
s_nameToOpcode.emplace(ex.name, static_cast<std::uint8_t>(op));
|
||||
}
|
||||
}
|
||||
|
||||
s_mnemonicMapInitialized = true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Mnemonic> Parser::makeMnemonic(const std::string& name, bool extended) {
|
||||
initMnemonicMap();
|
||||
|
||||
auto it = s_nameToOpcode.find(name);
|
||||
if (it == s_nameToOpcode.end()) {
|
||||
throw SyntaxError("Invalid mnemonic '" + name + "'", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
std::uint8_t opcode = it->second;
|
||||
const InstructionInfo* info = nullptr;
|
||||
|
||||
if (instructions[opcode].type != InstructionType::INVALID) {
|
||||
info = &instructions[opcode];
|
||||
} else if (instructionsEXEX[opcode].type != InstructionType::INVALID) {
|
||||
info = &instructionsEXEX[opcode];
|
||||
}
|
||||
|
||||
if (!info) {
|
||||
throw SyntaxError("Invalid mnemonic '" + name + "'", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
if (extended && info->type != InstructionType::TYPE3_4) {
|
||||
throw SyntaxError(
|
||||
"Extended format not allowed for mnemonic '" + name + "'",
|
||||
lexer_.row,
|
||||
lexer_.col
|
||||
);
|
||||
}
|
||||
|
||||
return std::make_shared<Mnemonic>(opcode, info->type, extended);
|
||||
}
|
||||
|
||||
std::string Parser::parseLabel() {
|
||||
if (lexer_.col == 1 && std::isalpha(static_cast<unsigned char>(lexer_.peek()))) {
|
||||
return std::string(lexer_.readAlphanumeric());
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::shared_ptr<Mnemonic> Parser::parseMnemonic() {
|
||||
bool isExtended = lexer_.advanceIf('+');
|
||||
std::string name(lexer_.readAlphanumeric());
|
||||
if (name.empty()) {
|
||||
throw SyntaxError("Mnemonic expected", lexer_.row, lexer_.col);
|
||||
}
|
||||
return makeMnemonic(name, isExtended);
|
||||
}
|
||||
|
||||
std::string Parser::parseSymbol() {
|
||||
return std::string(lexer_.readAlphanumeric());
|
||||
}
|
||||
|
||||
int Parser::parseRegister() {
|
||||
char ch = lexer_.advance();
|
||||
constexpr std::string_view regs = "AXLBSTF";
|
||||
auto pos = regs.find(ch);
|
||||
if (pos == std::string_view::npos) {
|
||||
throw SyntaxError(std::string("Invalid register '") + ch + "'", lexer_.row, lexer_.col);
|
||||
}
|
||||
return static_cast<int>(pos);
|
||||
}
|
||||
|
||||
void Parser::parseComma() {
|
||||
lexer_.skipWhitespace();
|
||||
lexer_.advance(',');
|
||||
lexer_.skipWhitespace();
|
||||
}
|
||||
|
||||
bool Parser::parseIndexed() {
|
||||
lexer_.skipWhitespace();
|
||||
if (lexer_.advanceIf(',')) {
|
||||
lexer_.skipWhitespace();
|
||||
lexer_.advance('X');
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int digitValue(char c, int radix) {
|
||||
if (radix < 2 || radix > 36) return -1;
|
||||
int v = -1;
|
||||
if (c >= '0' && c <= '9') v = c - '0';
|
||||
else if (c >= 'A' && c <= 'Z') v = c - 'A' + 10;
|
||||
else if (c >= 'a' && c <= 'z') v = c - 'a' + 10;
|
||||
if (v >= 0 && v < radix) return v;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Parser::parseNumber(int lo, int hi) {
|
||||
auto parseDigits = [&](int radix) -> int {
|
||||
std::string digits(lexer_.readDigits(radix));
|
||||
if (digits.empty()) {
|
||||
throw SyntaxError("Invalid number", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
long long value = 0;
|
||||
for (char c : digits) {
|
||||
int d = digitValue(c, radix);
|
||||
if (d < 0) throw SyntaxError("Invalid number", lexer_.row, lexer_.col);
|
||||
value = value * radix + d;
|
||||
if (value > std::numeric_limits<int>::max()) {
|
||||
throw SyntaxError("Invalid number", lexer_.row, lexer_.col);
|
||||
}
|
||||
}
|
||||
return static_cast<int>(value);
|
||||
};
|
||||
|
||||
int num = 0;
|
||||
|
||||
if (lexer_.peek() == '0') {
|
||||
int radix = -1;
|
||||
switch (lexer_.peek(1)) {
|
||||
case 'b': radix = 2; break;
|
||||
case 'o': radix = 8; break;
|
||||
case 'x': radix = 16; break;
|
||||
default: break;
|
||||
}
|
||||
if (radix != -1) {
|
||||
lexer_.advance();
|
||||
lexer_.advance();
|
||||
num = parseDigits(radix);
|
||||
} else {
|
||||
num = parseDigits(10);
|
||||
}
|
||||
} else if (std::isdigit(static_cast<unsigned char>(lexer_.peek()))) {
|
||||
num = parseDigits(10);
|
||||
} else {
|
||||
throw SyntaxError("Number expected", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
if (std::isalnum(static_cast<unsigned char>(lexer_.peek()))) {
|
||||
throw SyntaxError(
|
||||
std::string("invalid digit '") + lexer_.peek() + "'",
|
||||
lexer_.row,
|
||||
lexer_.col
|
||||
);
|
||||
}
|
||||
|
||||
if (num < lo || num > hi) {
|
||||
throw SyntaxError(
|
||||
"Number '" + std::to_string(num) + "' out of range [" +
|
||||
std::to_string(lo) + ".." + std::to_string(hi) + "]",
|
||||
lexer_.row,
|
||||
lexer_.col
|
||||
);
|
||||
}
|
||||
|
||||
return num;
|
||||
}
|
||||
|
||||
std::vector<std::uint8_t> Parser::parseData() {
|
||||
if (lexer_.advanceIf('C')) {
|
||||
lexer_.advance('\'');
|
||||
std::string s(lexer_.readTo('\''));
|
||||
std::vector<std::uint8_t> data;
|
||||
data.reserve(s.size());
|
||||
for (unsigned char c : s) {
|
||||
data.push_back(static_cast<std::uint8_t>(c));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
if (lexer_.advanceIf('X')) {
|
||||
lexer_.advance('\'');
|
||||
std::string s(lexer_.readTo('\''));
|
||||
if (s.size() % 2 != 0) {
|
||||
throw SyntaxError("Invalid hex literal length", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
std::vector<std::uint8_t> data;
|
||||
data.reserve(s.size() / 2);
|
||||
|
||||
auto hexVal = [](char c) -> int {
|
||||
if (c >= '0' && c <= '9') return c - '0';
|
||||
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
|
||||
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
|
||||
return -1;
|
||||
};
|
||||
|
||||
for (std::size_t i = 0; i < s.size(); i += 2) {
|
||||
int hi = hexVal(s[i]);
|
||||
int lo = hexVal(s[i + 1]);
|
||||
if (hi < 0 || lo < 0) {
|
||||
throw SyntaxError("Invalid hex digit in literal", lexer_.row, lexer_.col);
|
||||
}
|
||||
data.push_back(static_cast<std::uint8_t>((hi << 4) | lo));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
if (std::isdigit(static_cast<unsigned char>(lexer_.peek()))) {
|
||||
constexpr int MAX_WORD = 0xFFFFFF;
|
||||
int num = parseNumber(0, MAX_WORD);
|
||||
return {
|
||||
static_cast<std::uint8_t>((num >> 16) & 0xFF),
|
||||
static_cast<std::uint8_t>((num >> 8) & 0xFF),
|
||||
static_cast<std::uint8_t>(num & 0xFF)
|
||||
};
|
||||
}
|
||||
|
||||
throw SyntaxError(
|
||||
std::string("Invalid storage specifier '") + lexer_.peek() + "'",
|
||||
lexer_.row,
|
||||
lexer_.col
|
||||
);
|
||||
}
|
||||
|
||||
void Parser::parseOperands(Mnemonic& m) {
|
||||
InstructionType t = m.type();
|
||||
char c = lexer_.peek();
|
||||
|
||||
if (t == InstructionType::TYPE1) {
|
||||
// TYPE1 has no operands
|
||||
return;
|
||||
}
|
||||
|
||||
if (t == InstructionType::TYPE2) {
|
||||
// TYPE2: r1 or r1,r2 or r1,n
|
||||
if (c == '\n' || c == '\0') return;
|
||||
|
||||
int r1 = parseRegister();
|
||||
m.operands().emplace_back(Register{r1});
|
||||
lexer_.skipWhitespace();
|
||||
|
||||
if (lexer_.peek() == ',') {
|
||||
parseComma();
|
||||
char c2 = lexer_.peek();
|
||||
if (std::isalpha(static_cast<unsigned char>(c2))) {
|
||||
int r2 = parseRegister();
|
||||
m.operands().emplace_back(Register{r2});
|
||||
} else if (std::isdigit(static_cast<unsigned char>(c2))) {
|
||||
int n = parseNumber(0, 0xFFFF);
|
||||
m.operands().emplace_back(Immediate{n});
|
||||
} else {
|
||||
throw SyntaxError("Invalid second operand", lexer_.row, lexer_.col);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (t == InstructionType::TYPE3_4) {
|
||||
lexer_.skipWhitespace();
|
||||
char c0 = lexer_.peek();
|
||||
if (c0 == '\n' || c0 == '\0') {
|
||||
// No operand (e.g., RSUB)
|
||||
return;
|
||||
}
|
||||
|
||||
bool immediate = false;
|
||||
bool indirect = false;
|
||||
|
||||
if (lexer_.advanceIf('#')) {
|
||||
immediate = true;
|
||||
} else if (lexer_.advanceIf('@')) {
|
||||
indirect = true;
|
||||
}
|
||||
|
||||
char c1 = lexer_.peek();
|
||||
if (std::isdigit(static_cast<unsigned char>(c1))) {
|
||||
int num = parseNumber(0, 0x7FFFFF);
|
||||
if (immediate) {
|
||||
m.operands().emplace_back(Immediate{num});
|
||||
} else {
|
||||
// Direct numeric addressing (rare, treat as immediate)
|
||||
m.operands().emplace_back(Immediate{num});
|
||||
}
|
||||
} else if (std::isalpha(static_cast<unsigned char>(c1))) {
|
||||
std::string symbol = parseSymbol();
|
||||
bool indexed = parseIndexed();
|
||||
m.operands().emplace_back(SymbolRef{symbol, indexed, immediate, indirect});
|
||||
} else {
|
||||
throw SyntaxError("Invalid operand", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool Parser::isDirective(const std::string& name) {
|
||||
return name == "START" || name == "END" || name == "BASE" || name == "NOBASE" ||
|
||||
name == "EQU" || name == "ORG" || name == "LTORG" ||
|
||||
name == "EXTDEF" || name == "EXTREF" || name == "CSECT";
|
||||
}
|
||||
|
||||
bool Parser::isDataDirective(const std::string& name) {
|
||||
return name == "WORD" || name == "BYTE" || name == "RESW" || name == "RESB";
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> Parser::parseDirective(const std::string& label, const std::string& directive) {
|
||||
lexer_.skipWhitespace();
|
||||
|
||||
DirectiveArg argValue;
|
||||
char c = lexer_.peek();
|
||||
|
||||
// Parse argument based on first character
|
||||
if (std::isalpha(c)) {
|
||||
std::string arg = std::string(lexer_.readAlphanumeric());
|
||||
argValue = arg;
|
||||
} else if (std::isdigit(c) || c == '0') {
|
||||
int num = parseNumber(0, 0xFFFFFF);
|
||||
argValue = num;
|
||||
} else {
|
||||
// No argument
|
||||
argValue = std::monostate{};
|
||||
}
|
||||
|
||||
lexer_.skipWhitespace();
|
||||
std::string comment = std::string(lexer_.readTo('\n'));
|
||||
|
||||
DirectiveKind kind;
|
||||
if (directive == "START") kind = DirectiveKind::START;
|
||||
else if (directive == "END") kind = DirectiveKind::END;
|
||||
else if (directive == "BASE") kind = DirectiveKind::BASE;
|
||||
else if (directive == "NOBASE") kind = DirectiveKind::NOBASE;
|
||||
else if (directive == "EQU") kind = DirectiveKind::EQU;
|
||||
else if (directive == "ORG") kind = DirectiveKind::ORG;
|
||||
else if (directive == "LTORG") kind = DirectiveKind::LTORG;
|
||||
else if (directive == "EXTDEF") kind = DirectiveKind::EXTDEF;
|
||||
else if (directive == "EXTREF") kind = DirectiveKind::EXTREF;
|
||||
else if (directive == "CSECT") kind = DirectiveKind::CSECT;
|
||||
else throw SyntaxError("Unknown directive", lexer_.row, lexer_.col);
|
||||
|
||||
return std::make_shared<DirectiveNode>(label, kind, argValue, comment);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> Parser::parseDataDirective(const std::string& label, const std::string& directive) {
|
||||
lexer_.skipWhitespace();
|
||||
|
||||
DataKind kind;
|
||||
if (directive == "WORD") kind = DataKind::WORD;
|
||||
else if (directive == "BYTE") kind = DataKind::BYTE;
|
||||
else if (directive == "RESW") kind = DataKind::RESW;
|
||||
else if (directive == "RESB") kind = DataKind::RESB;
|
||||
else throw SyntaxError("Unknown data directive", lexer_.row, lexer_.col);
|
||||
|
||||
DataValue value;
|
||||
if (kind == DataKind::WORD || kind == DataKind::RESW || kind == DataKind::RESB) {
|
||||
int num = parseNumber(0, 0xFFFFFF);
|
||||
value = num;
|
||||
} else { // BYTE
|
||||
auto bytes = parseData();
|
||||
value = bytes;
|
||||
}
|
||||
|
||||
lexer_.skipWhitespace();
|
||||
std::string comment = std::string(lexer_.readTo('\n'));
|
||||
|
||||
return std::make_shared<DataNode>(label, kind, value, comment);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> Parser::parseInstruction() {
|
||||
if (lexer_.col == 1 && lexer_.peek() == '.') {
|
||||
return std::make_shared<CommentNode>(
|
||||
std::string(lexer_.readTo('\n'))
|
||||
);
|
||||
}
|
||||
|
||||
std::string label = parseLabel();
|
||||
|
||||
if (lexer_.skipWhitespace() && label.empty()) {
|
||||
lexer_.advance();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
lexer_.skipWhitespace();
|
||||
|
||||
// Check for extended format prefix
|
||||
bool isExtended = lexer_.peek() == '+';
|
||||
if (isExtended) {
|
||||
lexer_.advance();
|
||||
}
|
||||
|
||||
std::string name = std::string(lexer_.readAlphanumeric());
|
||||
|
||||
if (name.empty()) {
|
||||
throw SyntaxError("Mnemonic or directive expected", lexer_.row, lexer_.col);
|
||||
}
|
||||
|
||||
// Check if it's a directive or data directive
|
||||
if (isDirective(name)) {
|
||||
return parseDirective(label, name);
|
||||
}
|
||||
|
||||
if (isDataDirective(name)) {
|
||||
return parseDataDirective(label, name);
|
||||
}
|
||||
|
||||
// It's an instruction - create mnemonic
|
||||
auto mnemonic = makeMnemonic(name, isExtended);
|
||||
lexer_.skipWhitespace();
|
||||
|
||||
parseOperands(*mnemonic);
|
||||
lexer_.skipWhitespace();
|
||||
|
||||
std::string comment(lexer_.readTo('\n'));
|
||||
|
||||
return std::make_shared<InstructionNode>(
|
||||
std::move(label),
|
||||
std::move(mnemonic),
|
||||
std::move(comment)
|
||||
);
|
||||
}
|
||||
|
||||
Code Parser::parseCode() {
|
||||
Code code;
|
||||
|
||||
while (lexer_.peek() > 0) {
|
||||
while (lexer_.peek() > 0 && lexer_.col > 1) {
|
||||
lexer_.readTo('\n');
|
||||
}
|
||||
|
||||
if (auto node = parseInstruction()) {
|
||||
code.addLine(node);
|
||||
}
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
Code Parser::parse(const std::string& input) {
|
||||
lexer_ = Lexer(input);
|
||||
return parseCode();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue