working AST

2025-12-10 18:02:06 +01:00 · 2025-12-10 18:02:06 +01:00 · 9e9039af05
commit 9e9039af05
parent 7c6379c62d
13 changed files with 962 additions and 36 deletions
--- a/simulator_SIC_XE/src/lexer.cpp
+++ b/simulator_SIC_XE/src/lexer.cpp
@ -0,0 +1,138 @@
+#include "lexer.h"
+#include <cctype>
+#include <algorithm>
+
+Lexer::Lexer(std::string input)
+    : input_(std::move(input)),
+      pos_(0),
+      start_(0),
+      row(1),
+      col(1)
+{
+}
+
+Lexer& Lexer::mark() {
+    start_ = pos_;
+    return *this;
+}
+
+std::string Lexer::extract(int ofs) {
+    std::size_t end = pos_ + static_cast<std::size_t>(ofs);
+    if (end > input_.size()) {
+        end = input_.size();
+    }
+    if (end < start_) {
+        end = start_;
+    }
+    return input_.substr(start_, end - start_);
+}
+
+std::string Lexer::extract() {
+    return extract(0);
+}
+
+char Lexer::peek(int ahead) const {
+    std::size_t idx = pos_ + static_cast<std::size_t>(ahead);
+    if (idx < input_.size()) {
+        return input_[idx];
+    }
+    return '\0'; // sentinel for "no more chars"
+}
+
+char Lexer::peek() const {
+    return peek(0);
+}
+
+char Lexer::advance() {
+    char ch = peek();
+    if (ch == '\0') {
+        return '\0';  // don't move past end
+    }
+
+    ++pos_;
+
+    // update logical location
+    if (ch == '\n') {
+        ++row;
+        col = 1;
+    } else if (ch == '\t') {
+        col = ((col - 1) / 4) * 4 + 5;
+    } else {
+        ++col;
+    }
+    return ch;
+}
+
+bool Lexer::advanceIf(char ch) {
+    if (peek() != ch) {
+        return false;
+    }
+    advance();
+    return true;
+}
+
+void Lexer::advance(char ch) {
+    if (!advanceIf(ch)) {
+        throw SyntaxError(std::string("'") + ch + "' expected", row, col);
+    }
+}
+
+bool Lexer::skipWhitespace() {
+    while (true) {
+        char p = peek();
+        if (p == ' ' || p == '\t') {
+            advance();
+        } else {
+            break;
+        }
+    }
+    char p = peek();
+    return (p == '\n' || p == '\0');
+}
+
+std::string Lexer::readTo(char delimiter) {
+    mark();
+    while (peek() > 0 && peek() != delimiter) {
+        advance();
+    }
+    if (peek() == delimiter) {
+        advance(); // consume delimiter
+    }
+    // exclude delimiter itself (like Java's extract(-1))
+    return extract(-1);
+}
+
+std::string Lexer::readAlphanumeric() {
+    mark();
+    while (true) {
+        char c = peek();
+        if (std::isalnum(static_cast<unsigned char>(c)) || c == '_') {
+            advance();
+        } else {
+            break;
+        }
+    }
+    return extract();
+}
+
+int Lexer::digitValue(char c, int radix) {
+    if (radix < 2 || radix > 36) return -1;
+    int v = -1;
+    if (c >= '0' && c <= '9') {
+        v = c - '0';
+    } else if (c >= 'A' && c <= 'Z') {
+        v = c - 'A' + 10;
+    } else if (c >= 'a' && c <= 'z') {
+        v = c - 'a' + 10;
+    }
+    if (v >= 0 && v < radix) return v;
+    return -1;
+}
+
+std::string Lexer::readDigits(int radix) {
+    mark();
+    while (digitValue(peek(), radix) != -1) {
+        advance();
+    }
+    return extract();
+}
--- a/simulator_SIC_XE/src/mnemonic.cpp
+++ b/simulator_SIC_XE/src/mnemonic.cpp
@ -1,6 +0,0 @@
-#include "mnemonic.h"
-
-string Mnemonic::toString() const
-{
-    return string();
-}
--- a/simulator_SIC_XE/src/node.cpp
+++ b/simulator_SIC_XE/src/node.cpp
@ -1,22 +1,120 @@
 #include "node.h"
+#include <sstream>
+#include <iomanip>

-string Node::getLabel() const
-{
-    return _label;
+string Node::toString() const {
+    std::ostringstream oss;
+    if (!_label.empty()) oss << _label << " ";
+    if (_mnemonic) oss << _mnemonic->toString() << " ";
+    if (!_comment.empty()) oss << "." << _comment;
+    return oss.str();
 }

-string Node::getComment() const
-{
-    return _comment;
+std::string Mnemonic::toString() const {
+    std::ostringstream oss;
+    oss << "[OP:" << std::hex << (int)_opcode << "]";
+    if (_extended) oss << "+";
+    // Print operands
+    for (size_t i = 0; i < _operands.size(); ++i) {
+        if (i > 0) oss << ",";
+        std::visit([&](auto&& arg) {
+            using T = std::decay_t<decltype(arg)>;
+            if constexpr (std::is_same_v<T, Empty>) {
+                // nothing
+            } else if constexpr (std::is_same_v<T, Register>) {
+                oss << "R" << arg.num;
+            } else if constexpr (std::is_same_v<T, Immediate>) {
+                oss << "#" << arg.value;
+            } else if constexpr (std::is_same_v<T, SymbolRef>) {
+                oss << arg.name;
+                if (arg.indexed) oss << ",X";
+            }
+        }, _operands[i]);
+    }
+    return oss.str();
 }

-std::shared_ptr<Mnemonic> Node::getMnemonic() const
-{
-    return _mnemonic;
+string InstructionNode::toString() const {
+    std::ostringstream oss;
+    if (!_label.empty()) oss << _label << " ";
+    if (_mnemonic) oss << _mnemonic->toString();
+    if (!_comment.empty()) oss << " ." << _comment;
+    return oss.str();
 }

-string Node::toString() const
-{
-    return  (_label.length() > 0 ? _label + " " : "") + (_mnemonic ? _mnemonic->toString() + " ": "") + "." + _comment;
+string CommentNode::toString() const {
+    return "." + _comment;
+}
+
+string DirectiveNode::toString() const {
+    std::ostringstream oss;
+    if (!_label.empty()) oss << _label << " ";
+    switch (_kind) {
+        case DirectiveKind::START:  oss << "START"; break;
+        case DirectiveKind::END:    oss << "END"; break;
+        case DirectiveKind::BASE:   oss << "BASE"; break;
+        case DirectiveKind::NOBASE: oss << "NOBASE"; break;
+        case DirectiveKind::EQU:    oss << "EQU"; break;
+        case DirectiveKind::ORG:    oss << "ORG"; break;
+        case DirectiveKind::LTORG:  oss << "LTORG"; break;
+        case DirectiveKind::EXTDEF: oss << "EXTDEF"; break;
+        case DirectiveKind::EXTREF: oss << "EXTREF"; break;
+        case DirectiveKind::CSECT:  oss << "CSECT"; break;
+    }
+    std::visit([&](auto&& arg) {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, std::monostate>) {
+            // no arg
+        } else if constexpr (std::is_same_v<T, int>) {
+            oss << " " << std::hex << arg;
+        } else if constexpr (std::is_same_v<T, std::string>) {
+            oss << " " << arg;
+        } else if constexpr (std::is_same_v<T, std::vector<std::string>>) {
+            for (size_t i = 0; i < arg.size(); ++i) {
+                if (i > 0) oss << ",";
+                oss << arg[i];
+            }
+        }
+    }, _arg);
+    if (!_comment.empty()) oss << " ." << _comment;
+    return oss.str();
+}
+
+string DataNode::toString() const {
+    std::ostringstream oss;
+    if (!_label.empty()) oss << _label << " ";
+    switch (_kind) {
+        case DataKind::WORD: oss << "WORD"; break;
+        case DataKind::BYTE: oss << "BYTE"; break;
+        case DataKind::RESW: oss << "RESW"; break;
+        case DataKind::RESB: oss << "RESB"; break;
+    }
+    std::visit([&](auto&& arg) {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, std::monostate>) {
+            // no value
+        } else if constexpr (std::is_same_v<T, int>) {
+            oss << " " << arg;
+        } else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
+            // Try to display as string if all printable ASCII
+            bool isPrintable = !arg.empty() && std::all_of(arg.begin(), arg.end(), 
+                [](uint8_t b) { return b >= 32 && b <= 126; });
+            
+            if (isPrintable) {
+                oss << " C'";
+                for (uint8_t b : arg) oss << static_cast<char>(b);
+                oss << "'";
+            } else {
+                // Display as hex
+                oss << " X'";
+                for (uint8_t b : arg) {
+                    oss << std::hex << std::setw(2) << std::setfill('0') << (int)b;
+                }
+                oss << "'";
+            }
+        }
+    }, _value);
+    if (!_comment.empty()) oss << " ." << _comment;
+    return oss.str();
 }
    
--- a/simulator_SIC_XE/src/opcode.cpp
+++ b/simulator_SIC_XE/src/opcode.cpp
@ -95,8 +95,36 @@ void loadInstructionSet()
        if (instructions[i].name == nullptr) instructions[i] = {"INVALID", InstructionType::INVALID, nullptr};
        if (instructionsEXEX[i].name == nullptr) instructionsEXEX[i] = {"INVALID", InstructionType::INVALID, nullptr};
    }
+
+    // Initialize mnemonicToOpcode map
+    for (int i = 0; i < 0xff; ++i) {
+        if (instructions[i].type != InstructionType::INVALID) {
+            mnemonicToOpcode.emplace(instructions[i].name, static_cast<uint8_t>(i));
+        }
+        if (instructionsEXEX[i].type != InstructionType::INVALID) {
+            mnemonicToOpcode.emplace(instructionsEXEX[i].name, static_cast<uint8_t>(i));
+        }
+    }
+    opcodeTablesInitialized = true;
 }

+std::optional<uint8_t> findOpcodeByMnemonic(std::string_view name)
+{
+    auto it = mnemonicToOpcode.find(name);
+    if (it == mnemonicToOpcode.end())
+        return std::nullopt;
+    return it->second;
+}
+
+const InstructionInfo& getInstructionInfo(uint8_t opcode)
+{
+    if (instructions[opcode].type != InstructionType::INVALID)
+        return instructions[opcode];
+    return instructionsEXEX[opcode];
+}
+
+
+
 AddressingMode getAddressingMode(int ni)
 {
    switch (ni) {
--- a/simulator_SIC_XE/src/parser.cpp
+++ b/simulator_SIC_XE/src/parser.cpp
@ -0,0 +1,449 @@
+// parser.cpp
+#include "parser.h"
+#include <cctype>
+#include <limits>
+#include <string_view>
+
+void Parser::initMnemonicMap() {
+    if (s_mnemonicMapInitialized) return;
+
+    loadInstructionSet();
+
+    for (int op = 0; op < 0xFF; ++op) {
+        const auto& info = instructions[op];
+        if (info.name && info.type != InstructionType::INVALID) {
+            s_nameToOpcode.emplace(info.name, static_cast<std::uint8_t>(op));
+        }
+        const auto& ex = instructionsEXEX[op];
+        if (ex.name && ex.type != InstructionType::INVALID) {
+            s_nameToOpcode.emplace(ex.name, static_cast<std::uint8_t>(op));
+        }
+    }
+
+    s_mnemonicMapInitialized = true;
+}
+
+std::shared_ptr<Mnemonic> Parser::makeMnemonic(const std::string& name, bool extended) {
+    initMnemonicMap();
+
+    auto it = s_nameToOpcode.find(name);
+    if (it == s_nameToOpcode.end()) {
+        throw SyntaxError("Invalid mnemonic '" + name + "'", lexer_.row, lexer_.col);
+    }
+
+    std::uint8_t opcode = it->second;
+    const InstructionInfo* info = nullptr;
+
+    if (instructions[opcode].type != InstructionType::INVALID) {
+        info = &instructions[opcode];
+    } else if (instructionsEXEX[opcode].type != InstructionType::INVALID) {
+        info = &instructionsEXEX[opcode];
+    }
+
+    if (!info) {
+        throw SyntaxError("Invalid mnemonic '" + name + "'", lexer_.row, lexer_.col);
+    }
+
+    if (extended && info->type != InstructionType::TYPE3_4) {
+        throw SyntaxError(
+            "Extended format not allowed for mnemonic '" + name + "'",
+            lexer_.row,
+            lexer_.col
+        );
+    }
+
+    return std::make_shared<Mnemonic>(opcode, info->type, extended);
+}
+
+std::string Parser::parseLabel() {
+    if (lexer_.col == 1 && std::isalpha(static_cast<unsigned char>(lexer_.peek()))) {
+        return std::string(lexer_.readAlphanumeric());
+    }
+    return {};
+}
+
+std::shared_ptr<Mnemonic> Parser::parseMnemonic() {
+    bool isExtended = lexer_.advanceIf('+');
+    std::string name(lexer_.readAlphanumeric());
+    if (name.empty()) {
+        throw SyntaxError("Mnemonic expected", lexer_.row, lexer_.col);
+    }
+    return makeMnemonic(name, isExtended);
+}
+
+std::string Parser::parseSymbol() {
+    return std::string(lexer_.readAlphanumeric());
+}
+
+int Parser::parseRegister() {
+    char ch = lexer_.advance();
+    constexpr std::string_view regs = "AXLBSTF";
+    auto pos = regs.find(ch);
+    if (pos == std::string_view::npos) {
+        throw SyntaxError(std::string("Invalid register '") + ch + "'", lexer_.row, lexer_.col);
+    }
+    return static_cast<int>(pos);
+}
+
+void Parser::parseComma() {
+    lexer_.skipWhitespace();
+    lexer_.advance(',');
+    lexer_.skipWhitespace();
+}
+
+bool Parser::parseIndexed() {
+    lexer_.skipWhitespace();
+    if (lexer_.advanceIf(',')) {
+        lexer_.skipWhitespace();
+        lexer_.advance('X');
+        return true;
+    }
+    return false;
+}
+
+static int digitValue(char c, int radix) {
+    if (radix < 2 || radix > 36) return -1;
+    int v = -1;
+    if (c >= '0' && c <= '9')      v = c - '0';
+    else if (c >= 'A' && c <= 'Z') v = c - 'A' + 10;
+    else if (c >= 'a' && c <= 'z') v = c - 'a' + 10;
+    if (v >= 0 && v < radix) return v;
+    return -1;
+}
+
+int Parser::parseNumber(int lo, int hi) {
+    auto parseDigits = [&](int radix) -> int {
+        std::string digits(lexer_.readDigits(radix));
+        if (digits.empty()) {
+            throw SyntaxError("Invalid number", lexer_.row, lexer_.col);
+        }
+
+        long long value = 0;
+        for (char c : digits) {
+            int d = digitValue(c, radix);
+            if (d < 0) throw SyntaxError("Invalid number", lexer_.row, lexer_.col);
+            value = value * radix + d;
+            if (value > std::numeric_limits<int>::max()) {
+                throw SyntaxError("Invalid number", lexer_.row, lexer_.col);
+            }
+        }
+        return static_cast<int>(value);
+    };
+
+    int num = 0;
+
+    if (lexer_.peek() == '0') {
+        int radix = -1;
+        switch (lexer_.peek(1)) {
+        case 'b': radix = 2;  break;
+        case 'o': radix = 8;  break;
+        case 'x': radix = 16; break;
+        default:  break;
+        }
+        if (radix != -1) {
+            lexer_.advance();
+            lexer_.advance();
+            num = parseDigits(radix);
+        } else {
+            num = parseDigits(10);
+        }
+    } else if (std::isdigit(static_cast<unsigned char>(lexer_.peek()))) {
+        num = parseDigits(10);
+    } else {
+        throw SyntaxError("Number expected", lexer_.row, lexer_.col);
+    }
+
+    if (std::isalnum(static_cast<unsigned char>(lexer_.peek()))) {
+        throw SyntaxError(
+            std::string("invalid digit '") + lexer_.peek() + "'",
+            lexer_.row,
+            lexer_.col
+        );
+    }
+
+    if (num < lo || num > hi) {
+        throw SyntaxError(
+            "Number '" + std::to_string(num) + "' out of range [" +
+            std::to_string(lo) + ".." + std::to_string(hi) + "]",
+            lexer_.row,
+            lexer_.col
+        );
+    }
+
+    return num;
+}
+
+std::vector<std::uint8_t> Parser::parseData() {
+    if (lexer_.advanceIf('C')) {
+        lexer_.advance('\'');
+        std::string s(lexer_.readTo('\''));
+        std::vector<std::uint8_t> data;
+        data.reserve(s.size());
+        for (unsigned char c : s) {
+            data.push_back(static_cast<std::uint8_t>(c));
+        }
+        return data;
+    }
+
+    if (lexer_.advanceIf('X')) {
+        lexer_.advance('\'');
+        std::string s(lexer_.readTo('\''));
+        if (s.size() % 2 != 0) {
+            throw SyntaxError("Invalid hex literal length", lexer_.row, lexer_.col);
+        }
+
+        std::vector<std::uint8_t> data;
+        data.reserve(s.size() / 2);
+
+        auto hexVal = [](char c) -> int {
+            if (c >= '0' && c <= '9') return c - '0';
+            if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+            if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+            return -1;
+        };
+
+        for (std::size_t i = 0; i < s.size(); i += 2) {
+            int hi = hexVal(s[i]);
+            int lo = hexVal(s[i + 1]);
+            if (hi < 0 || lo < 0) {
+                throw SyntaxError("Invalid hex digit in literal", lexer_.row, lexer_.col);
+            }
+            data.push_back(static_cast<std::uint8_t>((hi << 4) | lo));
+        }
+        return data;
+    }
+
+    if (std::isdigit(static_cast<unsigned char>(lexer_.peek()))) {
+        constexpr int MAX_WORD = 0xFFFFFF;
+        int num = parseNumber(0, MAX_WORD);
+        return {
+            static_cast<std::uint8_t>((num >> 16) & 0xFF),
+            static_cast<std::uint8_t>((num >> 8) & 0xFF),
+            static_cast<std::uint8_t>(num & 0xFF)
+        };
+    }
+
+    throw SyntaxError(
+        std::string("Invalid storage specifier '") + lexer_.peek() + "'",
+        lexer_.row,
+        lexer_.col
+    );
+}
+
+void Parser::parseOperands(Mnemonic& m) {
+    InstructionType t = m.type();
+    char c = lexer_.peek();
+
+    if (t == InstructionType::TYPE1) {
+        // TYPE1 has no operands
+        return;
+    }
+
+    if (t == InstructionType::TYPE2) {
+        // TYPE2: r1 or r1,r2 or r1,n
+        if (c == '\n' || c == '\0') return;
+
+        int r1 = parseRegister();
+        m.operands().emplace_back(Register{r1});
+        lexer_.skipWhitespace();
+
+        if (lexer_.peek() == ',') {
+            parseComma();
+            char c2 = lexer_.peek();
+            if (std::isalpha(static_cast<unsigned char>(c2))) {
+                int r2 = parseRegister();
+                m.operands().emplace_back(Register{r2});
+            } else if (std::isdigit(static_cast<unsigned char>(c2))) {
+                int n = parseNumber(0, 0xFFFF);
+                m.operands().emplace_back(Immediate{n});
+            } else {
+                throw SyntaxError("Invalid second operand", lexer_.row, lexer_.col);
+            }
+        }
+
+        return;
+    }
+
+    if (t == InstructionType::TYPE3_4) {
+        lexer_.skipWhitespace();
+        char c0 = lexer_.peek();
+        if (c0 == '\n' || c0 == '\0') {
+            // No operand (e.g., RSUB)
+            return;
+        }
+
+        bool immediate = false;
+        bool indirect  = false;
+
+        if (lexer_.advanceIf('#')) {
+            immediate = true;
+        } else if (lexer_.advanceIf('@')) {
+            indirect = true;
+        }
+
+        char c1 = lexer_.peek();
+        if (std::isdigit(static_cast<unsigned char>(c1))) {
+            int num = parseNumber(0, 0x7FFFFF);
+            if (immediate) {
+                m.operands().emplace_back(Immediate{num});
+            } else {
+                // Direct numeric addressing (rare, treat as immediate)
+                m.operands().emplace_back(Immediate{num});
+            }
+        } else if (std::isalpha(static_cast<unsigned char>(c1))) {
+            std::string symbol = parseSymbol();
+            bool indexed = parseIndexed();
+            m.operands().emplace_back(SymbolRef{symbol, indexed, immediate, indirect});
+        } else {
+            throw SyntaxError("Invalid operand", lexer_.row, lexer_.col);
+        }
+
+        return;
+    }
+}
+
+bool Parser::isDirective(const std::string& name) {
+    return name == "START" || name == "END" || name == "BASE" || name == "NOBASE" || 
+           name == "EQU" || name == "ORG" || name == "LTORG" || 
+           name == "EXTDEF" || name == "EXTREF" || name == "CSECT";
+}
+
+bool Parser::isDataDirective(const std::string& name) {
+    return name == "WORD" || name == "BYTE" || name == "RESW" || name == "RESB";
+}
+
+std::shared_ptr<Node> Parser::parseDirective(const std::string& label, const std::string& directive) {
+    lexer_.skipWhitespace();
+    
+    DirectiveArg argValue;
+    char c = lexer_.peek();
+    
+    // Parse argument based on first character
+    if (std::isalpha(c)) {
+        std::string arg = std::string(lexer_.readAlphanumeric());
+        argValue = arg;
+    } else if (std::isdigit(c) || c == '0') {
+        int num = parseNumber(0, 0xFFFFFF);
+        argValue = num;
+    } else {
+        // No argument
+        argValue = std::monostate{};
+    }
+    
+    lexer_.skipWhitespace();
+    std::string comment = std::string(lexer_.readTo('\n'));
+    
+    DirectiveKind kind;
+    if (directive == "START") kind = DirectiveKind::START;
+    else if (directive == "END") kind = DirectiveKind::END;
+    else if (directive == "BASE") kind = DirectiveKind::BASE;
+    else if (directive == "NOBASE") kind = DirectiveKind::NOBASE;
+    else if (directive == "EQU") kind = DirectiveKind::EQU;
+    else if (directive == "ORG") kind = DirectiveKind::ORG;
+    else if (directive == "LTORG") kind = DirectiveKind::LTORG;
+    else if (directive == "EXTDEF") kind = DirectiveKind::EXTDEF;
+    else if (directive == "EXTREF") kind = DirectiveKind::EXTREF;
+    else if (directive == "CSECT") kind = DirectiveKind::CSECT;
+    else throw SyntaxError("Unknown directive", lexer_.row, lexer_.col);
+    
+    return std::make_shared<DirectiveNode>(label, kind, argValue, comment);
+}
+
+std::shared_ptr<Node> Parser::parseDataDirective(const std::string& label, const std::string& directive) {
+    lexer_.skipWhitespace();
+    
+    DataKind kind;
+    if (directive == "WORD") kind = DataKind::WORD;
+    else if (directive == "BYTE") kind = DataKind::BYTE;
+    else if (directive == "RESW") kind = DataKind::RESW;
+    else if (directive == "RESB") kind = DataKind::RESB;
+    else throw SyntaxError("Unknown data directive", lexer_.row, lexer_.col);
+    
+    DataValue value;
+    if (kind == DataKind::WORD || kind == DataKind::RESW || kind == DataKind::RESB) {
+        int num = parseNumber(0, 0xFFFFFF);
+        value = num;
+    } else { // BYTE
+        auto bytes = parseData();
+        value = bytes;
+    }
+    
+    lexer_.skipWhitespace();
+    std::string comment = std::string(lexer_.readTo('\n'));
+    
+    return std::make_shared<DataNode>(label, kind, value, comment);
+}
+
+std::shared_ptr<Node> Parser::parseInstruction() {
+    if (lexer_.col == 1 && lexer_.peek() == '.') {
+        return std::make_shared<CommentNode>(
+            std::string(lexer_.readTo('\n'))
+        );
+    }
+
+    std::string label = parseLabel();
+
+    if (lexer_.skipWhitespace() && label.empty()) {
+        lexer_.advance();
+        return nullptr;
+    }
+
+    lexer_.skipWhitespace();
+    
+    // Check for extended format prefix
+    bool isExtended = lexer_.peek() == '+';
+    if (isExtended) {
+        lexer_.advance();
+    }
+    
+    std::string name = std::string(lexer_.readAlphanumeric());
+    
+    if (name.empty()) {
+        throw SyntaxError("Mnemonic or directive expected", lexer_.row, lexer_.col);
+    }
+    
+    // Check if it's a directive or data directive
+    if (isDirective(name)) {
+        return parseDirective(label, name);
+    }
+    
+    if (isDataDirective(name)) {
+        return parseDataDirective(label, name);
+    }
+    
+    // It's an instruction - create mnemonic
+    auto mnemonic = makeMnemonic(name, isExtended);
+    lexer_.skipWhitespace();
+
+    parseOperands(*mnemonic);
+    lexer_.skipWhitespace();
+
+    std::string comment(lexer_.readTo('\n'));
+
+    return std::make_shared<InstructionNode>(
+        std::move(label),
+        std::move(mnemonic),
+        std::move(comment)
+    );
+}
+
+Code Parser::parseCode() {
+    Code code;
+
+    while (lexer_.peek() > 0) {
+        while (lexer_.peek() > 0 && lexer_.col > 1) {
+            lexer_.readTo('\n');
+        }
+
+        if (auto node = parseInstruction()) {
+            code.addLine(node);
+        }
+    }
+
+    return code;
+}
+
+Code Parser::parse(const std::string& input) {
+    lexer_ = Lexer(input);
+    return parseCode();
+}