diff --git a/.gitignore b/.gitignore index 6f03a8e..ebebf77 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ __pycache__/ autotester sictools.jar simulator_SIC_XE/CMakeLists.txt.user + +/build/ diff --git a/simulator_SIC_XE/CMakeLists.txt b/simulator_SIC_XE/CMakeLists.txt index c6c37e1..cde4cd2 100644 --- a/simulator_SIC_XE/CMakeLists.txt +++ b/simulator_SIC_XE/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.10) project(simulator_SIC_XE VERSION 1.0 LANGUAGES CXX) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Put all build outputs under target/bin @@ -13,6 +13,11 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_DIR}) # Collect all .cpp sources under src/ file(GLOB_RECURSE SOURCES "${PROJECT_SOURCE_DIR}/src/*.cpp") + +set(MAIN_SRC "${PROJECT_SOURCE_DIR}/src/main.cpp") +set(ASSEMBLER_SRC "${PROJECT_SOURCE_DIR}/src/assembler.cpp") +list(REMOVE_ITEM SOURCES ${MAIN_SRC} ${ASSEMBLER_SRC}) + if(NOT SOURCES) message(WARNING "No source files found in ${PROJECT_SOURCE_DIR}/src — the build will create an empty library") endif() @@ -28,15 +33,9 @@ if(EXISTS "${PROJECT_SOURCE_DIR}/src/main.cpp") target_link_libraries(simulator_exec PRIVATE simulator_lib) endif() - -if(TARGET simulator_exec) - add_custom_target(run - DEPENDS simulator_exec - COMMAND ${CMAKE_COMMAND} -E echo "Running simulator_exec..." - COMMAND $ - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - COMMENT "Builds and runs simulator_exec" - ) +if(EXISTS "${PROJECT_SOURCE_DIR}/src/assembler.cpp") + add_executable(assembler "${PROJECT_SOURCE_DIR}/src/assembler.cpp") + target_link_libraries(assembler PRIVATE simulator_lib) endif() message(STATUS "Project: ${PROJECT_NAME}") diff --git a/simulator_SIC_XE/README.md b/simulator_SIC_XE/README.md index aac6331..ae46366 100644 --- a/simulator_SIC_XE/README.md +++ b/simulator_SIC_XE/README.md @@ -1,30 +1,65 @@ # SIC/XE Simulator -A complete SIC/XE architecture simulator with instruction execution, device I/O, and memory management. +A complete SIC/XE architecture simulator with instruction execution, device I/O, memory management, and assembler. ## Quick Start -The easiest way to build and run the simulator: +### Building the Project + +```bash +make +``` + +This will build: +- `target/bin/simulator_exec` - The main simulator +- `target/bin/assembler` - The SIC/XE assembler +- `target/bin/simulator_qt` - Qt GUI version (if Qt is available) + +### Using the Assembler + +Assemble a SIC/XE assembly file to object code: + +```bash +./target/bin/assembler +``` + +**Example:** +```bash +./target/bin/assembler res/test_format4.asm +``` + +This will: +- Parse and assemble the input file +- Generate modification records (M records) for format 4 instructions +- Create `.obj` with the object code +- Display the object code and symbol table + +**Sample Output:** +``` +H TESTF4 0003E8 00001B +T 0003E8 1B 031003F70F1003FA4B1003FD4F2C090000000000000100004F2BFD +M 0003E9 05 +M 0003ED 05 +M 0003F1 05 +E 0003E8 +``` + +### Running the Simulator ```bash make run ``` -This single command will: -- Configure the build system (if needed) -- Compile all source files -- Link the executable -- Run the simulator +This will build and run the simulator with the default program. ## Build Commands | Command | Description | |--------------|----------------------------------------------------| -| `make` | Build the project | +| `make` | Build all executables | | `make build` | Build the project | -| `make run` | Build run the simulator | +| `make run` | Build and run the simulator | | `make clean` | Clean build artifacts | -| `make run` | Clean build artifacts, build and run the simulator | ## Project Structure diff --git a/simulator_SIC_XE/devices/FA.dev b/simulator_SIC_XE/devices/FA.dev new file mode 100644 index 0000000..cb6e5ed --- /dev/null +++ b/simulator_SIC_XE/devices/FA.dev @@ -0,0 +1,2 @@ +5 +0 diff --git a/simulator_SIC_XE/gui/qt/mainwindow.cpp b/simulator_SIC_XE/gui/qt/mainwindow.cpp index b53aa72..da6fb69 100644 --- a/simulator_SIC_XE/gui/qt/mainwindow.cpp +++ b/simulator_SIC_XE/gui/qt/mainwindow.cpp @@ -5,7 +5,9 @@ #include "../../include/instructions.h" #include "../../include/opcode.h" #include "../../include/constants.h" -#include "../../../include/loader.h" +#include "../../include/loader.h" +#include "../../include/parser.h" +#include "../../include/code.h" #include #include @@ -20,6 +22,9 @@ #include #include #include +#include +#include +#include class Loader; @@ -102,6 +107,7 @@ MainWindow::MainWindow(QWidget *parent) : // Connect menu actions connect(ui->actionLoad_Object_File, &QAction::triggered, this, &MainWindow::loadObjectFile); + connect(ui->actionLoad_Asm_file, &QAction::triggered, this, &MainWindow::loadAsmFile); connect(ui->actionAbout, &QAction::triggered, this, &MainWindow::showAboutDialog); connect(ui->actionFrequency, &QAction::triggered, this, &MainWindow::showFrequencyDialog); @@ -981,6 +987,100 @@ void MainWindow::loadObjectFile() } } +void MainWindow::loadAsmFile() +{ + QString fileName = QFileDialog::getOpenFileName(this, + tr("Load Assembly File"), + QString(), + tr("Assembly Files (*.asm);;All Files (*)")); + + if (fileName.isEmpty()) { + return; + } + + try { + // Stop execution if running + m_controller->stop(); + + // Reset machine state + m_machine->reset(); + + // Read assembly file + std::ifstream file(fileName.toStdString()); + if (!file.is_open()) { + throw std::runtime_error("Could not open file: " + fileName.toStdString()); + } + + std::string source((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + file.close(); + + // Parse and assemble + Parser parser; + Code code = parser.parse(source); + code.assemble(); + + // Generate object code + std::string objCode = code.emitText(); + + // Create resources directory if it doesn't exist + QDir dir; + if (!dir.exists("resources")) { + dir.mkpath("resources"); + } + + // Save object file to resources directory + QFileInfo fileInfo(fileName); + QString objFileName = "resources/" + fileInfo.completeBaseName() + ".obj"; + + std::ofstream objFile(objFileName.toStdString()); + if (!objFile.is_open()) { + throw std::runtime_error("Could not create object file: " + objFileName.toStdString()); + } + objFile << objCode; + objFile.close(); + + // Generate and save log file + QString logFileName = "resources/" + fileInfo.completeBaseName() + ".log"; + std::ofstream logFile(logFileName.toStdString()); + if (!logFile.is_open()) { + throw std::runtime_error("Could not create log file: " + logFileName.toStdString()); + } + + logFile << "=== SIC/XE Assembler Log ===\n\n"; + logFile << "Source file: " << fileName.toStdString() << "\n"; + logFile << "Object file: " << objFileName.toStdString() << "\n\n"; + + logFile << "=== Symbols ===\n"; + logFile << code.dumpSymbols() << "\n\n"; + + logFile << "=== Code ===\n"; + logFile << code.dumpCode() << "\n\n"; + + logFile << "=== Object Code ===\n"; + logFile << objCode << "\n"; + + logFile.close(); + + // Load the generated object file + Loader loader(m_machine, objFileName.toStdString()); + loader.load(); + + // Update displays + updateRegisterDisplays(); + updateMemoryDisplay(); + updateDisassemblyDisplay(); + + QMessageBox::information(this, tr("Success"), + tr("Assembly successful!\nObject file: %1\nLog file: %2") + .arg(objFileName).arg(logFileName)); + + } catch (const std::exception &e) { + QMessageBox::critical(this, tr("Error"), + tr("Failed to assemble file: %1").arg(e.what())); + } +} + void MainWindow::showAboutDialog() { QMessageBox::about(this, tr("About SIC/XE Simulator"), diff --git a/simulator_SIC_XE/gui/qt/mainwindow.h b/simulator_SIC_XE/gui/qt/mainwindow.h index 2dba671..cc8f042 100644 --- a/simulator_SIC_XE/gui/qt/mainwindow.h +++ b/simulator_SIC_XE/gui/qt/mainwindow.h @@ -51,6 +51,7 @@ private slots: void onDisassemblyGoToStart(); void onDisassemblyGoToEnd(); void loadObjectFile(); + void loadAsmFile(); void showAboutDialog(); void showFrequencyDialog(); diff --git a/simulator_SIC_XE/gui/qt/mainwindow.ui b/simulator_SIC_XE/gui/qt/mainwindow.ui index e9084bd..21085fa 100644 --- a/simulator_SIC_XE/gui/qt/mainwindow.ui +++ b/simulator_SIC_XE/gui/qt/mainwindow.ui @@ -892,6 +892,7 @@ File + @@ -924,6 +925,11 @@ About + + + Load Asm file + + diff --git a/simulator_SIC_XE/include/code.h b/simulator_SIC_XE/include/code.h new file mode 100644 index 0000000..519d5f1 --- /dev/null +++ b/simulator_SIC_XE/include/code.h @@ -0,0 +1,68 @@ +#ifndef CODE_H +#define CODE_H + +#include +#include +#include +#include + +#include "node.h" + +class Code { + +public: + Code() = default; + + void addLine(const std::shared_ptr& line); + + const std::vector>& getLines() const; + + const string toString() const; + + // Two-pass assembler methods + void assemble(); + std::vector emitCode(); + std::string emitText(); + std::string dumpSymbols() const; + std::string dumpCode() const; + +private: + std::vector> _lines; + + // Assembler state + std::unordered_map _symbolTable; + std::vector _locationCounters; // Location counter per line + int _startAddress = 0; + int _programLength = 0; + std::string _programName; + int _baseRegister = -1; // -1 means not set + + struct ModificationRecord { + int address; + int halfBytes; + }; + mutable std::vector _modificationRecords; + + // Pass 1: build symbol table and assign addresses + void firstPass(); + + // Pass 2: generate code + void secondPass(); + + // Helper methods + int getInstructionLength(const std::shared_ptr& node, int locationCounter) const; + std::vector generateInstruction(const InstructionNode* inst, int address); + std::vector generateData(const DataNode* data); + + // Addressing mode selection + struct AddressingResult { + int nixbpe; // ni, x, b, p, e bits + int displacement; // 12-bit or 20-bit + bool success; + }; + AddressingResult selectAddressingMode(int targetAddress, int pc, bool indexed, bool immediate, bool indirect, bool extended) const; +}; + + + +#endif // CODE_H \ No newline at end of file diff --git a/simulator_SIC_XE/include/lexer.h b/simulator_SIC_XE/include/lexer.h new file mode 100644 index 0000000..f9bdd70 --- /dev/null +++ b/simulator_SIC_XE/include/lexer.h @@ -0,0 +1,55 @@ +#ifndef LEXER_H +#define LEXER_H + +#include +#include +#include + +class SyntaxError : public std::runtime_error { +public: + int row; + int col; + + SyntaxError(const std::string& msg, int row_, int col_) + : std::runtime_error(msg), row(row_), col(col_) {} +}; + + +class Lexer { +public: + int row; + int col; + + explicit Lexer(std::string input); + + Lexer& mark(); + + std::string extract(int ofs); + std::string extract(); + + char peek(int ahead) const; + char peek() const; + + char advance(); + + bool advanceIf(char ch); + void advance(char ch); + + + bool skipWhitespace(); + + std::string readTo(char delimiter); + + std::string readAlphanumeric(); + + std::string readDigits(int radix); + +private: + std::string input_; + std::size_t pos_; + std::size_t start_; + + static int digitValue(char c, int radix); +}; + +#endif // LEXER_H diff --git a/simulator_SIC_XE/include/loader.h b/simulator_SIC_XE/include/loader.h index 41899c2..34c31a6 100644 --- a/simulator_SIC_XE/include/loader.h +++ b/simulator_SIC_XE/include/loader.h @@ -27,6 +27,7 @@ public: enum class RecordType { HEADER, TEXT, + MODIFICATION, END, UNKNOWN }; @@ -40,6 +41,11 @@ public: int start_address; std::vector data; }; + struct ModificationRecord { + int address; // Address to be modified + int length; // Length in nibbles + bool add; // true for +, false for - + }; struct EndRecord { int execution_start_address; }; @@ -54,10 +60,13 @@ private : shared_ptr _machine; string _filename; shared_ptr _file_reader; + int _relocation_address; HeaderMetadata readHeader(); TextRecord readTextRecord(); + ModificationRecord readModificationRecord(); EndRecord readEndRecord(); bool load_into_memory(int start_address, const std::vector& data); + void applyModification(const ModificationRecord& mod); }; diff --git a/simulator_SIC_XE/include/mnemonic.h b/simulator_SIC_XE/include/mnemonic.h new file mode 100644 index 0000000..3cdddab --- /dev/null +++ b/simulator_SIC_XE/include/mnemonic.h @@ -0,0 +1,45 @@ +// mnemonic.h +#ifndef MNEMONIC_H +#define MNEMONIC_H + +#include +#include +#include +#include + +#include "opcode.h" + +struct Empty {}; +struct Register { int num; }; +struct Immediate { int value; }; +struct SymbolRef { + std::string name; + bool indexed = false; + bool immediate = false; + bool indirect = false; +}; + +using Operand = std::variant; + +class Mnemonic { +public: + Mnemonic(std::uint8_t opcode, InstructionType type, bool extended) + : _opcode(opcode), _extended(extended), _type(type) {} + + std::uint8_t opcode() const { return _opcode; } + bool extended() const { return _extended; } + InstructionType type() const { return _type; } + + std::vector& operands() { return _operands; } + const std::vector& operands() const { return _operands; } + + std::string toString() const; + +private: + std::uint8_t _opcode; + bool _extended; + InstructionType _type; + std::vector _operands; +}; + +#endif // MNEMONIC_H diff --git a/simulator_SIC_XE/include/node.h b/simulator_SIC_XE/include/node.h new file mode 100644 index 0000000..2ad0d86 --- /dev/null +++ b/simulator_SIC_XE/include/node.h @@ -0,0 +1,98 @@ +#ifndef NODE_H +#define NODE_H + +#include +#include +#include +#include +#include +#include "mnemonic.h" + +using std::string; + +class Node { +public: + virtual ~Node() = default; + + string getLabel() const { return _label; } + string getComment() const { return _comment; } + std::shared_ptr getMnemonic() const { return _mnemonic; } + + virtual string toString() const; + +protected: + string _label; + std::shared_ptr _mnemonic; + string _comment; +}; + +class InstructionNode : public Node { +public: + InstructionNode(string label, + std::shared_ptr mnemonic, + string comment) { + _label = std::move(label); + _mnemonic = std::move(mnemonic); + _comment = std::move(comment); + } + + string toString() const override; +}; + +class CommentNode : public Node { +public: + explicit CommentNode(string text) { + _comment = std::move(text); + } + + string toString() const override; +}; + +enum class DirectiveKind { + START, END, BASE, NOBASE, EQU, ORG, LTORG, + EXTDEF, EXTREF, CSECT +}; + +using DirectiveArg = std::variant>; + +class DirectiveNode : public Node { +public: + DirectiveNode(string label, DirectiveKind kind, DirectiveArg arg, string comment) + : _kind(kind), _arg(std::move(arg)) { + _label = std::move(label); + _comment = std::move(comment); + } + + DirectiveKind kind() const { return _kind; } + const DirectiveArg& arg() const { return _arg; } + + string toString() const override; + +private: + DirectiveKind _kind; + DirectiveArg _arg; +}; + +enum class DataKind { WORD, BYTE, RESW, RESB }; + +using DataValue = std::variant>; + +class DataNode : public Node { +public: + DataNode(string label, DataKind kind, DataValue value, string comment) + : _kind(kind), _value(std::move(value)) { + _label = std::move(label); + _comment = std::move(comment); + } + + DataKind kind() const { return _kind; } + const DataValue& value() const { return _value; } + + string toString() const override; + +private: + DataKind _kind; + DataValue _value; +}; + +#endif // NODE_H diff --git a/simulator_SIC_XE/include/opcode.h b/simulator_SIC_XE/include/opcode.h index a467488..05cb693 100644 --- a/simulator_SIC_XE/include/opcode.h +++ b/simulator_SIC_XE/include/opcode.h @@ -3,6 +3,11 @@ #include "utils.h" +#include +#include +#include +#include + // ============================== // Opcode definitions (SIC/XE) // ============================== @@ -87,6 +92,8 @@ #define LDVS 0x68 #define LDVT 0x04 +static std::unordered_map mnemonicToOpcode; +static bool opcodeTablesInitialized = false; enum class InstructionType { @@ -110,6 +117,10 @@ struct InstructionInfo { extern InstructionInfo instructions[]; extern InstructionInfo instructionsEXEX[]; +extern std::optional findOpcodeByMnemonic(std::string_view name); +extern const InstructionInfo& getInstructionInfo(uint8_t opcode); + + // Initialize the instruction table void loadInstructionSet(); diff --git a/simulator_SIC_XE/include/parser.h b/simulator_SIC_XE/include/parser.h new file mode 100644 index 0000000..8ae126a --- /dev/null +++ b/simulator_SIC_XE/include/parser.h @@ -0,0 +1,52 @@ +// parser.h +#ifndef PARSER_H +#define PARSER_H + +#include +#include +#include +#include +#include + +#include "lexer.h" +#include "code.h" +#include "opcode.h" +#include "mnemonic.h" + +class Parser { +public: + Parser() = default; + + Code parse(const std::string& input); + +private: + std::string parseLabel(); + std::shared_ptr parseMnemonic(); + std::string parseSymbol(); + int parseRegister(); + void parseComma(); + bool parseIndexed(); + int parseNumber(int lo, int hi); + std::vector parseData(); + + void parseOperands(Mnemonic& m); + + bool isDirective(const std::string& name); + bool isDataDirective(const std::string& name); + std::shared_ptr parseDirective(const std::string& label, const std::string& directive); + std::shared_ptr parseDataDirective(const std::string& label, const std::string& directive); + + std::shared_ptr parseInstruction(); + Code parseCode(); + + std::shared_ptr makeMnemonic(const std::string& name, bool extended); + static void initMnemonicMap(); + +private: + Lexer lexer_{""}; + + static inline std::unordered_map s_nameToOpcode{}; + static inline bool s_mnemonicMapInitialized = false; +}; + +#endif // PARSER_H diff --git a/simulator_SIC_XE/res/rec.asm b/simulator_SIC_XE/res/rec.asm new file mode 100644 index 0000000..212bf23 --- /dev/null +++ b/simulator_SIC_XE/res/rec.asm @@ -0,0 +1,223 @@ +prog START 0 + +.------------------------------------------- +. MAIN LOOP +. +. Psevdo: +. sp = 0 +. while true: +. n = readFA() +. if n == 0: halt +. acc = 1 +. fact() ; rekurzivno: acc = n! +. printStdout(acc) +.------------------------------------------- + CLEAR A + STA sp + +loop JSUB readFA + COMP #0 + JEQ halt + + STA n + LDA #1 + STA acc + + JSUB fact + LDA acc + JSUB printStdout + + J loop + +halt J halt + +.------------------------------------------- +. readFA +. +. Psevdo: +. B = 0 +. while true: +. ch = RD(FA) +. if ch == CR or ch == LF: break +. digit = ch - '0' +. B = B * 10 + digit +. return B +.------------------------------------------- +readFA CLEAR B + LDS #10 + +rd_loopFA RD #0xFA + COMP #0x0D . CR? + JEQ rd_doneCR_FA + COMP #0x0A . LF? + JEQ rd_doneFA + + SUB #0x30 + MULR S,B . B = B * 10 + ADDR A,B . B = B + digit + J rd_loopFA + +rd_doneCR_FA RD #0xFA . pogoltni LF po CR +rd_doneFA CLEAR A + RMO B,A + RSUB + +.------------------------------------------- +. fact +. +. Psevdo (globalni n, acc, sklad L): +. fact(): +. push(L) +. if n <= 1: +. pop(L); return +. acc = acc * n +. n = n - 1 +. fact() +. pop(L); return +.------------------------------------------- +fact . push L + LDA sp + ADD #3 + STA sp + LDX sp + STL stackL,X + + LDA n + COMP #1 + JGT fact_rec + + . base case: n <= 1 + LDX sp + LDL stackL,X + LDA sp + SUB #3 + STA sp + RSUB + +fact_rec . recursive case: acc *= n; n--; fact() + + LDB acc + LDS n + MULR S,B + STB acc + + LDA n + SUB #1 + STA n + + JSUB fact + + . pop L in return to caller + LDX sp + LDL stackL,X + LDA sp + SUB #3 + STA sp + RSUB + +.------------------------------------------- +. printStdout +. +. Psevdo: +. if A == 0: +. print "0\n" +. return +. ps_val = A +. ps_len = 0 +. while ps_val > 0: +. q = ps_val / 10 +. r = ps_val % 10 +. buf[ps_len] = '0' + r +. ps_len++ +. ps_val = q +. for i = ps_len-1 .. 0: +. print buf[i] +. print "\r\n" +.------------------------------------------- +printStdout COMP #0 + JEQ ps_zero + + STA ps_val + LDA #0 + STA ps_len + LDS #10 + LDT #0x30 . '0' + +ps_div LDA ps_val + COMP #0 + JEQ ps_divdone + + RMO A,B + DIVR S,B . kvocient v B + RMO B,X . X = kvocient + + MULR S,B + SUBR B,A . A = ostanek + ADDR T,A . A = '0' + ostanek + STA psdigit + + LDA ps_len + STA ps_idx + LDA #psbuf + ADD ps_idx + STA ps_ptr + LDA psdigit + STCH @ps_ptr + + LDA ps_len + ADD #1 + STA ps_len + + RMO X,A + STA ps_val + J ps_div + +ps_divdone LDA ps_len + SUB #1 + STA ps_idx + +ps_print LDA ps_idx + COMP #0 + JLT ps_end + + LDA #psbuf + ADD ps_idx + STA ps_ptr + LDCH @ps_ptr + WD #1 + + LDA ps_idx + SUB #1 + STA ps_idx + J ps_print + +ps_end LDA #0x0D . CR + WD #1 + LDA #0x0A . LF + WD #1 + RSUB + +ps_zero LDA #0x30 . "0" + WD #1 + LDA #0x0D + WD #1 + LDA #0x0A + WD #1 + RSUB + +.data +. rekurzija faktoriala +sp WORD 0 . stack pointer +n WORD 0 +acc WORD 0 . akumulator za faktorial +stackL RESB 60 + +. printStdout +ps_val WORD 0 +ps_len WORD 0 +ps_idx WORD 0 +psdigit WORD 0 +ps_ptr WORD 0 +psbuf RESB 12 + + END prog diff --git a/simulator_SIC_XE/res/simple.asm b/simulator_SIC_XE/res/simple.asm new file mode 100644 index 0000000..20beca6 --- /dev/null +++ b/simulator_SIC_XE/res/simple.asm @@ -0,0 +1,32 @@ +SIMPLE START 0 + + +LDA NUM1 + +ADD NUM2 + +STA RESULT + + LDX NUM1 + LDL NUM2 + + LDA #0 + ADDR X,A + ADDR L,A + + +LDA RESULT + ADD #48 + RMO A,S + SHIFTL S,16 + SHIFTR S,16 + RMO S,A + STCH RESULT + LDCH RESULT + WD OUTPUT + +HALT J HALT + +OUTPUT BYTE 1 + +NUM1 WORD 1 +NUM2 WORD 2 +RESULT RESW 1 + + END SIMPLE diff --git a/simulator_SIC_XE/res/test_format4.asm b/simulator_SIC_XE/res/test_format4.asm new file mode 100644 index 0000000..6de136d --- /dev/null +++ b/simulator_SIC_XE/res/test_format4.asm @@ -0,0 +1,11 @@ +TESTF4 START 1000 + +LDA BUFFER + +STA OUTPUT + +JSUB FUNC + RSUB + +BUFFER RESW 1 +OUTPUT RESW 1 +FUNC LDA #0 + RSUB + END TESTF4 diff --git a/simulator_SIC_XE/src/assembler.cpp b/simulator_SIC_XE/src/assembler.cpp new file mode 100644 index 0000000..2ceece9 --- /dev/null +++ b/simulator_SIC_XE/src/assembler.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include "code.h" +#include "parser.h" +#include "opcode.h" + +using std::cout; +using std::endl; +using std::cerr; + +int main(int argc, char* argv[]) { + if (argc != 2) { + cerr << "Usage: " << argv[0] << " " << endl; + return 1; + } + + std::string inputFile = argv[1]; + + // Load instruction set + loadInstructionSet(); + + try { + // Read assembly file + cout << "Assembling: " << inputFile << endl; + std::ifstream file(inputFile); + if (!file.is_open()) { + throw std::runtime_error("Failed to open file: " + inputFile); + } + + std::string input; + std::string line; + while (std::getline(file, line)) { + input += line + "\n"; + } + file.close(); + + // Parse + Parser parser; + Code code = parser.parse(input); + + // Assemble + code.assemble(); + + // Generate object code + std::string objectCode = code.emitText(); + + // Determine output filename + std::string outputFile = inputFile; + size_t lastDot = outputFile.find_last_of('.'); + if (lastDot != std::string::npos) { + outputFile = outputFile.substr(0, lastDot); + } + outputFile += ".obj"; + + // Write to file + std::ofstream out(outputFile); + if (!out.is_open()) { + throw std::runtime_error("Failed to create output file: " + outputFile); + } + out << objectCode; + out.close(); + + // Display results + cout << "\n=== Object Code ===" << endl; + cout << objectCode; + cout << "\n=== Symbol Table ===" << endl; + cout << code.dumpSymbols(); + cout << "\nOutput written to: " << outputFile << endl; + + } catch (const std::exception& e) { + cerr << "ERROR: " << e.what() << endl; + return 1; + } + + return 0; +} diff --git a/simulator_SIC_XE/src/code.cpp b/simulator_SIC_XE/src/code.cpp new file mode 100644 index 0000000..2e87d12 --- /dev/null +++ b/simulator_SIC_XE/src/code.cpp @@ -0,0 +1,551 @@ +#include "code.h" +#include "opcode.h" +#include "constants.h" +#include +#include +#include +#include + +void Code::addLine(const std::shared_ptr &line) +{ + _lines.emplace_back(line); +} + +const std::vector> &Code::getLines() const +{ + return _lines; +} + +const string Code::toString() const +{ + string result; + for (const auto& line : _lines) { + result += line->toString() + "\n"; + } + return result; +} + +// ============================================================ +// TWO-PASS ASSEMBLER IMPLEMENTATION +// ============================================================ + +void Code::assemble() { + firstPass(); + secondPass(); +} + +void Code::firstPass() { + _symbolTable.clear(); + _locationCounters.clear(); + _locationCounters.resize(_lines.size(), 0); + + int locationCounter = 0; + bool startFound = false; + + for (size_t i = 0; i < _lines.size(); ++i) { + auto& line = _lines[i]; + _locationCounters[i] = locationCounter; + + // Handle label + std::string label = line->getLabel(); + if (!label.empty()) { + if (_symbolTable.find(label) != _symbolTable.end()) { + throw std::runtime_error("Duplicate symbol: " + label); + } + _symbolTable[label] = locationCounter; + } + + // Check for directives + if (auto* directive = dynamic_cast(line.get())) { + switch (directive->kind()) { + case DirectiveKind::START: { + if (std::holds_alternative(directive->arg())) { + _startAddress = std::get(directive->arg()); + locationCounter = _startAddress; + _locationCounters[i] = locationCounter; + if (!label.empty()) { + _symbolTable[label] = locationCounter; + _programName = label; + } + startFound = true; + } + break; + } + case DirectiveKind::END: + _programLength = locationCounter - _startAddress; + break; + + case DirectiveKind::BASE: { + // BASE sets base register for addressing + if (std::holds_alternative(directive->arg())) { + // Will resolve in second pass + } + break; + } + case DirectiveKind::NOBASE: + _baseRegister = -1; + break; + + case DirectiveKind::EQU: { + // EQU defines symbol value + if (!label.empty() && std::holds_alternative(directive->arg())) { + _symbolTable[label] = std::get(directive->arg()); + } + break; + } + case DirectiveKind::ORG: { + // ORG changes location counter + if (std::holds_alternative(directive->arg())) { + locationCounter = std::get(directive->arg()); + } + break; + } + default: + break; + } + continue; + } + + // Handle data directives + if (auto* data = dynamic_cast(line.get())) { + int length = 0; + switch (data->kind()) { + case DataKind::WORD: + length = 3; // 24-bit word + break; + case DataKind::BYTE: { + if (std::holds_alternative>(data->value())) { + length = std::get>(data->value()).size(); + } + break; + } + case DataKind::RESW: { + if (std::holds_alternative(data->value())) { + length = std::get(data->value()) * 3; + } + break; + } + case DataKind::RESB: { + if (std::holds_alternative(data->value())) { + length = std::get(data->value()); + } + break; + } + } + locationCounter += length; + continue; + } + + // Handle instructions + if (auto* inst = dynamic_cast(line.get())) { + int length = getInstructionLength(line, locationCounter); + locationCounter += length; + } + } + + if (!startFound) { + _startAddress = 0; + } + _programLength = locationCounter - _startAddress; +} + +int Code::getInstructionLength(const std::shared_ptr& node, int locationCounter) const { + auto* inst = dynamic_cast(node.get()); + if (!inst || !inst->getMnemonic()) { + return 0; + } + + auto mnemonic = inst->getMnemonic(); + InstructionType type = mnemonic->type(); + + switch (type) { + case InstructionType::TYPE1: + return 1; + case InstructionType::TYPE2: + return 2; + case InstructionType::TYPE3_4: + return mnemonic->extended() ? 4 : 3; + default: + return 0; + } +} + +void Code::secondPass() { + // Generate code for all instructions and data + // This will be used by emitCode() and emitText() +} + +std::vector Code::emitCode() { + std::vector code; + code.resize(_programLength, 0); + + for (size_t i = 0; i < _lines.size(); ++i) { + auto& line = _lines[i]; + int address = _locationCounters[i]; + int offset = address - _startAddress; + + if (offset < 0 || offset >= _programLength) { + continue; + } + + // Generate instruction + if (auto* inst = dynamic_cast(line.get())) { + auto bytes = generateInstruction(inst, address); + for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) { + code[offset + j] = bytes[j]; + } + } + + // Generate data + if (auto* data = dynamic_cast(line.get())) { + auto bytes = generateData(data); + for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) { + code[offset + j] = bytes[j]; + } + } + } + + return code; +} + +std::vector Code::generateInstruction(const InstructionNode* inst, int address) { + std::vector bytes; + + if (!inst || !inst->getMnemonic()) { + return bytes; + } + + auto mnemonic = inst->getMnemonic(); + uint8_t opcode = mnemonic->opcode(); + InstructionType type = mnemonic->type(); + bool extended = mnemonic->extended(); + const auto& operands = mnemonic->operands(); + + switch (type) { + case InstructionType::TYPE1: { + bytes.push_back(opcode); + break; + } + + case InstructionType::TYPE2: { + bytes.push_back(opcode); + uint8_t r1 = 0, r2 = 0; + if (operands.size() >= 1 && std::holds_alternative(operands[0])) { + r1 = std::get(operands[0]).num & 0xF; + } + if (operands.size() >= 2 && std::holds_alternative(operands[1])) { + r2 = std::get(operands[1]).num & 0xF; + } + bytes.push_back((r1 << 4) | r2); + break; + } + + case InstructionType::TYPE3_4: { + // Format 3 or 4 instruction + int ni = 0, x = 0, b = 0, p = 0, e = 0; + int targetAddress = 0; + bool immediate = false, indirect = false, indexed = false; + + // Parse operand + if (!operands.empty()) { + if (std::holds_alternative(operands[0])) { + immediate = true; + targetAddress = std::get(operands[0]).value; + ni = 0x01; // n=0, i=1 + } else if (std::holds_alternative(operands[0])) { + auto& sym = std::get(operands[0]); + immediate = sym.immediate; + indirect = sym.indirect; + indexed = sym.indexed; + + // Look up symbol + auto it = _symbolTable.find(sym.name); + if (it != _symbolTable.end()) { + targetAddress = it->second; + } + + // Set ni bits + if (immediate) { + ni = 0x01; // n=0, i=1 + } else if (indirect) { + ni = 0x02; // n=1, i=0 + } else { + ni = 0x03; // n=1, i=1 (simple/direct) + } + } + } else { + // No operand (like RSUB) + ni = 0x03; + } + + if (indexed) { + x = 1; + } + + if (extended) { + e = 1; + } + + // Calculate PC for addressing + int pc = address + (extended ? 4 : 3); + + // Select addressing mode + auto result = selectAddressingMode(targetAddress, pc, indexed, immediate, indirect, extended); + + if (result.success) { + b = (result.nixbpe >> 2) & 1; + p = (result.nixbpe >> 1) & 1; + e = result.nixbpe & 1; + } + + int displacement = result.displacement; + + // Build instruction bytes + uint8_t byte1 = (opcode & 0xFC) | ni; + uint8_t byte2 = (x << 7) | (b << 6) | (p << 5) | (e << 4); + + bytes.push_back(byte1); + + if (extended) { + // Format 4: 20-bit address + byte2 |= (displacement >> 16) & 0x0F; + bytes.push_back(byte2); + bytes.push_back((displacement >> 8) & 0xFF); + bytes.push_back(displacement & 0xFF); + + // Format 4 instructions with symbol references (not immediate values) need M records + bool needsRelocation = false; + if (!operands.empty() && std::holds_alternative(operands[0])) { + auto& sym = std::get(operands[0]); + // If it's not an immediate mode with a constant, it needs relocation + if (!sym.immediate || _symbolTable.find(sym.name) != _symbolTable.end()) { + needsRelocation = true; + } + } + + // Record modification if needed + if (needsRelocation) { + ModificationRecord mod; + mod.address = address + 1; // Skip the opcode+ni byte, start at xbpe+addr + mod.halfBytes = 5; // 5 half-bytes (20 bits) for format 4 address field + _modificationRecords.push_back(mod); + } + } else { + // Format 3: 12-bit displacement + byte2 |= (displacement >> 8) & 0x0F; + bytes.push_back(byte2); + bytes.push_back(displacement & 0xFF); + } + break; + } + + default: + break; + } + + return bytes; +} + +Code::AddressingResult Code::selectAddressingMode(int targetAddress, int pc, bool indexed, bool immediate, bool indirect, bool extended) const { + AddressingResult result; + result.success = false; + result.nixbpe = 0; + result.displacement = 0; + + // Immediate mode - use target address directly + if (immediate) { + if (extended) { + result.nixbpe = 0x01; // e=1, b=0, p=0 + result.displacement = targetAddress & 0xFFFFF; // 20 bits + } else { + result.nixbpe = 0x00; // e=0, b=0, p=0 + result.displacement = targetAddress & 0xFFF; // 12 bits + } + result.success = true; + return result; + } + + // Extended format - use absolute address + if (extended) { + result.nixbpe = 0x01; // e=1, b=0, p=0 + result.displacement = targetAddress & 0xFFFFF; + result.success = true; + return result; + } + + // Try PC-relative (-2048 to +2047) + int pcDisp = targetAddress - pc; + if (pcDisp >= -2048 && pcDisp <= 2047) { + result.nixbpe = 0x02; // p=1, b=0, e=0 + result.displacement = pcDisp & 0xFFF; + result.success = true; + return result; + } + + // Try base-relative (0 to 4095) + if (_baseRegister >= 0) { + int baseDisp = targetAddress - _baseRegister; + if (baseDisp >= 0 && baseDisp <= 4095) { + result.nixbpe = 0x04; // b=1, p=0, e=0 + result.displacement = baseDisp & 0xFFF; + result.success = true; + return result; + } + } + + // Try direct (0 to 4095) + if (targetAddress >= 0 && targetAddress <= 4095) { + result.nixbpe = 0x00; // b=0, p=0, e=0 + result.displacement = targetAddress & 0xFFF; + result.success = true; + return result; + } + + // Try SIC format (0 to 32767, 15 bits) + if (targetAddress >= 0 && targetAddress <= 32767) { + result.nixbpe = 0x00; + result.displacement = targetAddress & 0x7FFF; + result.success = true; + return result; + } + + // Could not find suitable addressing mode + result.success = false; + return result; +} + +std::vector Code::generateData(const DataNode* data) { + std::vector bytes; + + if (!data) { + return bytes; + } + + switch (data->kind()) { + case DataKind::WORD: { + if (std::holds_alternative(data->value())) { + int value = std::get(data->value()) & 0xFFFFFF; + // SIC/XE stores words in big-endian (MSB first) + bytes.push_back((value >> 16) & 0xFF); + bytes.push_back((value >> 8) & 0xFF); + bytes.push_back(value & 0xFF); + } + break; + } + + case DataKind::BYTE: { + if (std::holds_alternative>(data->value())) { + bytes = std::get>(data->value()); + } + break; + } + + case DataKind::RESW: + case DataKind::RESB: + // Reserved space - emit zeros (handled by initialized array) + break; + } + + return bytes; +} + +std::string Code::emitText() { + std::ostringstream oss; + + // H record: program name, start address, length + oss << "H "; + std::string name = _programName.empty() ? "PROG" : _programName; + name.resize(6, ' '); + oss << name << " "; + oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress << " "; + oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _programLength; + oss << "\n"; + + // Clear and rebuild modification records + _modificationRecords.clear(); + + // T records: text (code/data) + std::vector code = emitCode(); + int textStart = 0; + + while (textStart < code.size()) { + int textLength = std::min(30, (int)code.size() - textStart); + + oss << "T "; + oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << (_startAddress + textStart) << " "; + oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << textLength << " "; + + for (int i = 0; i < textLength; ++i) { + oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << (int)code[textStart + i]; + } + oss << "\n"; + + textStart += textLength; + } + + // M records: modifications for format 4 instructions + for (const auto& mod : _modificationRecords) { + oss << "M "; + oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << mod.address << " "; + oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << mod.halfBytes; + oss << "\n"; + } + + // E record: execution start address + oss << "E "; + oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress; + oss << "\n"; + + return oss.str(); +} + +std::string Code::dumpSymbols() const { + std::ostringstream oss; + oss << "=== Symbol Table ===\n"; + oss << std::left << std::setw(20) << "Symbol" << "Address\n"; + oss << std::string(30, '-') << "\n"; + + for (const auto& [symbol, address] : _symbolTable) { + oss << std::left << std::setw(20) << symbol; + oss << std::hex << std::uppercase << std::setw(6) << std::setfill('0') << address << "\n"; + } + + return oss.str(); +} + +std::string Code::dumpCode() const { + std::ostringstream oss; + oss << "=== Code Listing ===\n"; + oss << std::hex << std::uppercase << std::setfill('0'); + + std::vector code = const_cast(this)->emitCode(); + + for (size_t i = 0; i < _lines.size(); ++i) { + auto& line = _lines[i]; + int address = _locationCounters[i]; + int offset = address - _startAddress; + + // Print address + oss << std::setw(6) << address << " "; + + // Print generated bytes + int length = getInstructionLength(line, address); + if (auto* data = dynamic_cast(line.get())) { + auto bytes = const_cast(this)->generateData(data); + length = bytes.size(); + } + + for (int j = 0; j < length && (offset + j) < code.size(); ++j) { + oss << std::setw(2) << (int)code[offset + j]; + } + + // Pad for alignment + for (int j = length; j < 12; ++j) { + oss << " "; + } + + oss << " " << line->toString() << "\n"; + } + + return oss.str(); +} diff --git a/simulator_SIC_XE/src/lexer.cpp b/simulator_SIC_XE/src/lexer.cpp new file mode 100644 index 0000000..7ac9344 --- /dev/null +++ b/simulator_SIC_XE/src/lexer.cpp @@ -0,0 +1,138 @@ +#include "lexer.h" +#include +#include + +Lexer::Lexer(std::string input) + : input_(std::move(input)), + pos_(0), + start_(0), + row(1), + col(1) +{ +} + +Lexer& Lexer::mark() { + start_ = pos_; + return *this; +} + +std::string Lexer::extract(int ofs) { + std::size_t end = pos_ + static_cast(ofs); + if (end > input_.size()) { + end = input_.size(); + } + if (end < start_) { + end = start_; + } + return input_.substr(start_, end - start_); +} + +std::string Lexer::extract() { + return extract(0); +} + +char Lexer::peek(int ahead) const { + std::size_t idx = pos_ + static_cast(ahead); + if (idx < input_.size()) { + return input_[idx]; + } + return '\0'; // sentinel for "no more chars" +} + +char Lexer::peek() const { + return peek(0); +} + +char Lexer::advance() { + char ch = peek(); + if (ch == '\0') { + return '\0'; // don't move past end + } + + ++pos_; + + // update logical location + if (ch == '\n') { + ++row; + col = 1; + } else if (ch == '\t') { + col = ((col - 1) / 4) * 4 + 5; + } else { + ++col; + } + return ch; +} + +bool Lexer::advanceIf(char ch) { + if (peek() != ch) { + return false; + } + advance(); + return true; +} + +void Lexer::advance(char ch) { + if (!advanceIf(ch)) { + throw SyntaxError(std::string("'") + ch + "' expected", row, col); + } +} + +bool Lexer::skipWhitespace() { + while (true) { + char p = peek(); + if (p == ' ' || p == '\t') { + advance(); + } else { + break; + } + } + char p = peek(); + return (p == '\n' || p == '\0'); +} + +std::string Lexer::readTo(char delimiter) { + mark(); + while (peek() > 0 && peek() != delimiter) { + advance(); + } + if (peek() == delimiter) { + advance(); // consume delimiter + } + // exclude delimiter itself (like Java's extract(-1)) + return extract(-1); +} + +std::string Lexer::readAlphanumeric() { + mark(); + while (true) { + char c = peek(); + if (std::isalnum(static_cast(c)) || c == '_') { + advance(); + } else { + break; + } + } + return extract(); +} + +int Lexer::digitValue(char c, int radix) { + if (radix < 2 || radix > 36) return -1; + int v = -1; + if (c >= '0' && c <= '9') { + v = c - '0'; + } else if (c >= 'A' && c <= 'Z') { + v = c - 'A' + 10; + } else if (c >= 'a' && c <= 'z') { + v = c - 'a' + 10; + } + if (v >= 0 && v < radix) return v; + return -1; +} + +std::string Lexer::readDigits(int radix) { + mark(); + while (digitValue(peek(), radix) != -1) { + advance(); + } + return extract(); +} diff --git a/simulator_SIC_XE/src/loader.cpp b/simulator_SIC_XE/src/loader.cpp index 8d3aa17..1d00ac5 100644 --- a/simulator_SIC_XE/src/loader.cpp +++ b/simulator_SIC_XE/src/loader.cpp @@ -17,6 +17,7 @@ Loader::~Loader() void Loader::load() { HeaderMetadata header = readHeader(); + _relocation_address = header.start_address; while(true) { RecordType type = parseRecordType(static_cast(_file_reader->readByte())); @@ -28,6 +29,11 @@ void Loader::load() } break; } + case RecordType::MODIFICATION: { + ModificationRecord modRecord = readModificationRecord(); + applyModification(modRecord); + break; + } case RecordType::END: { EndRecord endRecord = readEndRecord(); _machine->setPC(endRecord.execution_start_address); @@ -45,6 +51,7 @@ Loader::RecordType Loader::parseRecordType(char c) switch (c) { case 'H': return RecordType::HEADER; case 'T': return RecordType::TEXT; + case 'M': return RecordType::MODIFICATION; case 'E': return RecordType::END; default: return RecordType::UNKNOWN; // fallback; adjust as needed } @@ -105,6 +112,29 @@ Loader::TextRecord Loader::readTextRecord() return record; } +Loader::ModificationRecord Loader::readModificationRecord() +{ + ModificationRecord record; + if(FILE_CONTAINS_WHITE_SPACES) _file_reader->readByte(); + + record.address = std::stoi(_file_reader->readString(6), nullptr, 16); + if(FILE_CONTAINS_WHITE_SPACES) _file_reader->readByte(); + + record.length = std::stoi(_file_reader->readString(2), nullptr, 16); + + record.add = true; + std::string rest = _file_reader->readLine(); + // Remove whitespace + rest.erase(std::remove_if(rest.begin(), rest.end(), ::isspace), rest.end()); + if (!rest.empty()) { + if (rest[0] == '-') { + record.add = false; + } + } + + return record; +} + Loader::EndRecord Loader::readEndRecord() { EndRecord record; @@ -132,3 +162,54 @@ bool Loader::load_into_memory(int start_address, const std::vector &dat } return true; } + +void Loader::applyModification(const ModificationRecord& mod) +{ + // M record specifies address and length in half-bytes (nibbles) + // We need to modify the value at that address by adding or subtracting + // the relocation address + + int address = mod.address; + int halfBytes = mod.length; + + // Calculate how many full bytes we need to read + // halfBytes can be odd or even + int numBytes = (halfBytes + 1) / 2; + + if (address < 0 || address + numBytes > MEMORY_SIZE) { + throw std::runtime_error("Modification address out of bounds"); + } + + // Read the current value from memory + int currentValue = 0; + for (int i = 0; i < numBytes; ++i) { + currentValue = (currentValue << 8) | _machine->getByte(address + i); + } + + // If odd number of half-bytes, we only modify the relevant nibbles + // For simplicity, we'll work with the full bytes and mask appropriately + int mask = 0; + for (int i = 0; i < halfBytes; ++i) { + mask = (mask << 4) | 0xF; + } + + // Extract the value to modify + int shift = (numBytes * 2 - halfBytes) * 4; + int valueToModify = (currentValue >> shift) & mask; + + // Apply modification + int newValue = mod.add ? (valueToModify + _relocation_address) + : (valueToModify - _relocation_address); + + // Mask to keep only the relevant bits + newValue &= mask; + + // Reconstruct the full value + int preservedBits = currentValue & ~(mask << shift); + int finalValue = preservedBits | (newValue << shift); + + // Write back to memory (big-endian) + for (int i = 0; i < numBytes; ++i) { + _machine->setByte(address + i, (finalValue >> ((numBytes - 1 - i) * 8)) & 0xFF); + } +} diff --git a/simulator_SIC_XE/src/node.cpp b/simulator_SIC_XE/src/node.cpp new file mode 100644 index 0000000..28c31fe --- /dev/null +++ b/simulator_SIC_XE/src/node.cpp @@ -0,0 +1,121 @@ +#include "node.h" +#include +#include +#include + +string Node::toString() const { + std::ostringstream oss; + if (!_label.empty()) oss << _label << " "; + if (_mnemonic) oss << _mnemonic->toString() << " "; + if (!_comment.empty()) oss << "." << _comment; + return oss.str(); +} + +std::string Mnemonic::toString() const { + std::ostringstream oss; + oss << "[OP:" << std::hex << (int)_opcode << "]"; + if (_extended) oss << "+"; + // Print operands + for (size_t i = 0; i < _operands.size(); ++i) { + if (i > 0) oss << ","; + std::visit([&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + // nothing + } else if constexpr (std::is_same_v) { + oss << "R" << arg.num; + } else if constexpr (std::is_same_v) { + oss << "#" << arg.value; + } else if constexpr (std::is_same_v) { + oss << arg.name; + if (arg.indexed) oss << ",X"; + } + }, _operands[i]); + } + return oss.str(); +} + +string InstructionNode::toString() const { + std::ostringstream oss; + if (!_label.empty()) oss << _label << " "; + if (_mnemonic) oss << _mnemonic->toString(); + if (!_comment.empty()) oss << " ." << _comment; + return oss.str(); +} + +string CommentNode::toString() const { + return "." + _comment; +} + +string DirectiveNode::toString() const { + std::ostringstream oss; + if (!_label.empty()) oss << _label << " "; + switch (_kind) { + case DirectiveKind::START: oss << "START"; break; + case DirectiveKind::END: oss << "END"; break; + case DirectiveKind::BASE: oss << "BASE"; break; + case DirectiveKind::NOBASE: oss << "NOBASE"; break; + case DirectiveKind::EQU: oss << "EQU"; break; + case DirectiveKind::ORG: oss << "ORG"; break; + case DirectiveKind::LTORG: oss << "LTORG"; break; + case DirectiveKind::EXTDEF: oss << "EXTDEF"; break; + case DirectiveKind::EXTREF: oss << "EXTREF"; break; + case DirectiveKind::CSECT: oss << "CSECT"; break; + } + std::visit([&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + // no arg + } else if constexpr (std::is_same_v) { + oss << " " << std::hex << arg; + } else if constexpr (std::is_same_v) { + oss << " " << arg; + } else if constexpr (std::is_same_v>) { + for (size_t i = 0; i < arg.size(); ++i) { + if (i > 0) oss << ","; + oss << arg[i]; + } + } + }, _arg); + if (!_comment.empty()) oss << " ." << _comment; + return oss.str(); +} + +string DataNode::toString() const { + std::ostringstream oss; + if (!_label.empty()) oss << _label << " "; + switch (_kind) { + case DataKind::WORD: oss << "WORD"; break; + case DataKind::BYTE: oss << "BYTE"; break; + case DataKind::RESW: oss << "RESW"; break; + case DataKind::RESB: oss << "RESB"; break; + } + std::visit([&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + // no value + } else if constexpr (std::is_same_v) { + oss << " " << arg; + } else if constexpr (std::is_same_v>) { + // Try to display as string if all printable ASCII + bool isPrintable = !arg.empty() && std::all_of(arg.begin(), arg.end(), + [](uint8_t b) { return b >= 32 && b <= 126; }); + + if (isPrintable) { + oss << " C'"; + for (uint8_t b : arg) oss << static_cast(b); + oss << "'"; + } else { + // Display as hex + oss << " X'"; + for (uint8_t b : arg) { + oss << std::hex << std::setw(2) << std::setfill('0') << (int)b; + } + oss << "'"; + } + } + }, _value); + if (!_comment.empty()) oss << " ." << _comment; + return oss.str(); +} + \ No newline at end of file diff --git a/simulator_SIC_XE/src/opcode.cpp b/simulator_SIC_XE/src/opcode.cpp index 0b63ce8..fcda0c4 100644 --- a/simulator_SIC_XE/src/opcode.cpp +++ b/simulator_SIC_XE/src/opcode.cpp @@ -95,8 +95,36 @@ void loadInstructionSet() if (instructions[i].name == nullptr) instructions[i] = {"INVALID", InstructionType::INVALID, nullptr}; if (instructionsEXEX[i].name == nullptr) instructionsEXEX[i] = {"INVALID", InstructionType::INVALID, nullptr}; } + + // Initialize mnemonicToOpcode map + for (int i = 0; i < 0xff; ++i) { + if (instructions[i].type != InstructionType::INVALID) { + mnemonicToOpcode.emplace(instructions[i].name, static_cast(i)); + } + if (instructionsEXEX[i].type != InstructionType::INVALID) { + mnemonicToOpcode.emplace(instructionsEXEX[i].name, static_cast(i)); + } + } + opcodeTablesInitialized = true; } +std::optional findOpcodeByMnemonic(std::string_view name) +{ + auto it = mnemonicToOpcode.find(name); + if (it == mnemonicToOpcode.end()) + return std::nullopt; + return it->second; +} + +const InstructionInfo& getInstructionInfo(uint8_t opcode) +{ + if (instructions[opcode].type != InstructionType::INVALID) + return instructions[opcode]; + return instructionsEXEX[opcode]; +} + + + AddressingMode getAddressingMode(int ni) { switch (ni) { diff --git a/simulator_SIC_XE/src/parser.cpp b/simulator_SIC_XE/src/parser.cpp new file mode 100644 index 0000000..3decfe9 --- /dev/null +++ b/simulator_SIC_XE/src/parser.cpp @@ -0,0 +1,465 @@ +// parser.cpp +#include "parser.h" +#include +#include +#include + +void Parser::initMnemonicMap() { + if (s_mnemonicMapInitialized) return; + + loadInstructionSet(); + + for (int op = 0; op < 0xFF; ++op) { + const auto& info = instructions[op]; + if (info.name && info.type != InstructionType::INVALID) { + s_nameToOpcode.emplace(info.name, static_cast(op)); + } + const auto& ex = instructionsEXEX[op]; + if (ex.name && ex.type != InstructionType::INVALID) { + s_nameToOpcode.emplace(ex.name, static_cast(op)); + } + } + + s_mnemonicMapInitialized = true; +} + +std::shared_ptr Parser::makeMnemonic(const std::string& name, bool extended) { + initMnemonicMap(); + + auto it = s_nameToOpcode.find(name); + if (it == s_nameToOpcode.end()) { + throw SyntaxError("Invalid mnemonic '" + name + "'", lexer_.row, lexer_.col); + } + + std::uint8_t opcode = it->second; + const InstructionInfo* info = nullptr; + + if (instructions[opcode].type != InstructionType::INVALID) { + info = &instructions[opcode]; + } else if (instructionsEXEX[opcode].type != InstructionType::INVALID) { + info = &instructionsEXEX[opcode]; + } + + if (!info) { + throw SyntaxError("Invalid mnemonic '" + name + "'", lexer_.row, lexer_.col); + } + + if (extended && info->type != InstructionType::TYPE3_4) { + throw SyntaxError( + "Extended format not allowed for mnemonic '" + name + "'", + lexer_.row, + lexer_.col + ); + } + + return std::make_shared(opcode, info->type, extended); +} + +std::string Parser::parseLabel() { + if (lexer_.col == 1 && std::isalpha(static_cast(lexer_.peek()))) { + return std::string(lexer_.readAlphanumeric()); + } + return {}; +} + +std::shared_ptr Parser::parseMnemonic() { + bool isExtended = lexer_.advanceIf('+'); + std::string name(lexer_.readAlphanumeric()); + if (name.empty()) { + throw SyntaxError("Mnemonic expected", lexer_.row, lexer_.col); + } + return makeMnemonic(name, isExtended); +} + +std::string Parser::parseSymbol() { + return std::string(lexer_.readAlphanumeric()); +} + +int Parser::parseRegister() { + char ch = lexer_.advance(); + constexpr std::string_view regs = "AXLBSTF"; + auto pos = regs.find(ch); + if (pos == std::string_view::npos) { + throw SyntaxError(std::string("Invalid register '") + ch + "'", lexer_.row, lexer_.col); + } + return static_cast(pos); +} + +void Parser::parseComma() { + lexer_.skipWhitespace(); + lexer_.advance(','); + lexer_.skipWhitespace(); +} + +bool Parser::parseIndexed() { + lexer_.skipWhitespace(); + if (lexer_.advanceIf(',')) { + lexer_.skipWhitespace(); + lexer_.advance('X'); + return true; + } + return false; +} + +static int digitValue(char c, int radix) { + if (radix < 2 || radix > 36) return -1; + int v = -1; + if (c >= '0' && c <= '9') v = c - '0'; + else if (c >= 'A' && c <= 'Z') v = c - 'A' + 10; + else if (c >= 'a' && c <= 'z') v = c - 'a' + 10; + if (v >= 0 && v < radix) return v; + return -1; +} + +int Parser::parseNumber(int lo, int hi) { + auto parseDigits = [&](int radix) -> int { + std::string digits(lexer_.readDigits(radix)); + if (digits.empty()) { + throw SyntaxError("Invalid number", lexer_.row, lexer_.col); + } + + long long value = 0; + for (char c : digits) { + int d = digitValue(c, radix); + if (d < 0) throw SyntaxError("Invalid number", lexer_.row, lexer_.col); + value = value * radix + d; + if (value > std::numeric_limits::max()) { + throw SyntaxError("Invalid number", lexer_.row, lexer_.col); + } + } + return static_cast(value); + }; + + int num = 0; + + if (lexer_.peek() == '0') { + int radix = -1; + switch (lexer_.peek(1)) { + case 'b': radix = 2; break; + case 'o': radix = 8; break; + case 'x': radix = 16; break; + default: break; + } + if (radix != -1) { + lexer_.advance(); + lexer_.advance(); + num = parseDigits(radix); + } else { + num = parseDigits(10); + } + } else if (std::isdigit(static_cast(lexer_.peek()))) { + num = parseDigits(10); + } else { + throw SyntaxError("Number expected", lexer_.row, lexer_.col); + } + + if (std::isalnum(static_cast(lexer_.peek()))) { + throw SyntaxError( + std::string("invalid digit '") + lexer_.peek() + "'", + lexer_.row, + lexer_.col + ); + } + + if (num < lo || num > hi) { + throw SyntaxError( + "Number '" + std::to_string(num) + "' out of range [" + + std::to_string(lo) + ".." + std::to_string(hi) + "]", + lexer_.row, + lexer_.col + ); + } + + return num; +} + +std::vector Parser::parseData() { + if (lexer_.advanceIf('C')) { + lexer_.advance('\''); + std::string s(lexer_.readTo('\'')); + std::vector data; + data.reserve(s.size()); + for (unsigned char c : s) { + data.push_back(static_cast(c)); + } + return data; + } + + if (lexer_.advanceIf('X')) { + lexer_.advance('\''); + std::string s(lexer_.readTo('\'')); + if (s.size() % 2 != 0) { + throw SyntaxError("Invalid hex literal length", lexer_.row, lexer_.col); + } + + std::vector data; + data.reserve(s.size() / 2); + + auto hexVal = [](char c) -> int { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + return -1; + }; + + for (std::size_t i = 0; i < s.size(); i += 2) { + int hi = hexVal(s[i]); + int lo = hexVal(s[i + 1]); + if (hi < 0 || lo < 0) { + throw SyntaxError("Invalid hex digit in literal", lexer_.row, lexer_.col); + } + data.push_back(static_cast((hi << 4) | lo)); + } + return data; + } + + if (std::isdigit(static_cast(lexer_.peek()))) { + constexpr int MAX_WORD = 0xFFFFFF; + int num = parseNumber(0, MAX_WORD); + return { + static_cast((num >> 16) & 0xFF), + static_cast((num >> 8) & 0xFF), + static_cast(num & 0xFF) + }; + } + + throw SyntaxError( + std::string("Invalid storage specifier '") + lexer_.peek() + "'", + lexer_.row, + lexer_.col + ); +} + +void Parser::parseOperands(Mnemonic& m) { + InstructionType t = m.type(); + char c = lexer_.peek(); + + if (t == InstructionType::TYPE1) { + // TYPE1 has no operands + return; + } + + if (t == InstructionType::TYPE2) { + // TYPE2: r1 or r1,r2 or r1,n + if (c == '\n' || c == '\0') return; + + int r1 = parseRegister(); + m.operands().emplace_back(Register{r1}); + lexer_.skipWhitespace(); + + if (lexer_.peek() == ',') { + parseComma(); + char c2 = lexer_.peek(); + if (std::isalpha(static_cast(c2))) { + int r2 = parseRegister(); + m.operands().emplace_back(Register{r2}); + } else if (std::isdigit(static_cast(c2))) { + int n = parseNumber(0, 0xFFFF); + m.operands().emplace_back(Immediate{n}); + } else { + throw SyntaxError("Invalid second operand", lexer_.row, lexer_.col); + } + } + + return; + } + + if (t == InstructionType::TYPE3_4) { + lexer_.skipWhitespace(); + char c0 = lexer_.peek(); + if (c0 == '\n' || c0 == '\0') { + // No operand (e.g., RSUB) + return; + } + + bool immediate = false; + bool indirect = false; + + if (lexer_.advanceIf('#')) { + immediate = true; + } else if (lexer_.advanceIf('@')) { + indirect = true; + } + + char c1 = lexer_.peek(); + if (std::isdigit(static_cast(c1))) { + int num = parseNumber(0, 0x7FFFFF); + if (immediate) { + m.operands().emplace_back(Immediate{num}); + } else { + // Direct numeric addressing (rare, treat as immediate) + m.operands().emplace_back(Immediate{num}); + } + } else if (std::isalpha(static_cast(c1))) { + std::string symbol = parseSymbol(); + bool indexed = parseIndexed(); + m.operands().emplace_back(SymbolRef{symbol, indexed, immediate, indirect}); + } else { + throw SyntaxError("Invalid operand", lexer_.row, lexer_.col); + } + + return; + } +} + +bool Parser::isDirective(const std::string& name) { + return name == "START" || name == "END" || name == "BASE" || name == "NOBASE" || + name == "EQU" || name == "ORG" || name == "LTORG" || + name == "EXTDEF" || name == "EXTREF" || name == "CSECT"; +} + +bool Parser::isDataDirective(const std::string& name) { + return name == "WORD" || name == "BYTE" || name == "RESW" || name == "RESB"; +} + +std::shared_ptr Parser::parseDirective(const std::string& label, const std::string& directive) { + lexer_.skipWhitespace(); + + DirectiveArg argValue; + char c = lexer_.peek(); + + // Parse argument based on first character + if (std::isalpha(c)) { + std::string arg = std::string(lexer_.readAlphanumeric()); + argValue = arg; + } else if (std::isdigit(c) || c == '0') { + int num = parseNumber(0, 0xFFFFFF); + argValue = num; + } else { + // No argument + argValue = std::monostate{}; + } + + lexer_.skipWhitespace(); + std::string comment = std::string(lexer_.readTo('\n')); + + DirectiveKind kind; + if (directive == "START") kind = DirectiveKind::START; + else if (directive == "END") kind = DirectiveKind::END; + else if (directive == "BASE") kind = DirectiveKind::BASE; + else if (directive == "NOBASE") kind = DirectiveKind::NOBASE; + else if (directive == "EQU") kind = DirectiveKind::EQU; + else if (directive == "ORG") kind = DirectiveKind::ORG; + else if (directive == "LTORG") kind = DirectiveKind::LTORG; + else if (directive == "EXTDEF") kind = DirectiveKind::EXTDEF; + else if (directive == "EXTREF") kind = DirectiveKind::EXTREF; + else if (directive == "CSECT") kind = DirectiveKind::CSECT; + else throw SyntaxError("Unknown directive", lexer_.row, lexer_.col); + + return std::make_shared(label, kind, argValue, comment); +} + +std::shared_ptr Parser::parseDataDirective(const std::string& label, const std::string& directive) { + lexer_.skipWhitespace(); + + DataKind kind; + if (directive == "WORD") kind = DataKind::WORD; + else if (directive == "BYTE") kind = DataKind::BYTE; + else if (directive == "RESW") kind = DataKind::RESW; + else if (directive == "RESB") kind = DataKind::RESB; + else throw SyntaxError("Unknown data directive", lexer_.row, lexer_.col); + + DataValue value; + if (kind == DataKind::WORD || kind == DataKind::RESW || kind == DataKind::RESB) { + int num = parseNumber(0, 0xFFFFFF); + value = num; + } else { // BYTE + auto bytes = parseData(); + value = bytes; + } + + lexer_.skipWhitespace(); + std::string comment = std::string(lexer_.readTo('\n')); + + return std::make_shared(label, kind, value, comment); +} + +std::shared_ptr Parser::parseInstruction() { + if (lexer_.col == 1 && lexer_.peek() == '.') { + return std::make_shared( + std::string(lexer_.readTo('\n')) + ); + } + + std::string label = parseLabel(); + + if (lexer_.skipWhitespace() && label.empty()) { + lexer_.advance(); + return nullptr; + } + + lexer_.skipWhitespace(); + + // Check for comment after label - create a label-only instruction node + if (lexer_.peek() == '.') { + std::string comment = std::string(lexer_.readTo('\n')); + // Return an instruction node with just the label (null mnemonic) + auto node = std::make_shared( + std::move(label), + nullptr, + std::move(comment) + ); + return node; + } + + // Check for extended format prefix + bool isExtended = lexer_.peek() == '+'; + if (isExtended) { + lexer_.advance(); + } + + std::string name = std::string(lexer_.readAlphanumeric()); + + if (name.empty()) { + throw SyntaxError( + "Mnemonic or directive expected (label='" + label + "')", + lexer_.row, + lexer_.col + ); + } + + // Check if it's a directive or data directive + if (isDirective(name)) { + return parseDirective(label, name); + } + + if (isDataDirective(name)) { + return parseDataDirective(label, name); + } + + // It's an instruction - create mnemonic + auto mnemonic = makeMnemonic(name, isExtended); + lexer_.skipWhitespace(); + + parseOperands(*mnemonic); + lexer_.skipWhitespace(); + + std::string comment(lexer_.readTo('\n')); + + return std::make_shared( + std::move(label), + std::move(mnemonic), + std::move(comment) + ); +} + +Code Parser::parseCode() { + Code code; + + while (lexer_.peek() > 0) { + while (lexer_.peek() > 0 && lexer_.col > 1) { + lexer_.readTo('\n'); + } + + if (auto node = parseInstruction()) { + code.addLine(node); + } + } + + return code; +} + +Code Parser::parse(const std::string& input) { + lexer_ = Lexer(input); + return parseCode(); +}