#include "code.h" #include "opcode.h" #include "constants.h" #include #include #include #include void Code::addLine(const std::shared_ptr &line) { _lines.emplace_back(line); } const std::vector> &Code::getLines() const { return _lines; } const string Code::toString() const { string result; for (const auto& line : _lines) { result += line->toString() + "\n"; } return result; } // ============================================================ // TWO-PASS ASSEMBLER IMPLEMENTATION // ============================================================ void Code::assemble() { firstPass(); secondPass(); } void Code::firstPass() { _symbolTable.clear(); _locationCounters.clear(); _locationCounters.resize(_lines.size(), 0); int locationCounter = 0; bool startFound = false; for (size_t i = 0; i < _lines.size(); ++i) { auto& line = _lines[i]; _locationCounters[i] = locationCounter; // Handle label std::string label = line->getLabel(); if (!label.empty()) { if (_symbolTable.find(label) != _symbolTable.end()) { throw std::runtime_error("Duplicate symbol: " + label); } _symbolTable[label] = locationCounter; } // Check for directives if (auto* directive = dynamic_cast(line.get())) { switch (directive->kind()) { case DirectiveKind::START: { if (std::holds_alternative(directive->arg())) { _startAddress = std::get(directive->arg()); locationCounter = _startAddress; _locationCounters[i] = locationCounter; if (!label.empty()) { _symbolTable[label] = locationCounter; _programName = label; } startFound = true; } break; } case DirectiveKind::END: _programLength = locationCounter - _startAddress; break; case DirectiveKind::BASE: { // BASE sets base register for addressing if (std::holds_alternative(directive->arg())) { // Will resolve in second pass } break; } case DirectiveKind::NOBASE: _baseRegister = -1; break; case DirectiveKind::EQU: { // EQU defines symbol value if (!label.empty() && std::holds_alternative(directive->arg())) { _symbolTable[label] = std::get(directive->arg()); } break; } case DirectiveKind::ORG: { // ORG changes location counter if (std::holds_alternative(directive->arg())) { locationCounter = std::get(directive->arg()); } break; } default: break; } continue; } // Handle data directives if (auto* data = dynamic_cast(line.get())) { int length = 0; switch (data->kind()) { case DataKind::WORD: length = 3; // 24-bit word break; case DataKind::BYTE: { if (std::holds_alternative>(data->value())) { length = std::get>(data->value()).size(); } break; } case DataKind::RESW: { if (std::holds_alternative(data->value())) { length = std::get(data->value()) * 3; } break; } case DataKind::RESB: { if (std::holds_alternative(data->value())) { length = std::get(data->value()); } break; } } locationCounter += length; continue; } // Handle instructions if (auto* inst = dynamic_cast(line.get())) { int length = getInstructionLength(line, locationCounter); locationCounter += length; } } if (!startFound) { _startAddress = 0; } _programLength = locationCounter - _startAddress; } int Code::getInstructionLength(const std::shared_ptr& node, int locationCounter) const { auto* inst = dynamic_cast(node.get()); if (!inst || !inst->getMnemonic()) { return 0; } auto mnemonic = inst->getMnemonic(); InstructionType type = mnemonic->type(); switch (type) { case InstructionType::TYPE1: return 1; case InstructionType::TYPE2: return 2; case InstructionType::TYPE3_4: return mnemonic->extended() ? 4 : 3; default: return 0; } } void Code::secondPass() { // Generate code for all instructions and data // This will be used by emitCode() and emitText() } std::vector Code::emitCode() { std::vector code; code.resize(_programLength, 0); for (size_t i = 0; i < _lines.size(); ++i) { auto& line = _lines[i]; int address = _locationCounters[i]; int offset = address - _startAddress; if (offset < 0 || offset >= _programLength) { continue; } // Generate instruction if (auto* inst = dynamic_cast(line.get())) { auto bytes = generateInstruction(inst, address); for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) { code[offset + j] = bytes[j]; } } // Generate data if (auto* data = dynamic_cast(line.get())) { auto bytes = generateData(data); for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) { code[offset + j] = bytes[j]; } } } return code; } std::vector Code::generateInstruction(const InstructionNode* inst, int address) { std::vector bytes; if (!inst || !inst->getMnemonic()) { return bytes; } auto mnemonic = inst->getMnemonic(); uint8_t opcode = mnemonic->opcode(); InstructionType type = mnemonic->type(); bool extended = mnemonic->extended(); const auto& operands = mnemonic->operands(); switch (type) { case InstructionType::TYPE1: { bytes.push_back(opcode); break; } case InstructionType::TYPE2: { bytes.push_back(opcode); uint8_t r1 = 0, r2 = 0; if (operands.size() >= 1 && std::holds_alternative(operands[0])) { r1 = std::get(operands[0]).num & 0xF; } if (operands.size() >= 2 && std::holds_alternative(operands[1])) { r2 = std::get(operands[1]).num & 0xF; } bytes.push_back((r1 << 4) | r2); break; } case InstructionType::TYPE3_4: { // Format 3 or 4 instruction int ni = 0, x = 0, b = 0, p = 0, e = 0; int targetAddress = 0; bool immediate = false, indirect = false, indexed = false; // Parse operand if (!operands.empty()) { if (std::holds_alternative(operands[0])) { immediate = true; targetAddress = std::get(operands[0]).value; ni = 0x01; // n=0, i=1 } else if (std::holds_alternative(operands[0])) { auto& sym = std::get(operands[0]); immediate = sym.immediate; indirect = sym.indirect; indexed = sym.indexed; // Look up symbol auto it = _symbolTable.find(sym.name); if (it != _symbolTable.end()) { targetAddress = it->second; } // Set ni bits if (immediate) { ni = 0x01; // n=0, i=1 } else if (indirect) { ni = 0x02; // n=1, i=0 } else { ni = 0x03; // n=1, i=1 (simple/direct) } } } else { // No operand (like RSUB) ni = 0x03; } if (indexed) { x = 1; } if (extended) { e = 1; } // Calculate PC for addressing int pc = address + (extended ? 4 : 3); // Select addressing mode auto result = selectAddressingMode(targetAddress, pc, indexed, immediate, indirect, extended); if (result.success) { b = (result.nixbpe >> 2) & 1; p = (result.nixbpe >> 1) & 1; e = result.nixbpe & 1; } int displacement = result.displacement; // Build instruction bytes uint8_t byte1 = (opcode & 0xFC) | ni; uint8_t byte2 = (x << 7) | (b << 6) | (p << 5) | (e << 4); bytes.push_back(byte1); if (extended) { // Format 4: 20-bit address byte2 |= (displacement >> 16) & 0x0F; bytes.push_back(byte2); bytes.push_back((displacement >> 8) & 0xFF); bytes.push_back(displacement & 0xFF); // Format 4 instructions with symbol references (not immediate values) need M records bool needsRelocation = false; if (!operands.empty() && std::holds_alternative(operands[0])) { auto& sym = std::get(operands[0]); // If it's not an immediate mode with a constant, it needs relocation if (!sym.immediate || _symbolTable.find(sym.name) != _symbolTable.end()) { needsRelocation = true; } } // Record modification if needed if (needsRelocation) { ModificationRecord mod; mod.address = address + 1; // Skip the opcode+ni byte, start at xbpe+addr mod.halfBytes = 5; // 5 half-bytes (20 bits) for format 4 address field _modificationRecords.push_back(mod); } } else { // Format 3: 12-bit displacement byte2 |= (displacement >> 8) & 0x0F; bytes.push_back(byte2); bytes.push_back(displacement & 0xFF); } break; } default: break; } return bytes; } Code::AddressingResult Code::selectAddressingMode(int targetAddress, int pc, bool indexed, bool immediate, bool indirect, bool extended) const { AddressingResult result; result.success = false; result.nixbpe = 0; result.displacement = 0; // Immediate mode - use target address directly if (immediate) { if (extended) { result.nixbpe = 0x01; // e=1, b=0, p=0 result.displacement = targetAddress & 0xFFFFF; // 20 bits } else { result.nixbpe = 0x00; // e=0, b=0, p=0 result.displacement = targetAddress & 0xFFF; // 12 bits } result.success = true; return result; } // Extended format - use absolute address if (extended) { result.nixbpe = 0x01; // e=1, b=0, p=0 result.displacement = targetAddress & 0xFFFFF; result.success = true; return result; } // Try PC-relative (-2048 to +2047) int pcDisp = targetAddress - pc; if (pcDisp >= -2048 && pcDisp <= 2047) { result.nixbpe = 0x02; // p=1, b=0, e=0 result.displacement = pcDisp & 0xFFF; result.success = true; return result; } // Try base-relative (0 to 4095) if (_baseRegister >= 0) { int baseDisp = targetAddress - _baseRegister; if (baseDisp >= 0 && baseDisp <= 4095) { result.nixbpe = 0x04; // b=1, p=0, e=0 result.displacement = baseDisp & 0xFFF; result.success = true; return result; } } // Try direct (0 to 4095) if (targetAddress >= 0 && targetAddress <= 4095) { result.nixbpe = 0x00; // b=0, p=0, e=0 result.displacement = targetAddress & 0xFFF; result.success = true; return result; } // Try SIC format (0 to 32767, 15 bits) if (targetAddress >= 0 && targetAddress <= 32767) { result.nixbpe = 0x00; result.displacement = targetAddress & 0x7FFF; result.success = true; return result; } // Could not find suitable addressing mode result.success = false; return result; } std::vector Code::generateData(const DataNode* data) { std::vector bytes; if (!data) { return bytes; } switch (data->kind()) { case DataKind::WORD: { if (std::holds_alternative(data->value())) { int value = std::get(data->value()) & 0xFFFFFF; // SIC/XE stores words in big-endian (MSB first) bytes.push_back((value >> 16) & 0xFF); bytes.push_back((value >> 8) & 0xFF); bytes.push_back(value & 0xFF); } break; } case DataKind::BYTE: { if (std::holds_alternative>(data->value())) { bytes = std::get>(data->value()); } break; } case DataKind::RESW: case DataKind::RESB: // Reserved space - emit zeros (handled by initialized array) break; } return bytes; } std::string Code::emitText() { std::ostringstream oss; // H record: program name, start address, length oss << "H "; std::string name = _programName.empty() ? "PROG" : _programName; name.resize(6, ' '); oss << name << " "; oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress << " "; oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _programLength; oss << "\n"; // Clear and rebuild modification records _modificationRecords.clear(); // T records: text (code/data) std::vector code = emitCode(); int textStart = 0; while (textStart < code.size()) { int textLength = std::min(30, (int)code.size() - textStart); oss << "T "; oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << (_startAddress + textStart) << " "; oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << textLength << " "; for (int i = 0; i < textLength; ++i) { oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << (int)code[textStart + i]; } oss << "\n"; textStart += textLength; } // M records: modifications for format 4 instructions for (const auto& mod : _modificationRecords) { oss << "M "; oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << mod.address << " "; oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << mod.halfBytes; oss << "\n"; } // E record: execution start address oss << "E "; oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress; oss << "\n"; return oss.str(); } std::string Code::dumpSymbols() const { std::ostringstream oss; oss << "=== Symbol Table ===\n"; oss << std::left << std::setw(20) << "Symbol" << "Address\n"; oss << std::string(30, '-') << "\n"; for (const auto& [symbol, address] : _symbolTable) { oss << std::left << std::setw(20) << symbol; oss << std::hex << std::uppercase << std::setw(6) << std::setfill('0') << address << "\n"; } return oss.str(); } std::string Code::dumpCode() const { std::ostringstream oss; oss << "=== Code Listing ===\n"; oss << std::hex << std::uppercase << std::setfill('0'); std::vector code = const_cast(this)->emitCode(); for (size_t i = 0; i < _lines.size(); ++i) { auto& line = _lines[i]; int address = _locationCounters[i]; int offset = address - _startAddress; // Print address oss << std::setw(6) << address << " "; // Print generated bytes int length = getInstructionLength(line, address); if (auto* data = dynamic_cast(line.get())) { auto bytes = const_cast(this)->generateData(data); length = bytes.size(); } for (int j = 0; j < length && (offset + j) < code.size(); ++j) { oss << std::setw(2) << (int)code[offset + j]; } // Pad for alignment for (int j = length; j < 12; ++j) { oss << " "; } oss << " " << line->toString() << "\n"; } return oss.str(); }