spo/simulator_SIC_XE/src/code.cpp
2025-12-21 17:17:52 +01:00

551 lines
18 KiB
C++

#include "code.h"
#include "opcode.h"
#include "constants.h"
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <cstring>
void Code::addLine(const std::shared_ptr<Node> &line)
{
_lines.emplace_back(line);
}
const std::vector<std::shared_ptr<Node>> &Code::getLines() const
{
return _lines;
}
const string Code::toString() const
{
string result;
for (const auto& line : _lines) {
result += line->toString() + "\n";
}
return result;
}
// ============================================================
// TWO-PASS ASSEMBLER IMPLEMENTATION
// ============================================================
void Code::assemble() {
firstPass();
secondPass();
}
void Code::firstPass() {
_symbolTable.clear();
_locationCounters.clear();
_locationCounters.resize(_lines.size(), 0);
int locationCounter = 0;
bool startFound = false;
for (size_t i = 0; i < _lines.size(); ++i) {
auto& line = _lines[i];
_locationCounters[i] = locationCounter;
// Handle label
std::string label = line->getLabel();
if (!label.empty()) {
if (_symbolTable.find(label) != _symbolTable.end()) {
throw std::runtime_error("Duplicate symbol: " + label);
}
_symbolTable[label] = locationCounter;
}
// Check for directives
if (auto* directive = dynamic_cast<DirectiveNode*>(line.get())) {
switch (directive->kind()) {
case DirectiveKind::START: {
if (std::holds_alternative<int>(directive->arg())) {
_startAddress = std::get<int>(directive->arg());
locationCounter = _startAddress;
_locationCounters[i] = locationCounter;
if (!label.empty()) {
_symbolTable[label] = locationCounter;
_programName = label;
}
startFound = true;
}
break;
}
case DirectiveKind::END:
_programLength = locationCounter - _startAddress;
break;
case DirectiveKind::BASE: {
// BASE sets base register for addressing
if (std::holds_alternative<std::string>(directive->arg())) {
// Will resolve in second pass
}
break;
}
case DirectiveKind::NOBASE:
_baseRegister = -1;
break;
case DirectiveKind::EQU: {
// EQU defines symbol value
if (!label.empty() && std::holds_alternative<int>(directive->arg())) {
_symbolTable[label] = std::get<int>(directive->arg());
}
break;
}
case DirectiveKind::ORG: {
// ORG changes location counter
if (std::holds_alternative<int>(directive->arg())) {
locationCounter = std::get<int>(directive->arg());
}
break;
}
default:
break;
}
continue;
}
// Handle data directives
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
int length = 0;
switch (data->kind()) {
case DataKind::WORD:
length = 3; // 24-bit word
break;
case DataKind::BYTE: {
if (std::holds_alternative<std::vector<uint8_t>>(data->value())) {
length = std::get<std::vector<uint8_t>>(data->value()).size();
}
break;
}
case DataKind::RESW: {
if (std::holds_alternative<int>(data->value())) {
length = std::get<int>(data->value()) * 3;
}
break;
}
case DataKind::RESB: {
if (std::holds_alternative<int>(data->value())) {
length = std::get<int>(data->value());
}
break;
}
}
locationCounter += length;
continue;
}
// Handle instructions
if (auto* inst = dynamic_cast<InstructionNode*>(line.get())) {
int length = getInstructionLength(line, locationCounter);
locationCounter += length;
}
}
if (!startFound) {
_startAddress = 0;
}
_programLength = locationCounter - _startAddress;
}
int Code::getInstructionLength(const std::shared_ptr<Node>& node, int locationCounter) const {
auto* inst = dynamic_cast<InstructionNode*>(node.get());
if (!inst || !inst->getMnemonic()) {
return 0;
}
auto mnemonic = inst->getMnemonic();
InstructionType type = mnemonic->type();
switch (type) {
case InstructionType::TYPE1:
return 1;
case InstructionType::TYPE2:
return 2;
case InstructionType::TYPE3_4:
return mnemonic->extended() ? 4 : 3;
default:
return 0;
}
}
void Code::secondPass() {
// Generate code for all instructions and data
// This will be used by emitCode() and emitText()
}
std::vector<uint8_t> Code::emitCode() {
std::vector<uint8_t> code;
code.resize(_programLength, 0);
for (size_t i = 0; i < _lines.size(); ++i) {
auto& line = _lines[i];
int address = _locationCounters[i];
int offset = address - _startAddress;
if (offset < 0 || offset >= _programLength) {
continue;
}
// Generate instruction
if (auto* inst = dynamic_cast<InstructionNode*>(line.get())) {
auto bytes = generateInstruction(inst, address);
for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) {
code[offset + j] = bytes[j];
}
}
// Generate data
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
auto bytes = generateData(data);
for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) {
code[offset + j] = bytes[j];
}
}
}
return code;
}
std::vector<uint8_t> Code::generateInstruction(const InstructionNode* inst, int address) {
std::vector<uint8_t> bytes;
if (!inst || !inst->getMnemonic()) {
return bytes;
}
auto mnemonic = inst->getMnemonic();
uint8_t opcode = mnemonic->opcode();
InstructionType type = mnemonic->type();
bool extended = mnemonic->extended();
const auto& operands = mnemonic->operands();
switch (type) {
case InstructionType::TYPE1: {
bytes.push_back(opcode);
break;
}
case InstructionType::TYPE2: {
bytes.push_back(opcode);
uint8_t r1 = 0, r2 = 0;
if (operands.size() >= 1 && std::holds_alternative<Register>(operands[0])) {
r1 = std::get<Register>(operands[0]).num & 0xF;
}
if (operands.size() >= 2 && std::holds_alternative<Register>(operands[1])) {
r2 = std::get<Register>(operands[1]).num & 0xF;
}
bytes.push_back((r1 << 4) | r2);
break;
}
case InstructionType::TYPE3_4: {
// Format 3 or 4 instruction
int ni = 0, x = 0, b = 0, p = 0, e = 0;
int targetAddress = 0;
bool immediate = false, indirect = false, indexed = false;
// Parse operand
if (!operands.empty()) {
if (std::holds_alternative<Immediate>(operands[0])) {
immediate = true;
targetAddress = std::get<Immediate>(operands[0]).value;
ni = 0x01; // n=0, i=1
} else if (std::holds_alternative<SymbolRef>(operands[0])) {
auto& sym = std::get<SymbolRef>(operands[0]);
immediate = sym.immediate;
indirect = sym.indirect;
indexed = sym.indexed;
// Look up symbol
auto it = _symbolTable.find(sym.name);
if (it != _symbolTable.end()) {
targetAddress = it->second;
}
// Set ni bits
if (immediate) {
ni = 0x01; // n=0, i=1
} else if (indirect) {
ni = 0x02; // n=1, i=0
} else {
ni = 0x03; // n=1, i=1 (simple/direct)
}
}
} else {
// No operand (like RSUB)
ni = 0x03;
}
if (indexed) {
x = 1;
}
if (extended) {
e = 1;
}
// Calculate PC for addressing
int pc = address + (extended ? 4 : 3);
// Select addressing mode
auto result = selectAddressingMode(targetAddress, pc, indexed, immediate, indirect, extended);
if (result.success) {
b = (result.nixbpe >> 2) & 1;
p = (result.nixbpe >> 1) & 1;
e = result.nixbpe & 1;
}
int displacement = result.displacement;
// Build instruction bytes
uint8_t byte1 = (opcode & 0xFC) | ni;
uint8_t byte2 = (x << 7) | (b << 6) | (p << 5) | (e << 4);
bytes.push_back(byte1);
if (extended) {
// Format 4: 20-bit address
byte2 |= (displacement >> 16) & 0x0F;
bytes.push_back(byte2);
bytes.push_back((displacement >> 8) & 0xFF);
bytes.push_back(displacement & 0xFF);
// Format 4 instructions with symbol references (not immediate values) need M records
bool needsRelocation = false;
if (!operands.empty() && std::holds_alternative<SymbolRef>(operands[0])) {
auto& sym = std::get<SymbolRef>(operands[0]);
// If it's not an immediate mode with a constant, it needs relocation
if (!sym.immediate || _symbolTable.find(sym.name) != _symbolTable.end()) {
needsRelocation = true;
}
}
// Record modification if needed
if (needsRelocation) {
ModificationRecord mod;
mod.address = address + 1; // Skip the opcode+ni byte, start at xbpe+addr
mod.halfBytes = 5; // 5 half-bytes (20 bits) for format 4 address field
_modificationRecords.push_back(mod);
}
} else {
// Format 3: 12-bit displacement
byte2 |= (displacement >> 8) & 0x0F;
bytes.push_back(byte2);
bytes.push_back(displacement & 0xFF);
}
break;
}
default:
break;
}
return bytes;
}
Code::AddressingResult Code::selectAddressingMode(int targetAddress, int pc, bool indexed, bool immediate, bool indirect, bool extended) const {
AddressingResult result;
result.success = false;
result.nixbpe = 0;
result.displacement = 0;
// Immediate mode - use target address directly
if (immediate) {
if (extended) {
result.nixbpe = 0x01; // e=1, b=0, p=0
result.displacement = targetAddress & 0xFFFFF; // 20 bits
} else {
result.nixbpe = 0x00; // e=0, b=0, p=0
result.displacement = targetAddress & 0xFFF; // 12 bits
}
result.success = true;
return result;
}
// Extended format - use absolute address
if (extended) {
result.nixbpe = 0x01; // e=1, b=0, p=0
result.displacement = targetAddress & 0xFFFFF;
result.success = true;
return result;
}
// Try PC-relative (-2048 to +2047)
int pcDisp = targetAddress - pc;
if (pcDisp >= -2048 && pcDisp <= 2047) {
result.nixbpe = 0x02; // p=1, b=0, e=0
result.displacement = pcDisp & 0xFFF;
result.success = true;
return result;
}
// Try base-relative (0 to 4095)
if (_baseRegister >= 0) {
int baseDisp = targetAddress - _baseRegister;
if (baseDisp >= 0 && baseDisp <= 4095) {
result.nixbpe = 0x04; // b=1, p=0, e=0
result.displacement = baseDisp & 0xFFF;
result.success = true;
return result;
}
}
// Try direct (0 to 4095)
if (targetAddress >= 0 && targetAddress <= 4095) {
result.nixbpe = 0x00; // b=0, p=0, e=0
result.displacement = targetAddress & 0xFFF;
result.success = true;
return result;
}
// Try SIC format (0 to 32767, 15 bits)
if (targetAddress >= 0 && targetAddress <= 32767) {
result.nixbpe = 0x00;
result.displacement = targetAddress & 0x7FFF;
result.success = true;
return result;
}
// Could not find suitable addressing mode
result.success = false;
return result;
}
std::vector<uint8_t> Code::generateData(const DataNode* data) {
std::vector<uint8_t> bytes;
if (!data) {
return bytes;
}
switch (data->kind()) {
case DataKind::WORD: {
if (std::holds_alternative<int>(data->value())) {
int value = std::get<int>(data->value()) & 0xFFFFFF;
// SIC/XE stores words in big-endian (MSB first)
bytes.push_back((value >> 16) & 0xFF);
bytes.push_back((value >> 8) & 0xFF);
bytes.push_back(value & 0xFF);
}
break;
}
case DataKind::BYTE: {
if (std::holds_alternative<std::vector<uint8_t>>(data->value())) {
bytes = std::get<std::vector<uint8_t>>(data->value());
}
break;
}
case DataKind::RESW:
case DataKind::RESB:
// Reserved space - emit zeros (handled by initialized array)
break;
}
return bytes;
}
std::string Code::emitText() {
std::ostringstream oss;
// H record: program name, start address, length
oss << "H ";
std::string name = _programName.empty() ? "PROG" : _programName;
name.resize(6, ' ');
oss << name << " ";
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress << " ";
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _programLength;
oss << "\n";
// Clear and rebuild modification records
_modificationRecords.clear();
// T records: text (code/data)
std::vector<uint8_t> code = emitCode();
int textStart = 0;
while (textStart < code.size()) {
int textLength = std::min(30, (int)code.size() - textStart);
oss << "T ";
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << (_startAddress + textStart) << " ";
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << textLength << " ";
for (int i = 0; i < textLength; ++i) {
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << (int)code[textStart + i];
}
oss << "\n";
textStart += textLength;
}
// M records: modifications for format 4 instructions
for (const auto& mod : _modificationRecords) {
oss << "M ";
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << mod.address << " ";
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << mod.halfBytes;
oss << "\n";
}
// E record: execution start address
oss << "E ";
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress;
oss << "\n";
return oss.str();
}
std::string Code::dumpSymbols() const {
std::ostringstream oss;
oss << "=== Symbol Table ===\n";
oss << std::left << std::setw(20) << "Symbol" << "Address\n";
oss << std::string(30, '-') << "\n";
for (const auto& [symbol, address] : _symbolTable) {
oss << std::left << std::setw(20) << symbol;
oss << std::hex << std::uppercase << std::setw(6) << std::setfill('0') << address << "\n";
}
return oss.str();
}
std::string Code::dumpCode() const {
std::ostringstream oss;
oss << "=== Code Listing ===\n";
oss << std::hex << std::uppercase << std::setfill('0');
std::vector<uint8_t> code = const_cast<Code*>(this)->emitCode();
for (size_t i = 0; i < _lines.size(); ++i) {
auto& line = _lines[i];
int address = _locationCounters[i];
int offset = address - _startAddress;
// Print address
oss << std::setw(6) << address << " ";
// Print generated bytes
int length = getInstructionLength(line, address);
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
auto bytes = const_cast<Code*>(this)->generateData(data);
length = bytes.size();
}
for (int j = 0; j < length && (offset + j) < code.size(); ++j) {
oss << std::setw(2) << (int)code[offset + j];
}
// Pad for alignment
for (int j = length; j < 12; ++j) {
oss << " ";
}
oss << " " << line->toString() << "\n";
}
return oss.str();
}