551 lines
18 KiB
C++
551 lines
18 KiB
C++
#include "code.h"
|
|
#include "opcode.h"
|
|
#include "constants.h"
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
#include <stdexcept>
|
|
#include <cstring>
|
|
|
|
void Code::addLine(const std::shared_ptr<Node> &line)
|
|
{
|
|
_lines.emplace_back(line);
|
|
}
|
|
|
|
const std::vector<std::shared_ptr<Node>> &Code::getLines() const
|
|
{
|
|
return _lines;
|
|
}
|
|
|
|
const string Code::toString() const
|
|
{
|
|
string result;
|
|
for (const auto& line : _lines) {
|
|
result += line->toString() + "\n";
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// ============================================================
|
|
// TWO-PASS ASSEMBLER IMPLEMENTATION
|
|
// ============================================================
|
|
|
|
void Code::assemble() {
|
|
firstPass();
|
|
secondPass();
|
|
}
|
|
|
|
void Code::firstPass() {
|
|
_symbolTable.clear();
|
|
_locationCounters.clear();
|
|
_locationCounters.resize(_lines.size(), 0);
|
|
|
|
int locationCounter = 0;
|
|
bool startFound = false;
|
|
|
|
for (size_t i = 0; i < _lines.size(); ++i) {
|
|
auto& line = _lines[i];
|
|
_locationCounters[i] = locationCounter;
|
|
|
|
// Handle label
|
|
std::string label = line->getLabel();
|
|
if (!label.empty()) {
|
|
if (_symbolTable.find(label) != _symbolTable.end()) {
|
|
throw std::runtime_error("Duplicate symbol: " + label);
|
|
}
|
|
_symbolTable[label] = locationCounter;
|
|
}
|
|
|
|
// Check for directives
|
|
if (auto* directive = dynamic_cast<DirectiveNode*>(line.get())) {
|
|
switch (directive->kind()) {
|
|
case DirectiveKind::START: {
|
|
if (std::holds_alternative<int>(directive->arg())) {
|
|
_startAddress = std::get<int>(directive->arg());
|
|
locationCounter = _startAddress;
|
|
_locationCounters[i] = locationCounter;
|
|
if (!label.empty()) {
|
|
_symbolTable[label] = locationCounter;
|
|
_programName = label;
|
|
}
|
|
startFound = true;
|
|
}
|
|
break;
|
|
}
|
|
case DirectiveKind::END:
|
|
_programLength = locationCounter - _startAddress;
|
|
break;
|
|
|
|
case DirectiveKind::BASE: {
|
|
// BASE sets base register for addressing
|
|
if (std::holds_alternative<std::string>(directive->arg())) {
|
|
// Will resolve in second pass
|
|
}
|
|
break;
|
|
}
|
|
case DirectiveKind::NOBASE:
|
|
_baseRegister = -1;
|
|
break;
|
|
|
|
case DirectiveKind::EQU: {
|
|
// EQU defines symbol value
|
|
if (!label.empty() && std::holds_alternative<int>(directive->arg())) {
|
|
_symbolTable[label] = std::get<int>(directive->arg());
|
|
}
|
|
break;
|
|
}
|
|
case DirectiveKind::ORG: {
|
|
// ORG changes location counter
|
|
if (std::holds_alternative<int>(directive->arg())) {
|
|
locationCounter = std::get<int>(directive->arg());
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Handle data directives
|
|
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
|
|
int length = 0;
|
|
switch (data->kind()) {
|
|
case DataKind::WORD:
|
|
length = 3; // 24-bit word
|
|
break;
|
|
case DataKind::BYTE: {
|
|
if (std::holds_alternative<std::vector<uint8_t>>(data->value())) {
|
|
length = std::get<std::vector<uint8_t>>(data->value()).size();
|
|
}
|
|
break;
|
|
}
|
|
case DataKind::RESW: {
|
|
if (std::holds_alternative<int>(data->value())) {
|
|
length = std::get<int>(data->value()) * 3;
|
|
}
|
|
break;
|
|
}
|
|
case DataKind::RESB: {
|
|
if (std::holds_alternative<int>(data->value())) {
|
|
length = std::get<int>(data->value());
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
locationCounter += length;
|
|
continue;
|
|
}
|
|
|
|
// Handle instructions
|
|
if (auto* inst = dynamic_cast<InstructionNode*>(line.get())) {
|
|
int length = getInstructionLength(line, locationCounter);
|
|
locationCounter += length;
|
|
}
|
|
}
|
|
|
|
if (!startFound) {
|
|
_startAddress = 0;
|
|
}
|
|
_programLength = locationCounter - _startAddress;
|
|
}
|
|
|
|
int Code::getInstructionLength(const std::shared_ptr<Node>& node, int locationCounter) const {
|
|
auto* inst = dynamic_cast<InstructionNode*>(node.get());
|
|
if (!inst || !inst->getMnemonic()) {
|
|
return 0;
|
|
}
|
|
|
|
auto mnemonic = inst->getMnemonic();
|
|
InstructionType type = mnemonic->type();
|
|
|
|
switch (type) {
|
|
case InstructionType::TYPE1:
|
|
return 1;
|
|
case InstructionType::TYPE2:
|
|
return 2;
|
|
case InstructionType::TYPE3_4:
|
|
return mnemonic->extended() ? 4 : 3;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void Code::secondPass() {
|
|
// Generate code for all instructions and data
|
|
// This will be used by emitCode() and emitText()
|
|
}
|
|
|
|
std::vector<uint8_t> Code::emitCode() {
|
|
std::vector<uint8_t> code;
|
|
code.resize(_programLength, 0);
|
|
|
|
for (size_t i = 0; i < _lines.size(); ++i) {
|
|
auto& line = _lines[i];
|
|
int address = _locationCounters[i];
|
|
int offset = address - _startAddress;
|
|
|
|
if (offset < 0 || offset >= _programLength) {
|
|
continue;
|
|
}
|
|
|
|
// Generate instruction
|
|
if (auto* inst = dynamic_cast<InstructionNode*>(line.get())) {
|
|
auto bytes = generateInstruction(inst, address);
|
|
for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) {
|
|
code[offset + j] = bytes[j];
|
|
}
|
|
}
|
|
|
|
// Generate data
|
|
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
|
|
auto bytes = generateData(data);
|
|
for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) {
|
|
code[offset + j] = bytes[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
std::vector<uint8_t> Code::generateInstruction(const InstructionNode* inst, int address) {
|
|
std::vector<uint8_t> bytes;
|
|
|
|
if (!inst || !inst->getMnemonic()) {
|
|
return bytes;
|
|
}
|
|
|
|
auto mnemonic = inst->getMnemonic();
|
|
uint8_t opcode = mnemonic->opcode();
|
|
InstructionType type = mnemonic->type();
|
|
bool extended = mnemonic->extended();
|
|
const auto& operands = mnemonic->operands();
|
|
|
|
switch (type) {
|
|
case InstructionType::TYPE1: {
|
|
bytes.push_back(opcode);
|
|
break;
|
|
}
|
|
|
|
case InstructionType::TYPE2: {
|
|
bytes.push_back(opcode);
|
|
uint8_t r1 = 0, r2 = 0;
|
|
if (operands.size() >= 1 && std::holds_alternative<Register>(operands[0])) {
|
|
r1 = std::get<Register>(operands[0]).num & 0xF;
|
|
}
|
|
if (operands.size() >= 2 && std::holds_alternative<Register>(operands[1])) {
|
|
r2 = std::get<Register>(operands[1]).num & 0xF;
|
|
}
|
|
bytes.push_back((r1 << 4) | r2);
|
|
break;
|
|
}
|
|
|
|
case InstructionType::TYPE3_4: {
|
|
// Format 3 or 4 instruction
|
|
int ni = 0, x = 0, b = 0, p = 0, e = 0;
|
|
int targetAddress = 0;
|
|
bool immediate = false, indirect = false, indexed = false;
|
|
|
|
// Parse operand
|
|
if (!operands.empty()) {
|
|
if (std::holds_alternative<Immediate>(operands[0])) {
|
|
immediate = true;
|
|
targetAddress = std::get<Immediate>(operands[0]).value;
|
|
ni = 0x01; // n=0, i=1
|
|
} else if (std::holds_alternative<SymbolRef>(operands[0])) {
|
|
auto& sym = std::get<SymbolRef>(operands[0]);
|
|
immediate = sym.immediate;
|
|
indirect = sym.indirect;
|
|
indexed = sym.indexed;
|
|
|
|
// Look up symbol
|
|
auto it = _symbolTable.find(sym.name);
|
|
if (it != _symbolTable.end()) {
|
|
targetAddress = it->second;
|
|
}
|
|
|
|
// Set ni bits
|
|
if (immediate) {
|
|
ni = 0x01; // n=0, i=1
|
|
} else if (indirect) {
|
|
ni = 0x02; // n=1, i=0
|
|
} else {
|
|
ni = 0x03; // n=1, i=1 (simple/direct)
|
|
}
|
|
}
|
|
} else {
|
|
// No operand (like RSUB)
|
|
ni = 0x03;
|
|
}
|
|
|
|
if (indexed) {
|
|
x = 1;
|
|
}
|
|
|
|
if (extended) {
|
|
e = 1;
|
|
}
|
|
|
|
// Calculate PC for addressing
|
|
int pc = address + (extended ? 4 : 3);
|
|
|
|
// Select addressing mode
|
|
auto result = selectAddressingMode(targetAddress, pc, indexed, immediate, indirect, extended);
|
|
|
|
if (result.success) {
|
|
b = (result.nixbpe >> 2) & 1;
|
|
p = (result.nixbpe >> 1) & 1;
|
|
e = result.nixbpe & 1;
|
|
}
|
|
|
|
int displacement = result.displacement;
|
|
|
|
// Build instruction bytes
|
|
uint8_t byte1 = (opcode & 0xFC) | ni;
|
|
uint8_t byte2 = (x << 7) | (b << 6) | (p << 5) | (e << 4);
|
|
|
|
bytes.push_back(byte1);
|
|
|
|
if (extended) {
|
|
// Format 4: 20-bit address
|
|
byte2 |= (displacement >> 16) & 0x0F;
|
|
bytes.push_back(byte2);
|
|
bytes.push_back((displacement >> 8) & 0xFF);
|
|
bytes.push_back(displacement & 0xFF);
|
|
|
|
// Format 4 instructions with symbol references (not immediate values) need M records
|
|
bool needsRelocation = false;
|
|
if (!operands.empty() && std::holds_alternative<SymbolRef>(operands[0])) {
|
|
auto& sym = std::get<SymbolRef>(operands[0]);
|
|
// If it's not an immediate mode with a constant, it needs relocation
|
|
if (!sym.immediate || _symbolTable.find(sym.name) != _symbolTable.end()) {
|
|
needsRelocation = true;
|
|
}
|
|
}
|
|
|
|
// Record modification if needed
|
|
if (needsRelocation) {
|
|
ModificationRecord mod;
|
|
mod.address = address + 1; // Skip the opcode+ni byte, start at xbpe+addr
|
|
mod.halfBytes = 5; // 5 half-bytes (20 bits) for format 4 address field
|
|
_modificationRecords.push_back(mod);
|
|
}
|
|
} else {
|
|
// Format 3: 12-bit displacement
|
|
byte2 |= (displacement >> 8) & 0x0F;
|
|
bytes.push_back(byte2);
|
|
bytes.push_back(displacement & 0xFF);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
Code::AddressingResult Code::selectAddressingMode(int targetAddress, int pc, bool indexed, bool immediate, bool indirect, bool extended) const {
|
|
AddressingResult result;
|
|
result.success = false;
|
|
result.nixbpe = 0;
|
|
result.displacement = 0;
|
|
|
|
// Immediate mode - use target address directly
|
|
if (immediate) {
|
|
if (extended) {
|
|
result.nixbpe = 0x01; // e=1, b=0, p=0
|
|
result.displacement = targetAddress & 0xFFFFF; // 20 bits
|
|
} else {
|
|
result.nixbpe = 0x00; // e=0, b=0, p=0
|
|
result.displacement = targetAddress & 0xFFF; // 12 bits
|
|
}
|
|
result.success = true;
|
|
return result;
|
|
}
|
|
|
|
// Extended format - use absolute address
|
|
if (extended) {
|
|
result.nixbpe = 0x01; // e=1, b=0, p=0
|
|
result.displacement = targetAddress & 0xFFFFF;
|
|
result.success = true;
|
|
return result;
|
|
}
|
|
|
|
// Try PC-relative (-2048 to +2047)
|
|
int pcDisp = targetAddress - pc;
|
|
if (pcDisp >= -2048 && pcDisp <= 2047) {
|
|
result.nixbpe = 0x02; // p=1, b=0, e=0
|
|
result.displacement = pcDisp & 0xFFF;
|
|
result.success = true;
|
|
return result;
|
|
}
|
|
|
|
// Try base-relative (0 to 4095)
|
|
if (_baseRegister >= 0) {
|
|
int baseDisp = targetAddress - _baseRegister;
|
|
if (baseDisp >= 0 && baseDisp <= 4095) {
|
|
result.nixbpe = 0x04; // b=1, p=0, e=0
|
|
result.displacement = baseDisp & 0xFFF;
|
|
result.success = true;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// Try direct (0 to 4095)
|
|
if (targetAddress >= 0 && targetAddress <= 4095) {
|
|
result.nixbpe = 0x00; // b=0, p=0, e=0
|
|
result.displacement = targetAddress & 0xFFF;
|
|
result.success = true;
|
|
return result;
|
|
}
|
|
|
|
// Try SIC format (0 to 32767, 15 bits)
|
|
if (targetAddress >= 0 && targetAddress <= 32767) {
|
|
result.nixbpe = 0x00;
|
|
result.displacement = targetAddress & 0x7FFF;
|
|
result.success = true;
|
|
return result;
|
|
}
|
|
|
|
// Could not find suitable addressing mode
|
|
result.success = false;
|
|
return result;
|
|
}
|
|
|
|
std::vector<uint8_t> Code::generateData(const DataNode* data) {
|
|
std::vector<uint8_t> bytes;
|
|
|
|
if (!data) {
|
|
return bytes;
|
|
}
|
|
|
|
switch (data->kind()) {
|
|
case DataKind::WORD: {
|
|
if (std::holds_alternative<int>(data->value())) {
|
|
int value = std::get<int>(data->value()) & 0xFFFFFF;
|
|
// SIC/XE stores words in big-endian (MSB first)
|
|
bytes.push_back((value >> 16) & 0xFF);
|
|
bytes.push_back((value >> 8) & 0xFF);
|
|
bytes.push_back(value & 0xFF);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case DataKind::BYTE: {
|
|
if (std::holds_alternative<std::vector<uint8_t>>(data->value())) {
|
|
bytes = std::get<std::vector<uint8_t>>(data->value());
|
|
}
|
|
break;
|
|
}
|
|
|
|
case DataKind::RESW:
|
|
case DataKind::RESB:
|
|
// Reserved space - emit zeros (handled by initialized array)
|
|
break;
|
|
}
|
|
|
|
return bytes;
|
|
}
|
|
|
|
std::string Code::emitText() {
|
|
std::ostringstream oss;
|
|
|
|
// H record: program name, start address, length
|
|
oss << "H ";
|
|
std::string name = _programName.empty() ? "PROG" : _programName;
|
|
name.resize(6, ' ');
|
|
oss << name << " ";
|
|
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress << " ";
|
|
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _programLength;
|
|
oss << "\n";
|
|
|
|
// Clear and rebuild modification records
|
|
_modificationRecords.clear();
|
|
|
|
// T records: text (code/data)
|
|
std::vector<uint8_t> code = emitCode();
|
|
int textStart = 0;
|
|
|
|
while (textStart < code.size()) {
|
|
int textLength = std::min(30, (int)code.size() - textStart);
|
|
|
|
oss << "T ";
|
|
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << (_startAddress + textStart) << " ";
|
|
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << textLength << " ";
|
|
|
|
for (int i = 0; i < textLength; ++i) {
|
|
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << (int)code[textStart + i];
|
|
}
|
|
oss << "\n";
|
|
|
|
textStart += textLength;
|
|
}
|
|
|
|
// M records: modifications for format 4 instructions
|
|
for (const auto& mod : _modificationRecords) {
|
|
oss << "M ";
|
|
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << mod.address << " ";
|
|
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << mod.halfBytes;
|
|
oss << "\n";
|
|
}
|
|
|
|
// E record: execution start address
|
|
oss << "E ";
|
|
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress;
|
|
oss << "\n";
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
std::string Code::dumpSymbols() const {
|
|
std::ostringstream oss;
|
|
oss << "=== Symbol Table ===\n";
|
|
oss << std::left << std::setw(20) << "Symbol" << "Address\n";
|
|
oss << std::string(30, '-') << "\n";
|
|
|
|
for (const auto& [symbol, address] : _symbolTable) {
|
|
oss << std::left << std::setw(20) << symbol;
|
|
oss << std::hex << std::uppercase << std::setw(6) << std::setfill('0') << address << "\n";
|
|
}
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
std::string Code::dumpCode() const {
|
|
std::ostringstream oss;
|
|
oss << "=== Code Listing ===\n";
|
|
oss << std::hex << std::uppercase << std::setfill('0');
|
|
|
|
std::vector<uint8_t> code = const_cast<Code*>(this)->emitCode();
|
|
|
|
for (size_t i = 0; i < _lines.size(); ++i) {
|
|
auto& line = _lines[i];
|
|
int address = _locationCounters[i];
|
|
int offset = address - _startAddress;
|
|
|
|
// Print address
|
|
oss << std::setw(6) << address << " ";
|
|
|
|
// Print generated bytes
|
|
int length = getInstructionLength(line, address);
|
|
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
|
|
auto bytes = const_cast<Code*>(this)->generateData(data);
|
|
length = bytes.size();
|
|
}
|
|
|
|
for (int j = 0; j < length && (offset + j) < code.size(); ++j) {
|
|
oss << std::setw(2) << (int)code[offset + j];
|
|
}
|
|
|
|
// Pad for alignment
|
|
for (int j = length; j < 12; ++j) {
|
|
oss << " ";
|
|
}
|
|
|
|
oss << " " << line->toString() << "\n";
|
|
}
|
|
|
|
return oss.str();
|
|
}
|