assembling first version
This commit is contained in:
parent
9e9039af05
commit
d3e08abd30
7 changed files with 896 additions and 1 deletions
|
|
@ -1,4 +1,10 @@
|
|||
#include "code.h"
|
||||
#include "opcode.h"
|
||||
#include "constants.h"
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
|
||||
void Code::addLine(const std::shared_ptr<Node> &line)
|
||||
{
|
||||
|
|
@ -18,3 +24,510 @@ const string Code::toString() const
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// TWO-PASS ASSEMBLER IMPLEMENTATION
|
||||
// ============================================================
|
||||
|
||||
void Code::assemble() {
|
||||
firstPass();
|
||||
secondPass();
|
||||
}
|
||||
|
||||
void Code::firstPass() {
|
||||
_symbolTable.clear();
|
||||
_locationCounters.clear();
|
||||
_locationCounters.resize(_lines.size(), 0);
|
||||
|
||||
int locationCounter = 0;
|
||||
bool startFound = false;
|
||||
|
||||
for (size_t i = 0; i < _lines.size(); ++i) {
|
||||
auto& line = _lines[i];
|
||||
_locationCounters[i] = locationCounter;
|
||||
|
||||
// Handle label
|
||||
std::string label = line->getLabel();
|
||||
if (!label.empty()) {
|
||||
if (_symbolTable.find(label) != _symbolTable.end()) {
|
||||
throw std::runtime_error("Duplicate symbol: " + label);
|
||||
}
|
||||
_symbolTable[label] = locationCounter;
|
||||
}
|
||||
|
||||
// Check for directives
|
||||
if (auto* directive = dynamic_cast<DirectiveNode*>(line.get())) {
|
||||
switch (directive->kind()) {
|
||||
case DirectiveKind::START: {
|
||||
if (std::holds_alternative<int>(directive->arg())) {
|
||||
_startAddress = std::get<int>(directive->arg());
|
||||
locationCounter = _startAddress;
|
||||
_locationCounters[i] = locationCounter;
|
||||
if (!label.empty()) {
|
||||
_symbolTable[label] = locationCounter;
|
||||
_programName = label;
|
||||
}
|
||||
startFound = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DirectiveKind::END:
|
||||
_programLength = locationCounter - _startAddress;
|
||||
break;
|
||||
|
||||
case DirectiveKind::BASE: {
|
||||
// BASE sets base register for addressing
|
||||
if (std::holds_alternative<std::string>(directive->arg())) {
|
||||
// Will resolve in second pass
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DirectiveKind::NOBASE:
|
||||
_baseRegister = -1;
|
||||
break;
|
||||
|
||||
case DirectiveKind::EQU: {
|
||||
// EQU defines symbol value
|
||||
if (!label.empty() && std::holds_alternative<int>(directive->arg())) {
|
||||
_symbolTable[label] = std::get<int>(directive->arg());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DirectiveKind::ORG: {
|
||||
// ORG changes location counter
|
||||
if (std::holds_alternative<int>(directive->arg())) {
|
||||
locationCounter = std::get<int>(directive->arg());
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle data directives
|
||||
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
|
||||
int length = 0;
|
||||
switch (data->kind()) {
|
||||
case DataKind::WORD:
|
||||
length = 3; // 24-bit word
|
||||
break;
|
||||
case DataKind::BYTE: {
|
||||
if (std::holds_alternative<std::vector<uint8_t>>(data->value())) {
|
||||
length = std::get<std::vector<uint8_t>>(data->value()).size();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataKind::RESW: {
|
||||
if (std::holds_alternative<int>(data->value())) {
|
||||
length = std::get<int>(data->value()) * 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataKind::RESB: {
|
||||
if (std::holds_alternative<int>(data->value())) {
|
||||
length = std::get<int>(data->value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
locationCounter += length;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle instructions
|
||||
if (auto* inst = dynamic_cast<InstructionNode*>(line.get())) {
|
||||
int length = getInstructionLength(line, locationCounter);
|
||||
locationCounter += length;
|
||||
}
|
||||
}
|
||||
|
||||
if (!startFound) {
|
||||
_startAddress = 0;
|
||||
}
|
||||
_programLength = locationCounter - _startAddress;
|
||||
}
|
||||
|
||||
int Code::getInstructionLength(const std::shared_ptr<Node>& node, int locationCounter) const {
|
||||
auto* inst = dynamic_cast<InstructionNode*>(node.get());
|
||||
if (!inst || !inst->getMnemonic()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto mnemonic = inst->getMnemonic();
|
||||
InstructionType type = mnemonic->type();
|
||||
|
||||
switch (type) {
|
||||
case InstructionType::TYPE1:
|
||||
return 1;
|
||||
case InstructionType::TYPE2:
|
||||
return 2;
|
||||
case InstructionType::TYPE3_4:
|
||||
return mnemonic->extended() ? 4 : 3;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Code::secondPass() {
|
||||
// Generate code for all instructions and data
|
||||
// This will be used by emitCode() and emitText()
|
||||
}
|
||||
|
||||
std::vector<uint8_t> Code::emitCode() {
|
||||
std::vector<uint8_t> code;
|
||||
code.resize(_programLength, 0);
|
||||
|
||||
for (size_t i = 0; i < _lines.size(); ++i) {
|
||||
auto& line = _lines[i];
|
||||
int address = _locationCounters[i];
|
||||
int offset = address - _startAddress;
|
||||
|
||||
if (offset < 0 || offset >= _programLength) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate instruction
|
||||
if (auto* inst = dynamic_cast<InstructionNode*>(line.get())) {
|
||||
auto bytes = generateInstruction(inst, address);
|
||||
for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) {
|
||||
code[offset + j] = bytes[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Generate data
|
||||
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
|
||||
auto bytes = generateData(data);
|
||||
for (size_t j = 0; j < bytes.size() && (offset + j) < code.size(); ++j) {
|
||||
code[offset + j] = bytes[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> Code::generateInstruction(const InstructionNode* inst, int address) {
|
||||
std::vector<uint8_t> bytes;
|
||||
|
||||
if (!inst || !inst->getMnemonic()) {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
auto mnemonic = inst->getMnemonic();
|
||||
uint8_t opcode = mnemonic->opcode();
|
||||
InstructionType type = mnemonic->type();
|
||||
bool extended = mnemonic->extended();
|
||||
const auto& operands = mnemonic->operands();
|
||||
|
||||
switch (type) {
|
||||
case InstructionType::TYPE1: {
|
||||
bytes.push_back(opcode);
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::TYPE2: {
|
||||
bytes.push_back(opcode);
|
||||
uint8_t r1 = 0, r2 = 0;
|
||||
if (operands.size() >= 1 && std::holds_alternative<Register>(operands[0])) {
|
||||
r1 = std::get<Register>(operands[0]).num & 0xF;
|
||||
}
|
||||
if (operands.size() >= 2 && std::holds_alternative<Register>(operands[1])) {
|
||||
r2 = std::get<Register>(operands[1]).num & 0xF;
|
||||
}
|
||||
bytes.push_back((r1 << 4) | r2);
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionType::TYPE3_4: {
|
||||
// Format 3 or 4 instruction
|
||||
int ni = 0, x = 0, b = 0, p = 0, e = 0;
|
||||
int targetAddress = 0;
|
||||
bool immediate = false, indirect = false, indexed = false;
|
||||
|
||||
// Parse operand
|
||||
if (!operands.empty()) {
|
||||
if (std::holds_alternative<Immediate>(operands[0])) {
|
||||
immediate = true;
|
||||
targetAddress = std::get<Immediate>(operands[0]).value;
|
||||
ni = 0x01; // n=0, i=1
|
||||
} else if (std::holds_alternative<SymbolRef>(operands[0])) {
|
||||
auto& sym = std::get<SymbolRef>(operands[0]);
|
||||
immediate = sym.immediate;
|
||||
indirect = sym.indirect;
|
||||
indexed = sym.indexed;
|
||||
|
||||
// Look up symbol
|
||||
auto it = _symbolTable.find(sym.name);
|
||||
if (it != _symbolTable.end()) {
|
||||
targetAddress = it->second;
|
||||
}
|
||||
|
||||
// Set ni bits
|
||||
if (immediate) {
|
||||
ni = 0x01; // n=0, i=1
|
||||
} else if (indirect) {
|
||||
ni = 0x02; // n=1, i=0
|
||||
} else {
|
||||
ni = 0x03; // n=1, i=1 (simple/direct)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No operand (like RSUB)
|
||||
ni = 0x03;
|
||||
}
|
||||
|
||||
if (indexed) {
|
||||
x = 1;
|
||||
}
|
||||
|
||||
if (extended) {
|
||||
e = 1;
|
||||
}
|
||||
|
||||
// Calculate PC for addressing
|
||||
int pc = address + (extended ? 4 : 3);
|
||||
|
||||
// Select addressing mode
|
||||
auto result = selectAddressingMode(targetAddress, pc, indexed, immediate, indirect, extended);
|
||||
|
||||
if (result.success) {
|
||||
b = (result.nixbpe >> 2) & 1;
|
||||
p = (result.nixbpe >> 1) & 1;
|
||||
e = result.nixbpe & 1;
|
||||
}
|
||||
|
||||
int displacement = result.displacement;
|
||||
|
||||
// Build instruction bytes
|
||||
uint8_t byte1 = (opcode & 0xFC) | ni;
|
||||
uint8_t byte2 = (x << 7) | (b << 6) | (p << 5) | (e << 4);
|
||||
|
||||
bytes.push_back(byte1);
|
||||
|
||||
if (extended) {
|
||||
// Format 4: 20-bit address
|
||||
byte2 |= (displacement >> 16) & 0x0F;
|
||||
bytes.push_back(byte2);
|
||||
bytes.push_back((displacement >> 8) & 0xFF);
|
||||
bytes.push_back(displacement & 0xFF);
|
||||
} else {
|
||||
// Format 3: 12-bit displacement
|
||||
byte2 |= (displacement >> 8) & 0x0F;
|
||||
bytes.push_back(byte2);
|
||||
bytes.push_back(displacement & 0xFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
Code::AddressingResult Code::selectAddressingMode(int targetAddress, int pc, bool indexed, bool immediate, bool indirect, bool extended) const {
|
||||
AddressingResult result;
|
||||
result.success = false;
|
||||
result.nixbpe = 0;
|
||||
result.displacement = 0;
|
||||
|
||||
// Immediate mode - use target address directly
|
||||
if (immediate) {
|
||||
if (extended) {
|
||||
result.nixbpe = 0x01; // e=1, b=0, p=0
|
||||
result.displacement = targetAddress & 0xFFFFF; // 20 bits
|
||||
} else {
|
||||
result.nixbpe = 0x00; // e=0, b=0, p=0
|
||||
result.displacement = targetAddress & 0xFFF; // 12 bits
|
||||
}
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Extended format - use absolute address
|
||||
if (extended) {
|
||||
result.nixbpe = 0x01; // e=1, b=0, p=0
|
||||
result.displacement = targetAddress & 0xFFFFF;
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try PC-relative (-2048 to +2047)
|
||||
int pcDisp = targetAddress - pc;
|
||||
if (pcDisp >= -2048 && pcDisp <= 2047) {
|
||||
result.nixbpe = 0x02; // p=1, b=0, e=0
|
||||
result.displacement = pcDisp & 0xFFF;
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try base-relative (0 to 4095)
|
||||
if (_baseRegister >= 0) {
|
||||
int baseDisp = targetAddress - _baseRegister;
|
||||
if (baseDisp >= 0 && baseDisp <= 4095) {
|
||||
result.nixbpe = 0x04; // b=1, p=0, e=0
|
||||
result.displacement = baseDisp & 0xFFF;
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Try direct (0 to 4095)
|
||||
if (targetAddress >= 0 && targetAddress <= 4095) {
|
||||
result.nixbpe = 0x00; // b=0, p=0, e=0
|
||||
result.displacement = targetAddress & 0xFFF;
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Try SIC format (0 to 32767, 15 bits)
|
||||
if (targetAddress >= 0 && targetAddress <= 32767) {
|
||||
result.nixbpe = 0x00;
|
||||
result.displacement = targetAddress & 0x7FFF;
|
||||
result.success = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Could not find suitable addressing mode
|
||||
result.success = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> Code::generateData(const DataNode* data) {
|
||||
std::vector<uint8_t> bytes;
|
||||
|
||||
if (!data) {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
switch (data->kind()) {
|
||||
case DataKind::WORD: {
|
||||
if (std::holds_alternative<int>(data->value())) {
|
||||
int value = std::get<int>(data->value()) & 0xFFFFFF;
|
||||
// SIC/XE stores words in big-endian (MSB first)
|
||||
bytes.push_back((value >> 16) & 0xFF);
|
||||
bytes.push_back((value >> 8) & 0xFF);
|
||||
bytes.push_back(value & 0xFF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DataKind::BYTE: {
|
||||
if (std::holds_alternative<std::vector<uint8_t>>(data->value())) {
|
||||
bytes = std::get<std::vector<uint8_t>>(data->value());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DataKind::RESW:
|
||||
case DataKind::RESB:
|
||||
// Reserved space - emit zeros (handled by initialized array)
|
||||
break;
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
std::string Code::emitText() {
|
||||
std::ostringstream oss;
|
||||
|
||||
// H record: program name, start address, length
|
||||
oss << "H ";
|
||||
std::string name = _programName.empty() ? "PROG" : _programName;
|
||||
name.resize(6, ' ');
|
||||
oss << name << " ";
|
||||
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress << " ";
|
||||
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _programLength;
|
||||
oss << "\n";
|
||||
|
||||
// T records: text (code/data)
|
||||
std::vector<uint8_t> code = emitCode();
|
||||
int textStart = 0;
|
||||
|
||||
while (textStart < code.size()) {
|
||||
int textLength = std::min(30, (int)code.size() - textStart);
|
||||
|
||||
// Skip all-zero sections for RESW/RESB
|
||||
bool allZeros = true;
|
||||
for (int i = 0; i < textLength; ++i) {
|
||||
if (code[textStart + i] != 0) {
|
||||
allZeros = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allZeros) {
|
||||
oss << "T ";
|
||||
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << (_startAddress + textStart) << " ";
|
||||
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << textLength << " ";
|
||||
|
||||
for (int i = 0; i < textLength; ++i) {
|
||||
oss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << (int)code[textStart + i];
|
||||
}
|
||||
oss << "\n";
|
||||
}
|
||||
|
||||
textStart += textLength;
|
||||
}
|
||||
|
||||
// E record: execution start address
|
||||
oss << "E ";
|
||||
oss << std::setfill('0') << std::setw(6) << std::hex << std::uppercase << _startAddress;
|
||||
oss << "\n";
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string Code::dumpSymbols() const {
|
||||
std::ostringstream oss;
|
||||
oss << "=== Symbol Table ===\n";
|
||||
oss << std::left << std::setw(20) << "Symbol" << "Address\n";
|
||||
oss << std::string(30, '-') << "\n";
|
||||
|
||||
for (const auto& [symbol, address] : _symbolTable) {
|
||||
oss << std::left << std::setw(20) << symbol;
|
||||
oss << std::hex << std::uppercase << std::setw(6) << std::setfill('0') << address << "\n";
|
||||
}
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string Code::dumpCode() const {
|
||||
std::ostringstream oss;
|
||||
oss << "=== Code Listing ===\n";
|
||||
oss << std::hex << std::uppercase << std::setfill('0');
|
||||
|
||||
std::vector<uint8_t> code = const_cast<Code*>(this)->emitCode();
|
||||
|
||||
for (size_t i = 0; i < _lines.size(); ++i) {
|
||||
auto& line = _lines[i];
|
||||
int address = _locationCounters[i];
|
||||
int offset = address - _startAddress;
|
||||
|
||||
// Print address
|
||||
oss << std::setw(6) << address << " ";
|
||||
|
||||
// Print generated bytes
|
||||
int length = getInstructionLength(line, address);
|
||||
if (auto* data = dynamic_cast<DataNode*>(line.get())) {
|
||||
auto bytes = const_cast<Code*>(this)->generateData(data);
|
||||
length = bytes.size();
|
||||
}
|
||||
|
||||
for (int j = 0; j < length && (offset + j) < code.size(); ++j) {
|
||||
oss << std::setw(2) << (int)code[offset + j];
|
||||
}
|
||||
|
||||
// Pad for alignment
|
||||
for (int j = length; j < 12; ++j) {
|
||||
oss << " ";
|
||||
}
|
||||
|
||||
oss << " " << line->toString() << "\n";
|
||||
}
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue