Popravu parser in nekej manjsih sintakticnih napak, pass1 in pass2 delujesta

This commit is contained in:
Timon 2026-01-04 16:20:54 +01:00
parent 195ca3c9fa
commit 8ea00ddb32
42 changed files with 622 additions and 297 deletions

View file

@ -5,8 +5,8 @@
int main(int argc, char const *argv[]){ int main(int argc, char const *argv[]){
machine m; machine m;
cpu procesor(&m); cpu procesor(&m);
m.loadObj("files/arithr.obj"); //m.loadObj("files/arithr.obj");
//m.loadObj("files/cat.obj"); m.loadObj("files/cat.obj");
procesor.setSpeed(100); procesor.setSpeed(100);
procesor.start(); procesor.start();
/*stuff /*stuff

View file

@ -1,10 +1,11 @@
from zbirnik.opcodes import OPCODES
class EmitContext: class EmitContext:
REGISTERS = { REGISTERS = {
'A': 0, 'X': 1, 'L': 2, 'A': 0, 'X': 1, 'L': 2,
'B': 3, 'S': 4, 'T': 5, 'F': 6 'B': 3, 'S': 4, 'T': 5, 'F': 6
} }
def __init__(self, opcodes, symtab): def __init__(self, symtab):
self.opcodes = opcodes self.opcodes = OPCODES
self.symtab = symtab self.symtab = symtab
self.base = None self.base = None

View file

@ -23,7 +23,7 @@ class Code:
# START # START
if node.__class__.__name__ == "directive" and node.name == "START": if node.__class__.__name__ == "directive" and node.name == "START":
self.start_address = node.value self.start_address = node.operand
locctr = self.start_address locctr = self.start_address
self.name = node.label self.name = node.label
node.address = locctr node.address = locctr
@ -40,8 +40,8 @@ class Code:
# END # END
if node.__class__.__name__ == "directive" and node.name == "END": if node.__class__.__name__ == "directive" and node.name == "END":
if node.value is not None: if node.operand is not None:
self.entry_point = node.value self.entry_point = node.operand
break break
locctr += node.size() locctr += node.size()

View file

@ -2,10 +2,11 @@ from zbirnik.parser import parser
from zbirnik.code import Code from zbirnik.code import Code
from zbirnik.EmitCtx import EmitContext from zbirnik.EmitCtx import EmitContext
from zbirnik.mnemoniki.mnemoniki_tabela import MNEMONICS from zbirnik.mnemoniki.mnemoniki_tabela import MNEMONICS
from zbirnik.opcodes import OPCODES from zbirnik.parserctx import ParserContext
import os
print("Vnesite ime programa (.asm), ki je v istem direktoriju kakor main.py: ") ime = input("Vnesite ime programa (.asm): ")
ime = input() ime = os.path.join(os.path.dirname(__file__), ime)
if not ime.endswith(".asm"): if not ime.endswith(".asm"):
raise ValueError("Ime programa ni v pravi obliki, mora biti: ime.asm") raise ValueError("Ime programa ni v pravi obliki, mora biti: ime.asm")
@ -14,19 +15,28 @@ code = Code()
with open(ime) as f: with open(ime) as f:
for line in f: for line in f:
ctx = parser.parse(line) print("LINE:", line.rstrip())
rez = parser.parse(line)
ctx = ParserContext(rez)
#print("CTX:", ctx)
if ctx is None: if ctx is None:
print(" -> komentar / prazna vrstica")
continue continue
print(" label =", ctx.label)
print(" mnemonic=", ctx.mnemonic)
print(" operands=", ctx.operands)
print("-" * 40)
mnemonic = MNEMONICS[ctx.mnemonic] mnemonic = MNEMONICS[ctx.mnemonic]
node = mnemonic.parse(ctx) node = mnemonic.parse(ctx)
code.add(node) code.add(node)
code.pass1() code.pass1()
ctx_emit = EmitContext(opcodes=OPCODES,symtab=code.symtab) ctx_emit = EmitContext(symtab=code.symtab)
binary = code.pass2(ctx_emit) binary = code.pass2(ctx_emit)
print(binary.hex()) print(binary.hex())

View file

@ -1,8 +1,8 @@
from zbirnik.ukazi.directive import directive from zbirnik.ukazi.directive import directive
from mnemonic import Mnemonic from zbirnik.mnemoniki.mnemonic import Mnemonic
from zbirnik.parserctx import ParserContext from zbirnik.parserctx import ParserContext
class MnemonicD(Mnemonic): class MnemonicD(Mnemonic):
def parse(self, parser: ParserContext): def parse(self, parser: ParserContext):
return directive(direktiva=self.name, label=parser.label, return directive(name=self.name, label=parser.label,
operand=None) operand=None)

View file

@ -1,8 +1,8 @@
from zbirnik.ukazi.directive import directive from zbirnik.ukazi.directive import directive
from mnemonic import Mnemonic from zbirnik.mnemoniki.mnemonic import Mnemonic
from zbirnik.parserctx import ParserContext from zbirnik.parserctx import ParserContext
class MnemonicDn(Mnemonic): class MnemonicDn(Mnemonic):
def parse(self, parser: ParserContext): def parse(self, parser: ParserContext):
return directive(direktiva=self.name, label=parser.label, return directive(name=self.name, label=parser.label,
operand=parser.read_num_sym()) operand=parser.read_num_sym())

View file

@ -1,4 +1,4 @@
from mnemonic import Mnemonic from zbirnik.mnemoniki.mnemonic import Mnemonic
from zbirnik.ukazi.f1 import f1 from zbirnik.ukazi.f1 import f1
class MnemonicF1(Mnemonic): class MnemonicF1(Mnemonic):

View file

@ -3,7 +3,7 @@ from zbirnik.ukazi.f2 import f2
from zbirnik.parserctx import ParserContext from zbirnik.parserctx import ParserContext
class mnemonicF2(Mnemonic): class MemonicF2(Mnemonic):
def parse(self, parser: ParserContext): def parse(self, parser: ParserContext):
return f2(r1=parser.read_reg(), r2=parser.read_reg(), return f2(r1=parser.read_reg(), r2=parser.read_reg(),
mnemonic=self.name, label=parser.label) mnemonic=self.name, label=parser.label)

View file

@ -4,4 +4,4 @@ from zbirnik.parserctx import ParserContext
class MnemonicSd(Mnemonic): class MnemonicSd(Mnemonic):
def parse(self, parser: ParserContext): def parse(self, parser: ParserContext):
return storage(label=parser.label, name=self.name, val=parser.read_num_sym()) return storage(label=parser.label, name=self.name, value=parser.read_num_sym())

View file

@ -5,4 +5,4 @@ from zbirnik.parserctx import ParserContext
class MnemonicSn(Mnemonic): class MnemonicSn(Mnemonic):
def parse(self, parser: ParserContext): def parse(self, parser: ParserContext):
return storage(label=parser.label, name=self.name, val=parser.read_num_sym()) return storage(label=parser.label, name=self.name, value=parser.read_num_sym())

View file

@ -1,70 +1,76 @@
from zbirnik.mnemoniki import ( from zbirnik.mnemoniki.mnemonicD import MnemonicD
mnemonicD, mnemonicDn, from zbirnik.mnemoniki.mnemonicDn import MnemonicDn
mnemonicF1, mnemonicF2n, mnemonicF2r, mnemonicF2rn, mnemonicF2rr, from zbirnik.mnemoniki.mnemonicF1 import MnemonicF1
mnemonicF3, mnemonicF3m, mnemonicF4m, from zbirnik.mnemoniki.mnemonicF2n import MnemonicF2n
mnemonicSd, mnemonicSn from zbirnik.mnemoniki.mnemonicF2r import MnemonicF2r
) from zbirnik.mnemoniki.mnemonicF2rn import MnemonicF2rn
from zbirnik.mnemoniki.mnemonicF2rr import MnemonicF2rr
from zbirnik.mnemoniki.mnemonicF3 import MnemonicF3
from zbirnik.mnemoniki.mnemonicF3m import MnemonicF3m
from zbirnik.mnemoniki.mnemonicF4m import MnemonicF4m
from zbirnik.mnemoniki.mnemonicSd import MnemonicSd
from zbirnik.mnemoniki.mnemonicSn import MnemonicSn
# Centralna tabela mnemonikov # Centralna tabela Mnemonikov
MNEMONICS = { MNEMONICS = {
# Direktive (brez operandov) # Direktive (brez operandov)
'NOBASE': mnemonicD('NOBASE'), 'NOBASE': MnemonicD('NOBASE'),
'LTORG': mnemonicD('LTORG'), 'LTORG': MnemonicD('LTORG'),
# Direktive (en operand) # Direktive (en operand)
'START': mnemonicDn('START'), 'START': MnemonicDn('START'),
'END': mnemonicDn('END'), 'END': MnemonicDn('END'),
'BASE': mnemonicDn('BASE'), 'BASE': MnemonicDn('BASE'),
# Format 1 (brez operandov) # Format 1 (brez operandov)
'FIX': mnemonicF1('FIX', opcode=0xC4), 'FIX': MnemonicF1('FIX', opcode=0xC4),
'FLOAT': mnemonicF1('FLOAT', opcode=0xC0), 'FLOAT': MnemonicF1('FLOAT', opcode=0xC0),
'HIO': mnemonicF1('HIO', opcode=0xF4), 'HIO': MnemonicF1('HIO', opcode=0xF4),
'NORM': mnemonicF1('NORM', opcode=0xC8), 'NORM': MnemonicF1('NORM', opcode=0xC8),
'SIO': mnemonicF1('SIO', opcode=0xF0), 'SIO': MnemonicF1('SIO', opcode=0xF0),
'TIO': mnemonicF1('TIO', opcode=0xF8), 'TIO': MnemonicF1('TIO', opcode=0xF8),
# Format 2 # Format 2
# F2 – en številčni operand # F2 – en številčni operand
'SVC': mnemonicF2n('SVC', opcode=0xB0), 'SVC': MnemonicF2n('SVC', opcode=0xB0),
# F2 – en register # F2 – en register
'CLEAR': mnemonicF2r('CLEAR', opcode=0xB4), 'CLEAR': MnemonicF2r('CLEAR', opcode=0xB4),
'TIXR': mnemonicF2r('TIXR', opcode=0xB8), 'TIXR': MnemonicF2r('TIXR', opcode=0xB8),
# F2 – register + število # F2 – register + število
'SHIFTL': mnemonicF2rn('SHIFTL', opcode=0xA4), 'SHIFTL': MnemonicF2rn('SHIFTL', opcode=0xA4),
'SHIFTR': mnemonicF2rn('SHIFTR', opcode=0xA8), 'SHIFTR': MnemonicF2rn('SHIFTR', opcode=0xA8),
# F2 – dva registra # F2 – dva registra
'ADDR': mnemonicF2rr('ADDR', opcode=0x90), 'ADDR': MnemonicF2rr('ADDR', opcode=0x90),
'SUBR': mnemonicF2rr('SUBR', opcode=0x94), 'SUBR': MnemonicF2rr('SUBR', opcode=0x94),
'MULR': mnemonicF2rr('MULR', opcode=0x98), 'MULR': MnemonicF2rr('MULR', opcode=0x98),
'DIVR': mnemonicF2rr('DIVR', opcode=0x9C), 'DIVR': MnemonicF2rr('DIVR', opcode=0x9C),
'COMPR': mnemonicF2rr('COMPR', opcode=0xA0), 'COMPR': MnemonicF2rr('COMPR', opcode=0xA0),
# Format 3 # Format 3
'RSUB': mnemonicF3('RSUB', opcode=0x4C), 'RSUB': MnemonicF3('RSUB', opcode=0x4C),
'LDA': mnemonicF3m('LDA', opcode=0x00), 'LDA': MnemonicF3m('LDA', opcode=0x00),
'LDX': mnemonicF3m('LDX', opcode=0x04), 'LDX': MnemonicF3m('LDX', opcode=0x04),
'LDL': mnemonicF3m('LDL', opcode=0x08), 'LDL': MnemonicF3m('LDL', opcode=0x08),
'STA': mnemonicF3m('STA', opcode=0x0C), 'STA': MnemonicF3m('STA', opcode=0x0C),
'STX': mnemonicF3m('STX', opcode=0x10), 'STX': MnemonicF3m('STX', opcode=0x10),
'STL': mnemonicF3m('STL', opcode=0x14), 'STL': MnemonicF3m('STL', opcode=0x14),
'ADD': mnemonicF3m('ADD', opcode=0x18), 'ADD': MnemonicF3m('ADD', opcode=0x18),
'SUB': mnemonicF3m('SUB', opcode=0x1C), 'SUB': MnemonicF3m('SUB', opcode=0x1C),
'MUL': mnemonicF3m('MUL', opcode=0x20), 'MUL': MnemonicF3m('MUL', opcode=0x20),
'DIV': mnemonicF3m('DIV', opcode=0x24), 'DIV': MnemonicF3m('DIV', opcode=0x24),
'COMP': mnemonicF3m('COMP', opcode=0x28), 'COMP': MnemonicF3m('COMP', opcode=0x28),
'J': mnemonicF3m('J', opcode=0x3C), 'J': MnemonicF3m('J', opcode=0x3C),
'JEQ': mnemonicF3m('JEQ', opcode=0x30), 'JEQ': MnemonicF3m('JEQ', opcode=0x30),
'JGT': mnemonicF3m('JGT', opcode=0x34), 'JGT': MnemonicF3m('JGT', opcode=0x34),
'JLT': mnemonicF3m('JLT', opcode=0x38), 'JLT': MnemonicF3m('JLT', opcode=0x38),
'JSUB': mnemonicF3m('JSUB', opcode=0x48), 'JSUB': MnemonicF3m('JSUB', opcode=0x48),
'TD': mnemonicF3m('TD', opcode=0xE0), 'TD': MnemonicF3m('TD', opcode=0xE0),
'RD': mnemonicF3m('RD', opcode=0xD8), 'RD': MnemonicF3m('RD', opcode=0xD8),
'WD': mnemonicF3m('WD', opcode=0xDC), 'WD': MnemonicF3m('WD', opcode=0xDC),
# Format 4 (razširjeni) # Format 4 (razširjeni)
'+LDA': mnemonicF4m('+LDA', opcode=0x00), '+LDA': MnemonicF4m('+LDA', opcode=0x00),
'+JSUB': mnemonicF4m('+JSUB', opcode=0x48), '+JSUB': MnemonicF4m('+JSUB', opcode=0x48),
# Pomnilniške direktive # Pomnilniške direktive
# podatki # podatki
'BYTE': mnemonicSd('BYTE'), 'BYTE': MnemonicSd('BYTE'),
'WORD': mnemonicSd('WORD'), 'WORD': MnemonicSd('WORD'),
# rezervacija # rezervacija
'RESB': mnemonicSn('RESB'), 'RESB': MnemonicSn('RESB'),
'RESW': mnemonicSn('RESW'), 'RESW': MnemonicSn('RESW'),
} }

View file

@ -3,54 +3,79 @@ Created by PLY version 3.11 (http://www.dabeaz.com/ply)
Grammar Grammar
Rule 0 S' -> start Rule 0 S' -> start
Rule 1 start -> LABEL command Rule 1 start -> LABEL statement
Rule 2 start -> command Rule 2 start -> statement
Rule 3 command -> MNEMONIC Rule 3 statement -> instruction
Rule 4 command -> MNEMONIC args Rule 4 statement -> directive
Rule 5 args -> operand Rule 5 instruction -> MNEMONIC
Rule 6 args -> operand COMMA operand Rule 6 instruction -> MNEMONIC args
Rule 7 operand -> REGISTER Rule 7 directive -> DIRECTIVE
Rule 8 operand -> AT address Rule 8 directive -> DIRECTIVE directive_args
Rule 9 operand -> HASH address Rule 9 directive_args -> directive_arg
Rule 10 operand -> address Rule 10 directive_args -> directive_arg COMMA directive_arg
Rule 11 address -> NUMBER Rule 11 directive_arg -> NUMBER
Rule 12 address -> SYMBOL Rule 12 directive_arg -> SYMBOL
Rule 13 directive_arg -> STRING
Rule 14 directive_arg -> operand
Rule 15 args -> operand
Rule 16 args -> operand COMMA operand
Rule 17 operand -> REGISTER
Rule 18 operand -> AT address
Rule 19 operand -> HASH address
Rule 20 operand -> PLUS address
Rule 21 operand -> address
Rule 22 address -> NUMBER
Rule 23 address -> SYMBOL
Terminals, with rules where they appear Terminals, with rules where they appear
AT : 8 AT : 18
COMMA : 6 COMMA : 10 16
HASH : 9 DIRECTIVE : 7 8
HASH : 19
LABEL : 1 LABEL : 1
MNEMONIC : 3 4 MNEMONIC : 5 6
NUMBER : 11 NUMBER : 11 22
REGISTER : 7 PLUS : 20
SYMBOL : 12 REGISTER : 17
STRING : 13
SYMBOL : 12 23
error : error :
Nonterminals, with rules where they appear Nonterminals, with rules where they appear
address : 8 9 10 address : 18 19 20 21
args : 4 args : 6
command : 1 2 directive : 4
operand : 5 6 6 directive_arg : 9 10 10
directive_args : 8
instruction : 3
operand : 14 15 16 16
start : 0 start : 0
statement : 1 2
Parsing method: LALR Parsing method: LALR
state 0 state 0
(0) S' -> . start (0) S' -> . start
(1) start -> . LABEL command (1) start -> . LABEL statement
(2) start -> . command (2) start -> . statement
(3) command -> . MNEMONIC (3) statement -> . instruction
(4) command -> . MNEMONIC args (4) statement -> . directive
(5) instruction -> . MNEMONIC
(6) instruction -> . MNEMONIC args
(7) directive -> . DIRECTIVE
(8) directive -> . DIRECTIVE directive_args
LABEL shift and go to state 2 LABEL shift and go to state 2
MNEMONIC shift and go to state 4 MNEMONIC shift and go to state 6
DIRECTIVE shift and go to state 7
start shift and go to state 1 start shift and go to state 1
command shift and go to state 3 statement shift and go to state 3
instruction shift and go to state 4
directive shift and go to state 5
state 1 state 1
@ -60,160 +85,337 @@ state 1
state 2 state 2
(1) start -> LABEL . command (1) start -> LABEL . statement
(3) command -> . MNEMONIC (3) statement -> . instruction
(4) command -> . MNEMONIC args (4) statement -> . directive
(5) instruction -> . MNEMONIC
(6) instruction -> . MNEMONIC args
(7) directive -> . DIRECTIVE
(8) directive -> . DIRECTIVE directive_args
MNEMONIC shift and go to state 4 MNEMONIC shift and go to state 6
DIRECTIVE shift and go to state 7
command shift and go to state 5 statement shift and go to state 8
instruction shift and go to state 4
directive shift and go to state 5
state 3 state 3
(2) start -> command . (2) start -> statement .
$end reduce using rule 2 (start -> command .) $end reduce using rule 2 (start -> statement .)
state 4 state 4
(3) command -> MNEMONIC . (3) statement -> instruction .
(4) command -> MNEMONIC . args
(5) args -> . operand
(6) args -> . operand COMMA operand
(7) operand -> . REGISTER
(8) operand -> . AT address
(9) operand -> . HASH address
(10) operand -> . address
(11) address -> . NUMBER
(12) address -> . SYMBOL
$end reduce using rule 3 (command -> MNEMONIC .) $end reduce using rule 3 (statement -> instruction .)
REGISTER shift and go to state 8
AT shift and go to state 9
HASH shift and go to state 11
NUMBER shift and go to state 12
SYMBOL shift and go to state 13
args shift and go to state 6
operand shift and go to state 7
address shift and go to state 10
state 5 state 5
(1) start -> LABEL command . (4) statement -> directive .
$end reduce using rule 1 (start -> LABEL command .) $end reduce using rule 4 (statement -> directive .)
state 6 state 6
(4) command -> MNEMONIC args . (5) instruction -> MNEMONIC .
(6) instruction -> MNEMONIC . args
(15) args -> . operand
(16) args -> . operand COMMA operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
$end reduce using rule 4 (command -> MNEMONIC args .) $end reduce using rule 5 (instruction -> MNEMONIC .)
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
args shift and go to state 9
operand shift and go to state 10
address shift and go to state 13
state 7 state 7
(5) args -> operand . (7) directive -> DIRECTIVE .
(6) args -> operand . COMMA operand (8) directive -> DIRECTIVE . directive_args
(9) directive_args -> . directive_arg
(10) directive_args -> . directive_arg COMMA directive_arg
(11) directive_arg -> . NUMBER
(12) directive_arg -> . SYMBOL
(13) directive_arg -> . STRING
(14) directive_arg -> . operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
$end reduce using rule 5 (args -> operand .) $end reduce using rule 7 (directive -> DIRECTIVE .)
COMMA shift and go to state 14 NUMBER shift and go to state 20
SYMBOL shift and go to state 21
STRING shift and go to state 22
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
directive_args shift and go to state 18
directive_arg shift and go to state 19
operand shift and go to state 23
address shift and go to state 13
state 8 state 8
(7) operand -> REGISTER . (1) start -> LABEL statement .
COMMA reduce using rule 7 (operand -> REGISTER .) $end reduce using rule 1 (start -> LABEL statement .)
$end reduce using rule 7 (operand -> REGISTER .)
state 9 state 9
(8) operand -> AT . address (6) instruction -> MNEMONIC args .
(11) address -> . NUMBER
(12) address -> . SYMBOL
NUMBER shift and go to state 12 $end reduce using rule 6 (instruction -> MNEMONIC args .)
SYMBOL shift and go to state 13
address shift and go to state 15
state 10 state 10
(10) operand -> address . (15) args -> operand .
(16) args -> operand . COMMA operand
COMMA reduce using rule 10 (operand -> address .) $end reduce using rule 15 (args -> operand .)
$end reduce using rule 10 (operand -> address .) COMMA shift and go to state 24
state 11 state 11
(9) operand -> HASH . address (17) operand -> REGISTER .
(11) address -> . NUMBER
(12) address -> . SYMBOL
NUMBER shift and go to state 12 COMMA reduce using rule 17 (operand -> REGISTER .)
SYMBOL shift and go to state 13 $end reduce using rule 17 (operand -> REGISTER .)
address shift and go to state 16
state 12 state 12
(11) address -> NUMBER . (18) operand -> AT . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
COMMA reduce using rule 11 (address -> NUMBER .) NUMBER shift and go to state 16
$end reduce using rule 11 (address -> NUMBER .) SYMBOL shift and go to state 17
address shift and go to state 25
state 13 state 13
(12) address -> SYMBOL . (21) operand -> address .
COMMA reduce using rule 12 (address -> SYMBOL .) COMMA reduce using rule 21 (operand -> address .)
$end reduce using rule 12 (address -> SYMBOL .) $end reduce using rule 21 (operand -> address .)
state 14 state 14
(6) args -> operand COMMA . operand (19) operand -> HASH . address
(7) operand -> . REGISTER (22) address -> . NUMBER
(8) operand -> . AT address (23) address -> . SYMBOL
(9) operand -> . HASH address
(10) operand -> . address
(11) address -> . NUMBER
(12) address -> . SYMBOL
REGISTER shift and go to state 8 NUMBER shift and go to state 16
AT shift and go to state 9 SYMBOL shift and go to state 17
HASH shift and go to state 11
NUMBER shift and go to state 12
SYMBOL shift and go to state 13
operand shift and go to state 17 address shift and go to state 26
address shift and go to state 10
state 15 state 15
(8) operand -> AT address . (20) operand -> PLUS . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
COMMA reduce using rule 8 (operand -> AT address .) NUMBER shift and go to state 16
$end reduce using rule 8 (operand -> AT address .) SYMBOL shift and go to state 17
address shift and go to state 27
state 16 state 16
(9) operand -> HASH address . (22) address -> NUMBER .
COMMA reduce using rule 9 (operand -> HASH address .) COMMA reduce using rule 22 (address -> NUMBER .)
$end reduce using rule 9 (operand -> HASH address .) $end reduce using rule 22 (address -> NUMBER .)
state 17 state 17
(6) args -> operand COMMA operand . (23) address -> SYMBOL .
$end reduce using rule 6 (args -> operand COMMA operand .) COMMA reduce using rule 23 (address -> SYMBOL .)
$end reduce using rule 23 (address -> SYMBOL .)
state 18
(8) directive -> DIRECTIVE directive_args .
$end reduce using rule 8 (directive -> DIRECTIVE directive_args .)
state 19
(9) directive_args -> directive_arg .
(10) directive_args -> directive_arg . COMMA directive_arg
$end reduce using rule 9 (directive_args -> directive_arg .)
COMMA shift and go to state 28
state 20
(11) directive_arg -> NUMBER .
(22) address -> NUMBER .
! reduce/reduce conflict for COMMA resolved using rule 11 (directive_arg -> NUMBER .)
! reduce/reduce conflict for $end resolved using rule 11 (directive_arg -> NUMBER .)
COMMA reduce using rule 11 (directive_arg -> NUMBER .)
$end reduce using rule 11 (directive_arg -> NUMBER .)
! COMMA [ reduce using rule 22 (address -> NUMBER .) ]
! $end [ reduce using rule 22 (address -> NUMBER .) ]
state 21
(12) directive_arg -> SYMBOL .
(23) address -> SYMBOL .
! reduce/reduce conflict for COMMA resolved using rule 12 (directive_arg -> SYMBOL .)
! reduce/reduce conflict for $end resolved using rule 12 (directive_arg -> SYMBOL .)
COMMA reduce using rule 12 (directive_arg -> SYMBOL .)
$end reduce using rule 12 (directive_arg -> SYMBOL .)
! COMMA [ reduce using rule 23 (address -> SYMBOL .) ]
! $end [ reduce using rule 23 (address -> SYMBOL .) ]
state 22
(13) directive_arg -> STRING .
COMMA reduce using rule 13 (directive_arg -> STRING .)
$end reduce using rule 13 (directive_arg -> STRING .)
state 23
(14) directive_arg -> operand .
COMMA reduce using rule 14 (directive_arg -> operand .)
$end reduce using rule 14 (directive_arg -> operand .)
state 24
(16) args -> operand COMMA . operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
operand shift and go to state 29
address shift and go to state 13
state 25
(18) operand -> AT address .
COMMA reduce using rule 18 (operand -> AT address .)
$end reduce using rule 18 (operand -> AT address .)
state 26
(19) operand -> HASH address .
COMMA reduce using rule 19 (operand -> HASH address .)
$end reduce using rule 19 (operand -> HASH address .)
state 27
(20) operand -> PLUS address .
COMMA reduce using rule 20 (operand -> PLUS address .)
$end reduce using rule 20 (operand -> PLUS address .)
state 28
(10) directive_args -> directive_arg COMMA . directive_arg
(11) directive_arg -> . NUMBER
(12) directive_arg -> . SYMBOL
(13) directive_arg -> . STRING
(14) directive_arg -> . operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
NUMBER shift and go to state 20
SYMBOL shift and go to state 21
STRING shift and go to state 22
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
directive_arg shift and go to state 30
operand shift and go to state 23
address shift and go to state 13
state 29
(16) args -> operand COMMA operand .
$end reduce using rule 16 (args -> operand COMMA operand .)
state 30
(10) directive_args -> directive_arg COMMA directive_arg .
$end reduce using rule 10 (directive_args -> directive_arg COMMA directive_arg .)
WARNING:
WARNING: Conflicts:
WARNING:
WARNING: reduce/reduce conflict in state 20 resolved using rule (directive_arg -> NUMBER)
WARNING: rejected rule (address -> NUMBER) in state 20
WARNING: reduce/reduce conflict in state 21 resolved using rule (directive_arg -> SYMBOL)
WARNING: rejected rule (address -> SYMBOL) in state 21

View file

@ -1,125 +1,191 @@
import ply.lex import ply.lex
import ply.yacc import ply.yacc
from zbirnik.parserctx import ParserContext #import sys
# -------------------- global at_line_start, seen_mnemonic_or_directive
# Lexer # Lexer
# --------------------
tokens = ( tokens = (
'AT', 'AT',
'COMMA', 'COMMA',
'HASH', 'HASH',
'PLUS',
'LABEL', 'LABEL',
'REGISTER', 'REGISTER',
'MNEMONIC', 'MNEMONIC',
'DIRECTIVE',
'SYMBOL', 'SYMBOL',
'NUMBER', 'NUMBER',
'COMMENT', 'STRING',
) )
t_AT = r'@' t_AT = r'@'
t_COMMA = r',' t_COMMA = r','
t_HASH = r'\#' t_HASH = r'\#'
t_LABEL = r'^[a-z_0-9]+' t_PLUS = r'\+'
t_REGISTER = r'\b[ABFLSTX]\b' t_REGISTER = r'\b[ABFLSTX]\b'
t_MNEMONIC = r'\b[A-Z]+\b'
t_SYMBOL = r'[a-z_0-9]+' # Assembler directives
directives = {
'START', 'END', 'BYTE', 'WORD', 'RESB', 'RESW',
'BASE', 'NOBASE', 'EQU', 'ORG', 'LTORG', 'USE',
'EXTDEF', 'EXTREF', 'CSECT'
}
# Track if we're at the start of a line
at_line_start = True
seen_mnemonic_or_directive = False
def t_WHITESPACE(t):
r'[ \t]+'
global at_line_start
# If we see whitespace at line start, we're no longer at start
if at_line_start:
at_line_start = False
pass # Don't return token
def t_STRING(t):
r'[CX]\'[^\']*\''
global at_line_start
at_line_start = False
return t
def t_IDENTIFIER(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
global at_line_start, seen_mnemonic_or_directive
# If at start of line, it's a label
if at_line_start:
t.type = 'LABEL'
at_line_start = False
return t
# After we've seen a mnemonic/directive, everything else is a symbol
if seen_mnemonic_or_directive:
t.type = 'SYMBOL'
return t
# First identifier after whitespace - check if it's directive or mnemonic
if t.value in directives:
t.type = 'DIRECTIVE'
seen_mnemonic_or_directive = True
else:
# Assume it's a mnemonic if uppercase, otherwise symbol
if t.value.isupper():
t.type = 'MNEMONIC'
seen_mnemonic_or_directive = True
else:
t.type = 'SYMBOL'
return t
def t_NUMBER(t): def t_NUMBER(t):
r'-?\d+' r'-?(?:0[xX][0-9a-fA-F]+|\d+)'
t.value = int(t.value) global at_line_start
at_line_start = False
t.value = int(t.value, 0) # Handles hex (0x) and decimal
return t return t
def t_COMMENT(t): t_ignore_COMMENT = r'\..*'
r'\..*'
t.value = t.value[1:].strip()
return t
t_ignore = ' \t\n' def t_newline(t):
r'\n+'
global at_line_start, seen_mnemonic_or_directive
at_line_start = True
seen_mnemonic_or_directive = False
t.lexer.lineno += len(t.value)
def t_error(t): def t_error(t):
print(f"Illegal character {t.value[0]!r}") print(f'Illegal character {t.value[0]!r}')
t.lexer.skip(1) t.lexer.skip(1)
lexer = ply.lex.lex() lexer = ply.lex.lex()
# --------------------
# Parser # Parser
# --------------------
def p_start(p): def p_start(p):
r'''start : LABEL command '''start : LABEL statement
| command | statement'''
| COMMENT if len(p) == 3:
''' # Has label
# komentar p[0] = (p[1], p[2][0], *p[2][1:])
if len(p) == 2 and isinstance(p[1], str):
p[0] = None
return
# brez labela
if len(p) == 2:
label = None
mnemonic, operands = p[1]
else: else:
label = p[1] # No label
mnemonic, operands = p[2] p[0] = (None, p[1][0], *p[1][1:])
p[0] = ParserContext(label, mnemonic, operands) def p_statement(p):
'''statement : instruction
| directive'''
p[0] = p[1]
def p_instruction(p):
def p_command(p): '''instruction : MNEMONIC
r'''command : MNEMONIC | MNEMONIC args'''
| MNEMONIC args
'''
if len(p) == 2: if len(p) == 2:
p[0] = (p[1], []) p[0] = [p[1]]
else: else:
p[0] = (p[1], list(p[2])) p[0] = [p[1]] + list(p[2])
def p_directive(p):
'''directive : DIRECTIVE
| DIRECTIVE directive_args'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = [p[1]] + list(p[2])
def p_args(p): def p_directive_args(p):
r'''args : operand '''directive_args : directive_arg
| operand COMMA operand | directive_arg COMMA directive_arg'''
'''
if len(p) == 2: if len(p) == 2:
p[0] = [p[1]] p[0] = [p[1]]
else: else:
p[0] = [p[1], p[3]] p[0] = [p[1], p[3]]
def p_directive_arg(p):
'''directive_arg : NUMBER
| SYMBOL
| STRING
| operand'''
p[0] = p[1]
def p_args(p):
'''args : operand
| operand COMMA operand'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = [p[1], p[3]]
def p_operand(p): def p_operand(p):
r'''operand : REGISTER '''operand : REGISTER
| AT address | AT address
| HASH address | HASH address
| address | PLUS address
''' | address'''
if len(p) == 2: if len(p) == 2:
p[0] = p[1] p[0] = p[1]
else: else:
# @X ali #5 → zadrži znak # Combine prefix with address
p[0] = (p[1], p[2]) p[0] = (p[1], p[2])
def p_address(p): def p_address(p):
r'''address : NUMBER '''address : NUMBER
| SYMBOL | SYMBOL'''
'''
p[0] = p[1] p[0] = p[1]
def p_error(p): def p_error(p):
if p: if p:
raise SyntaxError(f"Syntax error at token {p}") print(f'Syntax error at token {p.type}={p.value!r}')
else: else:
raise SyntaxError("Syntax error at EOF") print('Syntax error at EOF')
parser = ply.yacc.yacc() parser = ply.yacc.yacc()
if __name__ == '__main__': #if __name__ == '__main__':
import sys # for line in sys.stdin:
parser = ply.yacc.yacc() # line = line.rstrip('\n\r')
for line in sys.stdin: # if line and not line.lstrip().startswith('.'):
print(parser.parse(line)) # at_line_start = True
# seen_mnemonic_or_directive = False
# result = parser.parse(line, lexer=lexer)
# print(result)

View file

@ -1,12 +1,6 @@
class ParserContext: class ParserContext:
def __init__(self, parsed): def __init__(self, parsed):
""" # Handle COMMENT special case (if you still need it)
parsed je rezultat PLY parserja, npr.:
('loop', ('LDA', (('#', 5),)))
(None, ('FIX',))
('COMMENT', 'to je komentar')
"""
if isinstance(parsed, tuple) and parsed[0] == 'COMMENT': if isinstance(parsed, tuple) and parsed[0] == 'COMMENT':
self.label = None self.label = None
self.mnemonic = 'COMMENT' self.mnemonic = 'COMMENT'
@ -14,11 +8,16 @@ class ParserContext:
self.comment = parsed[1] self.comment = parsed[1]
return return
self.label, command = parsed # New parser format: (label, mnemonic, *operands)
self.mnemonic = command[0] if not isinstance(parsed, tuple) or len(parsed) < 2:
self.operands = list(command[1]) if len(command) > 1 else [] raise ValueError(f"Invalid parsed format: {parsed}")
self.label = parsed[0] # Can be None
self.mnemonic = parsed[1]
# All remaining elements are operands
self.operands = list(parsed[2:]) if len(parsed) > 2 else []
self.comment = None self.comment = None
def has_operand(self) -> bool: def has_operand(self) -> bool:
return len(self.operands) > 0 return len(self.operands) > 0
@ -28,35 +27,59 @@ class ParserContext:
return self.operands.pop(0) return self.operands.pop(0)
def read_reg(self) -> str: def read_reg(self) -> str:
"""Read a register operand (e.g., 'A', 'X', 'L')"""
op = self.next_op() op = self.next_op()
return op[0] # If it's a plain string, return it
if isinstance(op, str):
return op
# If it's a tuple (shouldn't happen for registers), take first element
if isinstance(op, tuple):
return op[0]
return str(op)
def read_num_sym(self): def read_num_sym(self):
"""Read a number or symbol operand"""
op = self.next_op() op = self.next_op()
if (len(op) == 1): # If it's already a simple value (number or string), return it
if isinstance(op, (int, str)):
return op
# If it's a tuple with prefix (like ('@', 'buffer')), return the value
if isinstance(op, tuple) and len(op) == 2:
return op[1]
# If it's a single-element tuple, unwrap it
if isinstance(op, tuple) and len(op) == 1:
return op[0] return op[0]
return op[1] return op
def read_addressed_operand(self): def read_addressed_operand(self):
"""Read an operand with addressing mode information"""
from adressing import AddrMode from zbirnik.adressing import AddrMode
indexed = False indexed = False
addr_mode = AddrMode.SIMPLE addr_mode = AddrMode.SIMPLE
op = self.next_op()
op = self.next_operand()
# Check if operand has a prefix (immediate/indirect/indexed)
# immediate / indirect if isinstance(op, tuple) and len(op) == 2:
if len(op) == 2:
prefix, value = op prefix, value = op
addr_mode = {'#': AddrMode.IMMEDIATE, '@': AddrMode.INDIRECT}[prefix] if prefix == '#':
addr_mode = AddrMode.IMMEDIATE
elif prefix == '@':
addr_mode = AddrMode.INDIRECT
elif prefix == '+':
# Extended format (SIC/XE)
addr_mode = AddrMode.EXTENDED
else:
# Unknown prefix, treat as simple
value = op
else: else:
value = op[0] # Simple operand (no prefix)
value = op
#indeksiranje
if self.operands and self.operands[0] == ('X',): # Check for indexed addressing (X register)
# In new parser, indexed would be a second operand that's just 'X'
if self.operands and self.operands[0] == 'X':
self.operands.pop(0) self.operands.pop(0)
indexed = True indexed = True
return addr_mode, value, indexed return addr_mode, value, indexed

View file

@ -6,9 +6,9 @@ _tabversion = '3.10'
_lr_method = 'LALR' _lr_method = 'LALR'
_lr_signature = 'AT COMMA HASH LABEL MNEMONIC NUMBER REGISTER SYMBOLstart : LABEL command\n | commandcommand : MNEMONIC\n | MNEMONIC argsargs : operand\n | operand COMMA operandoperand : REGISTER\n | AT address\n | HASH address\n | addressaddress : NUMBER\n | SYMBOL' _lr_signature = 'AT COMMA DIRECTIVE HASH LABEL MNEMONIC NUMBER PLUS REGISTER STRING SYMBOLstart : LABEL statement\n | statementstatement : instruction\n | directiveinstruction : MNEMONIC\n | MNEMONIC argsdirective : DIRECTIVE\n | DIRECTIVE directive_argsdirective_args : directive_arg\n | directive_arg COMMA directive_argdirective_arg : NUMBER\n | SYMBOL\n | STRING\n | operandargs : operand\n | operand COMMA operandoperand : REGISTER\n | AT address\n | HASH address\n | PLUS address\n | addressaddress : NUMBER\n | SYMBOL'
_lr_action_items = {'LABEL':([0,],[2,]),'MNEMONIC':([0,2,],[4,4,]),'$end':([1,3,4,5,6,7,8,10,12,13,15,16,17,],[0,-2,-3,-1,-4,-5,-7,-10,-11,-12,-8,-9,-6,]),'REGISTER':([4,14,],[8,8,]),'AT':([4,14,],[9,9,]),'HASH':([4,14,],[11,11,]),'NUMBER':([4,9,11,14,],[12,12,12,12,]),'SYMBOL':([4,9,11,14,],[13,13,13,13,]),'COMMA':([7,8,10,12,13,15,16,],[14,-7,-10,-11,-12,-8,-9,]),} _lr_action_items = {'LABEL':([0,],[2,]),'MNEMONIC':([0,2,],[6,6,]),'DIRECTIVE':([0,2,],[7,7,]),'$end':([1,3,4,5,6,7,8,9,10,11,13,16,17,18,19,20,21,22,23,25,26,27,29,30,],[0,-2,-3,-4,-5,-7,-1,-6,-15,-17,-21,-22,-23,-8,-9,-11,-12,-13,-14,-18,-19,-20,-16,-10,]),'REGISTER':([6,7,24,28,],[11,11,11,11,]),'AT':([6,7,24,28,],[12,12,12,12,]),'HASH':([6,7,24,28,],[14,14,14,14,]),'PLUS':([6,7,24,28,],[15,15,15,15,]),'NUMBER':([6,7,12,14,15,24,28,],[16,20,16,16,16,16,20,]),'SYMBOL':([6,7,12,14,15,24,28,],[17,21,17,17,17,17,21,]),'STRING':([7,28,],[22,22,]),'COMMA':([10,11,13,16,17,19,20,21,22,23,25,26,27,],[24,-17,-21,-22,-23,28,-11,-12,-13,-14,-18,-19,-20,]),}
_lr_action = {} _lr_action = {}
for _k, _v in _lr_action_items.items(): for _k, _v in _lr_action_items.items():
@ -17,7 +17,7 @@ for _k, _v in _lr_action_items.items():
_lr_action[_x][_k] = _y _lr_action[_x][_k] = _y
del _lr_action_items del _lr_action_items
_lr_goto_items = {'start':([0,],[1,]),'command':([0,2,],[3,5,]),'args':([4,],[6,]),'operand':([4,14,],[7,17,]),'address':([4,9,11,14,],[10,15,16,10,]),} _lr_goto_items = {'start':([0,],[1,]),'statement':([0,2,],[3,8,]),'instruction':([0,2,],[4,4,]),'directive':([0,2,],[5,5,]),'args':([6,],[9,]),'operand':([6,7,24,28,],[10,23,29,23,]),'address':([6,7,12,14,15,24,28,],[13,13,25,26,27,13,13,]),'directive_args':([7,],[18,]),'directive_arg':([7,28,],[19,30,]),}
_lr_goto = {} _lr_goto = {}
for _k, _v in _lr_goto_items.items(): for _k, _v in _lr_goto_items.items():
@ -27,16 +27,27 @@ for _k, _v in _lr_goto_items.items():
del _lr_goto_items del _lr_goto_items
_lr_productions = [ _lr_productions = [
("S' -> start","S'",1,None,None,None), ("S' -> start","S'",1,None,None,None),
('start -> LABEL command','start',2,'p_start','parser.py',41), ('start -> LABEL statement','start',2,'p_start','parser.py',92),
('start -> command','start',1,'p_start','parser.py',42), ('start -> statement','start',1,'p_start','parser.py',93),
('command -> MNEMONIC','command',1,'p_command','parser.py',48), ('statement -> instruction','statement',1,'p_statement','parser.py',102),
('command -> MNEMONIC args','command',2,'p_command','parser.py',49), ('statement -> directive','statement',1,'p_statement','parser.py',103),
('args -> operand','args',1,'p_args','parser.py',53), ('instruction -> MNEMONIC','instruction',1,'p_instruction','parser.py',107),
('args -> operand COMMA operand','args',3,'p_args','parser.py',54), ('instruction -> MNEMONIC args','instruction',2,'p_instruction','parser.py',108),
('operand -> REGISTER','operand',1,'p_operand','parser.py',60), ('directive -> DIRECTIVE','directive',1,'p_directive','parser.py',115),
('operand -> AT address','operand',2,'p_operand','parser.py',61), ('directive -> DIRECTIVE directive_args','directive',2,'p_directive','parser.py',116),
('operand -> HASH address','operand',2,'p_operand','parser.py',62), ('directive_args -> directive_arg','directive_args',1,'p_directive_args','parser.py',123),
('operand -> address','operand',1,'p_operand','parser.py',63), ('directive_args -> directive_arg COMMA directive_arg','directive_args',3,'p_directive_args','parser.py',124),
('address -> NUMBER','address',1,'p_address','parser.py',67), ('directive_arg -> NUMBER','directive_arg',1,'p_directive_arg','parser.py',131),
('address -> SYMBOL','address',1,'p_address','parser.py',68), ('directive_arg -> SYMBOL','directive_arg',1,'p_directive_arg','parser.py',132),
('directive_arg -> STRING','directive_arg',1,'p_directive_arg','parser.py',133),
('directive_arg -> operand','directive_arg',1,'p_directive_arg','parser.py',134),
('args -> operand','args',1,'p_args','parser.py',138),
('args -> operand COMMA operand','args',3,'p_args','parser.py',139),
('operand -> REGISTER','operand',1,'p_operand','parser.py',146),
('operand -> AT address','operand',2,'p_operand','parser.py',147),
('operand -> HASH address','operand',2,'p_operand','parser.py',148),
('operand -> PLUS address','operand',2,'p_operand','parser.py',149),
('operand -> address','operand',1,'p_operand','parser.py',150),
('address -> NUMBER','address',1,'p_address','parser.py',158),
('address -> SYMBOL','address',1,'p_address','parser.py',159),
] ]

View file

@ -0,0 +1,6 @@
TEST START 0
LDA NUM
STA RES
NUM WORD 5
RES RESW 1
END TEST

View file

@ -1,8 +1,8 @@
from zbirnik.ukazi.node import Node from zbirnik.ukazi.node import Node
class directive(Node): class directive(Node):
def __init__(self, direktiva : str, operand : str | int | None, label : str | None = None): def __init__(self, name : str, operand : str | int | None, label : str | None = None):
self.direktiva = direktiva self.name = name
super().__init__(label) super().__init__(label)
self.operand = operand self.operand = operand
@ -12,10 +12,10 @@ class directive(Node):
def emit(self, ctx): def emit(self, ctx):
# BASE directive # BASE directive
if self.name == "BASE": if self.name == "BASE":
if isinstance(self.value, str): if isinstance(self.operand, str):
ctx.base = ctx.symtab[self.value] ctx.base = ctx.symtab[self.operand]
elif isinstance(self.value, int): elif isinstance(self.operand, int):
ctx.base = self.value ctx.base = self.operand
else: else:
raise ValueError("Invalid BASE operand") raise ValueError("Invalid BASE operand")
return None return None