Popravu parser in nekej manjsih sintakticnih napak, pass1 in pass2 delujesta

This commit is contained in:
Timon 2026-01-04 16:20:54 +01:00
parent 195ca3c9fa
commit 8ea00ddb32
42 changed files with 622 additions and 297 deletions

View file

@ -5,8 +5,8 @@
int main(int argc, char const *argv[]){
machine m;
cpu procesor(&m);
m.loadObj("files/arithr.obj");
//m.loadObj("files/cat.obj");
//m.loadObj("files/arithr.obj");
m.loadObj("files/cat.obj");
procesor.setSpeed(100);
procesor.start();
/*stuff

View file

@ -1,10 +1,11 @@
from zbirnik.opcodes import OPCODES
class EmitContext:
REGISTERS = {
'A': 0, 'X': 1, 'L': 2,
'B': 3, 'S': 4, 'T': 5, 'F': 6
}
def __init__(self, opcodes, symtab):
self.opcodes = opcodes
def __init__(self, symtab):
self.opcodes = OPCODES
self.symtab = symtab
self.base = None

View file

@ -23,7 +23,7 @@ class Code:
# START
if node.__class__.__name__ == "directive" and node.name == "START":
self.start_address = node.value
self.start_address = node.operand
locctr = self.start_address
self.name = node.label
node.address = locctr
@ -40,8 +40,8 @@ class Code:
# END
if node.__class__.__name__ == "directive" and node.name == "END":
if node.value is not None:
self.entry_point = node.value
if node.operand is not None:
self.entry_point = node.operand
break
locctr += node.size()

View file

@ -2,10 +2,11 @@ from zbirnik.parser import parser
from zbirnik.code import Code
from zbirnik.EmitCtx import EmitContext
from zbirnik.mnemoniki.mnemoniki_tabela import MNEMONICS
from zbirnik.opcodes import OPCODES
from zbirnik.parserctx import ParserContext
import os
print("Vnesite ime programa (.asm), ki je v istem direktoriju kakor main.py: ")
ime = input()
ime = input("Vnesite ime programa (.asm): ")
ime = os.path.join(os.path.dirname(__file__), ime)
if not ime.endswith(".asm"):
raise ValueError("Ime programa ni v pravi obliki, mora biti: ime.asm")
@ -14,19 +15,28 @@ code = Code()
with open(ime) as f:
for line in f:
ctx = parser.parse(line)
print("LINE:", line.rstrip())
rez = parser.parse(line)
ctx = ParserContext(rez)
#print("CTX:", ctx)
if ctx is None:
print(" -> komentar / prazna vrstica")
continue
print(" label =", ctx.label)
print(" mnemonic=", ctx.mnemonic)
print(" operands=", ctx.operands)
print("-" * 40)
mnemonic = MNEMONICS[ctx.mnemonic]
node = mnemonic.parse(ctx)
code.add(node)
code.pass1()
ctx_emit = EmitContext(opcodes=OPCODES,symtab=code.symtab)
ctx_emit = EmitContext(symtab=code.symtab)
binary = code.pass2(ctx_emit)
print(binary.hex())

View file

@ -1,8 +1,8 @@
from zbirnik.ukazi.directive import directive
from mnemonic import Mnemonic
from zbirnik.mnemoniki.mnemonic import Mnemonic
from zbirnik.parserctx import ParserContext
class MnemonicD(Mnemonic):
def parse(self, parser: ParserContext):
return directive(direktiva=self.name, label=parser.label,
return directive(name=self.name, label=parser.label,
operand=None)

View file

@ -1,8 +1,8 @@
from zbirnik.ukazi.directive import directive
from mnemonic import Mnemonic
from zbirnik.mnemoniki.mnemonic import Mnemonic
from zbirnik.parserctx import ParserContext
class MnemonicDn(Mnemonic):
def parse(self, parser: ParserContext):
return directive(direktiva=self.name, label=parser.label,
return directive(name=self.name, label=parser.label,
operand=parser.read_num_sym())

View file

@ -1,4 +1,4 @@
from mnemonic import Mnemonic
from zbirnik.mnemoniki.mnemonic import Mnemonic
from zbirnik.ukazi.f1 import f1
class MnemonicF1(Mnemonic):

View file

@ -3,7 +3,7 @@ from zbirnik.ukazi.f2 import f2
from zbirnik.parserctx import ParserContext
class mnemonicF2(Mnemonic):
class MemonicF2(Mnemonic):
def parse(self, parser: ParserContext):
return f2(r1=parser.read_reg(), r2=parser.read_reg(),
mnemonic=self.name, label=parser.label)

View file

@ -4,4 +4,4 @@ from zbirnik.parserctx import ParserContext
class MnemonicSd(Mnemonic):
def parse(self, parser: ParserContext):
return storage(label=parser.label, name=self.name, val=parser.read_num_sym())
return storage(label=parser.label, name=self.name, value=parser.read_num_sym())

View file

@ -5,4 +5,4 @@ from zbirnik.parserctx import ParserContext
class MnemonicSn(Mnemonic):
def parse(self, parser: ParserContext):
return storage(label=parser.label, name=self.name, val=parser.read_num_sym())
return storage(label=parser.label, name=self.name, value=parser.read_num_sym())

View file

@ -1,70 +1,76 @@
from zbirnik.mnemoniki import (
mnemonicD, mnemonicDn,
mnemonicF1, mnemonicF2n, mnemonicF2r, mnemonicF2rn, mnemonicF2rr,
mnemonicF3, mnemonicF3m, mnemonicF4m,
mnemonicSd, mnemonicSn
)
from zbirnik.mnemoniki.mnemonicD import MnemonicD
from zbirnik.mnemoniki.mnemonicDn import MnemonicDn
from zbirnik.mnemoniki.mnemonicF1 import MnemonicF1
from zbirnik.mnemoniki.mnemonicF2n import MnemonicF2n
from zbirnik.mnemoniki.mnemonicF2r import MnemonicF2r
from zbirnik.mnemoniki.mnemonicF2rn import MnemonicF2rn
from zbirnik.mnemoniki.mnemonicF2rr import MnemonicF2rr
from zbirnik.mnemoniki.mnemonicF3 import MnemonicF3
from zbirnik.mnemoniki.mnemonicF3m import MnemonicF3m
from zbirnik.mnemoniki.mnemonicF4m import MnemonicF4m
from zbirnik.mnemoniki.mnemonicSd import MnemonicSd
from zbirnik.mnemoniki.mnemonicSn import MnemonicSn
# Centralna tabela mnemonikov
# Centralna tabela Mnemonikov
MNEMONICS = {
# Direktive (brez operandov)
'NOBASE': mnemonicD('NOBASE'),
'LTORG': mnemonicD('LTORG'),
'NOBASE': MnemonicD('NOBASE'),
'LTORG': MnemonicD('LTORG'),
# Direktive (en operand)
'START': mnemonicDn('START'),
'END': mnemonicDn('END'),
'BASE': mnemonicDn('BASE'),
'START': MnemonicDn('START'),
'END': MnemonicDn('END'),
'BASE': MnemonicDn('BASE'),
# Format 1 (brez operandov)
'FIX': mnemonicF1('FIX', opcode=0xC4),
'FLOAT': mnemonicF1('FLOAT', opcode=0xC0),
'HIO': mnemonicF1('HIO', opcode=0xF4),
'NORM': mnemonicF1('NORM', opcode=0xC8),
'SIO': mnemonicF1('SIO', opcode=0xF0),
'TIO': mnemonicF1('TIO', opcode=0xF8),
'FIX': MnemonicF1('FIX', opcode=0xC4),
'FLOAT': MnemonicF1('FLOAT', opcode=0xC0),
'HIO': MnemonicF1('HIO', opcode=0xF4),
'NORM': MnemonicF1('NORM', opcode=0xC8),
'SIO': MnemonicF1('SIO', opcode=0xF0),
'TIO': MnemonicF1('TIO', opcode=0xF8),
# Format 2
# F2 – en številčni operand
'SVC': mnemonicF2n('SVC', opcode=0xB0),
'SVC': MnemonicF2n('SVC', opcode=0xB0),
# F2 – en register
'CLEAR': mnemonicF2r('CLEAR', opcode=0xB4),
'TIXR': mnemonicF2r('TIXR', opcode=0xB8),
'CLEAR': MnemonicF2r('CLEAR', opcode=0xB4),
'TIXR': MnemonicF2r('TIXR', opcode=0xB8),
# F2 – register + število
'SHIFTL': mnemonicF2rn('SHIFTL', opcode=0xA4),
'SHIFTR': mnemonicF2rn('SHIFTR', opcode=0xA8),
'SHIFTL': MnemonicF2rn('SHIFTL', opcode=0xA4),
'SHIFTR': MnemonicF2rn('SHIFTR', opcode=0xA8),
# F2 – dva registra
'ADDR': mnemonicF2rr('ADDR', opcode=0x90),
'SUBR': mnemonicF2rr('SUBR', opcode=0x94),
'MULR': mnemonicF2rr('MULR', opcode=0x98),
'DIVR': mnemonicF2rr('DIVR', opcode=0x9C),
'COMPR': mnemonicF2rr('COMPR', opcode=0xA0),
'ADDR': MnemonicF2rr('ADDR', opcode=0x90),
'SUBR': MnemonicF2rr('SUBR', opcode=0x94),
'MULR': MnemonicF2rr('MULR', opcode=0x98),
'DIVR': MnemonicF2rr('DIVR', opcode=0x9C),
'COMPR': MnemonicF2rr('COMPR', opcode=0xA0),
# Format 3
'RSUB': mnemonicF3('RSUB', opcode=0x4C),
'LDA': mnemonicF3m('LDA', opcode=0x00),
'LDX': mnemonicF3m('LDX', opcode=0x04),
'LDL': mnemonicF3m('LDL', opcode=0x08),
'STA': mnemonicF3m('STA', opcode=0x0C),
'STX': mnemonicF3m('STX', opcode=0x10),
'STL': mnemonicF3m('STL', opcode=0x14),
'ADD': mnemonicF3m('ADD', opcode=0x18),
'SUB': mnemonicF3m('SUB', opcode=0x1C),
'MUL': mnemonicF3m('MUL', opcode=0x20),
'DIV': mnemonicF3m('DIV', opcode=0x24),
'COMP': mnemonicF3m('COMP', opcode=0x28),
'J': mnemonicF3m('J', opcode=0x3C),
'JEQ': mnemonicF3m('JEQ', opcode=0x30),
'JGT': mnemonicF3m('JGT', opcode=0x34),
'JLT': mnemonicF3m('JLT', opcode=0x38),
'JSUB': mnemonicF3m('JSUB', opcode=0x48),
'TD': mnemonicF3m('TD', opcode=0xE0),
'RD': mnemonicF3m('RD', opcode=0xD8),
'WD': mnemonicF3m('WD', opcode=0xDC),
'RSUB': MnemonicF3('RSUB', opcode=0x4C),
'LDA': MnemonicF3m('LDA', opcode=0x00),
'LDX': MnemonicF3m('LDX', opcode=0x04),
'LDL': MnemonicF3m('LDL', opcode=0x08),
'STA': MnemonicF3m('STA', opcode=0x0C),
'STX': MnemonicF3m('STX', opcode=0x10),
'STL': MnemonicF3m('STL', opcode=0x14),
'ADD': MnemonicF3m('ADD', opcode=0x18),
'SUB': MnemonicF3m('SUB', opcode=0x1C),
'MUL': MnemonicF3m('MUL', opcode=0x20),
'DIV': MnemonicF3m('DIV', opcode=0x24),
'COMP': MnemonicF3m('COMP', opcode=0x28),
'J': MnemonicF3m('J', opcode=0x3C),
'JEQ': MnemonicF3m('JEQ', opcode=0x30),
'JGT': MnemonicF3m('JGT', opcode=0x34),
'JLT': MnemonicF3m('JLT', opcode=0x38),
'JSUB': MnemonicF3m('JSUB', opcode=0x48),
'TD': MnemonicF3m('TD', opcode=0xE0),
'RD': MnemonicF3m('RD', opcode=0xD8),
'WD': MnemonicF3m('WD', opcode=0xDC),
# Format 4 (razširjeni)
'+LDA': mnemonicF4m('+LDA', opcode=0x00),
'+JSUB': mnemonicF4m('+JSUB', opcode=0x48),
'+LDA': MnemonicF4m('+LDA', opcode=0x00),
'+JSUB': MnemonicF4m('+JSUB', opcode=0x48),
# Pomnilniške direktive
# podatki
'BYTE': mnemonicSd('BYTE'),
'WORD': mnemonicSd('WORD'),
'BYTE': MnemonicSd('BYTE'),
'WORD': MnemonicSd('WORD'),
# rezervacija
'RESB': mnemonicSn('RESB'),
'RESW': mnemonicSn('RESW'),
'RESB': MnemonicSn('RESB'),
'RESW': MnemonicSn('RESW'),
}

View file

@ -3,54 +3,79 @@ Created by PLY version 3.11 (http://www.dabeaz.com/ply)
Grammar
Rule 0 S' -> start
Rule 1 start -> LABEL command
Rule 2 start -> command
Rule 3 command -> MNEMONIC
Rule 4 command -> MNEMONIC args
Rule 5 args -> operand
Rule 6 args -> operand COMMA operand
Rule 7 operand -> REGISTER
Rule 8 operand -> AT address
Rule 9 operand -> HASH address
Rule 10 operand -> address
Rule 11 address -> NUMBER
Rule 12 address -> SYMBOL
Rule 1 start -> LABEL statement
Rule 2 start -> statement
Rule 3 statement -> instruction
Rule 4 statement -> directive
Rule 5 instruction -> MNEMONIC
Rule 6 instruction -> MNEMONIC args
Rule 7 directive -> DIRECTIVE
Rule 8 directive -> DIRECTIVE directive_args
Rule 9 directive_args -> directive_arg
Rule 10 directive_args -> directive_arg COMMA directive_arg
Rule 11 directive_arg -> NUMBER
Rule 12 directive_arg -> SYMBOL
Rule 13 directive_arg -> STRING
Rule 14 directive_arg -> operand
Rule 15 args -> operand
Rule 16 args -> operand COMMA operand
Rule 17 operand -> REGISTER
Rule 18 operand -> AT address
Rule 19 operand -> HASH address
Rule 20 operand -> PLUS address
Rule 21 operand -> address
Rule 22 address -> NUMBER
Rule 23 address -> SYMBOL
Terminals, with rules where they appear
AT : 8
COMMA : 6
HASH : 9
AT : 18
COMMA : 10 16
DIRECTIVE : 7 8
HASH : 19
LABEL : 1
MNEMONIC : 3 4
NUMBER : 11
REGISTER : 7
SYMBOL : 12
MNEMONIC : 5 6
NUMBER : 11 22
PLUS : 20
REGISTER : 17
STRING : 13
SYMBOL : 12 23
error :
Nonterminals, with rules where they appear
address : 8 9 10
args : 4
command : 1 2
operand : 5 6 6
address : 18 19 20 21
args : 6
directive : 4
directive_arg : 9 10 10
directive_args : 8
instruction : 3
operand : 14 15 16 16
start : 0
statement : 1 2
Parsing method: LALR
state 0
(0) S' -> . start
(1) start -> . LABEL command
(2) start -> . command
(3) command -> . MNEMONIC
(4) command -> . MNEMONIC args
(1) start -> . LABEL statement
(2) start -> . statement
(3) statement -> . instruction
(4) statement -> . directive
(5) instruction -> . MNEMONIC
(6) instruction -> . MNEMONIC args
(7) directive -> . DIRECTIVE
(8) directive -> . DIRECTIVE directive_args
LABEL shift and go to state 2
MNEMONIC shift and go to state 4
MNEMONIC shift and go to state 6
DIRECTIVE shift and go to state 7
start shift and go to state 1
command shift and go to state 3
statement shift and go to state 3
instruction shift and go to state 4
directive shift and go to state 5
state 1
@ -60,160 +85,337 @@ state 1
state 2
(1) start -> LABEL . command
(3) command -> . MNEMONIC
(4) command -> . MNEMONIC args
(1) start -> LABEL . statement
(3) statement -> . instruction
(4) statement -> . directive
(5) instruction -> . MNEMONIC
(6) instruction -> . MNEMONIC args
(7) directive -> . DIRECTIVE
(8) directive -> . DIRECTIVE directive_args
MNEMONIC shift and go to state 4
MNEMONIC shift and go to state 6
DIRECTIVE shift and go to state 7
command shift and go to state 5
statement shift and go to state 8
instruction shift and go to state 4
directive shift and go to state 5
state 3
(2) start -> command .
(2) start -> statement .
$end reduce using rule 2 (start -> command .)
$end reduce using rule 2 (start -> statement .)
state 4
(3) command -> MNEMONIC .
(4) command -> MNEMONIC . args
(5) args -> . operand
(6) args -> . operand COMMA operand
(7) operand -> . REGISTER
(8) operand -> . AT address
(9) operand -> . HASH address
(10) operand -> . address
(11) address -> . NUMBER
(12) address -> . SYMBOL
(3) statement -> instruction .
$end reduce using rule 3 (command -> MNEMONIC .)
REGISTER shift and go to state 8
AT shift and go to state 9
HASH shift and go to state 11
NUMBER shift and go to state 12
SYMBOL shift and go to state 13
$end reduce using rule 3 (statement -> instruction .)
args shift and go to state 6
operand shift and go to state 7
address shift and go to state 10
state 5
(1) start -> LABEL command .
(4) statement -> directive .
$end reduce using rule 1 (start -> LABEL command .)
$end reduce using rule 4 (statement -> directive .)
state 6
(4) command -> MNEMONIC args .
(5) instruction -> MNEMONIC .
(6) instruction -> MNEMONIC . args
(15) args -> . operand
(16) args -> . operand COMMA operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
$end reduce using rule 4 (command -> MNEMONIC args .)
$end reduce using rule 5 (instruction -> MNEMONIC .)
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
args shift and go to state 9
operand shift and go to state 10
address shift and go to state 13
state 7
(5) args -> operand .
(6) args -> operand . COMMA operand
(7) directive -> DIRECTIVE .
(8) directive -> DIRECTIVE . directive_args
(9) directive_args -> . directive_arg
(10) directive_args -> . directive_arg COMMA directive_arg
(11) directive_arg -> . NUMBER
(12) directive_arg -> . SYMBOL
(13) directive_arg -> . STRING
(14) directive_arg -> . operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
$end reduce using rule 5 (args -> operand .)
COMMA shift and go to state 14
$end reduce using rule 7 (directive -> DIRECTIVE .)
NUMBER shift and go to state 20
SYMBOL shift and go to state 21
STRING shift and go to state 22
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
directive_args shift and go to state 18
directive_arg shift and go to state 19
operand shift and go to state 23
address shift and go to state 13
state 8
(7) operand -> REGISTER .
(1) start -> LABEL statement .
COMMA reduce using rule 7 (operand -> REGISTER .)
$end reduce using rule 7 (operand -> REGISTER .)
$end reduce using rule 1 (start -> LABEL statement .)
state 9
(8) operand -> AT . address
(11) address -> . NUMBER
(12) address -> . SYMBOL
(6) instruction -> MNEMONIC args .
NUMBER shift and go to state 12
SYMBOL shift and go to state 13
$end reduce using rule 6 (instruction -> MNEMONIC args .)
address shift and go to state 15
state 10
(10) operand -> address .
(15) args -> operand .
(16) args -> operand . COMMA operand
COMMA reduce using rule 10 (operand -> address .)
$end reduce using rule 10 (operand -> address .)
$end reduce using rule 15 (args -> operand .)
COMMA shift and go to state 24
state 11
(9) operand -> HASH . address
(11) address -> . NUMBER
(12) address -> . SYMBOL
(17) operand -> REGISTER .
NUMBER shift and go to state 12
SYMBOL shift and go to state 13
COMMA reduce using rule 17 (operand -> REGISTER .)
$end reduce using rule 17 (operand -> REGISTER .)
address shift and go to state 16
state 12
(11) address -> NUMBER .
(18) operand -> AT . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
COMMA reduce using rule 11 (address -> NUMBER .)
$end reduce using rule 11 (address -> NUMBER .)
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
address shift and go to state 25
state 13
(12) address -> SYMBOL .
(21) operand -> address .
COMMA reduce using rule 12 (address -> SYMBOL .)
$end reduce using rule 12 (address -> SYMBOL .)
COMMA reduce using rule 21 (operand -> address .)
$end reduce using rule 21 (operand -> address .)
state 14
(6) args -> operand COMMA . operand
(7) operand -> . REGISTER
(8) operand -> . AT address
(9) operand -> . HASH address
(10) operand -> . address
(11) address -> . NUMBER
(12) address -> . SYMBOL
(19) operand -> HASH . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
REGISTER shift and go to state 8
AT shift and go to state 9
HASH shift and go to state 11
NUMBER shift and go to state 12
SYMBOL shift and go to state 13
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
operand shift and go to state 17
address shift and go to state 10
address shift and go to state 26
state 15
(8) operand -> AT address .
(20) operand -> PLUS . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
COMMA reduce using rule 8 (operand -> AT address .)
$end reduce using rule 8 (operand -> AT address .)
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
address shift and go to state 27
state 16
(9) operand -> HASH address .
(22) address -> NUMBER .
COMMA reduce using rule 9 (operand -> HASH address .)
$end reduce using rule 9 (operand -> HASH address .)
COMMA reduce using rule 22 (address -> NUMBER .)
$end reduce using rule 22 (address -> NUMBER .)
state 17
(6) args -> operand COMMA operand .
(23) address -> SYMBOL .
$end reduce using rule 6 (args -> operand COMMA operand .)
COMMA reduce using rule 23 (address -> SYMBOL .)
$end reduce using rule 23 (address -> SYMBOL .)
state 18
(8) directive -> DIRECTIVE directive_args .
$end reduce using rule 8 (directive -> DIRECTIVE directive_args .)
state 19
(9) directive_args -> directive_arg .
(10) directive_args -> directive_arg . COMMA directive_arg
$end reduce using rule 9 (directive_args -> directive_arg .)
COMMA shift and go to state 28
state 20
(11) directive_arg -> NUMBER .
(22) address -> NUMBER .
! reduce/reduce conflict for COMMA resolved using rule 11 (directive_arg -> NUMBER .)
! reduce/reduce conflict for $end resolved using rule 11 (directive_arg -> NUMBER .)
COMMA reduce using rule 11 (directive_arg -> NUMBER .)
$end reduce using rule 11 (directive_arg -> NUMBER .)
! COMMA [ reduce using rule 22 (address -> NUMBER .) ]
! $end [ reduce using rule 22 (address -> NUMBER .) ]
state 21
(12) directive_arg -> SYMBOL .
(23) address -> SYMBOL .
! reduce/reduce conflict for COMMA resolved using rule 12 (directive_arg -> SYMBOL .)
! reduce/reduce conflict for $end resolved using rule 12 (directive_arg -> SYMBOL .)
COMMA reduce using rule 12 (directive_arg -> SYMBOL .)
$end reduce using rule 12 (directive_arg -> SYMBOL .)
! COMMA [ reduce using rule 23 (address -> SYMBOL .) ]
! $end [ reduce using rule 23 (address -> SYMBOL .) ]
state 22
(13) directive_arg -> STRING .
COMMA reduce using rule 13 (directive_arg -> STRING .)
$end reduce using rule 13 (directive_arg -> STRING .)
state 23
(14) directive_arg -> operand .
COMMA reduce using rule 14 (directive_arg -> operand .)
$end reduce using rule 14 (directive_arg -> operand .)
state 24
(16) args -> operand COMMA . operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
NUMBER shift and go to state 16
SYMBOL shift and go to state 17
operand shift and go to state 29
address shift and go to state 13
state 25
(18) operand -> AT address .
COMMA reduce using rule 18 (operand -> AT address .)
$end reduce using rule 18 (operand -> AT address .)
state 26
(19) operand -> HASH address .
COMMA reduce using rule 19 (operand -> HASH address .)
$end reduce using rule 19 (operand -> HASH address .)
state 27
(20) operand -> PLUS address .
COMMA reduce using rule 20 (operand -> PLUS address .)
$end reduce using rule 20 (operand -> PLUS address .)
state 28
(10) directive_args -> directive_arg COMMA . directive_arg
(11) directive_arg -> . NUMBER
(12) directive_arg -> . SYMBOL
(13) directive_arg -> . STRING
(14) directive_arg -> . operand
(17) operand -> . REGISTER
(18) operand -> . AT address
(19) operand -> . HASH address
(20) operand -> . PLUS address
(21) operand -> . address
(22) address -> . NUMBER
(23) address -> . SYMBOL
NUMBER shift and go to state 20
SYMBOL shift and go to state 21
STRING shift and go to state 22
REGISTER shift and go to state 11
AT shift and go to state 12
HASH shift and go to state 14
PLUS shift and go to state 15
directive_arg shift and go to state 30
operand shift and go to state 23
address shift and go to state 13
state 29
(16) args -> operand COMMA operand .
$end reduce using rule 16 (args -> operand COMMA operand .)
state 30
(10) directive_args -> directive_arg COMMA directive_arg .
$end reduce using rule 10 (directive_args -> directive_arg COMMA directive_arg .)
WARNING:
WARNING: Conflicts:
WARNING:
WARNING: reduce/reduce conflict in state 20 resolved using rule (directive_arg -> NUMBER)
WARNING: rejected rule (address -> NUMBER) in state 20
WARNING: reduce/reduce conflict in state 21 resolved using rule (directive_arg -> SYMBOL)
WARNING: rejected rule (address -> SYMBOL) in state 21

View file

@ -1,125 +1,191 @@
import ply.lex
import ply.yacc
from zbirnik.parserctx import ParserContext
#import sys
# --------------------
global at_line_start, seen_mnemonic_or_directive
# Lexer
# --------------------
tokens = (
'AT',
'COMMA',
'HASH',
'PLUS',
'LABEL',
'REGISTER',
'MNEMONIC',
'DIRECTIVE',
'SYMBOL',
'NUMBER',
'COMMENT',
'STRING',
)
t_AT = r'@'
t_COMMA = r','
t_HASH = r'\#'
t_LABEL = r'^[a-z_0-9]+'
t_PLUS = r'\+'
t_REGISTER = r'\b[ABFLSTX]\b'
t_MNEMONIC = r'\b[A-Z]+\b'
t_SYMBOL = r'[a-z_0-9]+'
# Assembler directives
directives = {
'START', 'END', 'BYTE', 'WORD', 'RESB', 'RESW',
'BASE', 'NOBASE', 'EQU', 'ORG', 'LTORG', 'USE',
'EXTDEF', 'EXTREF', 'CSECT'
}
# Track if we're at the start of a line
at_line_start = True
seen_mnemonic_or_directive = False
def t_WHITESPACE(t):
r'[ \t]+'
global at_line_start
# If we see whitespace at line start, we're no longer at start
if at_line_start:
at_line_start = False
pass # Don't return token
def t_STRING(t):
r'[CX]\'[^\']*\''
global at_line_start
at_line_start = False
return t
def t_IDENTIFIER(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
global at_line_start, seen_mnemonic_or_directive
# If at start of line, it's a label
if at_line_start:
t.type = 'LABEL'
at_line_start = False
return t
# After we've seen a mnemonic/directive, everything else is a symbol
if seen_mnemonic_or_directive:
t.type = 'SYMBOL'
return t
# First identifier after whitespace - check if it's directive or mnemonic
if t.value in directives:
t.type = 'DIRECTIVE'
seen_mnemonic_or_directive = True
else:
# Assume it's a mnemonic if uppercase, otherwise symbol
if t.value.isupper():
t.type = 'MNEMONIC'
seen_mnemonic_or_directive = True
else:
t.type = 'SYMBOL'
return t
def t_NUMBER(t):
r'-?\d+'
t.value = int(t.value)
r'-?(?:0[xX][0-9a-fA-F]+|\d+)'
global at_line_start
at_line_start = False
t.value = int(t.value, 0) # Handles hex (0x) and decimal
return t
def t_COMMENT(t):
r'\..*'
t.value = t.value[1:].strip()
return t
t_ignore_COMMENT = r'\..*'
t_ignore = ' \t\n'
def t_newline(t):
r'\n+'
global at_line_start, seen_mnemonic_or_directive
at_line_start = True
seen_mnemonic_or_directive = False
t.lexer.lineno += len(t.value)
def t_error(t):
print(f"Illegal character {t.value[0]!r}")
print(f'Illegal character {t.value[0]!r}')
t.lexer.skip(1)
lexer = ply.lex.lex()
# --------------------
# Parser
# --------------------
def p_start(p):
r'''start : LABEL command
| command
| COMMENT
'''
# komentar
if len(p) == 2 and isinstance(p[1], str):
p[0] = None
return
# brez labela
if len(p) == 2:
label = None
mnemonic, operands = p[1]
'''start : LABEL statement
| statement'''
if len(p) == 3:
# Has label
p[0] = (p[1], p[2][0], *p[2][1:])
else:
label = p[1]
mnemonic, operands = p[2]
# No label
p[0] = (None, p[1][0], *p[1][1:])
p[0] = ParserContext(label, mnemonic, operands)
def p_statement(p):
'''statement : instruction
| directive'''
p[0] = p[1]
def p_command(p):
r'''command : MNEMONIC
| MNEMONIC args
'''
def p_instruction(p):
'''instruction : MNEMONIC
| MNEMONIC args'''
if len(p) == 2:
p[0] = (p[1], [])
p[0] = [p[1]]
else:
p[0] = (p[1], list(p[2]))
p[0] = [p[1]] + list(p[2])
def p_directive(p):
'''directive : DIRECTIVE
| DIRECTIVE directive_args'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = [p[1]] + list(p[2])
def p_args(p):
r'''args : operand
| operand COMMA operand
'''
def p_directive_args(p):
'''directive_args : directive_arg
| directive_arg COMMA directive_arg'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = [p[1], p[3]]
def p_directive_arg(p):
'''directive_arg : NUMBER
| SYMBOL
| STRING
| operand'''
p[0] = p[1]
def p_args(p):
'''args : operand
| operand COMMA operand'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = [p[1], p[3]]
def p_operand(p):
r'''operand : REGISTER
'''operand : REGISTER
| AT address
| HASH address
| address
'''
| PLUS address
| address'''
if len(p) == 2:
p[0] = p[1]
else:
# @X ali #5 → zadrži znak
# Combine prefix with address
p[0] = (p[1], p[2])
def p_address(p):
r'''address : NUMBER
| SYMBOL
'''
'''address : NUMBER
| SYMBOL'''
p[0] = p[1]
def p_error(p):
if p:
raise SyntaxError(f"Syntax error at token {p}")
print(f'Syntax error at token {p.type}={p.value!r}')
else:
raise SyntaxError("Syntax error at EOF")
print('Syntax error at EOF')
parser = ply.yacc.yacc()
if __name__ == '__main__':
import sys
parser = ply.yacc.yacc()
for line in sys.stdin:
print(parser.parse(line))
#if __name__ == '__main__':
# for line in sys.stdin:
# line = line.rstrip('\n\r')
# if line and not line.lstrip().startswith('.'):
# at_line_start = True
# seen_mnemonic_or_directive = False
# result = parser.parse(line, lexer=lexer)
# print(result)

View file

@ -1,12 +1,6 @@
class ParserContext:
def __init__(self, parsed):
"""
parsed je rezultat PLY parserja, npr.:
('loop', ('LDA', (('#', 5),)))
(None, ('FIX',))
('COMMENT', 'to je komentar')
"""
# Handle COMMENT special case (if you still need it)
if isinstance(parsed, tuple) and parsed[0] == 'COMMENT':
self.label = None
self.mnemonic = 'COMMENT'
@ -14,11 +8,16 @@ class ParserContext:
self.comment = parsed[1]
return
self.label, command = parsed
self.mnemonic = command[0]
self.operands = list(command[1]) if len(command) > 1 else []
# New parser format: (label, mnemonic, *operands)
if not isinstance(parsed, tuple) or len(parsed) < 2:
raise ValueError(f"Invalid parsed format: {parsed}")
self.label = parsed[0] # Can be None
self.mnemonic = parsed[1]
# All remaining elements are operands
self.operands = list(parsed[2:]) if len(parsed) > 2 else []
self.comment = None
def has_operand(self) -> bool:
return len(self.operands) > 0
@ -28,35 +27,59 @@ class ParserContext:
return self.operands.pop(0)
def read_reg(self) -> str:
"""Read a register operand (e.g., 'A', 'X', 'L')"""
op = self.next_op()
return op[0]
# If it's a plain string, return it
if isinstance(op, str):
return op
# If it's a tuple (shouldn't happen for registers), take first element
if isinstance(op, tuple):
return op[0]
return str(op)
def read_num_sym(self):
"""Read a number or symbol operand"""
op = self.next_op()
if (len(op) == 1):
# If it's already a simple value (number or string), return it
if isinstance(op, (int, str)):
return op
# If it's a tuple with prefix (like ('@', 'buffer')), return the value
if isinstance(op, tuple) and len(op) == 2:
return op[1]
# If it's a single-element tuple, unwrap it
if isinstance(op, tuple) and len(op) == 1:
return op[0]
return op[1]
return op
def read_addressed_operand(self):
from adressing import AddrMode
"""Read an operand with addressing mode information"""
from zbirnik.adressing import AddrMode
indexed = False
addr_mode = AddrMode.SIMPLE
op = self.next_operand()
# immediate / indirect
if len(op) == 2:
op = self.next_op()
# Check if operand has a prefix (immediate/indirect/indexed)
if isinstance(op, tuple) and len(op) == 2:
prefix, value = op
addr_mode = {'#': AddrMode.IMMEDIATE, '@': AddrMode.INDIRECT}[prefix]
if prefix == '#':
addr_mode = AddrMode.IMMEDIATE
elif prefix == '@':
addr_mode = AddrMode.INDIRECT
elif prefix == '+':
# Extended format (SIC/XE)
addr_mode = AddrMode.EXTENDED
else:
# Unknown prefix, treat as simple
value = op
else:
value = op[0]
#indeksiranje
if self.operands and self.operands[0] == ('X',):
# Simple operand (no prefix)
value = op
# Check for indexed addressing (X register)
# In new parser, indexed would be a second operand that's just 'X'
if self.operands and self.operands[0] == 'X':
self.operands.pop(0)
indexed = True
return addr_mode, value, indexed
return addr_mode, value, indexed

View file

@ -6,9 +6,9 @@ _tabversion = '3.10'
_lr_method = 'LALR'
_lr_signature = 'AT COMMA HASH LABEL MNEMONIC NUMBER REGISTER SYMBOLstart : LABEL command\n | commandcommand : MNEMONIC\n | MNEMONIC argsargs : operand\n | operand COMMA operandoperand : REGISTER\n | AT address\n | HASH address\n | addressaddress : NUMBER\n | SYMBOL'
_lr_signature = 'AT COMMA DIRECTIVE HASH LABEL MNEMONIC NUMBER PLUS REGISTER STRING SYMBOLstart : LABEL statement\n | statementstatement : instruction\n | directiveinstruction : MNEMONIC\n | MNEMONIC argsdirective : DIRECTIVE\n | DIRECTIVE directive_argsdirective_args : directive_arg\n | directive_arg COMMA directive_argdirective_arg : NUMBER\n | SYMBOL\n | STRING\n | operandargs : operand\n | operand COMMA operandoperand : REGISTER\n | AT address\n | HASH address\n | PLUS address\n | addressaddress : NUMBER\n | SYMBOL'
_lr_action_items = {'LABEL':([0,],[2,]),'MNEMONIC':([0,2,],[4,4,]),'$end':([1,3,4,5,6,7,8,10,12,13,15,16,17,],[0,-2,-3,-1,-4,-5,-7,-10,-11,-12,-8,-9,-6,]),'REGISTER':([4,14,],[8,8,]),'AT':([4,14,],[9,9,]),'HASH':([4,14,],[11,11,]),'NUMBER':([4,9,11,14,],[12,12,12,12,]),'SYMBOL':([4,9,11,14,],[13,13,13,13,]),'COMMA':([7,8,10,12,13,15,16,],[14,-7,-10,-11,-12,-8,-9,]),}
_lr_action_items = {'LABEL':([0,],[2,]),'MNEMONIC':([0,2,],[6,6,]),'DIRECTIVE':([0,2,],[7,7,]),'$end':([1,3,4,5,6,7,8,9,10,11,13,16,17,18,19,20,21,22,23,25,26,27,29,30,],[0,-2,-3,-4,-5,-7,-1,-6,-15,-17,-21,-22,-23,-8,-9,-11,-12,-13,-14,-18,-19,-20,-16,-10,]),'REGISTER':([6,7,24,28,],[11,11,11,11,]),'AT':([6,7,24,28,],[12,12,12,12,]),'HASH':([6,7,24,28,],[14,14,14,14,]),'PLUS':([6,7,24,28,],[15,15,15,15,]),'NUMBER':([6,7,12,14,15,24,28,],[16,20,16,16,16,16,20,]),'SYMBOL':([6,7,12,14,15,24,28,],[17,21,17,17,17,17,21,]),'STRING':([7,28,],[22,22,]),'COMMA':([10,11,13,16,17,19,20,21,22,23,25,26,27,],[24,-17,-21,-22,-23,28,-11,-12,-13,-14,-18,-19,-20,]),}
_lr_action = {}
for _k, _v in _lr_action_items.items():
@ -17,7 +17,7 @@ for _k, _v in _lr_action_items.items():
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'start':([0,],[1,]),'command':([0,2,],[3,5,]),'args':([4,],[6,]),'operand':([4,14,],[7,17,]),'address':([4,9,11,14,],[10,15,16,10,]),}
_lr_goto_items = {'start':([0,],[1,]),'statement':([0,2,],[3,8,]),'instruction':([0,2,],[4,4,]),'directive':([0,2,],[5,5,]),'args':([6,],[9,]),'operand':([6,7,24,28,],[10,23,29,23,]),'address':([6,7,12,14,15,24,28,],[13,13,25,26,27,13,13,]),'directive_args':([7,],[18,]),'directive_arg':([7,28,],[19,30,]),}
_lr_goto = {}
for _k, _v in _lr_goto_items.items():
@ -27,16 +27,27 @@ for _k, _v in _lr_goto_items.items():
del _lr_goto_items
_lr_productions = [
("S' -> start","S'",1,None,None,None),
('start -> LABEL command','start',2,'p_start','parser.py',41),
('start -> command','start',1,'p_start','parser.py',42),
('command -> MNEMONIC','command',1,'p_command','parser.py',48),
('command -> MNEMONIC args','command',2,'p_command','parser.py',49),
('args -> operand','args',1,'p_args','parser.py',53),
('args -> operand COMMA operand','args',3,'p_args','parser.py',54),
('operand -> REGISTER','operand',1,'p_operand','parser.py',60),
('operand -> AT address','operand',2,'p_operand','parser.py',61),
('operand -> HASH address','operand',2,'p_operand','parser.py',62),
('operand -> address','operand',1,'p_operand','parser.py',63),
('address -> NUMBER','address',1,'p_address','parser.py',67),
('address -> SYMBOL','address',1,'p_address','parser.py',68),
('start -> LABEL statement','start',2,'p_start','parser.py',92),
('start -> statement','start',1,'p_start','parser.py',93),
('statement -> instruction','statement',1,'p_statement','parser.py',102),
('statement -> directive','statement',1,'p_statement','parser.py',103),
('instruction -> MNEMONIC','instruction',1,'p_instruction','parser.py',107),
('instruction -> MNEMONIC args','instruction',2,'p_instruction','parser.py',108),
('directive -> DIRECTIVE','directive',1,'p_directive','parser.py',115),
('directive -> DIRECTIVE directive_args','directive',2,'p_directive','parser.py',116),
('directive_args -> directive_arg','directive_args',1,'p_directive_args','parser.py',123),
('directive_args -> directive_arg COMMA directive_arg','directive_args',3,'p_directive_args','parser.py',124),
('directive_arg -> NUMBER','directive_arg',1,'p_directive_arg','parser.py',131),
('directive_arg -> SYMBOL','directive_arg',1,'p_directive_arg','parser.py',132),
('directive_arg -> STRING','directive_arg',1,'p_directive_arg','parser.py',133),
('directive_arg -> operand','directive_arg',1,'p_directive_arg','parser.py',134),
('args -> operand','args',1,'p_args','parser.py',138),
('args -> operand COMMA operand','args',3,'p_args','parser.py',139),
('operand -> REGISTER','operand',1,'p_operand','parser.py',146),
('operand -> AT address','operand',2,'p_operand','parser.py',147),
('operand -> HASH address','operand',2,'p_operand','parser.py',148),
('operand -> PLUS address','operand',2,'p_operand','parser.py',149),
('operand -> address','operand',1,'p_operand','parser.py',150),
('address -> NUMBER','address',1,'p_address','parser.py',158),
('address -> SYMBOL','address',1,'p_address','parser.py',159),
]

View file

@ -0,0 +1,6 @@
TEST START 0
LDA NUM
STA RES
NUM WORD 5
RES RESW 1
END TEST

View file

@ -1,8 +1,8 @@
from zbirnik.ukazi.node import Node
class directive(Node):
def __init__(self, direktiva : str, operand : str | int | None, label : str | None = None):
self.direktiva = direktiva
def __init__(self, name : str, operand : str | int | None, label : str | None = None):
self.name = name
super().__init__(label)
self.operand = operand
@ -12,10 +12,10 @@ class directive(Node):
def emit(self, ctx):
# BASE directive
if self.name == "BASE":
if isinstance(self.value, str):
ctx.base = ctx.symtab[self.value]
elif isinstance(self.value, int):
ctx.base = self.value
if isinstance(self.operand, str):
ctx.base = ctx.symtab[self.operand]
elif isinstance(self.operand, int):
ctx.base = self.operand
else:
raise ValueError("Invalid BASE operand")
return None