SPO_JakaFurlan/ass3/zbirnik/lib/semanticAnalyzer.ml
2025-12-14 23:31:52 +01:00

320 lines
12 KiB
OCaml

(*prebere parsano kodo in izvede semantično analizo*)
type register = | A | X | L | B | S | T | F | PC | SW
let string_of_register (r : register) : string =
match r with
| A -> "A" | X -> "X" | L -> "L" | B -> "B" | S -> "S" | T -> "T" | F -> "F" | PC -> "PC" | SW -> "SW"
let parse_register = function
| "A" -> A | "X" -> X | "L" -> L | "B" -> B
| "S" -> S | "T" -> T | "F" -> F | "PC" -> PC | "SW" -> SW
| r -> failwith ("Unknown register: " ^ r)
type mnemonic_type =
| MnemonicD (* direktiva brez operandov (NOBASE, LTORG) *)
| MnemonicDn of string (* direktiva z enim številskim operandom (lahko tudi simbol) (START, END, ORG, EQU) *)
| MnemonicF1 (* ukaza formata 1 (brez operandov) (FIX, FLOAT, ...) *)
| MnemonicF2n of int (* F2 z enim številskim operandom (SVC) *)
| MnemonicF2r of register (* F2 z enim registrskim operandom (CLEAR, TIXR) *)
| MnemonicF2rn of register * int (* F2 z enim registrskim in enim številskim operandom (SHIFTL, SHIFTR) *)
| MnemonicF2rr of register * register(* F2 z dvema registrskima operandoma (ADDR, SUBR, ...) *)
| MnemonicF3 (* F3 brez operandov (RSUB) *)
| MnemonicF3m of string(* F3 z enim operandom (LDA, STA, ...) *)
| MnemonicF4m of string (* F4 z enim operandom (+LDA, +STA, ...) *)
| MnemonicSd of string (* pomnilniška direktiva s podatki (BYTE, WORD) *)
| MnemonicSn of int(* pomnilniška direktiva za rezervacijo (RESB, RESW) *)
| COMMENT
(* Print mnemonic_type as string *)
let string_of_mnemonic_type = function
| MnemonicD -> "MnemonicD"
| MnemonicDn s -> "MnemonicDn(" ^ s ^ ")"
| MnemonicF1 -> "MnemonicF1"
| MnemonicF2n n -> "MnemonicF2n(" ^ string_of_int n ^ ")"
| MnemonicF2r r1 -> "MnemonicF2r(" ^ string_of_register r1 ^ ")"
| MnemonicF2rn (r1, n) -> "MnemonicF2rn(" ^ string_of_register r1 ^ ", " ^ string_of_int n ^ ")"
| MnemonicF2rr (r1, r2) -> "MnemonicF2rr(" ^ string_of_register r1 ^ ", " ^ string_of_register r2 ^ ")"
| MnemonicF3 -> "MnemonicF3"
| MnemonicF3m s -> "MnemonicF3m(" ^ s ^ ")"
| MnemonicF4m s -> "MnemonicF4m(" ^ s ^ ")"
| MnemonicSd s -> "MnemonicSd(" ^ s ^ ")"
| MnemonicSn n -> "MnemonicSn(" ^ string_of_int n ^ ")"
| COMMENT -> "Comment"
let get_string_from_mnemType (m : mnemonic_type) : string option =
match m with
| MnemonicDn s
| MnemonicF3m s
| MnemonicF4m s
| MnemonicSd s -> Some s
| _ -> None
type mnemonic =
(* ------------------- Format 1 instructions ------------------- *)
| FIX | FLOAT | HIO | NORM | SIO | TIO
(* ------------------- Format 2 instructions ------------------- *)
| ADDR | CLEAR | COMPR | DIVR | MULR | RMO
| SHIFTL | SHIFTR | SUBR | SVC | TIXR
(* ------------------- Format 3/4 instructions ------------------- *)
| ADD | ADDF | AND | COMP | COMPF | DIV
| J | JEQ | JGT | JLT | JSUB | LDA | LDB | LDCH | LDF
| LDL | LDS | LDT | LDX | LPS | MUL | OR | RD
| RSUB | STA | STB | STCH | STF | STL | STS | STSW
| STT | STX | SUB | SUBF | TD | TIX | WD
(* ------------------- SIC/XE Directives ------------------- *)
| START (* Set program starting address *)
| END (* End of program *)
| ORG (* Set location counter *)
| EQU (* Define a symbol *)
| BASE (* Set base register *)
| NOBASE (* Clear base register *)
| LTORG (* Literal pool *)
| RESW (* Reserve words *)
| RESB (* Reserve bytes *)
| WORD (* Define a word *)
| BYTE (* Define a byte or character constant *)
(*comment -> for ignoring full line comments*)
|COMMENT
let string_of_mnemonic = function
(* ------------------- Format 1 instructions ------------------- *)
| FIX -> "FIX" | FLOAT -> "FLOAT" | HIO -> "HIO" | NORM -> "NORM"
| SIO -> "SIO" | TIO -> "TIO"
(* ------------------- Format 2 instructions ------------------- *)
| ADDR -> "ADDR" | CLEAR -> "CLEAR" | COMPR -> "COMPR" | DIVR -> "DIVR"
| MULR -> "MULR" | RMO -> "RMO" | SHIFTL -> "SHIFTL" | SHIFTR -> "SHIFTR"
| SUBR -> "SUBR" | SVC -> "SVC" | TIXR -> "TIXR"
(* ------------------- Format 3/4 instructions ------------------- *)
| ADD -> "ADD" | ADDF -> "ADDF" | AND -> "AND" | COMP -> "COMP"
| COMPF -> "COMPF" | DIV -> "DIV" | J -> "J" | JEQ -> "JEQ" | JGT -> "JGT"
| JLT -> "JLT" | JSUB -> "JSUB" | LDA -> "LDA" | LDB -> "LDB" | LDCH -> "LDCH"
| LDF -> "LDF" | LDL -> "LDL" | LDS -> "LDS" | LDT -> "LDT" | LDX -> "LDX"
| LPS -> "LPS" | MUL -> "MUL" | OR -> "OR" | RD -> "RD" | RSUB -> "RSUB"
| STA -> "STA" | STB -> "STB" | STCH -> "STCH" | STF -> "STF" | STL -> "STL"
| STS -> "STS" | STSW -> "STSW" | STT -> "STT" | STX -> "STX" | SUB -> "SUB"
| SUBF -> "SUBF" | TD -> "TD" | TIX -> "TIX" | WD -> "WD"
(* ------------------- SIC/XE Directives ------------------- *)
| START -> "START" | END -> "END" | ORG -> "ORG" | EQU -> "EQU"
| BASE -> "BASE" | NOBASE -> "NOBASE" | LTORG -> "LTORG"
| RESW -> "RESW" | RESB -> "RESB" | WORD -> "WORD" | BYTE -> "BYTE"
(*comment*)
| COMMENT -> "COMMENT"
type lineSemantic = {
line_no : int;
label : string option;
ext : bool; (*za razlikovanje med F3 in F4*)
opcode : mnemonic;
mnem : mnemonic_type;
x : bool; (*ali imamo X naslavljanje*)
n : bool;
i : bool;
comment : string option;
len : int; (*dolžina, pove za koliko moramo povečati locctr*)
mutable loc : int option; (* assigned in pass1 *)
}
open Parser
let getMnem (opcode : string) : mnemonic * bool =
(*preveri če gre za ext in odstrani +*)
let opcode, ext = if opcode.[0] = '+' then String.sub opcode 1 (String.length opcode - 1), true else opcode, false in
let o =
match opcode with
(* ------------------- Format 1 instructions ------------------- *)
| "FIX" -> FIX | "FLOAT" -> FLOAT | "HIO" -> HIO
| "NORM" -> NORM | "SIO" -> SIO | "TIO" -> TIO
(* ------------------- Format 2 instructions ------------------- *)
| "ADDR" -> ADDR | "CLEAR" -> CLEAR | "COMPR" -> COMPR | "DIVR" -> DIVR
| "MULR" -> MULR | "RMO" -> RMO | "SHIFTL" -> SHIFTL | "SHIFTR" -> SHIFTR
| "SUBR" -> SUBR | "SVC" -> SVC | "TIXR" -> TIXR
(* ------------------- Format 3/4 instructions ------------------- *)
| "ADD" -> ADD | "ADDF" -> ADDF | "AND" -> AND | "COMP" -> COMP
| "COMPF" -> COMPF | "DIV" -> DIV | "J" -> J | "JEQ" -> JEQ
| "JGT" -> JGT | "JLT" -> JLT | "JSUB" -> JSUB | "LDA" -> LDA
| "LDB" -> LDB | "LDCH" -> LDCH | "LDF" -> LDF | "LDL" -> LDL
| "LDS" -> LDS | "LDT" -> LDT | "LDX" -> LDX | "LPS" -> LPS
| "MUL" -> MUL | "OR" -> OR | "RD" -> RD | "RSUB" -> RSUB | "STA" -> STA
| "STB" -> STB | "STCH" -> STCH | "STF" -> STF | "STL" -> STL | "STS" -> STS
| "STSW" -> STSW | "STT" -> STT | "STX" -> STX | "SUB" -> SUB
| "SUBF" -> SUBF | "TD" -> TD | "TIX" -> TIX | "WD" -> WD
(* ------------------- SIC/XE Directives ------------------- *)
| "START" -> START | "END" -> END | "ORG" -> ORG
| "EQU" -> EQU | "BASE" -> BASE | "NOBASE" -> NOBASE
| "LTORG" -> LTORG | "RESW" -> RESW | "RESB" -> RESB
| "WORD" -> WORD | "BYTE" -> BYTE
(*comment*)
| "COMMENT" -> COMMENT
| _ -> failwith ("Unknown opcode: " ^ opcode)
in
o, ext (*vrnemo opcode in extended*)
let getMnemType (opcode : mnemonic) (ext : bool) (operand : string list) : mnemonic_type =
match opcode, operand with
|COMMENT, _ -> COMMENT
(* ------------------- Format 1 ------------------- *)
| (FIX | FLOAT | HIO | NORM | SIO | TIO), [] ->
MnemonicF1
| (FIX | FLOAT | HIO | NORM | SIO | TIO), _ ->
failwith (Printf.sprintf "%s takes no operands, but got %d"
(string_of_mnemonic opcode) (List.length operand))
(* ------------------- Format 2 ------------------- *)
| (SVC), [n] ->
MnemonicF2n (int_of_string n)
| SVC, _ -> failwith "SVC requires exactly 1 numeric operand"
| (CLEAR | TIXR), [r] ->
MnemonicF2r (parse_register r)
| (CLEAR | TIXR), _ -> failwith (Printf.sprintf "%s requires 1 register" (string_of_mnemonic opcode))
| (SHIFTL | SHIFTR), [r; n] ->
MnemonicF2rn (parse_register r, int_of_string n)
| (SHIFTL | SHIFTR), _ -> failwith "SHIFTL/SHIFTR require 2 operands: reg, number"
| (ADDR | COMPR | DIVR | MULR | RMO | SUBR), [r1; r2] ->
MnemonicF2rr (parse_register r1, parse_register r2)
| (ADDR | COMPR | DIVR | MULR | RMO | SUBR), _ ->
failwith (Printf.sprintf "%s requires 2 registers" (string_of_mnemonic opcode))
(* ------------------- Directives ------------------- *)
| (START | END | ORG | EQU), [opd] -> MnemonicDn opd
| (START | END | ORG | EQU), _ ->
failwith (Printf.sprintf "%s requires exactly 1 operand" (string_of_mnemonic opcode))
| (NOBASE | LTORG), [] -> MnemonicD
| (NOBASE | LTORG), _ ->
failwith (Printf.sprintf "%s takes no operands" (string_of_mnemonic opcode))
| (BASE), [opd] -> MnemonicDn opd
| BASE, _ ->
failwith "BASE directive requires exactly 1 operand"
| (RESB | RESW), [n] -> MnemonicSn (int_of_string n)
| (RESB | RESW), _ ->
failwith (Printf.sprintf "%s requires exactly 1 numeric operand" (string_of_mnemonic opcode))
| (WORD | BYTE), [opd] -> MnemonicSd opd
| (WORD | BYTE), _ ->
failwith (Printf.sprintf "%s requires exactly 1 operand" (string_of_mnemonic opcode))
(* ------------------- Format 4 (extended) ------------------- *)
| (ADD | ADDF | AND | COMP | COMPF | DIV
| J | JEQ | JGT | JLT | JSUB | LDA | LDB | LDCH | LDF
| LDL | LDS | LDT | LDX | LPS | MUL | OR | RD
| STA | STB | STCH | STF | STL | STS | STSW
| STT | STX | SUB | SUBF | TD | TIX | WD), [opd] when ext ->
MnemonicF4m opd
(* ------------------- Format 3 ------------------- *)
| RSUB, [] -> MnemonicF3
| RSUB, _ -> failwith "RSUB takes no operands"
| (ADD | ADDF | AND | COMP | COMPF | DIV
| J | JEQ | JGT | JLT | JSUB | LDA | LDB | LDCH | LDF
| LDL | LDS | LDT | LDX | LPS | MUL | OR | RD
| STA | STB | STCH | STF | STL | STS | STSW
| STT | STX | SUB | SUBF | TD | TIX | WD), [opd] ->
MnemonicF3m opd
| _, _ -> failwith (Printf.sprintf "Invalid operands for opcode %s: %d operands"
(string_of_mnemonic opcode) (List.length operand))
(*vrni dolzino ukaza*)
let getLen (mnemType : mnemonic_type) (mnem : mnemonic) : int =
match mnemType with
(* ----------- directives with no memory ----------- *)
| MnemonicD
| MnemonicDn _
| COMMENT -> 0
(* ----------- instruction formats ----------- *)
| MnemonicF1 -> 1
| MnemonicF2n _
| MnemonicF2r _
| MnemonicF2rn _
| MnemonicF2rr _ -> 2
| MnemonicF3
| MnemonicF3m _ -> 3
| MnemonicF4m _ -> 4
(* ----------- storage directives ----------- *)
| MnemonicSn n ->
begin
match mnem with
| RESB -> n
| RESW -> 3*n
| _ -> failwith "Narobe mnemonic glede na mnemType" (* RESB *)
end
| MnemonicSd _ ->
begin
match mnem with
| BYTE -> 1
| WORD -> 3
| _ -> failwith "Narobe mnemoic glede na mnemType"
end
(*preveri ali imaš X naslavljanje*)
let has_index (ops : string list) : bool * string list = (*vrne bool x in operande brez X*)
match ops with
| [lst; x] when String.uppercase_ascii x = "X" -> true, [lst] (*predpostavimo da se X pojavi na drugem mestu in nikoli na tretjem*)
| _ -> false, ops
(*dobi n in i bita in operande brez # ali @*)
let get_ni_bits (operands : string list) : bool * bool * string list =
match operands with
| [] ->
(true, true, [])
| op :: rest ->
if String.length op = 0 then
(true, true, operands)
else
match op.[0] with
| '#' ->
let stripped =String.sub op 1 (String.length op - 1) in
(false, true, stripped :: rest)
| '@' ->
let stripped = String.sub op 1 (String.length op - 1) in
(true, false, stripped :: rest)
| _ ->
(true, true, operands)
let checkLineSemantic (line : line) : lineSemantic =
let mnem, ext = getMnem line.opcode in
let x, ops = has_index line.operand in
let n, i, ops = get_ni_bits ops in
let mnemType = getMnemType mnem ext ops in
let dolzina = getLen mnemType mnem in
{ line_no = line.line_no;
label = line.label;
ext = ext;
opcode = mnem;
mnem = mnemType;
x = x;
n = n;
i = i;
comment = line.comment;
len = dolzina;
loc = line.loc}
let checkLineSemanticOfCode (code : line list) : lineSemantic list =
List.map (checkLineSemantic) code