added macros, control flow, structs and comparison operands

This commit is contained in:
IgorCielniak
2025-12-06 18:30:29 +01:00
parent 643a4960c2
commit 3a63347380
4 changed files with 491 additions and 1 deletions

17
SPEC.md
View File

@@ -32,6 +32,23 @@
- **Typed views**: Optional helper words interpret memory as bytes, half-words, floats, or structs but core semantics stay cell-based.
- **User-defined types**: `struct`, `union`, and `enum` builders produce layout descriptors plus accessor words that expand to raw loads/stores.
### 4.1 Struct Builder
```
struct: Point
field x 8
field y 8
;struct
```
- `struct:` is an immediate word. It consumes field declarations until the matching `;struct` token.
- Each `field <name> <bytes>` line appends a member with byte size `<bytes>`; fields are laid out sequentially without implicit padding.
- The builder expands into ordinary word definitions:
- `<Struct>.size` plus `<Struct>.<field>.size` and `<Struct>.<field>.offset` constants.
- `<Struct>.<field>@ ( addr -- value )` loads a field by computing `addr + offset` and applying `@`.
- `<Struct>.<field>! ( value addr -- )` stores a field via `addr + offset !`.
- Because the output is plain L2 code, users can inspect or override any generated word, and additional helpers (e.g., pointer arithmetic or iterators) can be layered on top with regular macros.
## 5. Stacks & Calling Convention
- **Data stack**: Unlimited (up to memory). Manipulated via standard words (`dup`, `swap`, `rot`, `over`). Compiled code keeps top-of-stack in registers when possible for performance.
- **Return stack**: Used for control flow. Directly accessible for meta-programming; users must avoid corrupting call frames unless intentional.

310
main.py
View File

@@ -15,7 +15,7 @@ import subprocess
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Union
from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Union, Tuple
class ParseError(Exception):
@@ -131,6 +131,47 @@ class Module(ASTNode):
forms: List[ASTNode]
@dataclass
class MacroDefinition:
name: str
tokens: List[str]
param_count: int = 0
@dataclass
class StructField:
name: str
offset: int
size: int
@dataclass
class BranchZero(ASTNode):
target: str
@dataclass
class Jump(ASTNode):
target: str
@dataclass
class Label(ASTNode):
name: str
@dataclass
class ForBegin(ASTNode):
loop_label: str
end_label: str
@dataclass
class ForNext(ASTNode):
loop_label: str
end_label: str
MacroHandler = Callable[["Parser"], Optional[List[ASTNode]]]
IntrinsicEmitter = Callable[["FunctionEmitter"], None]
@@ -143,6 +184,8 @@ class Word:
definition: Optional[Union[Definition, AsmDefinition]] = None
macro: Optional[MacroHandler] = None
intrinsic: Optional[IntrinsicEmitter] = None
macro_expansion: Optional[List[str]] = None
macro_params: int = 0
@dataclass
@@ -175,6 +218,9 @@ class Parser:
self.definition_stack: List[Word] = []
self.last_defined: Optional[Word] = None
self.source: str = ""
self.macro_recording: Optional[MacroDefinition] = None
self.control_stack: List[Dict[str, str]] = []
self.label_counter = 0
# Public helpers for macros ------------------------------------------------
def next_token(self) -> Token:
@@ -197,9 +243,13 @@ class Parser:
self.context_stack = [Module(forms=[])]
self.definition_stack.clear()
self.last_defined = None
self.control_stack = []
self.label_counter = 0
while not self._eof():
token = self._consume()
if self._handle_macro_recording(token):
continue
lexeme = token.lexeme
if lexeme == ":":
self._begin_definition(token)
@@ -210,10 +260,29 @@ class Parser:
if lexeme == ":asm":
self._parse_asm_definition(token)
continue
if lexeme == "if":
self._handle_if_control()
continue
if lexeme == "else":
self._handle_else_control()
continue
if lexeme == "then":
self._handle_then_control()
continue
if lexeme == "for":
self._handle_for_control()
continue
if lexeme == "next":
self._handle_next_control()
continue
if self._maybe_expand_macro(token):
continue
self._handle_token(token)
if len(self.context_stack) != 1:
raise ParseError("unclosed definition at EOF")
if self.control_stack:
raise ParseError("unclosed control structure at EOF")
module = self.context_stack.pop()
if not isinstance(module, Module): # pragma: no cover - defensive
@@ -237,6 +306,107 @@ class Parser:
self._append_node(WordRef(name=token.lexeme))
def _handle_macro_recording(self, token: Token) -> bool:
if self.macro_recording is None:
return False
if token.lexeme == ";macro":
self._finish_macro_recording(token)
else:
self.macro_recording.tokens.append(token.lexeme)
return True
def _maybe_expand_macro(self, token: Token) -> bool:
word = self.dictionary.lookup(token.lexeme)
if word and word.macro_expansion is not None:
args = self._collect_macro_args(word.macro_params)
self._inject_macro_tokens(word, token, args)
return True
return False
def _inject_macro_tokens(self, word: Word, token: Token, args: List[str]) -> None:
replaced: List[str] = []
for lex in word.macro_expansion or []:
if lex.startswith("$"):
idx = int(lex[1:]) - 1
if idx < 0 or idx >= len(args):
raise ParseError(f"macro {word.name} missing argument for {lex}")
replaced.append(args[idx])
else:
replaced.append(lex)
insertion = [
Token(lexeme=lex, line=token.line, column=token.column, start=token.start, end=token.end)
for lex in replaced
]
self.tokens[self.pos:self.pos] = insertion
def _collect_macro_args(self, count: int) -> List[str]:
args: List[str] = []
for _ in range(count):
if self._eof():
raise ParseError("macro invocation missing arguments")
args.append(self._consume().lexeme)
return args
def _start_macro_recording(self, name: str, param_count: int) -> None:
if self.macro_recording is not None:
raise ParseError("nested macro definitions are not supported")
self.macro_recording = MacroDefinition(name=name, tokens=[], param_count=param_count)
def _finish_macro_recording(self, token: Token) -> None:
if self.macro_recording is None:
raise ParseError(f"unexpected ';macro' at {token.line}:{token.column}")
macro_def = self.macro_recording
self.macro_recording = None
word = Word(name=macro_def.name)
word.macro_expansion = list(macro_def.tokens)
word.macro_params = macro_def.param_count
self.dictionary.register(word)
def _push_control(self, entry: Dict[str, str]) -> None:
self.control_stack.append(entry)
def _pop_control(self, expected: Tuple[str, ...]) -> Dict[str, str]:
if not self.control_stack:
raise ParseError("control stack underflow")
entry = self.control_stack.pop()
if entry.get("type") not in expected:
raise ParseError(f"mismatched control word '{entry.get('type')}'")
return entry
def _new_label(self, prefix: str) -> str:
label = f"L_{prefix}_{self.label_counter}"
self.label_counter += 1
return label
def _handle_if_control(self) -> None:
false_label = self._new_label("if_false")
self._append_node(BranchZero(target=false_label))
self._push_control({"type": "if", "false": false_label})
def _handle_else_control(self) -> None:
entry = self._pop_control(("if",))
end_label = self._new_label("if_end")
self._append_node(Jump(target=end_label))
self._append_node(Label(name=entry["false"]))
self._push_control({"type": "else", "end": end_label})
def _handle_then_control(self) -> None:
entry = self._pop_control(("if", "else"))
if entry["type"] == "if":
self._append_node(Label(name=entry["false"]))
else:
self._append_node(Label(name=entry["end"]))
def _handle_for_control(self) -> None:
loop_label = self._new_label("for_loop")
end_label = self._new_label("for_end")
self._append_node(ForBegin(loop_label=loop_label, end_label=end_label))
self._push_control({"type": "for", "loop": loop_label, "end": end_label})
def _handle_next_control(self) -> None:
entry = self._pop_control(("for",))
self._append_node(ForNext(loop_label=entry["loop"], end_label=entry["end"]))
def _begin_definition(self, token: Token) -> None:
if self._eof():
raise ParseError(f"definition name missing after ':' at {token.line}:{token.column}")
@@ -447,6 +617,21 @@ class Assembler:
if isinstance(node, WordRef):
self._emit_wordref(node, builder)
return
if isinstance(node, BranchZero):
self._emit_branch_zero(node, builder)
return
if isinstance(node, Jump):
builder.emit(f" jmp {node.target}")
return
if isinstance(node, Label):
builder.emit(f"{node.name}:")
return
if isinstance(node, ForBegin):
self._emit_for_begin(node, builder)
return
if isinstance(node, ForNext):
self._emit_for_next(node, builder)
return
raise CompileError(f"unsupported AST node {node!r}")
def _emit_wordref(self, ref: WordRef, builder: FunctionEmitter) -> None:
@@ -458,6 +643,27 @@ class Assembler:
return
builder.emit(f" call {sanitize_label(ref.name)}")
def _emit_branch_zero(self, node: BranchZero, builder: FunctionEmitter) -> None:
builder.pop_to("rax")
builder.emit(" test rax, rax")
builder.emit(f" jz {node.target}")
def _emit_for_begin(self, node: ForBegin, builder: FunctionEmitter) -> None:
builder.pop_to("rax")
builder.emit(" cmp rax, 0")
builder.emit(f" jle {node.end_label}")
builder.emit(" sub r13, 8")
builder.emit(" mov [r13], rax")
builder.emit(f"{node.loop_label}:")
def _emit_for_next(self, node: ForNext, builder: FunctionEmitter) -> None:
builder.emit(" mov rax, [r13]")
builder.emit(" dec rax")
builder.emit(" mov [r13], rax")
builder.emit(f" jg {node.loop_label}")
builder.emit(" add r13, 8")
builder.emit(f"{node.end_label}:")
def _runtime_prelude(self) -> List[str]:
return [
"%define DSTK_BYTES 65536",
@@ -513,9 +719,111 @@ def macro_immediate(parser: Parser) -> Optional[List[ASTNode]]:
return None
def macro_begin_text_macro(parser: Parser) -> Optional[List[ASTNode]]:
if parser._eof():
raise ParseError("macro name missing after 'macro:'")
name_token = parser.next_token()
param_count = 0
peek = parser.peek_token()
if peek is not None:
try:
param_count = int(peek.lexeme, 0)
parser.next_token()
except ValueError:
param_count = 0
parser._start_macro_recording(name_token.lexeme, param_count)
return None
def macro_end_text_macro(parser: Parser) -> Optional[List[ASTNode]]:
if parser.macro_recording is None:
raise ParseError("';macro' without matching 'macro:'")
# Actual closing handled in parser loop when ';macro' token is seen.
return None
def _struct_emit_definition(tokens: List[Token], template: Token, name: str, body: Sequence[str]) -> None:
def make_token(lexeme: str) -> Token:
return Token(
lexeme=lexeme,
line=template.line,
column=template.column,
start=template.start,
end=template.end,
)
tokens.append(make_token(":"))
tokens.append(make_token(name))
for lexeme in body:
tokens.append(make_token(lexeme))
tokens.append(make_token(";"))
def macro_struct_begin(parser: Parser) -> Optional[List[ASTNode]]:
if parser._eof():
raise ParseError("struct name missing after 'struct:'")
name_token = parser.next_token()
struct_name = name_token.lexeme
fields: List[StructField] = []
current_offset = 0
while True:
if parser._eof():
raise ParseError("unterminated struct definition (missing ';struct')")
token = parser.next_token()
if token.lexeme == ";struct":
break
if token.lexeme != "field":
raise ParseError(f"expected 'field' or ';struct' in struct '{struct_name}' definition")
if parser._eof():
raise ParseError("field name missing in struct definition")
field_name_token = parser.next_token()
if parser._eof():
raise ParseError(f"field size missing for '{field_name_token.lexeme}'")
size_token = parser.next_token()
try:
field_size = int(size_token.lexeme, 0)
except ValueError as exc:
raise ParseError(
f"invalid field size '{size_token.lexeme}' in struct '{struct_name}'"
) from exc
fields.append(StructField(field_name_token.lexeme, current_offset, field_size))
current_offset += field_size
generated: List[Token] = []
_struct_emit_definition(generated, name_token, f"{struct_name}.size", [str(current_offset)])
for field in fields:
size_word = f"{struct_name}.{field.name}.size"
offset_word = f"{struct_name}.{field.name}.offset"
_struct_emit_definition(generated, name_token, size_word, [str(field.size)])
_struct_emit_definition(generated, name_token, offset_word, [str(field.offset)])
_struct_emit_definition(
generated,
name_token,
f"{struct_name}.{field.name}@",
[offset_word, "+", "@"],
)
_struct_emit_definition(
generated,
name_token,
f"{struct_name}.{field.name}!",
[offset_word, "+", "!"],
)
parser.tokens[parser.pos:parser.pos] = generated
return None
def macro_struct_end(parser: Parser) -> Optional[List[ASTNode]]:
raise ParseError("';struct' must follow a 'struct:' block")
def bootstrap_dictionary() -> Dictionary:
dictionary = Dictionary()
dictionary.register(Word(name="immediate", immediate=True, macro=macro_immediate))
dictionary.register(Word(name="macro:", immediate=True, macro=macro_begin_text_macro))
dictionary.register(Word(name=";macro", immediate=True, macro=macro_end_text_macro))
dictionary.register(Word(name="struct:", immediate=True, macro=macro_struct_begin))
dictionary.register(Word(name=";struct", immediate=True, macro=macro_struct_end))
return dictionary

View File

@@ -109,6 +109,72 @@ puts_finish_digits:
}
;
:asm == {
mov rax, [r12]
add r12, 8
mov rbx, [r12]
cmp rbx, rax
mov rbx, 0
sete bl
mov [r12], rbx
}
;
:asm != {
mov rax, [r12]
add r12, 8
mov rbx, [r12]
cmp rbx, rax
mov rbx, 0
setne bl
mov [r12], rbx
}
;
:asm < {
mov rax, [r12]
add r12, 8
mov rbx, [r12]
cmp rbx, rax
mov rbx, 0
setl bl
mov [r12], rbx
}
;
:asm > {
mov rax, [r12]
add r12, 8
mov rbx, [r12]
cmp rbx, rax
mov rbx, 0
setg bl
mov [r12], rbx
}
;
:asm <= {
mov rax, [r12]
add r12, 8
mov rbx, [r12]
cmp rbx, rax
mov rbx, 0
setle bl
mov [r12], rbx
}
;
:asm >= {
mov rax, [r12]
add r12, 8
mov rbx, [r12]
cmp rbx, rax
mov rbx, 0
setge bl
mov [r12], rbx
}
;
:asm @ {
mov rax, [r12]
mov rax, [rax]

99
test.sl
View File

@@ -7,6 +7,30 @@ import stdlib.sl
}
;
macro: square
dup *
;macro
macro: defconst 2
: $1
$2
;
;macro
macro: defadder 3
: $1
$2 $3 +
;
;macro
defconst MAGIC 99
defadder add13 5 8
struct: Point
field x 8
field y 8
;struct
: test-add
5 7 + puts
;
@@ -61,6 +85,74 @@ import stdlib.sl
4096 munmap drop
;
: test-macro
9 square puts
MAGIC puts
add13 puts
;
: test-if
5 5 == if
111 puts
else
222 puts
then
;
: test-else-if
2
dup 1 == if
50 puts
else
dup 2 == if
60 puts
else
70 puts
then
then
drop
;
: test-for
0
5 for
1 +
next
puts
;
: test-for-zero
123
0 for
drop
next
puts
;
: test-struct
mem-slot
dup 111 swap Point.x!
dup 222 swap Point.y!
dup Point.x@ puts
Point.y@ puts
Point.size puts
;
: test-cmp
5 5 == puts
5 4 == puts
5 4 != puts
4 4 != puts
3 5 < puts
5 3 < puts
5 3 > puts
3 5 > puts
5 5 <= puts
6 5 <= puts
5 5 >= puts
4 5 >= puts
;
: main
test-add
test-sub
@@ -72,5 +164,12 @@ import stdlib.sl
test-swap
test-store
test-mmap
test-macro
test-if
test-else-if
test-for
test-for-zero
test-cmp
test-struct
0
;