diff --git a/SPEC.md b/SPEC.md index d0b30b1..1520986 100644 --- a/SPEC.md +++ b/SPEC.md @@ -32,6 +32,23 @@ - **Typed views**: Optional helper words interpret memory as bytes, half-words, floats, or structs but core semantics stay cell-based. - **User-defined types**: `struct`, `union`, and `enum` builders produce layout descriptors plus accessor words that expand to raw loads/stores. +### 4.1 Struct Builder + +``` +struct: Point + field x 8 + field y 8 +;struct +``` + +- `struct:` is an immediate word. It consumes field declarations until the matching `;struct` token. +- Each `field ` line appends a member with byte size ``; fields are laid out sequentially without implicit padding. +- The builder expands into ordinary word definitions: + - `.size` plus `..size` and `..offset` constants. + - `.@ ( addr -- value )` loads a field by computing `addr + offset` and applying `@`. + - `.! ( value addr -- )` stores a field via `addr + offset !`. +- Because the output is plain L2 code, users can inspect or override any generated word, and additional helpers (e.g., pointer arithmetic or iterators) can be layered on top with regular macros. + ## 5. Stacks & Calling Convention - **Data stack**: Unlimited (up to memory). Manipulated via standard words (`dup`, `swap`, `rot`, `over`). Compiled code keeps top-of-stack in registers when possible for performance. - **Return stack**: Used for control flow. Directly accessible for meta-programming; users must avoid corrupting call frames unless intentional. diff --git a/main.py b/main.py index 169a633..e3eb762 100644 --- a/main.py +++ b/main.py @@ -15,7 +15,7 @@ import subprocess import sys from dataclasses import dataclass, field from pathlib import Path -from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Union +from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Union, Tuple class ParseError(Exception): @@ -131,6 +131,47 @@ class Module(ASTNode): forms: List[ASTNode] +@dataclass +class MacroDefinition: + name: str + tokens: List[str] + param_count: int = 0 + + +@dataclass +class StructField: + name: str + offset: int + size: int + + +@dataclass +class BranchZero(ASTNode): + target: str + + +@dataclass +class Jump(ASTNode): + target: str + + +@dataclass +class Label(ASTNode): + name: str + + +@dataclass +class ForBegin(ASTNode): + loop_label: str + end_label: str + + +@dataclass +class ForNext(ASTNode): + loop_label: str + end_label: str + + MacroHandler = Callable[["Parser"], Optional[List[ASTNode]]] IntrinsicEmitter = Callable[["FunctionEmitter"], None] @@ -143,6 +184,8 @@ class Word: definition: Optional[Union[Definition, AsmDefinition]] = None macro: Optional[MacroHandler] = None intrinsic: Optional[IntrinsicEmitter] = None + macro_expansion: Optional[List[str]] = None + macro_params: int = 0 @dataclass @@ -175,6 +218,9 @@ class Parser: self.definition_stack: List[Word] = [] self.last_defined: Optional[Word] = None self.source: str = "" + self.macro_recording: Optional[MacroDefinition] = None + self.control_stack: List[Dict[str, str]] = [] + self.label_counter = 0 # Public helpers for macros ------------------------------------------------ def next_token(self) -> Token: @@ -197,9 +243,13 @@ class Parser: self.context_stack = [Module(forms=[])] self.definition_stack.clear() self.last_defined = None + self.control_stack = [] + self.label_counter = 0 while not self._eof(): token = self._consume() + if self._handle_macro_recording(token): + continue lexeme = token.lexeme if lexeme == ":": self._begin_definition(token) @@ -210,10 +260,29 @@ class Parser: if lexeme == ":asm": self._parse_asm_definition(token) continue + if lexeme == "if": + self._handle_if_control() + continue + if lexeme == "else": + self._handle_else_control() + continue + if lexeme == "then": + self._handle_then_control() + continue + if lexeme == "for": + self._handle_for_control() + continue + if lexeme == "next": + self._handle_next_control() + continue + if self._maybe_expand_macro(token): + continue self._handle_token(token) if len(self.context_stack) != 1: raise ParseError("unclosed definition at EOF") + if self.control_stack: + raise ParseError("unclosed control structure at EOF") module = self.context_stack.pop() if not isinstance(module, Module): # pragma: no cover - defensive @@ -237,6 +306,107 @@ class Parser: self._append_node(WordRef(name=token.lexeme)) + def _handle_macro_recording(self, token: Token) -> bool: + if self.macro_recording is None: + return False + if token.lexeme == ";macro": + self._finish_macro_recording(token) + else: + self.macro_recording.tokens.append(token.lexeme) + return True + + def _maybe_expand_macro(self, token: Token) -> bool: + word = self.dictionary.lookup(token.lexeme) + if word and word.macro_expansion is not None: + args = self._collect_macro_args(word.macro_params) + self._inject_macro_tokens(word, token, args) + return True + return False + + def _inject_macro_tokens(self, word: Word, token: Token, args: List[str]) -> None: + replaced: List[str] = [] + for lex in word.macro_expansion or []: + if lex.startswith("$"): + idx = int(lex[1:]) - 1 + if idx < 0 or idx >= len(args): + raise ParseError(f"macro {word.name} missing argument for {lex}") + replaced.append(args[idx]) + else: + replaced.append(lex) + insertion = [ + Token(lexeme=lex, line=token.line, column=token.column, start=token.start, end=token.end) + for lex in replaced + ] + self.tokens[self.pos:self.pos] = insertion + + def _collect_macro_args(self, count: int) -> List[str]: + args: List[str] = [] + for _ in range(count): + if self._eof(): + raise ParseError("macro invocation missing arguments") + args.append(self._consume().lexeme) + return args + + def _start_macro_recording(self, name: str, param_count: int) -> None: + if self.macro_recording is not None: + raise ParseError("nested macro definitions are not supported") + self.macro_recording = MacroDefinition(name=name, tokens=[], param_count=param_count) + + def _finish_macro_recording(self, token: Token) -> None: + if self.macro_recording is None: + raise ParseError(f"unexpected ';macro' at {token.line}:{token.column}") + macro_def = self.macro_recording + self.macro_recording = None + word = Word(name=macro_def.name) + word.macro_expansion = list(macro_def.tokens) + word.macro_params = macro_def.param_count + self.dictionary.register(word) + + def _push_control(self, entry: Dict[str, str]) -> None: + self.control_stack.append(entry) + + def _pop_control(self, expected: Tuple[str, ...]) -> Dict[str, str]: + if not self.control_stack: + raise ParseError("control stack underflow") + entry = self.control_stack.pop() + if entry.get("type") not in expected: + raise ParseError(f"mismatched control word '{entry.get('type')}'") + return entry + + def _new_label(self, prefix: str) -> str: + label = f"L_{prefix}_{self.label_counter}" + self.label_counter += 1 + return label + + def _handle_if_control(self) -> None: + false_label = self._new_label("if_false") + self._append_node(BranchZero(target=false_label)) + self._push_control({"type": "if", "false": false_label}) + + def _handle_else_control(self) -> None: + entry = self._pop_control(("if",)) + end_label = self._new_label("if_end") + self._append_node(Jump(target=end_label)) + self._append_node(Label(name=entry["false"])) + self._push_control({"type": "else", "end": end_label}) + + def _handle_then_control(self) -> None: + entry = self._pop_control(("if", "else")) + if entry["type"] == "if": + self._append_node(Label(name=entry["false"])) + else: + self._append_node(Label(name=entry["end"])) + + def _handle_for_control(self) -> None: + loop_label = self._new_label("for_loop") + end_label = self._new_label("for_end") + self._append_node(ForBegin(loop_label=loop_label, end_label=end_label)) + self._push_control({"type": "for", "loop": loop_label, "end": end_label}) + + def _handle_next_control(self) -> None: + entry = self._pop_control(("for",)) + self._append_node(ForNext(loop_label=entry["loop"], end_label=entry["end"])) + def _begin_definition(self, token: Token) -> None: if self._eof(): raise ParseError(f"definition name missing after ':' at {token.line}:{token.column}") @@ -447,6 +617,21 @@ class Assembler: if isinstance(node, WordRef): self._emit_wordref(node, builder) return + if isinstance(node, BranchZero): + self._emit_branch_zero(node, builder) + return + if isinstance(node, Jump): + builder.emit(f" jmp {node.target}") + return + if isinstance(node, Label): + builder.emit(f"{node.name}:") + return + if isinstance(node, ForBegin): + self._emit_for_begin(node, builder) + return + if isinstance(node, ForNext): + self._emit_for_next(node, builder) + return raise CompileError(f"unsupported AST node {node!r}") def _emit_wordref(self, ref: WordRef, builder: FunctionEmitter) -> None: @@ -458,6 +643,27 @@ class Assembler: return builder.emit(f" call {sanitize_label(ref.name)}") + def _emit_branch_zero(self, node: BranchZero, builder: FunctionEmitter) -> None: + builder.pop_to("rax") + builder.emit(" test rax, rax") + builder.emit(f" jz {node.target}") + + def _emit_for_begin(self, node: ForBegin, builder: FunctionEmitter) -> None: + builder.pop_to("rax") + builder.emit(" cmp rax, 0") + builder.emit(f" jle {node.end_label}") + builder.emit(" sub r13, 8") + builder.emit(" mov [r13], rax") + builder.emit(f"{node.loop_label}:") + + def _emit_for_next(self, node: ForNext, builder: FunctionEmitter) -> None: + builder.emit(" mov rax, [r13]") + builder.emit(" dec rax") + builder.emit(" mov [r13], rax") + builder.emit(f" jg {node.loop_label}") + builder.emit(" add r13, 8") + builder.emit(f"{node.end_label}:") + def _runtime_prelude(self) -> List[str]: return [ "%define DSTK_BYTES 65536", @@ -513,9 +719,111 @@ def macro_immediate(parser: Parser) -> Optional[List[ASTNode]]: return None +def macro_begin_text_macro(parser: Parser) -> Optional[List[ASTNode]]: + if parser._eof(): + raise ParseError("macro name missing after 'macro:'") + name_token = parser.next_token() + param_count = 0 + peek = parser.peek_token() + if peek is not None: + try: + param_count = int(peek.lexeme, 0) + parser.next_token() + except ValueError: + param_count = 0 + parser._start_macro_recording(name_token.lexeme, param_count) + return None + + +def macro_end_text_macro(parser: Parser) -> Optional[List[ASTNode]]: + if parser.macro_recording is None: + raise ParseError("';macro' without matching 'macro:'") + # Actual closing handled in parser loop when ';macro' token is seen. + return None + + +def _struct_emit_definition(tokens: List[Token], template: Token, name: str, body: Sequence[str]) -> None: + def make_token(lexeme: str) -> Token: + return Token( + lexeme=lexeme, + line=template.line, + column=template.column, + start=template.start, + end=template.end, + ) + + tokens.append(make_token(":")) + tokens.append(make_token(name)) + for lexeme in body: + tokens.append(make_token(lexeme)) + tokens.append(make_token(";")) + + +def macro_struct_begin(parser: Parser) -> Optional[List[ASTNode]]: + if parser._eof(): + raise ParseError("struct name missing after 'struct:'") + name_token = parser.next_token() + struct_name = name_token.lexeme + fields: List[StructField] = [] + current_offset = 0 + while True: + if parser._eof(): + raise ParseError("unterminated struct definition (missing ';struct')") + token = parser.next_token() + if token.lexeme == ";struct": + break + if token.lexeme != "field": + raise ParseError(f"expected 'field' or ';struct' in struct '{struct_name}' definition") + if parser._eof(): + raise ParseError("field name missing in struct definition") + field_name_token = parser.next_token() + if parser._eof(): + raise ParseError(f"field size missing for '{field_name_token.lexeme}'") + size_token = parser.next_token() + try: + field_size = int(size_token.lexeme, 0) + except ValueError as exc: + raise ParseError( + f"invalid field size '{size_token.lexeme}' in struct '{struct_name}'" + ) from exc + fields.append(StructField(field_name_token.lexeme, current_offset, field_size)) + current_offset += field_size + + generated: List[Token] = [] + _struct_emit_definition(generated, name_token, f"{struct_name}.size", [str(current_offset)]) + for field in fields: + size_word = f"{struct_name}.{field.name}.size" + offset_word = f"{struct_name}.{field.name}.offset" + _struct_emit_definition(generated, name_token, size_word, [str(field.size)]) + _struct_emit_definition(generated, name_token, offset_word, [str(field.offset)]) + _struct_emit_definition( + generated, + name_token, + f"{struct_name}.{field.name}@", + [offset_word, "+", "@"], + ) + _struct_emit_definition( + generated, + name_token, + f"{struct_name}.{field.name}!", + [offset_word, "+", "!"], + ) + + parser.tokens[parser.pos:parser.pos] = generated + return None + + +def macro_struct_end(parser: Parser) -> Optional[List[ASTNode]]: + raise ParseError("';struct' must follow a 'struct:' block") + + def bootstrap_dictionary() -> Dictionary: dictionary = Dictionary() dictionary.register(Word(name="immediate", immediate=True, macro=macro_immediate)) + dictionary.register(Word(name="macro:", immediate=True, macro=macro_begin_text_macro)) + dictionary.register(Word(name=";macro", immediate=True, macro=macro_end_text_macro)) + dictionary.register(Word(name="struct:", immediate=True, macro=macro_struct_begin)) + dictionary.register(Word(name=";struct", immediate=True, macro=macro_struct_end)) return dictionary diff --git a/stdlib.sl b/stdlib.sl index 0415b26..cb008ce 100644 --- a/stdlib.sl +++ b/stdlib.sl @@ -109,6 +109,72 @@ puts_finish_digits: } ; +:asm == { + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx +} +; + +:asm != { + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx +} +; + +:asm < { + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx +} +; + +:asm > { + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx +} +; + +:asm <= { + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx +} +; + +:asm >= { + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx +} +; + :asm @ { mov rax, [r12] mov rax, [rax] diff --git a/test.sl b/test.sl index e6e258c..1da9570 100644 --- a/test.sl +++ b/test.sl @@ -7,6 +7,30 @@ import stdlib.sl } ; +macro: square + dup * +;macro + +macro: defconst 2 + : $1 + $2 + ; +;macro + +macro: defadder 3 + : $1 + $2 $3 + + ; +;macro + +defconst MAGIC 99 +defadder add13 5 8 + +struct: Point + field x 8 + field y 8 +;struct + : test-add 5 7 + puts ; @@ -61,6 +85,74 @@ import stdlib.sl 4096 munmap drop ; +: test-macro + 9 square puts + MAGIC puts + add13 puts +; + +: test-if + 5 5 == if + 111 puts + else + 222 puts + then +; + +: test-else-if + 2 + dup 1 == if + 50 puts + else + dup 2 == if + 60 puts + else + 70 puts + then + then + drop +; + +: test-for + 0 + 5 for + 1 + + next + puts +; + +: test-for-zero + 123 + 0 for + drop + next + puts +; + +: test-struct + mem-slot + dup 111 swap Point.x! + dup 222 swap Point.y! + dup Point.x@ puts + Point.y@ puts + Point.size puts +; + +: test-cmp + 5 5 == puts + 5 4 == puts + 5 4 != puts + 4 4 != puts + 3 5 < puts + 5 3 < puts + 5 3 > puts + 3 5 > puts + 5 5 <= puts + 6 5 <= puts + 5 5 >= puts + 4 5 >= puts +; + : main test-add test-sub @@ -72,5 +164,12 @@ import stdlib.sl test-swap test-store test-mmap + test-macro + test-if + test-else-if + test-for + test-for-zero + test-cmp + test-struct 0 ;