From 5030138f3087ecca9a7f96dc014321389552ecc0 Mon Sep 17 00:00:00 2001 From: igor Date: Wed, 18 Feb 2026 14:18:41 +0100 Subject: [PATCH] optimizations --- SPEC.md | 2 +- main.py | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/SPEC.md b/SPEC.md index 5f76531..b42ba91 100644 --- a/SPEC.md +++ b/SPEC.md @@ -9,7 +9,7 @@ This document reflects the implementation that ships in this repository today (` - **Unsafe by design** – Memory, syscalls, inline assembly, and FFI expose raw machine power. The standard library is intentionally thin and policy-free. ## 2. Toolchain and Repository Layout -- **Driver (`main.py`)** – Supports `python main.py source.sl -o a.out`, `--emit-asm`, `--run`, `--dbg`, `--repl`, `--temp-dir`, `--clean`, repeated `-I/--include` paths, and repeated `-l` linker flags (either `-lfoo` or `-l libc.so.6`). Unknown `-l` flags are collected and forwarded to the linker. Pass `--ct-run-main` to run the program's `main` word on the compile-time VM before NASM/ld run, which surfaces discrepancies between compile-time and runtime semantics. Pass `--no-artifact` to stop after compilation/assembly emission without building an output file, or use `--script` as shorthand for `--no-artifact --ct-run-main`. Pass `--docs` to open a searchable TUI that scans stack-effect comments and nearby docs from `.sl` files (`--docs-query` sets initial filter and `--docs-root` adds scan roots). `--no-folding` disables constant folding and `--no-peephole` disables peephole rewrites (for example `swap drop` → `nip`, `dup drop` removed, `swap over` → `tuck`, `nip drop` → `2drop`). +- **Driver (`main.py`)** – Supports `python main.py source.sl -o a.out`, `--emit-asm`, `--run`, `--dbg`, `--repl`, `--temp-dir`, `--clean`, repeated `-I/--include` paths, and repeated `-l` linker flags (either `-lfoo` or `-l libc.so.6`). Unknown `-l` flags are collected and forwarded to the linker. Pass `--ct-run-main` to run the program's `main` word on the compile-time VM before NASM/ld run, which surfaces discrepancies between compile-time and runtime semantics. Pass `--no-artifact` to stop after compilation/assembly emission without building an output file, or use `--script` as shorthand for `--no-artifact --ct-run-main`. Pass `--docs` to open a searchable TUI that scans stack-effect comments and nearby docs from `.sl` files (`--docs-query` sets initial filter and `--docs-root` adds scan roots). `--no-folding` disables constant folding and `--no-peephole` disables peephole rewrites (for example `swap drop` → `nip`, `dup drop` removed, `swap over` → `tuck`, `nip drop` → `2drop`, `x 0 +` removed, `x 1 *` removed, `x -1 *` → `neg`, and `not not` removed). - **REPL** – `--repl` launches a stateful session with commands such as `:help`, `:reset`, `:load`, `:call `, `:edit`, and `:show`. The REPL still emits/links entire programs for each run; it simply manages the session source for you. - **Imports** – `import relative/or/absolute/path.sl` inserts the referenced file textually. Resolution order: (1) absolute path, (2) relative to the importing file, (3) each include path (defaults: project root and `./stdlib`). Each file is included at most once per compilation unit. Import lines leave blank placeholders so error spans stay meaningful. - **Workspace** – `stdlib/` holds library modules, `tests/` contains executable samples with `.expected` outputs, `extra_tests/` houses standalone integration demos, and `libs/` collects opt-in extensions such as `libs/fn.sl` and `libs/nob.sl`. diff --git a/main.py b/main.py index d8c9819..ee6fa88 100644 --- a/main.py +++ b/main.py @@ -2510,6 +2510,18 @@ class CompileTimeVM: ip += 1 continue + if kind == "list_literal": + values = list(node.data or []) + count = len(values) + buf_size = (count + 1) * 8 + addr = self.memory.allocate(buf_size) + CTMemory.write_qword(addr, count) + for idx_item, val in enumerate(values): + CTMemory.write_qword(addr + 8 + idx_item * 8, int(val)) + _push(addr) + ip += 1 + continue + if kind == "list_end": if not self._list_capture_stack: raise ParseError("']' without matching '['") @@ -2931,6 +2943,82 @@ class Assembler: optimized.append(nodes[idx]) idx += 1 nodes = optimized + + # Literal-aware algebraic identities and redundant unary chains. + changed = True + while changed: + changed = False + optimized = [] + idx = 0 + + while idx < len(nodes): + # Redundant unary pairs. + if idx + 1 < len(nodes): + a = nodes[idx] + b = nodes[idx + 1] + if a.op == "word" and b.op == "word": + wa = str(a.data) + wb = str(b.data) + if (wa, wb) in { + ("not", "not"), + ("neg", "neg"), + }: + idx += 2 + changed = True + continue + + # Binary op identities where right operand is a literal. + if idx + 1 < len(nodes): + lit = nodes[idx] + op = nodes[idx + 1] + if lit.op == "literal" and isinstance(lit.data, int) and op.op == "word": + k = int(lit.data) + w = str(op.data) + base_loc = lit.loc or op.loc + + if (w == "+" and k == 0) or (w == "-" and k == 0) or (w == "*" and k == 1) or (w == "/" and k == 1): + idx += 2 + changed = True + continue + + if w == "*" and k == -1: + optimized.append(Op(op="word", data="neg", loc=base_loc)) + idx += 2 + changed = True + continue + + if w == "%" and k == 1: + optimized.append(Op(op="word", data="drop", loc=base_loc)) + optimized.append(Op(op="literal", data=0, loc=base_loc)) + idx += 2 + changed = True + continue + + if w == "==" and k == 0: + optimized.append(Op(op="word", data="not", loc=base_loc)) + idx += 2 + changed = True + continue + + if (w == "bor" and k == 0) or (w == "bxor" and k == 0): + idx += 2 + changed = True + continue + + if w == "band" and k == -1: + idx += 2 + changed = True + continue + + if w in {"shl", "shr", "sar"} and k == 0: + idx += 2 + changed = True + continue + + optimized.append(nodes[idx]) + idx += 1 + + nodes = optimized definition.body = nodes def _fold_constants_in_definition(self, definition: Definition) -> None: @@ -3084,6 +3172,58 @@ class Assembler: definition.body = rebuilt + def _fold_static_list_literals_definition(self, definition: Definition) -> None: + nodes = definition.body + rebuilt: List[Op] = [] + idx = 0 + while idx < len(nodes): + node = nodes[idx] + if node.op != "list_begin": + rebuilt.append(node) + idx += 1 + continue + + depth = 1 + j = idx + 1 + static_values: List[int] = [] + is_static = True + + while j < len(nodes): + cur = nodes[j] + if cur.op == "list_begin": + depth += 1 + is_static = False + j += 1 + continue + if cur.op == "list_end": + depth -= 1 + if depth == 0: + break + j += 1 + continue + + if depth == 1: + if cur.op == "literal" and isinstance(cur.data, int): + static_values.append(int(cur.data)) + else: + is_static = False + j += 1 + + if depth != 0: + rebuilt.append(node) + idx += 1 + continue + + if is_static: + rebuilt.append(Op(op="list_literal", data=static_values, loc=node.loc)) + idx = j + 1 + continue + + rebuilt.append(node) + idx += 1 + + definition.body = rebuilt + def _reachable_runtime_defs(self, runtime_defs: Sequence[Union[Definition, AsmDefinition]]) -> Set[str]: edges: Dict[str, Set[str]] = {} for definition in runtime_defs: @@ -3139,6 +3279,9 @@ class Assembler: for defn in definitions: if isinstance(defn, Definition): self._fold_constants_in_definition(defn) + for defn in definitions: + if isinstance(defn, Definition): + self._fold_static_list_literals_definition(defn) stray_forms = [form for form in module.forms if not isinstance(form, valid_defs)] if stray_forms: raise CompileError("top-level literals or word references are not supported yet") @@ -3386,6 +3529,26 @@ class Assembler: builder.emit(" mov [rel list_capture_sp], rax") return + if kind == "list_literal": + values = list(data or []) + count = len(values) + bytes_needed = (count + 1) * 8 + builder.comment("list literal") + builder.emit(" xor rdi, rdi") + builder.emit(f" mov rsi, {bytes_needed}") + builder.emit(" mov rdx, 3") + builder.emit(" mov r10, 34") + builder.emit(" mov r8, -1") + builder.emit(" xor r9, r9") + builder.emit(" mov rax, 9") + builder.emit(" syscall") + builder.emit(f" mov qword [rax], {count}") + for idx_item, value in enumerate(values): + builder.emit(f" mov qword [rax + {8 + idx_item * 8}], {int(value)}") + builder.emit(" sub r12, 8") + builder.emit(" mov [r12], rax") + return + if kind == "list_end": base = str(data) loop_label = f"{base}_copy_loop"