From 8e8faf3c913955d0560edefbd85591cab40a5ba5 Mon Sep 17 00:00:00 2001 From: IgorCielniak Date: Sat, 6 Dec 2025 16:30:58 +0100 Subject: [PATCH] Initial commit --- SPEC.md | 102 +++++++++ main.py | 601 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.sl | 7 + stdlib.sl | 118 +++++++++++ test.sl | 45 ++++ 5 files changed, 873 insertions(+) create mode 100644 SPEC.md create mode 100644 main.py create mode 100644 main.sl create mode 100644 stdlib.sl create mode 100644 test.sl diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..d0b30b1 --- /dev/null +++ b/SPEC.md @@ -0,0 +1,102 @@ +# L2 Language Specification (Draft) + +## 1. Design Goals +- **Meta-language first**: L2 is a minimal core designed to be reshaped into other languages at runtime, matching Forth's malleability with modern tooling. +- **Native code generation**: Source compiles directly to NASM-compatible x86-64 assembly, enabling both AOT binaries and JIT-style pipelines. +- **Runtime self-modification**: Parsers, macro expanders, and the execution pipeline are ordinary user-defined words that can be swapped or rewritten on demand. +- **Total control**: Provide unchecked memory access, inline assembly, and ABI-level hooks for syscalls/FFI, leaving safety policies to user space. +- **Self-hosting path**: The bootstrap reference implementation lives in Python, but the language must be able to reimplement its toolchain using its own facilities plus inline asm. + +## 2. Program Model +- **Execution units (words)**: Everything is a word. Words can be defined in high-level L2, inline asm, or as parser/runtime hooks. +- **Compilation pipeline**: + 1. Source stream tokenized via active reader (user-overridable). + 2. Tokens dispatched to interpreter or compiler hooks (also user-overridable). + 3. Resulting IR is a threaded list of word references. + 4. Code generator emits NASM `.text` with helper macros. + 5. `nasm` + `ld` (or custom linker) build an ELF64 executable. +- **Interpreted mode**: For REPLs or rapid experimentation, the compiler can emit temporary asm, assemble to an object in memory, and `dlopen` or `execve` it. +- **Bootstrapping**: `main.py` orchestrates tokenizer, dictionary, IR, and final asm emission. + +## 3. Parsing & Macro System +- **Reader hooks**: + - `read-token`: splits the byte stream; default is whitespace delimited with numeric/string literal recognizers. + - `on-token`: user code decides whether to interpret, compile, or treat the token as syntax. + - `lookup`: resolves token → word entry; can be replaced to build new namespaces or module systems. +- **Compile vs interpret**: Each word advertises stack effect + immediacy. Immediate words execute during compilation (macro behavior). Others emit code or inline asm. +- **Syntax morphing**: Provide primitives `set-reader`, `with-reader`, and word-lists so layers (e.g., Lisp-like forms) can be composed. + +## 4. Core Types & Data Model +- **Cells**: 64-bit signed integers; all stack operations use cells. +- **Double cells**: 128-bit values formed by two cells; used for addresses or 128-bit arithmetic. +- **Typed views**: Optional helper words interpret memory as bytes, half-words, floats, or structs but core semantics stay cell-based. +- **User-defined types**: `struct`, `union`, and `enum` builders produce layout descriptors plus accessor words that expand to raw loads/stores. + +## 5. Stacks & Calling Convention +- **Data stack**: Unlimited (up to memory). Manipulated via standard words (`dup`, `swap`, `rot`, `over`). Compiled code keeps top-of-stack in registers when possible for performance. +- **Return stack**: Used for control flow. Directly accessible for meta-programming; users must avoid corrupting call frames unless intentional. +- **Control stack**: Optional third stack for advanced flow transformations (e.g., continuations) implemented in the standard library. +- **Call ABI**: Compiled words follow System V: arguments mapped from data stack into registers before `call`, results pushed back afterward. + +## 6. Memory & Allocation +- **Linear memory primitives**: `@` (fetch), `!` (store), `+!`, `-!`, `memcpy`, `memset` translate to plain loads/stores without checks. +- **Address spaces**: Single flat 64-bit space; no segmentation. Users may map devices via `mmap` or syscalls. +- **Allocators**: + - Default bump allocator in the runtime prelude. + - `install-allocator` allows swapping malloc/free pairs at runtime. + - Allocators are just words; nothing prevents multiple domains. + +## 7. Control Flow +- **Branching**: `if ... else ... then`, `begin ... until`, `case ... endcase` compile to standard conditional jumps. Users can redefine the parsing words to create new control forms. +- **Tail calls**: `tail` word emits `jmp` instead of `call`, enabling explicit TCO. +- **Exceptions**: Not baked in; provide optional libraries that implement condition stacks via return-stack manipulation. + +## 8. Inline Assembly & Low-Level Hooks +- **Asm blocks**: `asm { ... }` injects raw NASM inside a word. The compiler preserves stack/register invariants by letting asm declare its stack effect signature. +- **Asm-defined words**: `:asm name ( in -- out ) { ... } ;` generates a label and copies the block verbatim, wrapping prologue/epilogue helpers. +- **Macro assembler helpers**: Provide macros for stack slots (`.tos`, `.nos`), temporary registers, and calling runtime helpers. + +## 9. Foreign Function Interface +- **Symbol import**: `c-import "libc.so" clock_gettime` loads a symbol and records its address as a constant word. Multiple libraries can be opened and cached. +- **Call sites**: `c-call ( in -- out ) symbol` pops arguments, loads System V argument registers, issues `call symbol`, then pushes return values. Variadic calls require the user to manage `al` for arg count. +- **Struct marshalling**: Helper words `with-struct` and `field` macros emit raw loads/stores so C structs can be passed by pointer without extra runtime support. +- **Error handling**: The runtime never inspects `errno`; users can read/write the TLS slot through provided helper words. + +## 10. Syscalls & OS Integration +- **Primitive syscall**: `syscall ( args... nr -- ret )` expects the syscall number at the top of stack, maps previous values to `rdi`, `rsi`, `rdx`, `r10`, `r8`, `r9`, runs `syscall`, and returns `rax`. +- **Wrappers**: The standard library layers ergonomic words (`open`, `mmap`, `clone`, etc.) over the primitive but exposes hooks to override or extend them. +- **Process bootstrap**: Entry stub captures `argc`, `argv`, `envp`, stores them in global cells (`argc`, `argv-base`), and pushes them on the data stack before invoking the user `main` word. + +## 11. Module & Namespace System +- **Wordlists**: Dictionaries can be stacked; `within wordlist ... end` temporarily searches a specific namespace. +- **Sealing**: Wordlists may be frozen to prevent redefinition, but the default remains open-world recompilation. +- **Import forms**: `use module-name` copies references into the active wordlist; advanced loaders can be authored entirely in L2. + +## 12. Build & Tooling Pipeline +- **Compiler driver**: `main.py` exposes modes: `build -o a.out`, `repl`, `emit-asm`, `emit-obj`. +- **External tools**: Default path is `nasm -f elf64` then `ld`; flags pass-through so users can link against custom CRT or libc replacements. +- **Incremental/JIT**: Driver may pipe asm into `nasm` via stdin and `dlopen` the resulting shared object for REPL-like workflows. +- **Configuration**: A manifest (TOML or `.sl`) records include paths, default allocators, and target triples for future cross-compilation. + +## 13. Self-Hosting Strategy +- **Phase 1**: Python host provides tokenizer, parser hooks, dictionary, and code emitter. +- **Phase 2**: Re-implement tokenizer + dictionary in L2 using inline asm for hot paths; Python shrinks to a thin driver. +- **Phase 3**: Full self-host—compiler, assembler helpers, and driver written in L2, requiring only `nasm`/`ld`. + +## 14. Standard Library Sketch +- **Core words**: Arithmetic, logic, stack ops, comparison, memory access, control flow combinators. +- **Meta words**: Reader management, dictionary inspection, definition forms (`:`, `:noninline`, `:asm`, `immediate`). +- **Allocators**: Default bump allocator, arena allocator, and hook to install custom malloc/free pairs. +- **FFI/syscalls**: Thin wrappers plus convenience words for POSIX-level APIs. +- **Diagnostics**: Minimal `type`, `emit`, `cr`, `dump`, and tracing hooks for debugging emitted asm. + +## 15. Command-Line & Environment +- **Entry contract**: `main` receives `argc argv -- exit-code` on the data stack. Programs push the desired exit code before invoking `exit` or returning to runtime epilogue. +- **Environment access**: `envp` pointer stored in `.data`; helper words convert entries to counted strings or key/value maps. +- **Args parsing**: Library combinators transform `argv` into richer domain structures, though raw pointer arithmetic remains available. + +## 16. Extensibility & Safety Considerations +- **Hot reload**: Redefining a word overwrites its dictionary entry and emits fresh asm. Users must relink or patch call sites if binaries are already running. +- **Sandboxing**: None by default. Documented patterns show how to wrap memory/syscall words to build capability subsets without touching the core. +- **Testing hooks**: Interpreter-mode trace prints emitted asm per word to aid verification. +- **Portability**: Spec targets x86-64 System V for now but the abstraction layers (stack macros, calling helpers) permit future backends. \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..065565f --- /dev/null +++ b/main.py @@ -0,0 +1,601 @@ +"""Bootstrap compiler for the L2 language. + +This file now contains working scaffolding for: + +* Parsing definitions, literals, and ordinary word references. +* Respecting immediate/macro words so syntax can be rewritten on the fly. +* Emitting NASM-compatible x86-64 assembly with explicit data and return stacks. +* Driving the toolchain via ``nasm`` + ``ld``. +""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Union + + +class ParseError(Exception): + """Raised when the source stream cannot be parsed.""" + + +class CompileError(Exception): + """Raised when IR cannot be turned into assembly.""" + + +# --------------------------------------------------------------------------- +# Tokenizer / Reader +# --------------------------------------------------------------------------- + + +@dataclass +class Token: + lexeme: str + line: int + column: int + start: int + end: int + + def __repr__(self) -> str: # pragma: no cover - debug helper + return f"Token({self.lexeme!r}@{self.line}:{self.column})" + + +class Reader: + """Default reader; users can swap implementations at runtime.""" + + def __init__(self) -> None: + self.line = 1 + self.column = 0 + + def tokenize(self, source: str) -> Iterable[Token]: + self.line = 1 + self.column = 0 + index = 0 + lexeme: List[str] = [] + token_start = 0 + token_line = 1 + token_column = 0 + for char in source: + if char.isspace(): + if lexeme: + yield Token( + "".join(lexeme), + token_line, + token_column, + token_start, + index, + ) + lexeme.clear() + if char == "\n": + self.line += 1 + self.column = 0 + else: + self.column += 1 + index += 1 + continue + if not lexeme: + token_start = index + token_line = self.line + token_column = self.column + lexeme.append(char) + self.column += 1 + index += 1 + if lexeme: + yield Token("".join(lexeme), token_line, token_column, token_start, index) + + +# --------------------------------------------------------------------------- +# Dictionary / Words +# --------------------------------------------------------------------------- + + +class ASTNode: + """Base class for all AST nodes.""" + + +@dataclass +class WordRef(ASTNode): + name: str + + +@dataclass +class Literal(ASTNode): + value: int + + +@dataclass +class Definition(ASTNode): + name: str + body: List[ASTNode] + immediate: bool = False + + +@dataclass +class AsmDefinition(ASTNode): + name: str + body: str + immediate: bool = False + + +@dataclass +class Module(ASTNode): + forms: List[ASTNode] + + +MacroHandler = Callable[["Parser"], Optional[List[ASTNode]]] +IntrinsicEmitter = Callable[["FunctionEmitter"], None] + + +@dataclass +class Word: + name: str + immediate: bool = False + stack_effect: str = "( -- )" + definition: Optional[Union[Definition, AsmDefinition]] = None + macro: Optional[MacroHandler] = None + intrinsic: Optional[IntrinsicEmitter] = None + + +@dataclass +class Dictionary: + words: Dict[str, Word] = field(default_factory=dict) + + def register(self, word: Word) -> None: + if word.name in self.words: + sys.stderr.write(f"[warn] redefining word {word.name}\n") + self.words[word.name] = word + + def lookup(self, name: str) -> Optional[Word]: + return self.words.get(name) + + +# --------------------------------------------------------------------------- +# Parser +# --------------------------------------------------------------------------- + + +Context = Union[Module, Definition] + + +class Parser: + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + self.tokens: List[Token] = [] + self.pos = 0 + self.context_stack: List[Context] = [] + self.definition_stack: List[Word] = [] + self.last_defined: Optional[Word] = None + self.source: str = "" + + # Public helpers for macros ------------------------------------------------ + def next_token(self) -> Token: + return self._consume() + + def peek_token(self) -> Optional[Token]: + return None if self._eof() else self.tokens[self.pos] + + def emit_node(self, node: ASTNode) -> None: + self._append_node(node) + + def most_recent_definition(self) -> Optional[Word]: + return self.last_defined + + # Parsing ------------------------------------------------------------------ + def parse(self, tokens: Iterable[Token], source: str) -> Module: + self.tokens = list(tokens) + self.source = source + self.pos = 0 + self.context_stack = [Module(forms=[])] + self.definition_stack.clear() + self.last_defined = None + + while not self._eof(): + token = self._consume() + lexeme = token.lexeme + if lexeme == ":": + self._begin_definition(token) + continue + if lexeme == ";": + self._end_definition(token) + continue + if lexeme == ":asm": + self._parse_asm_definition(token) + continue + self._handle_token(token) + + if len(self.context_stack) != 1: + raise ParseError("unclosed definition at EOF") + + module = self.context_stack.pop() + if not isinstance(module, Module): # pragma: no cover - defensive + raise ParseError("internal parser state corrupt") + return module + + # Internal helpers --------------------------------------------------------- + def _handle_token(self, token: Token) -> None: + if self._try_literal(token): + return + + word = self.dictionary.lookup(token.lexeme) + if word and word.immediate: + if not word.macro: + raise ParseError(f"immediate word {word.name} lacks macro handler") + produced = word.macro(self) + if produced: + for node in produced: + self._append_node(node) + return + + self._append_node(WordRef(name=token.lexeme)) + + def _begin_definition(self, token: Token) -> None: + if self._eof(): + raise ParseError(f"definition name missing after ':' at {token.line}:{token.column}") + name_token = self._consume() + definition = Definition(name=name_token.lexeme, body=[]) + self.context_stack.append(definition) + word = self.dictionary.lookup(definition.name) + if word is None: + word = Word(name=definition.name) + self.dictionary.register(word) + word.definition = definition + self.definition_stack.append(word) + + def _end_definition(self, token: Token) -> None: + if len(self.context_stack) <= 1: + raise ParseError(f"unexpected ';' at {token.line}:{token.column}") + ctx = self.context_stack.pop() + if not isinstance(ctx, Definition): + raise ParseError("';' can only close definitions") + word = self.definition_stack.pop() + ctx.immediate = word.immediate + module = self.context_stack[-1] + if not isinstance(module, Module): + raise ParseError("nested definitions are not supported yet") + module.forms.append(ctx) + self.last_defined = word + + def _parse_asm_definition(self, token: Token) -> None: + if self._eof(): + raise ParseError(f"definition name missing after ':asm' at {token.line}:{token.column}") + name_token = self._consume() + brace_token = self._consume() + if brace_token.lexeme != "{": + raise ParseError(f"expected '{{' after asm name at {brace_token.line}:{brace_token.column}") + block_start = brace_token.end + block_end: Optional[int] = None + while not self._eof(): + next_token = self._consume() + if next_token.lexeme == "}": + block_end = next_token.start + break + if block_end is None: + raise ParseError("missing '}' to terminate asm body") + asm_body = self.source[block_start:block_end] + definition = AsmDefinition(name=name_token.lexeme, body=asm_body) + word = self.dictionary.lookup(definition.name) + if word is None: + word = Word(name=definition.name) + self.dictionary.register(word) + word.definition = definition + definition.immediate = word.immediate + module = self.context_stack[-1] + if not isinstance(module, Module): + raise ParseError("asm definitions must be top-level forms") + module.forms.append(definition) + self.last_defined = word + if self._eof(): + raise ParseError("asm definition missing terminator ';'") + terminator = self._consume() + if terminator.lexeme != ";": + raise ParseError(f"expected ';' after asm definition at {terminator.line}:{terminator.column}") + + def _append_node(self, node: ASTNode) -> None: + target = self.context_stack[-1] + if isinstance(target, Module): + target.forms.append(node) + elif isinstance(target, Definition): + target.body.append(node) + else: # pragma: no cover - defensive + raise ParseError("unknown parse context") + + def _try_literal(self, token: Token) -> bool: + try: + value = int(token.lexeme, 0) + except ValueError: + return False + self._append_node(Literal(value=value)) + return True + + def _consume(self) -> Token: + if self._eof(): + raise ParseError("unexpected EOF") + token = self.tokens[self.pos] + self.pos += 1 + return token + + def _eof(self) -> bool: + return self.pos >= len(self.tokens) + + +# --------------------------------------------------------------------------- +# NASM Emitter +# --------------------------------------------------------------------------- + + +@dataclass +class Emission: + text: List[str] = field(default_factory=list) + data: List[str] = field(default_factory=list) + bss: List[str] = field(default_factory=list) + + def snapshot(self) -> str: + parts: List[str] = [] + if self.text: + parts.extend(["section .text", *self.text]) + if self.data: + parts.extend(["section .data", *self.data]) + if self.bss: + parts.extend(["section .bss", *self.bss]) + return "\n".join(parts) + + +class FunctionEmitter: + """Utility for emitting per-word assembly.""" + + def __init__(self, text: List[str]) -> None: + self.text = text + + def emit(self, line: str) -> None: + self.text.append(line) + + def comment(self, message: str) -> None: + self.text.append(f" ; {message}") + + def push_literal(self, value: int) -> None: + self.text.extend([ + f" ; push {value}", + " sub r12, 8", + f" mov qword [r12], {value}", + ]) + + def push_from(self, register: str) -> None: + self.text.extend([ + " sub r12, 8", + f" mov [r12], {register}", + ]) + + def pop_to(self, register: str) -> None: + self.text.extend([ + f" mov {register}, [r12]", + " add r12, 8", + ]) + + +def sanitize_label(name: str) -> str: + parts: List[str] = [] + for ch in name: + if ch.isalnum() or ch == "_": + parts.append(ch) + else: + parts.append(f"_{ord(ch):02x}") + safe = "".join(parts) or "anon" + return f"word_{safe}" + + +class Assembler: + def __init__(self, dictionary: Dictionary) -> None: + self.dictionary = dictionary + self.stack_bytes = 65536 + self.io_buffer_bytes = 128 + + def emit(self, module: Module) -> Emission: + emission = Emission() + emission.text.extend(self._runtime_prelude()) + + valid_defs = (Definition, AsmDefinition) + definitions = [form for form in module.forms if isinstance(form, valid_defs)] + stray_forms = [form for form in module.forms if not isinstance(form, valid_defs)] + if stray_forms: + raise CompileError("top-level literals or word references are not supported yet") + + if not any(defn.name == "main" for defn in definitions): + raise CompileError("missing 'main' definition") + + for definition in definitions: + self._emit_definition(definition, emission.text) + + emission.bss.extend(self._bss_layout()) + return emission + + def _emit_definition(self, definition: Union[Definition, AsmDefinition], text: List[str]) -> None: + label = sanitize_label(definition.name) + text.append(f"{label}:") + builder = FunctionEmitter(text) + if isinstance(definition, Definition): + for node in definition.body: + self._emit_node(node, builder) + elif isinstance(definition, AsmDefinition): + self._emit_asm_body(definition, builder) + else: # pragma: no cover - defensive + raise CompileError("unknown definition type") + builder.emit(" ret") + + def _emit_asm_body(self, definition: AsmDefinition, builder: FunctionEmitter) -> None: + body = definition.body.strip("\n") + if not body: + return + for line in body.splitlines(): + if line.strip(): + builder.emit(line) + else: + builder.emit("") + + def _emit_node(self, node: ASTNode, builder: FunctionEmitter) -> None: + if isinstance(node, Literal): + builder.push_literal(node.value) + return + if isinstance(node, WordRef): + self._emit_wordref(node, builder) + return + raise CompileError(f"unsupported AST node {node!r}") + + def _emit_wordref(self, ref: WordRef, builder: FunctionEmitter) -> None: + word = self.dictionary.lookup(ref.name) + if word is None: + raise CompileError(f"unknown word '{ref.name}'") + if word.intrinsic: + word.intrinsic(builder) + return + builder.emit(f" call {sanitize_label(ref.name)}") + + def _runtime_prelude(self) -> List[str]: + return [ + "%define DSTK_BYTES 65536", + "%define RSTK_BYTES 65536", + "%define PRINT_BUF_BYTES 128", + "global _start", + "_start:", + " ; initialize data/return stack pointers", + " lea r12, [rel dstack_top]", + " mov r15, r12", + " lea r13, [rel rstack_top]", + " call word_main", + " mov rax, 0", + " cmp r12, r15", + " je .no_exit_value", + " mov rax, [r12]", + " add r12, 8", + ".no_exit_value:", + " mov rdi, rax", + " mov rax, 60", + " syscall", + ] + + def _bss_layout(self) -> List[str]: + return [ + "align 16", + "dstack: resb DSTK_BYTES", + "dstack_top:", + "align 16", + "rstack: resb RSTK_BYTES", + "rstack_top:", + "align 16", + "print_buf: resb PRINT_BUF_BYTES", + "print_buf_end:", + ] + + def write_asm(self, emission: Emission, path: Path) -> None: + path.write_text(emission.snapshot()) + + +# --------------------------------------------------------------------------- +# Built-in macros and intrinsics +# --------------------------------------------------------------------------- + + +def macro_immediate(parser: Parser) -> Optional[List[ASTNode]]: + word = parser.most_recent_definition() + if word is None: + raise ParseError("'immediate' must follow a definition") + word.immediate = True + if word.definition is not None: + word.definition.immediate = True + return None + + +def bootstrap_dictionary() -> Dictionary: + dictionary = Dictionary() + dictionary.register(Word(name="immediate", immediate=True, macro=macro_immediate)) + return dictionary + + +# --------------------------------------------------------------------------- +# Driver +# --------------------------------------------------------------------------- + + +class Compiler: + def __init__(self) -> None: + self.reader = Reader() + self.dictionary = bootstrap_dictionary() + self.parser = Parser(self.dictionary) + self.assembler = Assembler(self.dictionary) + + def compile_source(self, source: str) -> Emission: + tokens = list(self.reader.tokenize(source)) + module = self.parser.parse(tokens, source) + return self.assembler.emit(module) + + def compile_file(self, path: Path) -> Emission: + source = self._load_with_imports(path.resolve()) + return self.compile_source(source) + + def _load_with_imports(self, path: Path, seen: Optional[Set[Path]] = None) -> str: + if seen is None: + seen = set() + path = path.resolve() + if path in seen: + return "" + seen.add(path) + try: + contents = path.read_text() + except FileNotFoundError as exc: + raise ParseError(f"cannot import {path}: {exc}") from exc + lines: List[str] = [] + for idx, line in enumerate(contents.splitlines()): + stripped = line.strip() + if stripped.startswith("import "): + target = stripped.split(None, 1)[1].strip() + if not target: + raise ParseError(f"empty import target in {path}:{idx + 1}") + target_path = (path.parent / target).resolve() + lines.append(self._load_with_imports(target_path, seen)) + continue + lines.append(line) + return "\n".join(lines) + "\n" + + +def run_nasm(asm_path: Path, obj_path: Path) -> None: + subprocess.run(["nasm", "-f", "elf64", "-o", str(obj_path), str(asm_path)], check=True) + + +def run_linker(obj_path: Path, exe_path: Path) -> None: + subprocess.run(["ld", "-o", str(exe_path), str(obj_path)], check=True) + + +def cli(argv: Sequence[str]) -> int: + parser = argparse.ArgumentParser(description="L2 compiler driver") + parser.add_argument("source", type=Path, help="input .sl file") + parser.add_argument("-o", dest="output", type=Path, default=Path("a.out")) + parser.add_argument("--emit-asm", action="store_true", help="stop after generating asm") + parser.add_argument("--temp-dir", type=Path, default=Path("build")) + args = parser.parse_args(argv) + + compiler = Compiler() + emission = compiler.compile_file(args.source) + + args.temp_dir.mkdir(parents=True, exist_ok=True) + asm_path = args.temp_dir / (args.source.stem + ".asm") + obj_path = args.temp_dir / (args.source.stem + ".o") + compiler.assembler.write_asm(emission, asm_path) + + if args.emit_asm: + print(f"[info] wrote {asm_path}") + return 0 + + run_nasm(asm_path, obj_path) + run_linker(obj_path, args.output) + print(f"[info] built {args.output}") + return 0 + + +def main() -> None: + sys.exit(cli(sys.argv[1:])) + + +if __name__ == "__main__": + main() diff --git a/main.sl b/main.sl new file mode 100644 index 0000000..28f6825 --- /dev/null +++ b/main.sl @@ -0,0 +1,7 @@ +import stdlib.sl + +: main + 2 40 + + puts + 0 +; \ No newline at end of file diff --git a/stdlib.sl b/stdlib.sl new file mode 100644 index 0000000..045f3a6 --- /dev/null +++ b/stdlib.sl @@ -0,0 +1,118 @@ +:asm puts { + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall +} +; + +:asm dup { + mov rax, [r12] + sub r12, 8 + mov [r12], rax +} +; + +:asm drop { + add r12, 8 +} +; + +:asm swap { + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax +} +; + +:asm + { + mov rax, [r12] + add r12, 8 + add qword [r12], rax +} +; + +:asm - { + mov rax, [r12] + add r12, 8 + sub qword [r12], rax +} +; + +:asm * { + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax +} +; + +:asm / { + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax +} +; + +:asm % { + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx +} +; + +:asm exit { + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall +} +; diff --git a/test.sl b/test.sl new file mode 100644 index 0000000..29163b8 --- /dev/null +++ b/test.sl @@ -0,0 +1,45 @@ +import stdlib.sl + +: test-add + 5 7 + puts +; + +: test-sub + 10 3 - puts +; + +: test-mul + 6 7 * puts +; + +: test-div + 84 7 / puts +; + +: test-mod + 85 7 % puts +; + +: test-drop + 10 20 drop puts +; + +: test-dup + 11 dup + puts +; + +: test-swap + 2 5 swap - puts +; + +: main + test-add + test-sub + test-mul + test-div + test-mod + test-drop + test-dup + test-swap + 0 +;