From 2ea49d26f72ba44161de8a78c185c0ba4cc0b943 Mon Sep 17 00:00:00 2001 From: IgorCielniak Date: Mon, 8 Dec 2025 18:14:52 +0100 Subject: [PATCH] commit --- SPEC.md | 20 + build/test.asm | 730 +++++++++++++++++++++++++++++++ build/test.o | Bin 0 -> 6240 bytes main.py | 1108 +++++++++++++++++++++++++++++++++++++++++++++++- main.sl | 10 +- stdlib.sl | 235 ++++++++++ test.bin | Bin 0 -> 10112 bytes test.sl | 14 + 8 files changed, 2099 insertions(+), 18 deletions(-) create mode 100644 build/test.asm create mode 100644 build/test.o create mode 100755 test.bin diff --git a/SPEC.md b/SPEC.md index 1520986..5f600e2 100644 --- a/SPEC.md +++ b/SPEC.md @@ -25,6 +25,7 @@ - `lookup`: resolves token → word entry; can be replaced to build new namespaces or module systems. - **Compile vs interpret**: Each word advertises stack effect + immediacy. Immediate words execute during compilation (macro behavior). Others emit code or inline asm. - **Syntax morphing**: Provide primitives `set-reader`, `with-reader`, and word-lists so layers (e.g., Lisp-like forms) can be composed. +- **Inline Python hooks**: `:py name { ... } ;` executes the enclosed Python block immediately, then registers `name` as a word whose behavior is provided by that block. Define a `macro(ctx)` function to intercept compilation (receiving a `MacroContext` with helpers like `next_token`, `emit_literal`, `new_label`, `inject_tokens`, and direct access to the active parser), and/or an `intrinsic(builder)` function to emit custom assembly. This lets end users extend the language—parsing source, manipulating AST nodes, or writing NASM—without touching the bootstrap source. The standard library’s `extend-syntax` and `fn` forms are ordinary `:py` blocks built with these APIs, so users can clone or replace them entirely from L2 source files. ## 4. Core Types & Data Model - **Cells**: 64-bit signed integers; all stack operations use cells. @@ -49,6 +50,24 @@ struct: Point - `.! ( value addr -- )` stores a field via `addr + offset !`. - Because the output is plain L2 code, users can inspect or override any generated word, and additional helpers (e.g., pointer arithmetic or iterators) can be layered on top with regular macros. +### 4.2 Lightweight C-style Sugar + +- `extend-syntax` is implemented as a `:py` macro that toggles a reader mode where identifiers suffixed with `()` (e.g., `foo()`) are rewritten as ordinary word calls. The call still obeys data-stack calling conventions; the parentheses are purely syntactic sugar. +- The same user-defined macro stack unlocks a compact function form: + + ``` + fn add(int left, int right){ + return (left + right) * right; + } + ``` + + expands into a normal colon definition which consumes two stack arguments (`left` and `right`), mirrors them onto the return stack, evaluates the infix expression, and cleans up the temporary frame before returning. +- Current limitations: + - Only `int` parameters are recognized. + - Function bodies must be a single `return ;` statement. `` may contain parameter names, integer literals, parentheses, and the binary operators `+ - * / %`. + - Parameter names become available by index via `rpick`, so advanced bodies can still drop into raw L2 code if needed. +- Since the generated code uses the return stack to store arguments, it happily composes with loops/conditionals—the frame lives beneath any subsequent `for` counters and is explicitly released before the word returns. Because `fn` lives in user space, nothing stops you from swapping it out for a completely different parser (pattern matching, keyword arguments, etc.) using the same `:py` facility. + ## 5. Stacks & Calling Convention - **Data stack**: Unlimited (up to memory). Manipulated via standard words (`dup`, `swap`, `rot`, `over`). Compiled code keeps top-of-stack in registers when possible for performance. - **Return stack**: Used for control flow. Directly accessible for meta-programming; users must avoid corrupting call frames unless intentional. @@ -102,6 +121,7 @@ struct: Point ## 14. Standard Library Sketch - **Core words**: Arithmetic, logic, stack ops, comparison, memory access, control flow combinators. +- **Return-stack helpers**: `>r`, `r>`, `rdrop`, and `rpick` shuffle values between the data stack and the return stack. They’re used by the `fn` sugar but also available to user code for building custom control constructs. - **Meta words**: Reader management, dictionary inspection, definition forms (`:`, `:noninline`, `:asm`, `immediate`). - **Allocators**: Default bump allocator, arena allocator, and hook to install custom malloc/free pairs. - **FFI/syscalls**: Thin wrappers plus convenience words for POSIX-level APIs. diff --git a/build/test.asm b/build/test.asm new file mode 100644 index 0000000..bc3c542 --- /dev/null +++ b/build/test.asm @@ -0,0 +1,730 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_mem_2dslot: + lea rax, [rel print_buf] + sub r12, 8 + mov [r12], rax + ret +word_MAGIC: + ; push 99 + sub r12, 8 + mov qword [r12], 99 + ret +word_add13: + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 8 + sub r12, 8 + mov qword [r12], 8 + call word__2b + ret +word_Point_2esize: + ; push 16 + sub r12, 8 + mov qword [r12], 16 + ret +word_Point_2ex_2esize: + ; push 8 + sub r12, 8 + mov qword [r12], 8 + ret +word_Point_2ex_2eoffset: + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +word_Point_2ex_40: + call word_Point_2ex_2eoffset + call word__2b + call word__40 + ret +word_Point_2ex_21: + call word_Point_2ex_2eoffset + call word__2b + call word__21 + ret +word_Point_2ey_2esize: + ; push 8 + sub r12, 8 + mov qword [r12], 8 + ret +word_Point_2ey_2eoffset: + ; push 8 + sub r12, 8 + mov qword [r12], 8 + ret +word_Point_2ey_40: + call word_Point_2ey_2eoffset + call word__2b + call word__40 + ret +word_Point_2ey_21: + call word_Point_2ey_2eoffset + call word__2b + call word__21 + ret +word_fancy_add: + call word__3er + call word__3er + ; push 0 + sub r12, 8 + mov qword [r12], 0 + call word_rpick + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word_rpick + call word__2b + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word_rpick + call word__2a + call word_rdrop + call word_rdrop + ret +word_test_2dadd: + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 7 + sub r12, 8 + mov qword [r12], 7 + call word__2b + call word_puts + ret +word_test_2dsub: + ; push 10 + sub r12, 8 + mov qword [r12], 10 + ; push 3 + sub r12, 8 + mov qword [r12], 3 + call word__2d + call word_puts + ret +word_test_2dmul: + ; push 6 + sub r12, 8 + mov qword [r12], 6 + ; push 7 + sub r12, 8 + mov qword [r12], 7 + call word__2a + call word_puts + ret +word_test_2ddiv: + ; push 84 + sub r12, 8 + mov qword [r12], 84 + ; push 7 + sub r12, 8 + mov qword [r12], 7 + call word__2f + call word_puts + ret +word_test_2dmod: + ; push 85 + sub r12, 8 + mov qword [r12], 85 + ; push 7 + sub r12, 8 + mov qword [r12], 7 + call word__25 + call word_puts + ret +word_test_2ddrop: + ; push 10 + sub r12, 8 + mov qword [r12], 10 + ; push 20 + sub r12, 8 + mov qword [r12], 20 + call word_drop + call word_puts + ret +word_test_2ddup: + ; push 11 + sub r12, 8 + mov qword [r12], 11 + call word_dup + call word__2b + call word_puts + ret +word_test_2dswap: + ; push 2 + sub r12, 8 + mov qword [r12], 2 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word_swap + call word__2d + call word_puts + ret +word_test_2dstore: + call word_mem_2dslot + call word_dup + ; push 123 + sub r12, 8 + mov qword [r12], 123 + call word_swap + call word__21 + call word__40 + call word_puts + ret +word_test_2dmmap: + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ; push 4096 + sub r12, 8 + mov qword [r12], 4096 + ; push 3 + sub r12, 8 + mov qword [r12], 3 + ; push 34 + sub r12, 8 + mov qword [r12], 34 + ; push -1 + sub r12, 8 + mov qword [r12], -1 + ; push 0 + sub r12, 8 + mov qword [r12], 0 + call word_mmap + call word_dup + ; push 1337 + sub r12, 8 + mov qword [r12], 1337 + call word_swap + call word__21 + call word_dup + call word__40 + call word_puts + ; push 4096 + sub r12, 8 + mov qword [r12], 4096 + call word_munmap + call word_drop + ret +word_test_2dmacro: + ; push 9 + sub r12, 8 + mov qword [r12], 9 + call word_dup + call word__2a + call word_puts + call word_MAGIC + call word_puts + call word_add13 + call word_puts + ret +word_test_2dif: + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3d_3d + mov rax, [r12] + add r12, 8 + test rax, rax + jz L_if_false_0 + ; push 111 + sub r12, 8 + mov qword [r12], 111 + call word_puts + jmp L_if_end_1 +L_if_false_0: + ; push 222 + sub r12, 8 + mov qword [r12], 222 + call word_puts +L_if_end_1: + ret +word_test_2delse_2dif: + ; push 2 + sub r12, 8 + mov qword [r12], 2 + call word_dup + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word__3d_3d + mov rax, [r12] + add r12, 8 + test rax, rax + jz L_if_false_2 + ; push 50 + sub r12, 8 + mov qword [r12], 50 + call word_puts + jmp L_if_end_3 +L_if_false_2: + call word_dup + ; push 2 + sub r12, 8 + mov qword [r12], 2 + call word__3d_3d + mov rax, [r12] + add r12, 8 + test rax, rax + jz L_if_false_4 + ; push 60 + sub r12, 8 + mov qword [r12], 60 + call word_puts + jmp L_if_end_5 +L_if_false_4: + ; push 70 + sub r12, 8 + mov qword [r12], 70 + call word_puts +L_if_end_5: +L_if_end_3: + call word_drop + ret +word_test_2dfor: + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + mov rax, [r12] + add r12, 8 + cmp rax, 0 + jle L_for_end_7 + sub r13, 8 + mov [r13], rax +L_for_loop_6: + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word__2b + mov rax, [r13] + dec rax + mov [r13], rax + jg L_for_loop_6 + add r13, 8 +L_for_end_7: + call word_puts + ret +word_test_2dfor_2dzero: + ; push 123 + sub r12, 8 + mov qword [r12], 123 + ; push 0 + sub r12, 8 + mov qword [r12], 0 + mov rax, [r12] + add r12, 8 + cmp rax, 0 + jle L_for_end_9 + sub r13, 8 + mov [r13], rax +L_for_loop_8: + call word_drop + mov rax, [r13] + dec rax + mov [r13], rax + jg L_for_loop_8 + add r13, 8 +L_for_end_9: + call word_puts + ret +word_test_2dstruct: + call word_mem_2dslot + call word_dup + ; push 111 + sub r12, 8 + mov qword [r12], 111 + call word_swap + call word_Point_2ex_21 + call word_dup + ; push 222 + sub r12, 8 + mov qword [r12], 222 + call word_swap + call word_Point_2ey_21 + call word_dup + call word_Point_2ex_40 + call word_puts + call word_Point_2ey_40 + call word_puts + call word_Point_2esize + call word_puts + ret +word_test_2dcmp: + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3d_3d + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 4 + sub r12, 8 + mov qword [r12], 4 + call word__3d_3d + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 4 + sub r12, 8 + mov qword [r12], 4 + call word__21_3d + call word_puts + ; push 4 + sub r12, 8 + mov qword [r12], 4 + ; push 4 + sub r12, 8 + mov qword [r12], 4 + call word__21_3d + call word_puts + ; push 3 + sub r12, 8 + mov qword [r12], 3 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3c + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 3 + sub r12, 8 + mov qword [r12], 3 + call word__3c + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 3 + sub r12, 8 + mov qword [r12], 3 + call word__3e + call word_puts + ; push 3 + sub r12, 8 + mov qword [r12], 3 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3e + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3c_3d + call word_puts + ; push 6 + sub r12, 8 + mov qword [r12], 6 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3c_3d + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3e_3d + call word_puts + ; push 4 + sub r12, 8 + mov qword [r12], 4 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3e_3d + call word_puts + ret +word_test_2dc_2dfn: + ; push 3 + sub r12, 8 + mov qword [r12], 3 + ; push 7 + sub r12, 8 + mov qword [r12], 7 + call word_fancy_add + call word_puts + ret +word_main: + call word_test_2dadd + call word_test_2dsub + call word_test_2dmul + call word_test_2ddiv + call word_test_2dmod + call word_test_2ddrop + call word_test_2ddup + call word_test_2dswap + call word_test_2dstore + call word_test_2dmmap + call word_test_2dmacro + call word_test_2dif + call word_test_2delse_2dif + call word_test_2dfor + call word_test_2dfor_2dzero + call word_test_2dcmp + call word_test_2dstruct + call word_test_2dc_2dfn + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/test.o b/build/test.o new file mode 100644 index 0000000000000000000000000000000000000000..92599760b610482f185dcfcdab07c44c678508cd GIT binary patch literal 6240 zcmbW5e~esJ702(iTiCYLR$5ENwb6ELMCrOe+RBm!S=}u&xGO)Tl7R6!J3nA#XJ&ab zL%V=&byBnAX8qBWq!29fj}c11Xuwq<)LqIpM3RHZ{ZTr5foYd-AKLzHpD?L>wpMFPSA4QWM|GaporHW;HqLTkw_t;`WcC zgLEyT&&~8fUvaJRZ)z>wW#8dCywVq3)8OAtDcj#b4k7WS+`zXv5E{hF_IpM?14VE3 z&7P5su)JoP@lnk>W=f6m;f83u@?vAGIx)JH=<-D&8Sm7(SmlEH?&8?giOx~95F6Qy z=-;8}{QVLtAt*16)h1uMs3lhW!&^rdonS8N2&O<32CiH2yBAT2nl zr~-+xvAw+|AJl4r7Y}?;fjg*>t;W4~YPFho{Cz5pA-qv@dY??wIW*|4Bx(Tmv!`XZQdk{*o8r9Nk?b{C$C1$p>;dC=@jeb+YQ~b_ zu`_ZUFWy2(I&BF~TqDt0PLtPLYfEmCk`s_zxL`H3T}mb)Ic`hrzN|uDZieJ(TY~-L zH3NSQ#e=pFr>p5FM((YFq#ecG(0PBw(9V!)RQEx3mVmbuu4zcO*(u2%q*Sy)Odw{; z*y}8tmMln)SZ@L;cUet#P|v*GXz~DDNm$>9H92jCQd8cGpf}pR!9(S>BH}I-4^xD< z5*^OqQG-qbb=c`JzuvEj7>VAMpPVaa z8|3HCQ_kzW`>iOQ=1l-TNa}imSBE!-sGuj?fSw}cecA2~(O?bqMmOo`y|C+ZaXv-r z`g*x;wOhq%M{&aJejUe)cMGKs<_ufH#ON06tZPkHZo_Q87L92H}*o`!WRtV3i49o0j&4mNK;Vuwg#nza>p zt9vdKnfVm`P;^-e#F>0L$MsOGvlRBDpf_0;6v|TA{m@6uIw(#~&E8Y3XoBK7OYxyr zrl|q%AxmMmqWke46gw@2eU53xF(|%aDL$0rk5Fu~6!v`R9FIeBDJg=5vAe2|%O=`& z-n;+7)=0wq9*Rd$Jdff8it{Kg!y-4MxCzA$6bh%n=TVeV)byc%C;dGXKS5Cuht4?m zty0aU^v;rM?kg5mbMek%$t@-N$QsP~cdonX{)DgeqyA@!hrU6bsm&%_IO{J0d~d}= zUw>Vus}c3tdaSNwj^o3=w&+Ske>b-ImzbArNcLCf;CK(=8}3i_JuDn;M%2$kq%D{E z^Md1`K%SQc-^=_J!GFyBb-@oXe^>BFna>FRIP+y#2k)NY@DVhdf)yhHE}%x@CBg?U2otC_ok zcQLOBegpF%!Ea-JQ1I_Ce^T%=^JfL$!~9jj_c1>u_%E6NTkylo<&VZ=%va!?vaUCn ze@gIa<{JgSRJRvVR|_6xe!bvZnBOLNH}fY1-_HCM!PCso3SMUZF&xp>wU_x-ft!3Fc#h`|C&hS@3_d{yw}utZRn(A;FjGaYmFU_!{Qt1z*p6TJR3$ zTk#sSu5HX+!N1PDBKWuF;92HXp&w-aTfu+8{8hpCG5?$3k1=1y|C5*@Ji~mw;A70M z5d1I9hXnrz^G}QakZ*p9pw;t_^eW7Ur}4U$uhX zM{{0%|3qvewXUPZ|OD+-Xl*-gIbhYB4!meIu7MUCEX6fsL66Qo6q7I_;d@4M*Y*WRrJs zrgV-5QS2{pWN+tn@vr!lm?Bj{`y2OXJ_l$$R`IK8iqdzY#|z}tQX6?nC#D$`|I?n>^*h$T%kT+IIWx@ z*g9Mf_l*Iig3Hrr+6>mvfJD+Q)HR#Us2(SqaWaYiV%ll(E9rd7;lXlX8s4^4y}Hc~ z-4upy4ozNY&XOs(nGaIr!A${7_{4)vA}6@3iS9j6rR8U6*Z ZjBH#X_s<~zdv?h0=idO2@-Zj%{{>~(bol@P literal 0 HcmV?d00001 diff --git a/main.py b/main.py index e3eb762..7a655b3 100644 --- a/main.py +++ b/main.py @@ -13,9 +13,10 @@ from __future__ import annotations import argparse import subprocess import sys +import textwrap from dataclasses import dataclass, field from pathlib import Path -from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Union, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Union, Tuple class ParseError(Exception): @@ -109,7 +110,7 @@ class WordRef(ASTNode): @dataclass class Literal(ASTNode): - value: int + value: Any @dataclass @@ -117,6 +118,7 @@ class Definition(ASTNode): name: str body: List[ASTNode] immediate: bool = False + compile_only: bool = False @dataclass @@ -124,6 +126,7 @@ class AsmDefinition(ASTNode): name: str body: str immediate: bool = False + compile_only: bool = False @dataclass @@ -172,7 +175,63 @@ class ForNext(ASTNode): end_label: str -MacroHandler = Callable[["Parser"], Optional[List[ASTNode]]] +class MacroContext: + """Small facade exposed to Python-defined macros.""" + + def __init__(self, parser: "Parser") -> None: + self._parser = parser + + @property + def parser(self) -> "Parser": + return self._parser + + def next_token(self) -> Token: + return self._parser.next_token() + + def peek_token(self) -> Optional[Token]: + return self._parser.peek_token() + + def emit_literal(self, value: int) -> None: + self._parser.emit_node(Literal(value=value)) + + def emit_word(self, name: str) -> None: + self._parser.emit_node(WordRef(name=name)) + + def emit_node(self, node: ASTNode) -> None: + self._parser.emit_node(node) + + def inject_tokens(self, tokens: Sequence[str], template: Optional[Token] = None) -> None: + if template is None: + template = Token(lexeme="", line=0, column=0, start=0, end=0) + generated = [ + Token( + lexeme=lex, + line=template.line, + column=template.column, + start=template.start, + end=template.end, + ) + for lex in tokens + ] + self.inject_token_objects(generated) + + def inject_token_objects(self, tokens: Sequence[Token]) -> None: + self._parser.tokens[self._parser.pos:self._parser.pos] = list(tokens) + + def enable_call_syntax(self) -> None: + self._parser.call_syntax_enabled = True + + def disable_call_syntax(self) -> None: + self._parser.call_syntax_enabled = False + + def new_label(self, prefix: str) -> str: + return self._parser._new_label(prefix) + + def most_recent_definition(self) -> Optional[Word]: + return self._parser.most_recent_definition() + + +MacroHandler = Callable[[MacroContext], Optional[List[ASTNode]]] IntrinsicEmitter = Callable[["FunctionEmitter"], None] @@ -186,6 +245,8 @@ class Word: intrinsic: Optional[IntrinsicEmitter] = None macro_expansion: Optional[List[str]] = None macro_params: int = 0 + compile_time_intrinsic: Optional[Callable[["CompileTimeVM"], None]] = None + compile_only: bool = False @dataclass @@ -221,6 +282,8 @@ class Parser: self.macro_recording: Optional[MacroDefinition] = None self.control_stack: List[Dict[str, str]] = [] self.label_counter = 0 + self.call_syntax_enabled = False + self.compile_time_vm = CompileTimeVM(self) # Public helpers for macros ------------------------------------------------ def next_token(self) -> Token: @@ -245,6 +308,7 @@ class Parser: self.last_defined = None self.control_stack = [] self.label_counter = 0 + self.call_syntax_enabled = False while not self._eof(): token = self._consume() @@ -260,6 +324,9 @@ class Parser: if lexeme == ":asm": self._parse_asm_definition(token) continue + if lexeme == ":py": + self._parse_py_definition(token) + continue if lexeme == "if": self._handle_if_control() continue @@ -291,21 +358,35 @@ class Parser: # Internal helpers --------------------------------------------------------- def _handle_token(self, token: Token) -> None: + if self.call_syntax_enabled: + call_target = self._maybe_call_form(token.lexeme) + if call_target is not None: + self._append_node(WordRef(name=call_target)) + return if self._try_literal(token): return word = self.dictionary.lookup(token.lexeme) if word and word.immediate: - if not word.macro: - raise ParseError(f"immediate word {word.name} lacks macro handler") - produced = word.macro(self) - if produced: - for node in produced: - self._append_node(node) + if word.macro: + produced = word.macro(MacroContext(self)) + if produced: + for node in produced: + self._append_node(node) + else: + self._execute_immediate_word(word) return self._append_node(WordRef(name=token.lexeme)) + def _execute_immediate_word(self, word: Word) -> None: + try: + self.compile_time_vm.invoke(word) + except ParseError: + raise + except Exception as exc: # pragma: no cover - defensive + raise ParseError(f"compile-time word '{word.name}' failed: {exc}") from exc + def _handle_macro_recording(self, token: Token) -> bool: if self.macro_recording is None: return False @@ -378,6 +459,14 @@ class Parser: self.label_counter += 1 return label + def _maybe_call_form(self, lexeme: str) -> Optional[str]: + if len(lexeme) <= 2 or not lexeme.endswith("()"): + return None + name = lexeme[:-2] + if not name or not _is_identifier(name): + return None + return name + def _handle_if_control(self) -> None: false_label = self._new_label("if_false") self._append_node(BranchZero(target=false_label)) @@ -428,6 +517,7 @@ class Parser: raise ParseError("';' can only close definitions") word = self.definition_stack.pop() ctx.immediate = word.immediate + ctx.compile_only = word.compile_only module = self.context_stack[-1] if not isinstance(module, Module): raise ParseError("nested definitions are not supported yet") @@ -458,6 +548,7 @@ class Parser: self.dictionary.register(word) word.definition = definition definition.immediate = word.immediate + definition.compile_only = word.compile_only module = self.context_stack[-1] if not isinstance(module, Module): raise ParseError("asm definitions must be top-level forms") @@ -469,6 +560,50 @@ class Parser: if terminator.lexeme != ";": raise ParseError(f"expected ';' after asm definition at {terminator.line}:{terminator.column}") + def _parse_py_definition(self, token: Token) -> None: + if self._eof(): + raise ParseError(f"definition name missing after ':py' at {token.line}:{token.column}") + name_token = self._consume() + brace_token = self._consume() + if brace_token.lexeme != "{": + raise ParseError(f"expected '{{' after py name at {brace_token.line}:{brace_token.column}") + block_start = brace_token.end + block_end: Optional[int] = None + while not self._eof(): + next_token = self._consume() + if next_token.lexeme == "}": + block_end = next_token.start + break + if block_end is None: + raise ParseError("missing '}' to terminate py body") + py_body = textwrap.dedent(self.source[block_start:block_end]) + word = self.dictionary.lookup(name_token.lexeme) + if word is None: + word = Word(name=name_token.lexeme) + namespace = self._py_exec_namespace() + try: + exec(py_body, namespace) + except Exception as exc: # pragma: no cover - user code + raise ParseError(f"python macro body for '{word.name}' raised: {exc}") from exc + macro_fn = namespace.get("macro") + intrinsic_fn = namespace.get("intrinsic") + if macro_fn is None and intrinsic_fn is None: + raise ParseError("python definition must define 'macro' or 'intrinsic'") + if macro_fn is not None: + word.macro = macro_fn + word.immediate = True + if intrinsic_fn is not None: + word.intrinsic = intrinsic_fn + self.dictionary.register(word) + if self._eof(): + raise ParseError("py definition missing terminator ';'") + terminator = self._consume() + if terminator.lexeme != ";": + raise ParseError(f"expected ';' after py definition at {terminator.line}:{terminator.column}") + + def _py_exec_namespace(self) -> Dict[str, Any]: + return dict(PY_EXEC_GLOBALS) + def _append_node(self, node: ASTNode) -> None: target = self.context_stack[-1] if isinstance(target, Module): @@ -482,7 +617,11 @@ class Parser: try: value = int(token.lexeme, 0) except ValueError: - return False + string_value = _parse_string_literal(token) + if string_value is None: + return False + self._append_node(Literal(value=string_value)) + return True self._append_node(Literal(value=value)) return True @@ -497,6 +636,163 @@ class Parser: return self.pos >= len(self.tokens) +class CompileTimeVM: + def __init__(self, parser: Parser) -> None: + self.parser = parser + self.dictionary = parser.dictionary + self.stack: List[Any] = [] + self.return_stack: List[Any] = [] + + def reset(self) -> None: + self.stack.clear() + self.return_stack.clear() + + def push(self, value: Any) -> None: + self.stack.append(value) + + def pop(self) -> Any: + if not self.stack: + raise ParseError("compile-time stack underflow") + return self.stack.pop() + + def peek(self) -> Any: + if not self.stack: + raise ParseError("compile-time stack underflow") + return self.stack[-1] + + def pop_int(self) -> int: + value = self.pop() + if not isinstance(value, int): + raise ParseError("expected integer on compile-time stack") + return value + + def pop_str(self) -> str: + value = self.pop() + if not isinstance(value, str): + raise ParseError("expected string on compile-time stack") + return value + + def pop_list(self) -> List[Any]: + value = self.pop() + if not isinstance(value, list): + raise ParseError("expected list on compile-time stack") + return value + + def pop_token(self) -> Token: + value = self.pop() + if not isinstance(value, Token): + raise ParseError("expected token on compile-time stack") + return value + + def invoke(self, word: Word) -> None: + self.reset() + self._call_word(word) + + def _call_word(self, word: Word) -> None: + if word.compile_time_intrinsic is not None: + word.compile_time_intrinsic(self) + return + definition = word.definition + if definition is None: + raise ParseError(f"word '{word.name}' has no compile-time definition") + if isinstance(definition, AsmDefinition): + raise ParseError(f"word '{word.name}' cannot run at compile time") + self._execute_nodes(definition.body) + + def _call_word_by_name(self, name: str) -> None: + word = self.dictionary.lookup(name) + if word is None: + raise ParseError(f"unknown word '{name}' during compile-time execution") + self._call_word(word) + + def _execute_nodes(self, nodes: Sequence[ASTNode]) -> None: + label_positions = self._label_positions(nodes) + loop_pairs = self._for_pairs(nodes) + loop_stack: List[Dict[str, Any]] = [] + ip = 0 + while ip < len(nodes): + node = nodes[ip] + if isinstance(node, Literal): + self.push(node.value) + ip += 1 + continue + if isinstance(node, WordRef): + self._call_word_by_name(node.name) + ip += 1 + continue + if isinstance(node, BranchZero): + condition = self.pop() + flag: bool + if isinstance(condition, bool): + flag = condition + elif isinstance(condition, int): + flag = condition != 0 + else: + raise ParseError("branch expects integer or boolean condition") + if not flag: + ip = self._jump_to_label(label_positions, node.target) + else: + ip += 1 + continue + if isinstance(node, Jump): + ip = self._jump_to_label(label_positions, node.target) + continue + if isinstance(node, Label): + ip += 1 + continue + if isinstance(node, ForBegin): + count = self.pop_int() + if count <= 0: + match = loop_pairs.get(ip) + if match is None: + raise ParseError("internal loop bookkeeping error") + ip = match + 1 + continue + loop_stack.append({"remaining": count, "begin": ip}) + ip += 1 + continue + if isinstance(node, ForNext): + if not loop_stack: + raise ParseError("'next' without matching 'for'") + frame = loop_stack[-1] + frame["remaining"] -= 1 + if frame["remaining"] > 0: + ip = frame["begin"] + 1 + continue + loop_stack.pop() + ip += 1 + continue + raise ParseError(f"unsupported compile-time AST node {node!r}") + + def _label_positions(self, nodes: Sequence[ASTNode]) -> Dict[str, int]: + positions: Dict[str, int] = {} + for idx, node in enumerate(nodes): + if isinstance(node, Label): + positions[node.name] = idx + return positions + + def _for_pairs(self, nodes: Sequence[ASTNode]) -> Dict[int, int]: + stack: List[int] = [] + pairs: Dict[int, int] = {} + for idx, node in enumerate(nodes): + if isinstance(node, ForBegin): + stack.append(idx) + elif isinstance(node, ForNext): + if not stack: + raise ParseError("'next' without matching 'for'") + begin_idx = stack.pop() + pairs[begin_idx] = idx + pairs[idx] = begin_idx + if stack: + raise ParseError("'for' without matching 'next'") + return pairs + + def _jump_to_label(self, labels: Dict[str, int], target: str) -> int: + if target not in labels: + raise ParseError(f"unknown label '{target}' during compile-time execution") + return labels[target] + + # --------------------------------------------------------------------------- # NASM Emitter # --------------------------------------------------------------------------- @@ -562,6 +858,54 @@ def sanitize_label(name: str) -> str: return f"word_{safe}" +def _is_identifier(text: str) -> bool: + if not text: + return False + first = text[0] + if not (first.isalpha() or first == "_"): + return False + return all(ch.isalnum() or ch == "_" for ch in text) + + +def _parse_string_literal(token: Token) -> Optional[str]: + text = token.lexeme + if len(text) < 2 or text[0] != '"' or text[-1] != '"': + return None + body = text[1:-1] + result: List[str] = [] + idx = 0 + while idx < len(body): + char = body[idx] + if char != "\\": + result.append(char) + idx += 1 + continue + idx += 1 + if idx >= len(body): + raise ParseError( + f"unterminated escape sequence in string literal at {token.line}:{token.column}" + ) + escape = body[idx] + idx += 1 + if escape == 'n': + result.append("\n") + elif escape == 't': + result.append("\t") + elif escape == 'r': + result.append("\r") + elif escape == '0': + result.append("\0") + elif escape == '"': + result.append('"') + elif escape == "\\": + result.append("\\") + else: + raise ParseError( + f"unsupported escape sequence '\\{escape}' in string literal at {token.line}:{token.column}" + ) + return "".join(result) + + class Assembler: def __init__(self, dictionary: Dictionary) -> None: self.dictionary = dictionary @@ -578,10 +922,13 @@ class Assembler: if stray_forms: raise CompileError("top-level literals or word references are not supported yet") - if not any(defn.name == "main" for defn in definitions): + runtime_defs = [ + defn for defn in definitions if not getattr(defn, "compile_only", False) + ] + if not any(defn.name == "main" for defn in runtime_defs): raise CompileError("missing 'main' definition") - for definition in definitions: + for definition in runtime_defs: self._emit_definition(definition, emission.text) emission.bss.extend(self._bss_layout()) @@ -612,6 +959,8 @@ class Assembler: def _emit_node(self, node: ASTNode, builder: FunctionEmitter) -> None: if isinstance(node, Literal): + if not isinstance(node.value, int): + raise CompileError("string literals are compile-time only") builder.push_literal(node.value) return if isinstance(node, WordRef): @@ -638,6 +987,8 @@ class Assembler: word = self.dictionary.lookup(ref.name) if word is None: raise CompileError(f"unknown word '{ref.name}'") + if word.compile_only: + raise CompileError(f"word '{ref.name}' is compile-time only") if word.intrinsic: word.intrinsic(builder) return @@ -709,7 +1060,8 @@ class Assembler: # --------------------------------------------------------------------------- -def macro_immediate(parser: Parser) -> Optional[List[ASTNode]]: +def macro_immediate(ctx: MacroContext) -> Optional[List[ASTNode]]: + parser = ctx.parser word = parser.most_recent_definition() if word is None: raise ParseError("'immediate' must follow a definition") @@ -719,7 +1071,19 @@ def macro_immediate(parser: Parser) -> Optional[List[ASTNode]]: return None -def macro_begin_text_macro(parser: Parser) -> Optional[List[ASTNode]]: +def macro_compile_only(ctx: MacroContext) -> Optional[List[ASTNode]]: + parser = ctx.parser + word = parser.most_recent_definition() + if word is None: + raise ParseError("'compile-only' must follow a definition") + word.compile_only = True + if word.definition is not None: + word.definition.compile_only = True + return None + + +def macro_begin_text_macro(ctx: MacroContext) -> Optional[List[ASTNode]]: + parser = ctx.parser if parser._eof(): raise ParseError("macro name missing after 'macro:'") name_token = parser.next_token() @@ -735,7 +1099,8 @@ def macro_begin_text_macro(parser: Parser) -> Optional[List[ASTNode]]: return None -def macro_end_text_macro(parser: Parser) -> Optional[List[ASTNode]]: +def macro_end_text_macro(ctx: MacroContext) -> Optional[List[ASTNode]]: + parser = ctx.parser if parser.macro_recording is None: raise ParseError("';macro' without matching 'macro:'") # Actual closing handled in parser loop when ';macro' token is seen. @@ -759,7 +1124,714 @@ def _struct_emit_definition(tokens: List[Token], template: Token, name: str, bod tokens.append(make_token(";")) -def macro_struct_begin(parser: Parser) -> Optional[List[ASTNode]]: +class SplitLexer: + def __init__(self, parser: Parser, separators: str) -> None: + self.parser = parser + self.separators = set(separators) + self.buffer: List[Token] = [] + + def _fill(self) -> None: + while not self.buffer: + if self.parser._eof(): + raise ParseError("unexpected EOF inside custom lexer") + token = self.parser.next_token() + parts = _split_token_by_chars(token, self.separators) + if not parts: + continue + self.buffer.extend(parts) + + def peek(self) -> Token: + self._fill() + return self.buffer[0] + + def pop(self) -> Token: + token = self.peek() + self.buffer.pop(0) + return token + + def expect(self, lexeme: str) -> Token: + token = self.pop() + if token.lexeme != lexeme: + raise ParseError(f"expected '{lexeme}' but found '{token.lexeme}'") + return token + + def collect_brace_block(self) -> List[Token]: + depth = 1 + collected: List[Token] = [] + while depth > 0: + token = self.pop() + if token.lexeme == "{": + depth += 1 + collected.append(token) + continue + if token.lexeme == "}": + depth -= 1 + if depth == 0: + break + collected.append(token) + continue + collected.append(token) + return collected + + def push_back(self) -> None: + if not self.buffer: + return + self.parser.tokens[self.parser.pos:self.parser.pos] = self.buffer + self.buffer = [] + + +def _split_token_by_chars(token: Token, separators: Set[str]) -> List[Token]: + lex = token.lexeme + if not lex: + return [] + parts: List[Token] = [] + idx = 0 + while idx < len(lex): + char = lex[idx] + if char in separators: + parts.append(Token( + lexeme=char, + line=token.line, + column=token.column + idx, + start=token.start + idx, + end=token.start + idx + 1, + )) + idx += 1 + continue + start_idx = idx + while idx < len(lex) and lex[idx] not in separators: + idx += 1 + segment = lex[start_idx:idx] + if segment: + parts.append(Token( + lexeme=segment, + line=token.line, + column=token.column + start_idx, + start=token.start + start_idx, + end=token.start + idx, + )) + return parts + + +def _ensure_list(value: Any) -> List[Any]: + if not isinstance(value, list): + raise ParseError("expected list value") + return value + + +def _ensure_dict(value: Any) -> Dict[Any, Any]: + if not isinstance(value, dict): + raise ParseError("expected map value") + return value + + +def _ensure_lexer(value: Any) -> SplitLexer: + if not isinstance(value, SplitLexer): + raise ParseError("expected lexer value") + return value + + +def _truthy(value: Any) -> bool: + if isinstance(value, bool): + return value + if isinstance(value, int): + return value != 0 + return value is not None + + +def _coerce_str(value: Any) -> str: + if isinstance(value, str): + return value + if isinstance(value, bool): + return "1" if value else "0" + if isinstance(value, int): + return str(value) + raise ParseError("expected string-compatible value") + + +def _default_template(template: Optional[Token]) -> Token: + if template is None: + return Token(lexeme="", line=0, column=0, start=0, end=0) + if not isinstance(template, Token): + raise ParseError("expected token for template") + return template + + +def _trunc_divmod(a: int, b: int) -> Tuple[int, int]: + if b == 0: + raise ParseError("division by zero") + quot = abs(a) // abs(b) + if (a < 0) ^ (b < 0): + quot = -quot + rem = a - quot * b + return quot, rem + + +def _ct_dup(vm: CompileTimeVM) -> None: + vm.push(vm.peek()) + + +def _ct_drop(vm: CompileTimeVM) -> None: + vm.pop() + + +def _ct_swap(vm: CompileTimeVM) -> None: + a = vm.pop() + b = vm.pop() + vm.push(a) + vm.push(b) + + +def _ct_over(vm: CompileTimeVM) -> None: + if len(vm.stack) < 2: + raise ParseError("over requires two stack values") + vm.push(vm.stack[-2]) + + +def _ct_rot(vm: CompileTimeVM) -> None: + if len(vm.stack) < 3: + raise ParseError("rot requires three stack values") + vm.stack[-3], vm.stack[-2], vm.stack[-1] = vm.stack[-2], vm.stack[-1], vm.stack[-3] + + +def _ct_nip(vm: CompileTimeVM) -> None: + if len(vm.stack) < 2: + raise ParseError("nip requires two stack values") + top = vm.pop() + vm.pop() + vm.push(top) + + +def _ct_tuck(vm: CompileTimeVM) -> None: + if len(vm.stack) < 2: + raise ParseError("tuck requires two stack values") + first = vm.pop() + second = vm.pop() + vm.push(first) + vm.push(second) + vm.push(first) + + +def _ct_2dup(vm: CompileTimeVM) -> None: + if len(vm.stack) < 2: + raise ParseError("2dup requires two stack values") + second = vm.pop() + first = vm.pop() + vm.push(first) + vm.push(second) + vm.push(first) + vm.push(second) + + +def _ct_2drop(vm: CompileTimeVM) -> None: + if len(vm.stack) < 2: + raise ParseError("2drop requires two stack values") + vm.pop() + vm.pop() + + +def _ct_2swap(vm: CompileTimeVM) -> None: + if len(vm.stack) < 4: + raise ParseError("2swap requires four stack values") + a = vm.pop() + b = vm.pop() + c = vm.pop() + d = vm.pop() + vm.push(a) + vm.push(b) + vm.push(c) + vm.push(d) + + +def _ct_2over(vm: CompileTimeVM) -> None: + if len(vm.stack) < 4: + raise ParseError("2over requires four stack values") + vm.push(vm.stack[-4]) + vm.push(vm.stack[-3]) + + +def _ct_minus_rot(vm: CompileTimeVM) -> None: + if len(vm.stack) < 3: + raise ParseError("-rot requires three stack values") + vm.stack[-3], vm.stack[-2], vm.stack[-1] = vm.stack[-1], vm.stack[-3], vm.stack[-2] + + +def _ct_binary_int(vm: CompileTimeVM, func: Callable[[int, int], int]) -> None: + b = vm.pop_int() + a = vm.pop_int() + vm.push(func(a, b)) + + +def _ct_add(vm: CompileTimeVM) -> None: + _ct_binary_int(vm, lambda a, b: a + b) + + +def _ct_sub(vm: CompileTimeVM) -> None: + _ct_binary_int(vm, lambda a, b: a - b) + + +def _ct_mul(vm: CompileTimeVM) -> None: + _ct_binary_int(vm, lambda a, b: a * b) + + +def _ct_div(vm: CompileTimeVM) -> None: + divisor = vm.pop_int() + dividend = vm.pop_int() + quot, _ = _trunc_divmod(dividend, divisor) + vm.push(quot) + + +def _ct_mod(vm: CompileTimeVM) -> None: + divisor = vm.pop_int() + dividend = vm.pop_int() + _, rem = _trunc_divmod(dividend, divisor) + vm.push(rem) + + +def _ct_compare(vm: CompileTimeVM, predicate: Callable[[Any, Any], bool]) -> None: + b = vm.pop() + a = vm.pop() + vm.push(1 if predicate(a, b) else 0) + + +def _ct_eq(vm: CompileTimeVM) -> None: + _ct_compare(vm, lambda a, b: a == b) + + +def _ct_ne(vm: CompileTimeVM) -> None: + _ct_compare(vm, lambda a, b: a != b) + + +def _ct_lt(vm: CompileTimeVM) -> None: + _ct_compare(vm, lambda a, b: a < b) + + +def _ct_le(vm: CompileTimeVM) -> None: + _ct_compare(vm, lambda a, b: a <= b) + + +def _ct_gt(vm: CompileTimeVM) -> None: + _ct_compare(vm, lambda a, b: a > b) + + +def _ct_ge(vm: CompileTimeVM) -> None: + _ct_compare(vm, lambda a, b: a >= b) + + +def _ct_and(vm: CompileTimeVM) -> None: + b = _truthy(vm.pop()) + a = _truthy(vm.pop()) + vm.push(1 if (a and b) else 0) + + +def _ct_or(vm: CompileTimeVM) -> None: + b = _truthy(vm.pop()) + a = _truthy(vm.pop()) + vm.push(1 if (a or b) else 0) + + +def _ct_not(vm: CompileTimeVM) -> None: + vm.push(1 if not _truthy(vm.pop()) else 0) + + +def _ct_to_r(vm: CompileTimeVM) -> None: + vm.return_stack.append(vm.pop()) + + +def _ct_r_from(vm: CompileTimeVM) -> None: + if not vm.return_stack: + raise ParseError("return stack underflow") + vm.push(vm.return_stack.pop()) + + +def _ct_rdrop(vm: CompileTimeVM) -> None: + if not vm.return_stack: + raise ParseError("return stack underflow") + vm.return_stack.pop() + + +def _ct_rpick(vm: CompileTimeVM) -> None: + index = vm.pop_int() + if index < 0 or index >= len(vm.return_stack): + raise ParseError("rpick index out of range") + vm.push(vm.return_stack[-1 - index]) + + +def _ct_nil(vm: CompileTimeVM) -> None: + vm.push(None) + + +def _ct_nil_p(vm: CompileTimeVM) -> None: + vm.push(1 if vm.pop() is None else 0) + + +def _ct_list_new(vm: CompileTimeVM) -> None: + vm.push([]) + + +def _ct_list_clone(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + vm.push(list(lst)) + + +def _ct_list_append(vm: CompileTimeVM) -> None: + value = vm.pop() + lst = _ensure_list(vm.pop()) + lst.append(value) + vm.push(lst) + + +def _ct_list_pop(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + if not lst: + raise ParseError("cannot pop from empty list") + value = lst.pop() + vm.push(lst) + vm.push(value) + + +def _ct_list_pop_front(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + if not lst: + raise ParseError("cannot pop from empty list") + value = lst.pop(0) + vm.push(lst) + vm.push(value) + + +def _ct_list_length(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + vm.push(len(lst)) + + +def _ct_list_empty(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + vm.push(1 if not lst else 0) + + +def _ct_list_get(vm: CompileTimeVM) -> None: + index = vm.pop_int() + lst = _ensure_list(vm.pop()) + try: + vm.push(lst[index]) + except IndexError as exc: + raise ParseError("list index out of range") from exc + + +def _ct_list_set(vm: CompileTimeVM) -> None: + value = vm.pop() + index = vm.pop_int() + lst = _ensure_list(vm.pop()) + try: + lst[index] = value + except IndexError as exc: + raise ParseError("list index out of range") from exc + vm.push(lst) + + +def _ct_list_clear(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + lst.clear() + vm.push(lst) + + +def _ct_list_extend(vm: CompileTimeVM) -> None: + source = _ensure_list(vm.pop()) + target = _ensure_list(vm.pop()) + target.extend(source) + vm.push(target) + + +def _ct_list_last(vm: CompileTimeVM) -> None: + lst = _ensure_list(vm.pop()) + if not lst: + raise ParseError("list is empty") + vm.push(lst[-1]) + + +def _ct_map_new(vm: CompileTimeVM) -> None: + vm.push({}) + + +def _ct_map_set(vm: CompileTimeVM) -> None: + value = vm.pop() + key = vm.pop() + map_obj = _ensure_dict(vm.pop()) + map_obj[key] = value + vm.push(map_obj) + + +def _ct_map_get(vm: CompileTimeVM) -> None: + key = vm.pop() + map_obj = _ensure_dict(vm.pop()) + vm.push(map_obj) + if key in map_obj: + vm.push(map_obj[key]) + vm.push(1) + else: + vm.push(None) + vm.push(0) + + +def _ct_map_has(vm: CompileTimeVM) -> None: + key = vm.pop() + map_obj = _ensure_dict(vm.pop()) + vm.push(map_obj) + vm.push(1 if key in map_obj else 0) + + +def _ct_string_eq(vm: CompileTimeVM) -> None: + right = vm.pop_str() + left = vm.pop_str() + vm.push(1 if left == right else 0) + + +def _ct_string_length(vm: CompileTimeVM) -> None: + value = vm.pop_str() + vm.push(len(value)) + + +def _ct_string_append(vm: CompileTimeVM) -> None: + right = vm.pop_str() + left = vm.pop_str() + vm.push(left + right) + + +def _ct_string_to_number(vm: CompileTimeVM) -> None: + text = vm.pop_str() + try: + value = int(text, 0) + vm.push(value) + vm.push(1) + except ValueError: + vm.push(0) + vm.push(0) + + +def _ct_int_to_string(vm: CompileTimeVM) -> None: + value = vm.pop_int() + vm.push(str(value)) + + +def _ct_identifier_p(vm: CompileTimeVM) -> None: + value = vm.pop_str() + vm.push(1 if _is_identifier(value) else 0) + + +def _ct_token_lexeme(vm: CompileTimeVM) -> None: + token = vm.pop_token() + vm.push(token.lexeme) + + +def _ct_token_from_lexeme(vm: CompileTimeVM) -> None: + template_value = vm.pop() + lexeme = vm.pop_str() + template = _default_template(template_value) + vm.push(Token( + lexeme=lexeme, + line=template.line, + column=template.column, + start=template.start, + end=template.end, + )) + + +def _ct_next_token(vm: CompileTimeVM) -> None: + token = vm.parser.next_token() + vm.push(token) + + +def _ct_peek_token(vm: CompileTimeVM) -> None: + vm.push(vm.parser.peek_token()) + + +def _ct_inject_tokens(vm: CompileTimeVM) -> None: + tokens = _ensure_list(vm.pop()) + if not all(isinstance(item, Token) for item in tokens): + raise ParseError("inject-tokens expects a list of tokens") + vm.parser.inject_token_objects(tokens) + + +def _ct_emit_definition(vm: CompileTimeVM) -> None: + body = _ensure_list(vm.pop()) + name_value = vm.pop() + if isinstance(name_value, Token): + template = name_value + name = name_value.lexeme + elif isinstance(name_value, str): + template = _default_template(vm.pop()) + name = name_value + else: + raise ParseError("emit-definition expects token or string for name") + lexemes = [ + item.lexeme if isinstance(item, Token) else _coerce_str(item) + for item in body + ] + generated: List[Token] = [] + _struct_emit_definition(generated, template, name, lexemes) + vm.parser.inject_token_objects(generated) + + +def _ct_parse_error(vm: CompileTimeVM) -> None: + message = vm.pop_str() + raise ParseError(message) + + +def _ct_enable_call_syntax(vm: CompileTimeVM) -> None: + vm.parser.call_syntax_enabled = True + + +def _ct_disable_call_syntax(vm: CompileTimeVM) -> None: + vm.parser.call_syntax_enabled = False + + +def _ct_lexer_new(vm: CompileTimeVM) -> None: + separators = vm.pop_str() + vm.push(SplitLexer(vm.parser, separators)) + + +def _ct_lexer_pop(vm: CompileTimeVM) -> None: + lexer = _ensure_lexer(vm.pop()) + token = lexer.pop() + vm.push(lexer) + vm.push(token) + + +def _ct_lexer_peek(vm: CompileTimeVM) -> None: + lexer = _ensure_lexer(vm.pop()) + vm.push(lexer) + vm.push(lexer.peek()) + + +def _ct_lexer_expect(vm: CompileTimeVM) -> None: + lexeme = vm.pop_str() + lexer = _ensure_lexer(vm.pop()) + token = lexer.expect(lexeme) + vm.push(lexer) + vm.push(token) + + +def _ct_lexer_collect_brace(vm: CompileTimeVM) -> None: + lexer = _ensure_lexer(vm.pop()) + vm.push(lexer) + vm.push(lexer.collect_brace_block()) + + +def _ct_lexer_push_back(vm: CompileTimeVM) -> None: + lexer = _ensure_lexer(vm.pop()) + lexer.push_back() + vm.push(lexer) + + +def _register_compile_time_primitives(dictionary: Dictionary) -> None: + def register(name: str, func: Callable[[CompileTimeVM], None], *, compile_only: bool = False) -> None: + word = dictionary.lookup(name) + if word is None: + word = Word(name=name) + dictionary.register(word) + word.compile_time_intrinsic = func + if compile_only: + word.compile_only = True + + register("dup", _ct_dup) + register("drop", _ct_drop) + register("swap", _ct_swap) + register("over", _ct_over) + register("rot", _ct_rot) + register("nip", _ct_nip) + register("tuck", _ct_tuck) + register("2dup", _ct_2dup) + register("2drop", _ct_2drop) + register("2swap", _ct_2swap) + register("2over", _ct_2over) + register("-rot", _ct_minus_rot) + register("+", _ct_add) + register("-", _ct_sub) + register("*", _ct_mul) + register("/", _ct_div) + register("%", _ct_mod) + register("==", _ct_eq) + register("!=", _ct_ne) + register("<", _ct_lt) + register("<=", _ct_le) + register(">", _ct_gt) + register(">=", _ct_ge) + register("and", _ct_and) + register("or", _ct_or) + register("not", _ct_not) + register(">r", _ct_to_r) + register("r>", _ct_r_from) + register("rdrop", _ct_rdrop) + register("rpick", _ct_rpick) + + register("nil", _ct_nil, compile_only=True) + register("nil?", _ct_nil_p, compile_only=True) + register("list-new", _ct_list_new, compile_only=True) + register("list-clone", _ct_list_clone, compile_only=True) + register("list-append", _ct_list_append, compile_only=True) + register("list-pop", _ct_list_pop, compile_only=True) + register("list-pop-front", _ct_list_pop_front, compile_only=True) + register("list-length", _ct_list_length, compile_only=True) + register("list-empty?", _ct_list_empty, compile_only=True) + register("list-get", _ct_list_get, compile_only=True) + register("list-set", _ct_list_set, compile_only=True) + register("list-clear", _ct_list_clear, compile_only=True) + register("list-extend", _ct_list_extend, compile_only=True) + register("list-last", _ct_list_last, compile_only=True) + + register("map-new", _ct_map_new, compile_only=True) + register("map-set", _ct_map_set, compile_only=True) + register("map-get", _ct_map_get, compile_only=True) + register("map-has?", _ct_map_has, compile_only=True) + + register("string=", _ct_string_eq, compile_only=True) + register("string-length", _ct_string_length, compile_only=True) + register("string-append", _ct_string_append, compile_only=True) + register("string>number", _ct_string_to_number, compile_only=True) + register("int>string", _ct_int_to_string, compile_only=True) + register("identifier?", _ct_identifier_p, compile_only=True) + + register("token-lexeme", _ct_token_lexeme, compile_only=True) + register("token-from-lexeme", _ct_token_from_lexeme, compile_only=True) + register("next-token", _ct_next_token, compile_only=True) + register("peek-token", _ct_peek_token, compile_only=True) + register("inject-tokens", _ct_inject_tokens, compile_only=True) + register("emit-definition", _ct_emit_definition, compile_only=True) + register("parse-error", _ct_parse_error, compile_only=True) + register("enable-call-syntax", _ct_enable_call_syntax, compile_only=True) + register("disable-call-syntax", _ct_disable_call_syntax, compile_only=True) + + register("lexer-new", _ct_lexer_new, compile_only=True) + register("lexer-pop", _ct_lexer_pop, compile_only=True) + register("lexer-peek", _ct_lexer_peek, compile_only=True) + register("lexer-expect", _ct_lexer_expect, compile_only=True) + register("lexer-collect-brace", _ct_lexer_collect_brace, compile_only=True) + register("lexer-push-back", _ct_lexer_push_back, compile_only=True) + + + + +PY_EXEC_GLOBALS: Dict[str, Any] = { + "MacroContext": MacroContext, + "Token": Token, + "Literal": Literal, + "WordRef": WordRef, + "BranchZero": BranchZero, + "Jump": Jump, + "Label": Label, + "ForBegin": ForBegin, + "ForNext": ForNext, + "StructField": StructField, + "Definition": Definition, + "Module": Module, + "ParseError": ParseError, + "emit_definition": _struct_emit_definition, + "is_identifier": _is_identifier, +} + + +def macro_struct_begin(ctx: MacroContext) -> Optional[List[ASTNode]]: + parser = ctx.parser if parser._eof(): raise ParseError("struct name missing after 'struct:'") name_token = parser.next_token() @@ -813,17 +1885,19 @@ def macro_struct_begin(parser: Parser) -> Optional[List[ASTNode]]: return None -def macro_struct_end(parser: Parser) -> Optional[List[ASTNode]]: +def macro_struct_end(ctx: MacroContext) -> Optional[List[ASTNode]]: raise ParseError("';struct' must follow a 'struct:' block") def bootstrap_dictionary() -> Dictionary: dictionary = Dictionary() dictionary.register(Word(name="immediate", immediate=True, macro=macro_immediate)) + dictionary.register(Word(name="compile-only", immediate=True, macro=macro_compile_only)) dictionary.register(Word(name="macro:", immediate=True, macro=macro_begin_text_macro)) dictionary.register(Word(name=";macro", immediate=True, macro=macro_end_text_macro)) dictionary.register(Word(name="struct:", immediate=True, macro=macro_struct_begin)) dictionary.register(Word(name=";struct", immediate=True, macro=macro_struct_end)) + _register_compile_time_primitives(dictionary) return dictionary diff --git a/main.sl b/main.sl index 28f6825..44cc4e8 100644 --- a/main.sl +++ b/main.sl @@ -3,5 +3,13 @@ import stdlib.sl : main 2 40 + puts + extend-syntax + 1 + 2 + foo() + puts 0 -; \ No newline at end of file +; +fn foo(int a, int b){ + return a + b; +} \ No newline at end of file diff --git a/stdlib.sl b/stdlib.sl index cb008ce..4cfa60e 100644 --- a/stdlib.sl +++ b/stdlib.sl @@ -47,6 +47,211 @@ puts_finish_digits: } ; +: extend-syntax + enable-call-syntax +; +immediate +compile-only + +:py fn { + FN_SPLIT_CHARS = set("(),{};+-*/%,") + + def split_token(token): + lex = token.lexeme + parts = [] + idx = 0 + while idx < len(lex): + char = lex[idx] + if char in FN_SPLIT_CHARS: + parts.append(Token( + lexeme=char, + line=token.line, + column=token.column + idx, + start=token.start + idx, + end=token.start + idx + 1, + )) + idx += 1 + continue + start_idx = idx + while idx < len(lex) and lex[idx] not in FN_SPLIT_CHARS: + idx += 1 + segment = lex[start_idx:idx] + if segment: + parts.append(Token( + lexeme=segment, + line=token.line, + column=token.column + start_idx, + start=token.start + start_idx, + end=token.start + idx, + )) + return [part for part in parts if part.lexeme] + + class FnLexer: + def __init__(self, parser): + self.parser = parser + self.buffer = [] + + def _fill(self): + while not self.buffer: + if self.parser._eof(): + raise ParseError("unexpected EOF inside fn definition") + token = self.parser.next_token() + split = split_token(token) + if not split: + continue + self.buffer.extend(split) + + def peek(self): + self._fill() + return self.buffer[0] + + def pop(self): + token = self.peek() + self.buffer.pop(0) + return token + + def expect(self, lexeme): + token = self.pop() + if token.lexeme != lexeme: + raise ParseError(f"expected '{lexeme}' but found '{token.lexeme}'") + return token + + def push_back_remaining(self): + if not self.buffer: + return + self.parser.tokens[self.parser.pos:self.parser.pos] = self.buffer + self.buffer = [] + + def collect_block_tokens(self): + depth = 1 + collected = [] + while depth > 0: + token = self.pop() + if token.lexeme == "{": + depth += 1 + collected.append(token) + continue + if token.lexeme == "}": + depth -= 1 + if depth == 0: + break + collected.append(token) + continue + collected.append(token) + return collected + + OP_PRECEDENCE = {} + OP_PRECEDENCE["+"] = 1 + OP_PRECEDENCE["-"] = 1 + OP_PRECEDENCE["*"] = 2 + OP_PRECEDENCE["/"] = 2 + OP_PRECEDENCE["%"] = 2 + + def parse_fn_body(tokens): + if not tokens: + raise ParseError("empty function body") + lexemes = [tok.lexeme for tok in tokens if tok.lexeme] + if not lexemes or lexemes[0] != "return": + raise ParseError("function body must start with 'return'") + if lexemes[-1] != ";": + raise ParseError("function body must terminate with ';'") + extra = lexemes[1:-1] + if not extra: + raise ParseError("missing return expression") + return extra + + def shunting_yard(tokens): + output = [] + stack = [] + for token in tokens: + if token == "(": + stack.append(token) + continue + if token == ")": + while stack and stack[-1] != "(": + output.append(stack.pop()) + if not stack: + raise ParseError("mismatched parentheses in return expression") + stack.pop() + continue + if token in OP_PRECEDENCE: + while stack and stack[-1] in OP_PRECEDENCE and OP_PRECEDENCE[stack[-1]] >= OP_PRECEDENCE[token]: + output.append(stack.pop()) + stack.append(token) + continue + output.append(token) + while stack: + top = stack.pop() + if top == "(": + raise ParseError("mismatched parentheses in return expression") + output.append(top) + return output + + def is_int_literal(text): + try: + int(text, 0) + return True + except ValueError: + return False + + def translate_postfix(postfix, params): + indices = {name: idx for idx, name in enumerate(params)} + translated = [] + for token in postfix: + if token in indices: + translated.append(str(indices[token])) + translated.append("rpick") + continue + if is_int_literal(token): + translated.append(token) + continue + translated.append(token) + return translated + + def macro(ctx): + parser = ctx.parser + if not isinstance(parser.context_stack[-1], Module): + raise ParseError("'fn' definitions must be top-level") + lexer = FnLexer(parser) + name_token = lexer.pop() + name = name_token.lexeme + if not is_identifier(name): + raise ParseError("invalid function name for 'fn'") + lexer.expect("(") + params = [] + if lexer.peek().lexeme != ")": + while True: + type_token = lexer.pop() + if type_token.lexeme != "int": + raise ParseError("only 'int' parameters are supported in fn definitions") + param_token = lexer.pop() + if not is_identifier(param_token.lexeme): + raise ParseError("invalid parameter name in fn definition") + params.append(param_token.lexeme) + if lexer.peek().lexeme == ",": + lexer.pop() + continue + break + lexer.expect(")") + lexer.expect("{") + body_tokens = lexer.collect_block_tokens() + lexer.push_back_remaining() + if len(params) != len(set(params)): + raise ParseError("duplicate parameter names in fn definition") + return_tokens = parse_fn_body(body_tokens) + postfix = shunting_yard(return_tokens) + body_words = [] + for _ in reversed(params): + body_words.append(">r") + body_words.extend(translate_postfix(postfix, params)) + for _ in params: + body_words.append("rdrop") + generated = [] + emit_definition(generated, name_token, name, body_words) + ctx.inject_token_objects(generated) +} +; + :asm dup { mov rax, [r12] sub r12, 8 @@ -226,3 +431,33 @@ puts_finish_digits: syscall } ; + +:asm >r { + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax +} +; + +:asm r> { + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax +} +; + +:asm rdrop { + add r13, 8 +} +; + +:asm rpick { + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax +} +; diff --git a/test.bin b/test.bin new file mode 100755 index 0000000000000000000000000000000000000000..b883df02a0990299fe76974eba4d991b2dacafc9 GIT binary patch literal 10112 zcmeI2e{fXQ6~}KvLIad0n269ujTmidOCTGlq?6i21MV^jgH}tYboyqqKXixeZrpt< zMB6}{aVG0d9Chf7G8U@+tKvwfGmNp+QHh9SYnh!EVp@zH{H+ zcgg0D{@Z;s+;{Hhe$To0o_pV2_GNc%=-XIbRVCC_Ep8RE*3d1HTatORtTJ0K=80}G zU(`x%O{s~jZmzj^himFC8eD)rYGHC+aDVE4V1cVsccp{he7DIhTe|&sZrd%YT}#Dh zUb$5=P{}|g1CKqUi}3{*1k|CoWk@tZEzSBd`oM}6a~&em58 zr;WaOPaqZjotN#pP@$$ZH2Qj7Fn?^Ki(q|iiA?rLU2t?p{B%KZ`ee@pzMTih*L}62 zTD*i1J-=TFLW10)U~%e=`D=p3*G@MF+m8qHCqmQbcOM}zHMW#!Fh3c5_+;*%q0tM% zULeJ*2)|MDrG<2pkzbMF^KXUn=lb$f^|h1kP(H2;m4KWeB~|dL;Pgy6+N4D)G`9vjCl6z~^?N4KZ>bl(?H6;jJv~R; z{yEzA&C&MDQd_VfcqZ;*hA^)DGMTad!WZS7`#CfHg(l8{y07JWSL*~`%FqsQbEWY` zI`5Kt-YunEjGm!I*&qmP5TVhdb)ivj;f`XJ++lC-9HwDLQw=J$N)n&u6n$`&Q zM+;6Ws^B$}vg^op7XGJL^jh)$$E%>(tB@@wollCzqI2SNDvra=IfeS0s88u(e4eu6 zdhUbr$&%-zsPB>WYDD^+Of@)k=rj^l0Q=dIvaRIiqaTx>vkk%CgLMKCl|Y~H7Z74O zbkWP21dpAP&x2$hBt5zWZ_$)QMma;R&UL!vZc}m+l9?H;qkBxr6eK5fi9VJljO8v! z4(Sr?AEym?Gm5A5HoV7E(RIy>1Y};+r^Oc71^3#kre8dpX0FP(HE;J?eETsxv`8IGSLK136S;yeNCl?zlHfprMhR#*qf@&+oWtQ~C59{7%t zM0Kkx@aEmgKoOryF$hJkrhuP{Cu3}aVuhyA9|gI|dZ7@SLLY}bVpc$LYI^pON<}ji zf6x?H^m2(haQ12ny%#x-&!E_;DfDwpDvm?(eNAyij6XrKR#WKtkTITv;wn;j3!@KJ z9+%Cu>zuRy#@2|y{27Yz?1$NipNll z8i&p#_HEe<4{eP&S2P9gpFgU^*VNxvSa6fPSBZeZS2MrM;Om*^4BlIYZ)X0uq5lE% zUm1Km^Op^7Gk?qABh241_!#pK4E{Ltvj*SG{IbD+&3y5aZfU9FdFD47{1Ed_gTKc7 z9)rKl{CzsJ1A;GZ&IW$;D#T%~pSw!yDuzRBRLng7t>w=#d;;G3DhWAGm{KX339 z^Q)Wu^BrNn-r$ci-)8Xr%ts7D`RzV^h{|cnW02vJ4cEoX%gANG<){<+!4CnZ3;)D z?H#VUC7npwRv?y5Jjl@$*;$sRk0u`Q)sty|yi!^@fVXwHUfMSb*y#+XQT6Gqp#tGZ zChhAs5f^<{B5uXQgV~t1#;wFsQHv+bjxn?bqJDKicU@a@T~{)Bp*c!CouMWwEj%vP zx444tZt;x@l0fvq7!6j_uh;aQK7H2C;l?*3RB1(@sZCCeZ z+4MMOT;kp&8OmUT4&>s(vn(2|u&jY>);%kP6^n-Lun=w8;iMfNpu3%M@AoOoj_tHX z8@c$-s{f_(6>`FVUA*69ssBeexY4}dZ*g$OTod93vhX@h9`m5;{A@u*2=8}XZdLsZ zjB9S+bn0Gl{dT#F{)X>rbkT7uw$MNHg=?966L|UJ_00^o$1pIuHo;N5FjxIrLY3M4 Q|LT6h(tkE5-Aw=g0vB14YXATM literal 0 HcmV?d00001 diff --git a/test.sl b/test.sl index 1da9570..c26daa1 100644 --- a/test.sl +++ b/test.sl @@ -31,6 +31,12 @@ struct: Point field y 8 ;struct +extend-syntax + +fn fancy_add(int left, int right){ + return (left + right) * right; +} + : test-add 5 7 + puts ; @@ -153,6 +159,13 @@ struct: Point 4 5 >= puts ; +: test-c-fn + 3 + 7 + fancy_add() + puts +; + : main test-add test-sub @@ -171,5 +184,6 @@ struct: Point test-for-zero test-cmp test-struct + test-c-fn 0 ;