diff --git a/__pycache__/main.cpython-314.pyc b/__pycache__/main.cpython-314.pyc new file mode 100644 index 0000000..8c2e145 Binary files /dev/null and b/__pycache__/main.cpython-314.pyc differ diff --git a/a.out b/a.out new file mode 100755 index 0000000..9a54c1f Binary files /dev/null and b/a.out differ diff --git a/a.sl b/a.sl new file mode 100644 index 0000000..9f3cb37 --- /dev/null +++ b/a.sl @@ -0,0 +1,5 @@ +import stdlib.sl + +: main + "hello world" puts +; \ No newline at end of file diff --git a/build/call_syntax_parens.asm b/build/call_syntax_parens.asm new file mode 100644 index 0000000..0bd0700 --- /dev/null +++ b/build/call_syntax_parens.asm @@ -0,0 +1,289 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_main: + ; push 2 + sub r12, 8 + mov qword [r12], 2 + ; push 40 + sub r12, 8 + mov qword [r12], 40 + call word__2b + call word_puts + ; push 1 + sub r12, 8 + mov qword [r12], 1 + ; push 2 + sub r12, 8 + mov qword [r12], 2 + call word_foo + call word_puts + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +word_foo: + call word__3er + call word__3er + ; push 0 + sub r12, 8 + mov qword [r12], 0 + call word_rpick + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word_rpick + call word__2b + call word_rdrop + call word_rdrop + ret +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/call_syntax_parens.o b/build/call_syntax_parens.o new file mode 100644 index 0000000..2cb5c54 Binary files /dev/null and b/build/call_syntax_parens.o differ diff --git a/build/loops_and_cmp.asm b/build/loops_and_cmp.asm new file mode 100644 index 0000000..c6ae2cf --- /dev/null +++ b/build/loops_and_cmp.asm @@ -0,0 +1,298 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_main: + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + mov rax, [r12] + add r12, 8 + cmp rax, 0 + jle L_for_end_1 + sub r13, 8 + mov [r13], rax +L_for_loop_0: + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word__2b + mov rax, [r13] + dec rax + mov [r13], rax + jg L_for_loop_0 + add r13, 8 +L_for_end_1: + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 5 + sub r12, 8 + mov qword [r12], 5 + call word__3d_3d + call word_puts + ; push 5 + sub r12, 8 + mov qword [r12], 5 + ; push 4 + sub r12, 8 + mov qword [r12], 4 + call word__3d_3d + call word_puts + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/loops_and_cmp.o b/build/loops_and_cmp.o new file mode 100644 index 0000000..1f7e196 Binary files /dev/null and b/build/loops_and_cmp.o differ diff --git a/build/main.asm b/build/main.asm new file mode 100644 index 0000000..0bd0700 --- /dev/null +++ b/build/main.asm @@ -0,0 +1,289 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_main: + ; push 2 + sub r12, 8 + mov qword [r12], 2 + ; push 40 + sub r12, 8 + mov qword [r12], 40 + call word__2b + call word_puts + ; push 1 + sub r12, 8 + mov qword [r12], 1 + ; push 2 + sub r12, 8 + mov qword [r12], 2 + call word_foo + call word_puts + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +word_foo: + call word__3er + call word__3er + ; push 0 + sub r12, 8 + mov qword [r12], 0 + call word_rpick + ; push 1 + sub r12, 8 + mov qword [r12], 1 + call word_rpick + call word__2b + call word_rdrop + call word_rdrop + ret +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/main.o b/build/main.o new file mode 100644 index 0000000..1c49082 Binary files /dev/null and b/build/main.o differ diff --git a/build/override_dup_compile_time.asm b/build/override_dup_compile_time.asm new file mode 100644 index 0000000..1fc2fbb --- /dev/null +++ b/build/override_dup_compile_time.asm @@ -0,0 +1,262 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_main: + ; push 6 + sub r12, 8 + mov qword [r12], 6 + call word_puts + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/override_dup_compile_time.o b/build/override_dup_compile_time.o new file mode 100644 index 0000000..14b8148 Binary files /dev/null and b/build/override_dup_compile_time.o differ diff --git a/build/test.asm b/build/test.asm index bc3c542..81c4db2 100644 --- a/build/test.asm +++ b/build/test.asm @@ -73,6 +73,11 @@ word_dup: word_drop: add r12, 8 ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret word_swap: mov rax, [r12] mov rbx, [r12 + 8] @@ -222,6 +227,13 @@ word_r_3e: word_rdrop: add r13, 8 ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret word_rpick: mov rcx, [r12] add r12, 8 @@ -461,18 +473,18 @@ word_test_2dif: mov rax, [r12] add r12, 8 test rax, rax - jz L_if_false_0 + jz L_if_false_24 ; push 111 sub r12, 8 mov qword [r12], 111 call word_puts - jmp L_if_end_1 -L_if_false_0: + jmp L_if_end_25 +L_if_false_24: ; push 222 sub r12, 8 mov qword [r12], 222 call word_puts -L_if_end_1: +L_if_end_25: ret word_test_2delse_2dif: ; push 2 @@ -486,13 +498,13 @@ word_test_2delse_2dif: mov rax, [r12] add r12, 8 test rax, rax - jz L_if_false_2 + jz L_if_false_26 ; push 50 sub r12, 8 mov qword [r12], 50 call word_puts - jmp L_if_end_3 -L_if_false_2: + jmp L_if_end_27 +L_if_false_26: call word_dup ; push 2 sub r12, 8 @@ -501,19 +513,19 @@ L_if_false_2: mov rax, [r12] add r12, 8 test rax, rax - jz L_if_false_4 + jz L_if_false_28 ; push 60 sub r12, 8 mov qword [r12], 60 call word_puts - jmp L_if_end_5 -L_if_false_4: + jmp L_if_end_29 +L_if_false_28: ; push 70 sub r12, 8 mov qword [r12], 70 call word_puts -L_if_end_5: -L_if_end_3: +L_if_end_29: +L_if_end_27: call word_drop ret word_test_2dfor: @@ -526,10 +538,10 @@ word_test_2dfor: mov rax, [r12] add r12, 8 cmp rax, 0 - jle L_for_end_7 + jle L_for_end_31 sub r13, 8 mov [r13], rax -L_for_loop_6: +L_for_loop_30: ; push 1 sub r12, 8 mov qword [r12], 1 @@ -537,9 +549,9 @@ L_for_loop_6: mov rax, [r13] dec rax mov [r13], rax - jg L_for_loop_6 + jg L_for_loop_30 add r13, 8 -L_for_end_7: +L_for_end_31: call word_puts ret word_test_2dfor_2dzero: @@ -552,17 +564,17 @@ word_test_2dfor_2dzero: mov rax, [r12] add r12, 8 cmp rax, 0 - jle L_for_end_9 + jle L_for_end_33 sub r13, 8 mov [r13], rax -L_for_loop_8: +L_for_loop_32: call word_drop mov rax, [r13] dec rax mov [r13], rax - jg L_for_loop_8 + jg L_for_loop_32 add r13, 8 -L_for_end_9: +L_for_end_33: call word_puts ret word_test_2dstruct: diff --git a/build/test.o b/build/test.o index 9259976..03b5da4 100644 Binary files a/build/test.o and b/build/test.o differ diff --git a/fn.sl b/fn.sl new file mode 100644 index 0000000..45b0c4b --- /dev/null +++ b/fn.sl @@ -0,0 +1,155 @@ +: call-syntax-rewrite # ( fnameToken -- handled ) + dup token-lexeme identifier? 0 == if drop 0 exit then + peek-token dup nil? if drop drop 0 exit then + dup token-lexeme "(" string= 0 == if drop drop 0 exit then + swap >r # stash fnameTok + drop # discard peeked '(' + next-token drop # consume '(' + list-new # out + list-new # out cur +begin + next-token dup nil? if "unterminated call expression" parse-error then + dup token-lexeme ")" string= if + drop + # flush current arg + list-extend # out' + r> list-append # out'' + inject-tokens + 1 exit + then + dup token-lexeme "," string= if + drop + list-extend # out' + list-new # out' cur + continue + then + # default: append tok to cur + list-append +again +; +immediate +compile-only + +: extend-syntax + "call-syntax-rewrite" set-token-hook +; +immediate +compile-only + + +: fn-op-prec + dup "+" string= if drop 1 exit then + dup "-" string= if drop 1 exit then + dup "*" string= if drop 2 exit then + dup "/" string= if drop 2 exit then + dup "%" string= if drop 2 exit then + drop 0 +; +compile-only + +: fn-operator? + fn-op-prec 0 > +; +compile-only + +: fn-check-dup + >r # params (r: name) + 0 # params idx +begin + over list-length swap >= if # params flag + r> exit + then + dup >r # params idx (r: idx name) + over swap list-get # params elem + 1 rpick string= if "duplicate parameter names in fn definition" parse-error then + drop # drop comparison flag when no error + r> 1 + # params idx+1 +again +; +compile-only + +: fn-params + list-new # lexer params + swap # params lexer + >r # params (r: lexer) +begin + 0 rpick lexer-pop token-lexeme # params lex + swap drop # params lex (drop returned lexer) + dup ")" string= if drop r> exit then + dup "int" string= 0 == if "only 'int' parameters are supported in fn definitions" parse-error then + drop # params + 0 rpick lexer-pop token-lexeme # params lexer pname + swap drop # params pname + dup identifier? 0 == if "invalid parameter name in fn definition" parse-error then + fn-check-dup # params pname + list-append # params + 0 rpick lexer-pop token-lexeme # params lexer sep + swap drop # params sep + dup "," string= if drop continue then + dup ")" string= if drop r> exit then + "expected ',' or ')' in parameter list" parse-error +again +; +compile-only + +: fn-collect-body + "{" lexer-expect drop # consume opening brace, keep lexer + lexer-collect-brace # lexer bodyTokens + swap drop # bodyTokens +; +compile-only + +: fn-lexemes-from-tokens + list-new >r # tokens (r: acc) + 0 # tokens idx +begin + over list-length over swap >= if # stop when idx >= len + drop drop # drop idx and tokens (flag consumed by if) + r> exit # return acc + then + over over list-get token-lexeme # tokens idx lex + r> swap list-append >r # tokens idx + 1 + # tokens idx+1 +again +; +compile-only + +: fn-validate-body + dup list-length 0 == if "empty function body" parse-error then + dup >r 0 r> swap list-get "return" string= 0 == if "function body must start with 'return'" parse-error then + dup list-last ";" string= 0 == if "function body must terminate with ';'" parse-error then + list-clone # work on a copy + list-pop drop # drop trailing ';' + list-pop-front drop # drop leading 'return' + dup list-length 0 == if "missing return expression" parse-error then +; +compile-only + +: fn-build-body + fn-translate-postfix # words +; +compile-only + +: fn + "(),{};+-*/%," lexer-new # lexer + dup lexer-pop # lexer nameTok + dup >r # save nameTok + token-lexeme # lexer name + dup identifier? 0 == if "invalid function name for 'fn'" parse-error then + >r # save name string + drop # leave lexer only for params + "(" lexer-expect drop # consume '(' keep lexer + fn-params # params lexer + fn-collect-body # params bodyTokens + swap >r # bodyTokens (r: params) + fn-lexemes-from-tokens # lexemes + fn-validate-body # expr + shunt # postfix + r> # postfix params + fn-build-body # body + r> drop # drop name string + r> # name token + swap emit-definition +; +immediate +compile-only diff --git a/main b/main new file mode 100755 index 0000000..922fbf7 Binary files /dev/null and b/main differ diff --git a/main.bin b/main.bin new file mode 100755 index 0000000..9a54c1f Binary files /dev/null and b/main.bin differ diff --git a/main.py b/main.py index 7a655b3..7bffc89 100644 --- a/main.py +++ b/main.py @@ -50,6 +50,22 @@ class Reader: def __init__(self) -> None: self.line = 1 self.column = 0 + self.custom_tokens: Set[str] = {"(", ")", "{", "}", ";", ",", "[", "]"} + self._token_order: List[str] = sorted(self.custom_tokens, key=len, reverse=True) + + def add_tokens(self, tokens: Iterable[str]) -> None: + updated = False + for tok in tokens: + if not tok: + continue + if tok not in self.custom_tokens: + self.custom_tokens.add(tok) + updated = True + if updated: + self._token_order = sorted(self.custom_tokens, key=len, reverse=True) + + def add_token_chars(self, chars: str) -> None: + self.add_tokens(chars) def tokenize(self, source: str) -> Iterable[Token]: self.line = 1 @@ -62,19 +78,78 @@ class Reader: source_len = len(source) while index < source_len: char = source[index] + if char == '"': + if lexeme: + yield Token("".join(lexeme), token_line, token_column, token_start, index) + lexeme.clear() + token_start = index + token_line = self.line + token_column = self.column + index += 1 + self.column += 1 + string_parts = ['"'] + while True: + if index >= source_len: + raise ParseError("unterminated string literal") + ch = source[index] + string_parts.append(ch) + index += 1 + if ch == "\n": + self.line += 1 + self.column = 0 + else: + self.column += 1 + if ch == "\\": + if index >= source_len: + raise ParseError("unterminated string literal") + next_ch = source[index] + string_parts.append(next_ch) + index += 1 + if next_ch == "\n": + self.line += 1 + self.column = 0 + else: + self.column += 1 + continue + if ch == '"': + yield Token("".join(string_parts), token_line, token_column, token_start, index) + break + continue if char == "#": while index < source_len and source[index] != "\n": index += 1 continue + if char == ";" and index + 1 < source_len and source[index + 1].isalpha(): + if not lexeme: + token_start = index + token_line = self.line + token_column = self.column + lexeme.append(";") + index += 1 + self.column += 1 + continue + matched_token: Optional[str] = None + for tok in self._token_order: + if source.startswith(tok, index): + matched_token = tok + break + if matched_token is not None: + if lexeme: + yield Token("".join(lexeme), token_line, token_column, token_start, index) + lexeme.clear() + token_start = index + token_line = self.line + token_column = self.column + yield Token(matched_token, self.line, self.column, index, index + len(matched_token)) + index += len(matched_token) + self.column += len(matched_token) + token_start = index + token_line = self.line + token_column = self.column + continue if char.isspace(): if lexeme: - yield Token( - "".join(lexeme), - token_line, - token_column, - token_start, - index, - ) + yield Token("".join(lexeme), token_line, token_column, token_start, index) lexeme.clear() if char == "\n": self.line += 1 @@ -82,6 +157,9 @@ class Reader: else: self.column += 1 index += 1 + token_start = index + token_line = self.line + token_column = self.column continue if not lexeme: token_start = index @@ -91,7 +169,7 @@ class Reader: self.column += 1 index += 1 if lexeme: - yield Token("".join(lexeme), token_line, token_column, token_start, index) + yield Token("".join(lexeme), token_line, token_column, token_start, source_len) # --------------------------------------------------------------------------- @@ -218,11 +296,8 @@ class MacroContext: def inject_token_objects(self, tokens: Sequence[Token]) -> None: self._parser.tokens[self._parser.pos:self._parser.pos] = list(tokens) - def enable_call_syntax(self) -> None: - self._parser.call_syntax_enabled = True - - def disable_call_syntax(self) -> None: - self._parser.call_syntax_enabled = False + def set_token_hook(self, handler: Optional[str]) -> None: + self._parser.token_hook = handler def new_label(self, prefix: str) -> str: return self._parser._new_label(prefix) @@ -247,6 +322,7 @@ class Word: macro_params: int = 0 compile_time_intrinsic: Optional[Callable[["CompileTimeVM"], None]] = None compile_only: bool = False + compile_time_override: bool = False @dataclass @@ -271,9 +347,12 @@ Context = Union[Module, Definition] class Parser: - def __init__(self, dictionary: Dictionary) -> None: + def __init__(self, dictionary: Dictionary, reader: Optional[Reader] = None) -> None: self.dictionary = dictionary + self.reader = reader or Reader() self.tokens: List[Token] = [] + self._token_iter: Optional[Iterable[Token]] = None + self._token_iter_exhausted = True self.pos = 0 self.context_stack: List[Context] = [] self.definition_stack: List[Word] = [] @@ -282,14 +361,20 @@ class Parser: self.macro_recording: Optional[MacroDefinition] = None self.control_stack: List[Dict[str, str]] = [] self.label_counter = 0 - self.call_syntax_enabled = False + self.token_hook: Optional[str] = None + self._last_token: Optional[Token] = None self.compile_time_vm = CompileTimeVM(self) + def inject_token_objects(self, tokens: Sequence[Token]) -> None: + """Insert tokens at the current parse position.""" + self.tokens[self.pos:self.pos] = list(tokens) + # Public helpers for macros ------------------------------------------------ def next_token(self) -> Token: return self._consume() def peek_token(self) -> Optional[Token]: + self._ensure_tokens(self.pos) return None if self._eof() else self.tokens[self.pos] def emit_node(self, node: ASTNode) -> None: @@ -300,7 +385,9 @@ class Parser: # Parsing ------------------------------------------------------------------ def parse(self, tokens: Iterable[Token], source: str) -> Module: - self.tokens = list(tokens) + self.tokens = [] + self._token_iter = iter(tokens) + self._token_iter_exhausted = False self.source = source self.pos = 0 self.context_stack = [Module(forms=[])] @@ -308,10 +395,14 @@ class Parser: self.last_defined = None self.control_stack = [] self.label_counter = 0 - self.call_syntax_enabled = False + self.token_hook = None + self._last_token = None while not self._eof(): token = self._consume() + self._last_token = token + if self._run_token_hook(token): + continue if self._handle_macro_recording(token): continue lexeme = token.lexeme @@ -358,11 +449,6 @@ class Parser: # Internal helpers --------------------------------------------------------- def _handle_token(self, token: Token) -> None: - if self.call_syntax_enabled: - call_target = self._maybe_call_form(token.lexeme) - if call_target is not None: - self._append_node(WordRef(name=call_target)) - return if self._try_literal(token): return @@ -444,6 +530,12 @@ class Parser: self.dictionary.register(word) def _push_control(self, entry: Dict[str, str]) -> None: + if "line" not in entry or "column" not in entry: + tok = self._last_token + if tok is not None: + entry = dict(entry) + entry["line"] = tok.line + entry["column"] = tok.column self.control_stack.append(entry) def _pop_control(self, expected: Tuple[str, ...]) -> Dict[str, str]: @@ -451,7 +543,14 @@ class Parser: raise ParseError("control stack underflow") entry = self.control_stack.pop() if entry.get("type") not in expected: - raise ParseError(f"mismatched control word '{entry.get('type')}'") + tok = self._last_token + location = "" + if tok is not None: + location = f" at {tok.line}:{tok.column} near '{tok.lexeme}'" + origin = "" + if "line" in entry and "column" in entry: + origin = f" (opened at {entry['line']}:{entry['column']})" + raise ParseError(f"mismatched control word '{entry.get('type')}'" + origin + location) return entry def _new_label(self, prefix: str) -> str: @@ -459,13 +558,16 @@ class Parser: self.label_counter += 1 return label - def _maybe_call_form(self, lexeme: str) -> Optional[str]: - if len(lexeme) <= 2 or not lexeme.endswith("()"): - return None - name = lexeme[:-2] - if not name or not _is_identifier(name): - return None - return name + def _run_token_hook(self, token: Token) -> bool: + if not self.token_hook: + return False + hook_word = self.dictionary.lookup(self.token_hook) + if hook_word is None: + raise ParseError(f"token hook '{self.token_hook}' not defined") + self.compile_time_vm.invoke_with_args(hook_word, [token]) + # Convention: hook leaves handled flag on stack (int truthy means consumed) + handled = self.compile_time_vm.pop() + return bool(handled) def _handle_if_control(self) -> None: false_label = self._new_label("if_false") @@ -518,6 +620,9 @@ class Parser: word = self.definition_stack.pop() ctx.immediate = word.immediate ctx.compile_only = word.compile_only + if word.compile_only or word.immediate: + word.compile_time_override = True + word.compile_time_intrinsic = None module = self.context_stack[-1] if not isinstance(module, Module): raise ParseError("nested definitions are not supported yet") @@ -626,6 +731,7 @@ class Parser: return True def _consume(self) -> Token: + self._ensure_tokens(self.pos) if self._eof(): raise ParseError("unexpected EOF") token = self.tokens[self.pos] @@ -633,8 +739,23 @@ class Parser: return token def _eof(self) -> bool: + self._ensure_tokens(self.pos) return self.pos >= len(self.tokens) + def _ensure_tokens(self, upto: int) -> None: + if self._token_iter_exhausted: + return + if self._token_iter is None: + self._token_iter_exhausted = True + return + while len(self.tokens) <= upto and not self._token_iter_exhausted: + try: + next_tok = next(self._token_iter) + except StopIteration: + self._token_iter_exhausted = True + break + self.tokens.append(next_tok) + class CompileTimeVM: def __init__(self, parser: Parser) -> None: @@ -642,10 +763,12 @@ class CompileTimeVM: self.dictionary = parser.dictionary self.stack: List[Any] = [] self.return_stack: List[Any] = [] + self.loop_stack: List[Dict[str, Any]] = [] def reset(self) -> None: self.stack.clear() self.return_stack.clear() + self.loop_stack.clear() def push(self, value: Any) -> None: self.stack.append(value) @@ -688,11 +811,18 @@ class CompileTimeVM: self.reset() self._call_word(word) + def invoke_with_args(self, word: Word, args: Sequence[Any]) -> None: + self.reset() + for value in args: + self.push(value) + self._call_word(word) + def _call_word(self, word: Word) -> None: - if word.compile_time_intrinsic is not None: + definition = word.definition + prefer_definition = word.compile_time_override or (isinstance(definition, Definition) and (word.immediate or word.compile_only)) + if not prefer_definition and word.compile_time_intrinsic is not None: word.compile_time_intrinsic(self) return - definition = word.definition if definition is None: raise ParseError(f"word '{word.name}' has no compile-time definition") if isinstance(definition, AsmDefinition): @@ -708,7 +838,9 @@ class CompileTimeVM: def _execute_nodes(self, nodes: Sequence[ASTNode]) -> None: label_positions = self._label_positions(nodes) loop_pairs = self._for_pairs(nodes) - loop_stack: List[Dict[str, Any]] = [] + begin_pairs = self._begin_pairs(nodes) + self.loop_stack = [] + begin_stack: List[Dict[str, int]] = [] ip = 0 while ip < len(nodes): node = nodes[ip] @@ -717,7 +849,31 @@ class CompileTimeVM: ip += 1 continue if isinstance(node, WordRef): - self._call_word_by_name(node.name) + name = node.name + if name == "begin": + end_idx = begin_pairs.get(ip) + if end_idx is None: + raise ParseError("'begin' without matching 'again'") + begin_stack.append({"begin": ip, "end": end_idx}) + ip += 1 + continue + if name == "again": + if not begin_stack or begin_stack[-1]["end"] != ip: + raise ParseError("'again' without matching 'begin'") + ip = begin_stack[-1]["begin"] + 1 + continue + if name == "continue": + if not begin_stack: + raise ParseError("'continue' outside begin/again loop") + ip = begin_stack[-1]["begin"] + 1 + continue + if name == "exit": + if begin_stack: + frame = begin_stack.pop() + ip = frame["end"] + 1 + continue + return + self._call_word_by_name(name) ip += 1 continue if isinstance(node, BranchZero): @@ -748,18 +904,18 @@ class CompileTimeVM: raise ParseError("internal loop bookkeeping error") ip = match + 1 continue - loop_stack.append({"remaining": count, "begin": ip}) + self.loop_stack.append({"remaining": count, "begin": ip, "initial": count}) ip += 1 continue if isinstance(node, ForNext): - if not loop_stack: + if not self.loop_stack: raise ParseError("'next' without matching 'for'") - frame = loop_stack[-1] + frame = self.loop_stack[-1] frame["remaining"] -= 1 if frame["remaining"] > 0: ip = frame["begin"] + 1 continue - loop_stack.pop() + self.loop_stack.pop() ip += 1 continue raise ParseError(f"unsupported compile-time AST node {node!r}") @@ -787,6 +943,22 @@ class CompileTimeVM: raise ParseError("'for' without matching 'next'") return pairs + def _begin_pairs(self, nodes: Sequence[ASTNode]) -> Dict[int, int]: + stack: List[int] = [] + pairs: Dict[int, int] = {} + for idx, node in enumerate(nodes): + if isinstance(node, WordRef) and node.name == "begin": + stack.append(idx) + elif isinstance(node, WordRef) and node.name == "again": + if not stack: + raise ParseError("'again' without matching 'begin'") + begin_idx = stack.pop() + pairs[begin_idx] = idx + pairs[idx] = begin_idx + if stack: + raise ParseError("'begin' without matching 'again'") + return pairs + def _jump_to_label(self, labels: Dict[str, int], target: str) -> int: if target not in labels: raise ParseError(f"unknown label '{target}' during compile-time execution") @@ -1457,6 +1629,13 @@ def _ct_rpick(vm: CompileTimeVM) -> None: vm.push(vm.return_stack[-1 - index]) +def _ct_pick(vm: CompileTimeVM) -> None: + index = vm.pop_int() + if index < 0 or index >= len(vm.stack): + raise ParseError("pick index out of range") + vm.push(vm.stack[-1 - index]) + + def _ct_nil(vm: CompileTimeVM) -> None: vm.push(None) @@ -1509,6 +1688,14 @@ def _ct_list_empty(vm: CompileTimeVM) -> None: vm.push(1 if not lst else 0) +def _ct_loop_index(vm: CompileTimeVM) -> None: + if not vm.loop_stack: + raise ParseError("'i' used outside of a for loop") + frame = vm.loop_stack[-1] + idx = frame["initial"] - frame["remaining"] + vm.push(idx) + + def _ct_list_get(vm: CompileTimeVM) -> None: index = vm.pop_int() lst = _ensure_list(vm.pop()) @@ -1608,11 +1795,121 @@ def _ct_string_to_number(vm: CompileTimeVM) -> None: vm.push(0) +def _ct_set_token_hook(vm: CompileTimeVM) -> None: + hook_name = vm.pop_str() + vm.parser.token_hook = hook_name + + +def _ct_clear_token_hook(vm: CompileTimeVM) -> None: + vm.parser.token_hook = None + + +def _ct_use_l2_compile_time(vm: CompileTimeVM) -> None: + if vm.stack: + name = vm.pop_str() + word = vm.dictionary.lookup(name) + else: + word = vm.parser.most_recent_definition() + if word is None: + raise ParseError("use-l2-ct with empty stack and no recent definition") + name = word.name + if word is None: + raise ParseError(f"unknown word '{name}' for use-l2-ct") + word.compile_time_intrinsic = None + word.compile_time_override = True + + +def _ct_add_token(vm: CompileTimeVM) -> None: + tok = vm.pop_str() + vm.parser.reader.add_tokens([tok]) + + +def _ct_add_token_chars(vm: CompileTimeVM) -> None: + chars = vm.pop_str() + vm.parser.reader.add_token_chars(chars) + + +def _ct_fn_param_index(vm: CompileTimeVM) -> None: + name = vm.pop_str() + params = _ensure_list(vm.pop()) + try: + idx = params.index(name) + vm.push(params) + vm.push(idx) + vm.push(1) + except ValueError: + vm.push(params) + vm.push(-1) + vm.push(0) + + +def _ct_fn_translate_postfix(vm: CompileTimeVM) -> None: + params = _ensure_list(vm.pop()) + postfix = _ensure_list(vm.pop()) + prologue: List[Any] = [">r"] * len(params) + translated: List[Any] = [] + for tok in postfix: + if isinstance(tok, int): + translated.append(tok) + continue + if isinstance(tok, str): + try: + num_value = int(tok, 0) + translated.append(num_value) + continue + except ValueError: + pass + if isinstance(tok, str) and tok in params: + idx = params.index(tok) + translated.append(idx) + translated.append("rpick") + else: + translated.append(tok) + epilogue: List[Any] = ["rdrop"] * len(params) + out: List[Any] = prologue + translated + epilogue + vm.push(out) + + +def _ct_shunt(vm: CompileTimeVM) -> None: + """Convert an infix token list (strings) to postfix using +,-,*,/,%.""" + ops: List[str] = [] + output: List[str] = [] + prec = {"+": 1, "-": 1, "*": 2, "/": 2, "%": 2} + tokens = _ensure_list(vm.pop()) + for tok in tokens: + if not isinstance(tok, str): + raise ParseError("shunt expects list of strings") + if tok == "(": + ops.append(tok) + continue + if tok == ")": + while ops and ops[-1] != "(": + output.append(ops.pop()) + if not ops: + raise ParseError("mismatched parentheses in expression") + ops.pop() + continue + if tok in prec: + while ops and ops[-1] in prec and prec[ops[-1]] >= prec[tok]: + output.append(ops.pop()) + ops.append(tok) + continue + output.append(tok) + while ops: + top = ops.pop() + if top == "(": + raise ParseError("mismatched parentheses in expression") + output.append(top) + vm.push(output) + + def _ct_int_to_string(vm: CompileTimeVM) -> None: value = vm.pop_int() vm.push(str(value)) + + def _ct_identifier_p(vm: CompileTimeVM) -> None: value = vm.pop_str() vm.push(1 if _is_identifier(value) else 0) @@ -1677,12 +1974,6 @@ def _ct_parse_error(vm: CompileTimeVM) -> None: raise ParseError(message) -def _ct_enable_call_syntax(vm: CompileTimeVM) -> None: - vm.parser.call_syntax_enabled = True - - -def _ct_disable_call_syntax(vm: CompileTimeVM) -> None: - vm.parser.call_syntax_enabled = False def _ct_lexer_new(vm: CompileTimeVM) -> None: @@ -1763,6 +2054,7 @@ def _register_compile_time_primitives(dictionary: Dictionary) -> None: register("r>", _ct_r_from) register("rdrop", _ct_rdrop) register("rpick", _ct_rpick) + register("pick", _ct_pick) register("nil", _ct_nil, compile_only=True) register("nil?", _ct_nil_p, compile_only=True) @@ -1778,6 +2070,7 @@ def _register_compile_time_primitives(dictionary: Dictionary) -> None: register("list-clear", _ct_list_clear, compile_only=True) register("list-extend", _ct_list_extend, compile_only=True) register("list-last", _ct_list_last, compile_only=True) + register("i", _ct_loop_index, compile_only=True) register("map-new", _ct_map_new, compile_only=True) register("map-set", _ct_map_set, compile_only=True) @@ -1788,18 +2081,27 @@ def _register_compile_time_primitives(dictionary: Dictionary) -> None: register("string-length", _ct_string_length, compile_only=True) register("string-append", _ct_string_append, compile_only=True) register("string>number", _ct_string_to_number, compile_only=True) + register("fn-param-index", _ct_fn_param_index, compile_only=True) + register("fn-translate-postfix", _ct_fn_translate_postfix, compile_only=True) register("int>string", _ct_int_to_string, compile_only=True) register("identifier?", _ct_identifier_p, compile_only=True) + register("shunt", _ct_shunt, compile_only=True) register("token-lexeme", _ct_token_lexeme, compile_only=True) register("token-from-lexeme", _ct_token_from_lexeme, compile_only=True) register("next-token", _ct_next_token, compile_only=True) register("peek-token", _ct_peek_token, compile_only=True) register("inject-tokens", _ct_inject_tokens, compile_only=True) + register("add-token", _ct_add_token, compile_only=True) + register("add-token-chars", _ct_add_token_chars, compile_only=True) + register("set-token-hook", _ct_set_token_hook, compile_only=True) + register("clear-token-hook", _ct_clear_token_hook, compile_only=True) + register("use-l2-ct", _ct_use_l2_compile_time, compile_only=True) + word_use_l2 = dictionary.lookup("use-l2-ct") + if word_use_l2: + word_use_l2.immediate = True register("emit-definition", _ct_emit_definition, compile_only=True) register("parse-error", _ct_parse_error, compile_only=True) - register("enable-call-syntax", _ct_enable_call_syntax, compile_only=True) - register("disable-call-syntax", _ct_disable_call_syntax, compile_only=True) register("lexer-new", _ct_lexer_new, compile_only=True) register("lexer-pop", _ct_lexer_pop, compile_only=True) @@ -1910,11 +2212,11 @@ class Compiler: def __init__(self) -> None: self.reader = Reader() self.dictionary = bootstrap_dictionary() - self.parser = Parser(self.dictionary) + self.parser = Parser(self.dictionary, self.reader) self.assembler = Assembler(self.dictionary) def compile_source(self, source: str) -> Emission: - tokens = list(self.reader.tokenize(source)) + tokens = self.reader.tokenize(source) module = self.parser.parse(tokens, source) return self.assembler.emit(module) diff --git a/main.sl b/main.sl index 44cc4e8..7548779 100644 --- a/main.sl +++ b/main.sl @@ -1,15 +1,15 @@ import stdlib.sl +import fn.sl : main 2 40 + puts extend-syntax - 1 - 2 - foo() + foo(1, 2) puts 0 ; + fn foo(int a, int b){ return a + b; } \ No newline at end of file diff --git a/stdlib.sl b/stdlib.sl index 4cfa60e..28381c8 100644 --- a/stdlib.sl +++ b/stdlib.sl @@ -47,211 +47,6 @@ puts_finish_digits: } ; -: extend-syntax - enable-call-syntax -; -immediate -compile-only - -:py fn { - FN_SPLIT_CHARS = set("(),{};+-*/%,") - - def split_token(token): - lex = token.lexeme - parts = [] - idx = 0 - while idx < len(lex): - char = lex[idx] - if char in FN_SPLIT_CHARS: - parts.append(Token( - lexeme=char, - line=token.line, - column=token.column + idx, - start=token.start + idx, - end=token.start + idx + 1, - )) - idx += 1 - continue - start_idx = idx - while idx < len(lex) and lex[idx] not in FN_SPLIT_CHARS: - idx += 1 - segment = lex[start_idx:idx] - if segment: - parts.append(Token( - lexeme=segment, - line=token.line, - column=token.column + start_idx, - start=token.start + start_idx, - end=token.start + idx, - )) - return [part for part in parts if part.lexeme] - - class FnLexer: - def __init__(self, parser): - self.parser = parser - self.buffer = [] - - def _fill(self): - while not self.buffer: - if self.parser._eof(): - raise ParseError("unexpected EOF inside fn definition") - token = self.parser.next_token() - split = split_token(token) - if not split: - continue - self.buffer.extend(split) - - def peek(self): - self._fill() - return self.buffer[0] - - def pop(self): - token = self.peek() - self.buffer.pop(0) - return token - - def expect(self, lexeme): - token = self.pop() - if token.lexeme != lexeme: - raise ParseError(f"expected '{lexeme}' but found '{token.lexeme}'") - return token - - def push_back_remaining(self): - if not self.buffer: - return - self.parser.tokens[self.parser.pos:self.parser.pos] = self.buffer - self.buffer = [] - - def collect_block_tokens(self): - depth = 1 - collected = [] - while depth > 0: - token = self.pop() - if token.lexeme == "{": - depth += 1 - collected.append(token) - continue - if token.lexeme == "}": - depth -= 1 - if depth == 0: - break - collected.append(token) - continue - collected.append(token) - return collected - - OP_PRECEDENCE = {} - OP_PRECEDENCE["+"] = 1 - OP_PRECEDENCE["-"] = 1 - OP_PRECEDENCE["*"] = 2 - OP_PRECEDENCE["/"] = 2 - OP_PRECEDENCE["%"] = 2 - - def parse_fn_body(tokens): - if not tokens: - raise ParseError("empty function body") - lexemes = [tok.lexeme for tok in tokens if tok.lexeme] - if not lexemes or lexemes[0] != "return": - raise ParseError("function body must start with 'return'") - if lexemes[-1] != ";": - raise ParseError("function body must terminate with ';'") - extra = lexemes[1:-1] - if not extra: - raise ParseError("missing return expression") - return extra - - def shunting_yard(tokens): - output = [] - stack = [] - for token in tokens: - if token == "(": - stack.append(token) - continue - if token == ")": - while stack and stack[-1] != "(": - output.append(stack.pop()) - if not stack: - raise ParseError("mismatched parentheses in return expression") - stack.pop() - continue - if token in OP_PRECEDENCE: - while stack and stack[-1] in OP_PRECEDENCE and OP_PRECEDENCE[stack[-1]] >= OP_PRECEDENCE[token]: - output.append(stack.pop()) - stack.append(token) - continue - output.append(token) - while stack: - top = stack.pop() - if top == "(": - raise ParseError("mismatched parentheses in return expression") - output.append(top) - return output - - def is_int_literal(text): - try: - int(text, 0) - return True - except ValueError: - return False - - def translate_postfix(postfix, params): - indices = {name: idx for idx, name in enumerate(params)} - translated = [] - for token in postfix: - if token in indices: - translated.append(str(indices[token])) - translated.append("rpick") - continue - if is_int_literal(token): - translated.append(token) - continue - translated.append(token) - return translated - - def macro(ctx): - parser = ctx.parser - if not isinstance(parser.context_stack[-1], Module): - raise ParseError("'fn' definitions must be top-level") - lexer = FnLexer(parser) - name_token = lexer.pop() - name = name_token.lexeme - if not is_identifier(name): - raise ParseError("invalid function name for 'fn'") - lexer.expect("(") - params = [] - if lexer.peek().lexeme != ")": - while True: - type_token = lexer.pop() - if type_token.lexeme != "int": - raise ParseError("only 'int' parameters are supported in fn definitions") - param_token = lexer.pop() - if not is_identifier(param_token.lexeme): - raise ParseError("invalid parameter name in fn definition") - params.append(param_token.lexeme) - if lexer.peek().lexeme == ",": - lexer.pop() - continue - break - lexer.expect(")") - lexer.expect("{") - body_tokens = lexer.collect_block_tokens() - lexer.push_back_remaining() - if len(params) != len(set(params)): - raise ParseError("duplicate parameter names in fn definition") - return_tokens = parse_fn_body(body_tokens) - postfix = shunting_yard(return_tokens) - body_words = [] - for _ in reversed(params): - body_words.append(">r") - body_words.extend(translate_postfix(postfix, params)) - for _ in params: - body_words.append("rdrop") - generated = [] - emit_definition(generated, name_token, name, body_words) - ctx.inject_token_objects(generated) -} -; - :asm dup { mov rax, [r12] sub r12, 8 @@ -264,6 +59,13 @@ compile-only } ; +:asm over { + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax +} +; + :asm swap { mov rax, [r12] mov rbx, [r12 + 8] @@ -453,6 +255,15 @@ compile-only } ; +:asm pick { + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax +} +; + :asm rpick { mov rcx, [r12] add r12, 8 diff --git a/test.bin b/test.bin index b883df0..792e00c 100755 Binary files a/test.bin and b/test.bin differ diff --git a/test.sl b/test.sl index c26daa1..5dd56e1 100644 --- a/test.sl +++ b/test.sl @@ -1,4 +1,5 @@ import stdlib.sl +import fn.sl :asm mem-slot { lea rax, [rel print_buf] @@ -33,8 +34,8 @@ struct: Point extend-syntax -fn fancy_add(int left, int right){ - return (left + right) * right; +fn fancy_add(int a, int b){ + return (a + b) * b; } : test-add diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100644 index 0000000..68a6af3 --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +"""Simple end-to-end test runner for L2. + +Each test case provides an L2 program source and an expected stdout. The runner +invokes the bootstrap compiler on the fly and executes the produced binary. +""" + +from __future__ import annotations + +import subprocess +import sys +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import List + +ROOT = Path(__file__).resolve().parents[1] +COMPILER = ROOT / "main.py" +PYTHON = Path(sys.executable) + + +@dataclass +class TestCase: + name: str + source: str + expected_stdout: str + + +CASES: List[TestCase] = [ + TestCase( + name="call_syntax_parens", + source=f""" +import {ROOT / 'stdlib.sl'} +import {ROOT / 'fn.sl'} + +: main + 2 40 + + puts + extend-syntax + foo(1, 2) + puts + 0 +; + +fn foo(int a, int b){{ + return a + b; +}} +""", + expected_stdout="42\n3\n", + ), + TestCase( + name="loops_and_cmp", + source=f""" +import {ROOT / 'stdlib.sl'} + +: main + 0 + 5 for + 1 + + next + puts + 5 5 == puts + 5 4 == puts + 0 +; +""", + expected_stdout="5\n1\n0\n", + ), + TestCase( + name="override_dup_compile_time", + source=f""" +import {ROOT / 'stdlib.sl'} + +: dup + 6 +; +compile-only + +: emit-overridden + "dup" use-l2-ct + 42 + dup + int>string + nil + token-from-lexeme + list-new + swap + list-append + inject-tokens +; +immediate +compile-only + +: main + emit-overridden + puts + 0 +; +""", + expected_stdout="6\n", + ), +] + + +def run_case(case: TestCase) -> None: + print(f"[run] {case.name}") + with tempfile.TemporaryDirectory() as tmp: + src_path = Path(tmp) / f"{case.name}.sl" + exe_path = Path(tmp) / f"{case.name}.out" + src_path.write_text(case.source.strip() + "\n", encoding="utf-8") + + compile_cmd = [str(PYTHON), str(COMPILER), str(src_path), "-o", str(exe_path)] + compile_result = subprocess.run( + compile_cmd, + capture_output=True, + text=True, + cwd=ROOT, + ) + if compile_result.returncode != 0: + sys.stderr.write("[fail] compile error\n") + sys.stderr.write(compile_result.stdout) + sys.stderr.write(compile_result.stderr) + raise SystemExit(compile_result.returncode) + + run_result = subprocess.run( + [str(exe_path)], + capture_output=True, + text=True, + cwd=ROOT, + ) + if run_result.returncode != 0: + sys.stderr.write("[fail] execution error\n") + sys.stderr.write(run_result.stdout) + sys.stderr.write(run_result.stderr) + raise SystemExit(run_result.returncode) + + if run_result.stdout != case.expected_stdout: + sys.stderr.write(f"[fail] output mismatch for {case.name}\n") + sys.stderr.write("expected:\n" + case.expected_stdout) + sys.stderr.write("got:\n" + run_result.stdout) + raise SystemExit(1) + + print(f"[ok] {case.name}") + + +def main() -> None: + for case in CASES: + run_case(case) + print("[all tests passed]") + + +if __name__ == "__main__": + main()