diff --git a/a.out b/a.out index 9a54c1f..8bc6f17 100755 Binary files a/a.out and b/a.out differ diff --git a/a.sl b/a.sl index 9f3cb37..5ce78b8 100644 --- a/a.sl +++ b/a.sl @@ -1,5 +1,5 @@ import stdlib.sl : main - "hello world" puts + "hello world" puts_str ; \ No newline at end of file diff --git a/build/a.asm b/build/a.asm new file mode 100644 index 0000000..26aed90 --- /dev/null +++ b/build/a.asm @@ -0,0 +1,284 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_puts_str: + ; expects (addr, len) on data stack + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + cmp rdx, 0 + je puts_str_write_newline + mov rax, 1 + mov rdi, 1 + syscall +puts_str_write_newline: + mov byte [rel print_buf], 10 + mov rax, 1 + mov rdi, 1 + lea rsi, [rel print_buf] + mov rdx, 1 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_main: + ; push str_0 + sub r12, 8 + mov qword [r12], str_0 + ; push 11 + sub r12, 8 + mov qword [r12], 11 + call word_puts_str + ret +section .data +str_0: db 104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 0 +str_0_len equ 11 +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/a.o b/build/a.o new file mode 100644 index 0000000..515fd45 Binary files /dev/null and b/build/a.o differ diff --git a/build/call_syntax_parens.asm b/build/call_syntax_parens.asm index 0bd0700..7df47c7 100644 --- a/build/call_syntax_parens.asm +++ b/build/call_syntax_parens.asm @@ -65,6 +65,25 @@ puts_finish_digits: mov rsi, r9 syscall ret +word_puts_str: + ; expects (addr, len) on data stack + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + cmp rdx, 0 + je puts_str_write_newline + mov rax, 1 + mov rdi, 1 + syscall +puts_str_write_newline: + mov byte [rel print_buf], 10 + mov rax, 1 + mov rdi, 1 + lea rsi, [rel print_buf] + mov rdx, 1 + syscall + ret word_dup: mov rax, [r12] sub r12, 8 diff --git a/build/call_syntax_parens.o b/build/call_syntax_parens.o index 2cb5c54..4f2a189 100644 Binary files a/build/call_syntax_parens.o and b/build/call_syntax_parens.o differ diff --git a/build/loops_and_cmp.asm b/build/loops_and_cmp.asm index c6ae2cf..58926cf 100644 --- a/build/loops_and_cmp.asm +++ b/build/loops_and_cmp.asm @@ -65,6 +65,25 @@ puts_finish_digits: mov rsi, r9 syscall ret +word_puts_str: + ; expects (addr, len) on data stack + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + cmp rdx, 0 + je puts_str_write_newline + mov rax, 1 + mov rdi, 1 + syscall +puts_str_write_newline: + mov byte [rel print_buf], 10 + mov rax, 1 + mov rdi, 1 + lea rsi, [rel print_buf] + mov rdx, 1 + syscall + ret word_dup: mov rax, [r12] sub r12, 8 diff --git a/build/loops_and_cmp.o b/build/loops_and_cmp.o index 1f7e196..8e4f71d 100644 Binary files a/build/loops_and_cmp.o and b/build/loops_and_cmp.o differ diff --git a/build/override_dup_compile_time.asm b/build/override_dup_compile_time.asm index 1fc2fbb..99c6ae4 100644 --- a/build/override_dup_compile_time.asm +++ b/build/override_dup_compile_time.asm @@ -65,6 +65,25 @@ puts_finish_digits: mov rsi, r9 syscall ret +word_puts_str: + ; expects (addr, len) on data stack + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + cmp rdx, 0 + je puts_str_write_newline + mov rax, 1 + mov rdi, 1 + syscall +puts_str_write_newline: + mov byte [rel print_buf], 10 + mov rax, 1 + mov rdi, 1 + lea rsi, [rel print_buf] + mov rdx, 1 + syscall + ret word_dup: mov rax, [r12] sub r12, 8 diff --git a/build/override_dup_compile_time.o b/build/override_dup_compile_time.o index 14b8148..388f9eb 100644 Binary files a/build/override_dup_compile_time.o and b/build/override_dup_compile_time.o differ diff --git a/build/string_puts.asm b/build/string_puts.asm new file mode 100644 index 0000000..edace3c --- /dev/null +++ b/build/string_puts.asm @@ -0,0 +1,305 @@ +section .text +%define DSTK_BYTES 65536 +%define RSTK_BYTES 65536 +%define PRINT_BUF_BYTES 128 +global _start +_start: + ; initialize data/return stack pointers + lea r12, [rel dstack_top] + mov r15, r12 + lea r13, [rel rstack_top] + call word_main + mov rax, 0 + cmp r12, r15 + je .no_exit_value + mov rax, [r12] + add r12, 8 +.no_exit_value: + mov rdi, rax + mov rax, 60 + syscall +word_puts: + mov rax, [r12] + add r12, 8 + mov rbx, rax + mov r8, 0 + cmp rbx, 0 + jge puts_abs + neg rbx + mov r8, 1 +puts_abs: + lea rsi, [rel print_buf_end] + mov rcx, 0 + mov r10, 10 + cmp rbx, 0 + jne puts_digits + dec rsi + mov byte [rsi], '0' + inc rcx + jmp puts_sign +puts_digits: +puts_loop: + xor rdx, rdx + mov rax, rbx + div r10 + add dl, '0' + dec rsi + mov [rsi], dl + inc rcx + mov rbx, rax + test rbx, rbx + jne puts_loop +puts_sign: + cmp r8, 0 + je puts_finish_digits + dec rsi + mov byte [rsi], '-' + inc rcx +puts_finish_digits: + mov byte [rsi + rcx], 10 + inc rcx + mov rax, 1 + mov rdi, 1 + mov rdx, rcx + mov r9, rsi + mov rsi, r9 + syscall + ret +word_puts_str: + ; expects (addr, len) on data stack + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + cmp rdx, 0 + je puts_str_write_newline + mov rax, 1 + mov rdi, 1 + syscall +puts_str_write_newline: + mov byte [rel print_buf], 10 + mov rax, 1 + mov rdi, 1 + lea rsi, [rel print_buf] + mov rdx, 1 + syscall + ret +word_dup: + mov rax, [r12] + sub r12, 8 + mov [r12], rax + ret +word_drop: + add r12, 8 + ret +word_over: + mov rax, [r12 + 8] + sub r12, 8 + mov [r12], rax + ret +word_swap: + mov rax, [r12] + mov rbx, [r12 + 8] + mov [r12], rbx + mov [r12 + 8], rax + ret +word__2b: + mov rax, [r12] + add r12, 8 + add qword [r12], rax + ret +word__2d: + mov rax, [r12] + add r12, 8 + sub qword [r12], rax + ret +word__2a: + mov rax, [r12] + add r12, 8 + imul qword [r12] + mov [r12], rax + ret +word__2f: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rax + ret +word__25: + mov rbx, [r12] + add r12, 8 + mov rax, [r12] + cqo + idiv rbx + mov [r12], rdx + ret +word__3d_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + sete bl + mov [r12], rbx + ret +word__21_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setne bl + mov [r12], rbx + ret +word__3c: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setl bl + mov [r12], rbx + ret +word__3e: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setg bl + mov [r12], rbx + ret +word__3c_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setle bl + mov [r12], rbx + ret +word__3e_3d: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + cmp rbx, rax + mov rbx, 0 + setge bl + mov [r12], rbx + ret +word__40: + mov rax, [r12] + mov rax, [rax] + mov [r12], rax + ret +word__21: + mov rax, [r12] + add r12, 8 + mov rbx, [r12] + mov [rax], rbx + add r12, 8 + ret +word_mmap: + mov r9, [r12] + add r12, 8 + mov r8, [r12] + add r12, 8 + mov r10, [r12] + add r12, 8 + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 9 + syscall + mov [r12], rax + ret +word_munmap: + mov rsi, [r12] + add r12, 8 + mov rdi, [r12] + mov rax, 11 + syscall + mov [r12], rax + ret +word_exit: + mov rdi, [r12] + add r12, 8 + mov rax, 60 + syscall + ret +word__3er: + mov rax, [r12] + add r12, 8 + sub r13, 8 + mov [r13], rax + ret +word_r_3e: + mov rax, [r13] + add r13, 8 + sub r12, 8 + mov [r12], rax + ret +word_rdrop: + add r13, 8 + ret +word_pick: + mov rcx, [r12] + add r12, 8 + mov rax, [r12 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_rpick: + mov rcx, [r12] + add r12, 8 + mov rax, [r13 + rcx * 8] + sub r12, 8 + mov [r12], rax + ret +word_main: + ; push str_0 + sub r12, 8 + mov qword [r12], str_0 + ; push 11 + sub r12, 8 + mov qword [r12], 11 + call word_puts_str + ; push str_1 + sub r12, 8 + mov qword [r12], str_1 + ; push 11 + sub r12, 8 + mov qword [r12], 11 + call word_puts_str + ; push str_2 + sub r12, 8 + mov qword [r12], str_2 + ; push 0 + sub r12, 8 + mov qword [r12], 0 + call word_puts_str + ; push 0 + sub r12, 8 + mov qword [r12], 0 + ret +section .data +str_0: db 104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 0 +str_0_len equ 11 +str_1: db 108, 105, 110, 101, 49, 10, 108, 105, 110, 101, 50, 0 +str_1_len equ 11 +str_2: db 0 +str_2_len equ 0 +section .bss +align 16 +dstack: resb DSTK_BYTES +dstack_top: +align 16 +rstack: resb RSTK_BYTES +rstack_top: +align 16 +print_buf: resb PRINT_BUF_BYTES +print_buf_end: \ No newline at end of file diff --git a/build/string_puts.o b/build/string_puts.o new file mode 100644 index 0000000..8b8d1c8 Binary files /dev/null and b/build/string_puts.o differ diff --git a/hello.sl b/hello.sl new file mode 100644 index 0000000..5ce78b8 --- /dev/null +++ b/hello.sl @@ -0,0 +1,5 @@ +import stdlib.sl + +: main + "hello world" puts_str +; \ No newline at end of file diff --git a/main.py b/main.py index 7bffc89..1ea71d1 100644 --- a/main.py +++ b/main.py @@ -1006,6 +1006,13 @@ class FunctionEmitter: f" mov qword [r12], {value}", ]) + def push_label(self, label: str) -> None: + self.text.extend([ + f" ; push {label}", + " sub r12, 8", + f" mov qword [r12], {label}", + ]) + def push_from(self, register: str) -> None: self.text.extend([ " sub r12, 8", @@ -1083,10 +1090,14 @@ class Assembler: self.dictionary = dictionary self.stack_bytes = 65536 self.io_buffer_bytes = 128 + self._string_literals: Dict[str, Tuple[str, int]] = {} + self._data_section: Optional[List[str]] = None def emit(self, module: Module) -> Emission: emission = Emission() emission.text.extend(self._runtime_prelude()) + self._string_literals = {} + self._data_section = emission.data valid_defs = (Definition, AsmDefinition) definitions = [form for form in module.forms if isinstance(form, valid_defs)] @@ -1104,8 +1115,23 @@ class Assembler: self._emit_definition(definition, emission.text) emission.bss.extend(self._bss_layout()) + self._data_section = None return emission + def _intern_string_literal(self, value: str) -> Tuple[str, int]: + if self._data_section is None: + raise CompileError("string literal emission requested without data section") + if value in self._string_literals: + return self._string_literals[value] + label = f"str_{len(self._string_literals)}" + encoded = value.encode("utf-8") + bytes_with_nul = list(encoded) + [0] + byte_list = ", ".join(str(b) for b in bytes_with_nul) + self._data_section.append(f"{label}: db {byte_list}") + self._data_section.append(f"{label}_len equ {len(encoded)}") + self._string_literals[value] = (label, len(encoded)) + return self._string_literals[value] + def _emit_definition(self, definition: Union[Definition, AsmDefinition], text: List[str]) -> None: label = sanitize_label(definition.name) text.append(f"{label}:") @@ -1131,9 +1157,15 @@ class Assembler: def _emit_node(self, node: ASTNode, builder: FunctionEmitter) -> None: if isinstance(node, Literal): - if not isinstance(node.value, int): - raise CompileError("string literals are compile-time only") - builder.push_literal(node.value) + if isinstance(node.value, int): + builder.push_literal(node.value) + return + if isinstance(node.value, str): + label, length = self._intern_string_literal(node.value) + builder.push_label(label) + builder.push_literal(length) + return + raise CompileError(f"unsupported literal type {type(node.value)!r}") return if isinstance(node, WordRef): self._emit_wordref(node, builder) diff --git a/stdlib.sl b/stdlib.sl index 28381c8..d7b2c85 100644 --- a/stdlib.sl +++ b/stdlib.sl @@ -47,6 +47,27 @@ puts_finish_digits: } ; +:asm puts_str { + ; expects (addr, len) on data stack + mov rdx, [r12] + add r12, 8 + mov rsi, [r12] + add r12, 8 + cmp rdx, 0 + je puts_str_write_newline + mov rax, 1 + mov rdi, 1 + syscall +puts_str_write_newline: + mov byte [rel print_buf], 10 + mov rax, 1 + mov rdi, 1 + lea rsi, [rel print_buf] + mov rdx, 1 + syscall +} +; + :asm dup { mov rax, [r12] sub r12, 8 diff --git a/tests/run_tests.py b/tests/run_tests.py index 68a6af3..4578995 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -99,6 +99,20 @@ compile-only """, expected_stdout="6\n", ), + TestCase( + name="string_puts", + source=f""" +import {ROOT / 'stdlib.sl'} + +: main + \"hello world\" puts_str + \"line1\\nline2\" puts_str + \"\" puts_str + 0 +; +""", + expected_stdout="hello world\nline1\nline2\n\n", + ), ]