#include #include #include #include #include #include #include #include #include #include #include #include #define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0])) static void *xmalloc(size_t size) { void *ptr = malloc(size); if (!ptr) { fprintf(stderr, "[error] out of memory\n"); exit(1); } return ptr; } static void *xrealloc(void *ptr, size_t size) { void *out = realloc(ptr, size); if (!out) { fprintf(stderr, "[error] out of memory\n"); exit(1); } return out; } static char *str_dup(const char *src) { if (!src) { return NULL; } size_t len = strlen(src); char *out = (char *)xmalloc(len + 1); memcpy(out, src, len + 1); return out; } static char *str_printf(const char *fmt, ...) { va_list args; va_start(args, fmt); va_list args2; va_copy(args2, args); int needed = vsnprintf(NULL, 0, fmt, args2); va_end(args2); if (needed < 0) { va_end(args); return str_dup(""); } char *buf = (char *)xmalloc((size_t)needed + 1); vsnprintf(buf, (size_t)needed + 1, fmt, args); va_end(args); return buf; } static bool str_starts_with(const char *text, const char *prefix) { if (!text || !prefix) { return false; } size_t len = strlen(prefix); return strncmp(text, prefix, len) == 0; } static bool str_equals(const char *a, const char *b) { if (!a || !b) { return false; } return strcmp(a, b) == 0; } static uint64_t hash_str(const char *text) { uint64_t hash = 1469598103934665603ULL; while (*text) { hash ^= (unsigned char)(*text++); hash *= 1099511628211ULL; } return hash; } #define VEC_DECL(name, type) \ typedef struct { \ type *data; \ size_t len; \ size_t cap; \ } name #define VEC_INIT(vec) do { (vec)->data = NULL; (vec)->len = 0; (vec)->cap = 0; } while (0) #define VEC_FREE(vec) do { free((vec)->data); (vec)->data = NULL; (vec)->len = 0; (vec)->cap = 0; } while (0) #define VEC_PUSH(vec, value) do { \ if ((vec)->len + 1 > (vec)->cap) { \ (vec)->cap = (vec)->cap ? (vec)->cap * 2 : 8; \ (vec)->data = xrealloc((vec)->data, (vec)->cap * sizeof(*(vec)->data)); \ } \ (vec)->data[(vec)->len++] = (value); \ } while (0) #define VEC_POP(vec) ((vec)->len ? (vec)->data[--(vec)->len] : (vec)->data[0]) VEC_DECL(StrVec, char *); VEC_DECL(IntVec, int); static bool strvec_contains(StrVec *vec, const char *value) { if (!vec || !value) { return false; } for (size_t i = 0; i < vec->len; i++) { if (strcmp(vec->data[i], value) == 0) { return true; } } return false; } typedef struct { char *lexeme; int line; int column; int start; int end; } Token; VEC_DECL(TokenVec, Token); typedef struct { char *path; int line; int column; } SourceLocation; typedef struct { char *path; int start_line; int end_line; int local_start_line; } FileSpan; VEC_DECL(FileSpanVec, FileSpan); typedef enum { OP_LITERAL, OP_WORD, OP_BRANCH_ZERO, OP_JUMP, OP_LABEL, OP_FOR_BEGIN, OP_FOR_END, OP_LIST_BEGIN, OP_LIST_END } OpKind; typedef enum { LIT_INT, LIT_FLOAT, LIT_STRING } LiteralKind; typedef struct { OpKind kind; LiteralKind lit_kind; SourceLocation *loc; union { int64_t i64; double f64; char *str; char *word; char *label; struct { char *loop; char *end; } loop; } data; } Op; VEC_DECL(OpVec, Op); typedef struct { char *name; OpVec body; bool immediate; bool compile_only; char *terminator; bool inline_def; } Definition; typedef struct { char *name; char *body; bool immediate; bool compile_only; bool effect_string_io; } AsmDefinition; typedef enum { FORM_DEF, FORM_ASM } FormKind; typedef struct { FormKind kind; void *ptr; } Form; VEC_DECL(FormVec, Form); typedef struct { StrVec text; StrVec data; StrVec bss; } Emission; typedef struct { StrVec *text; bool debug_enabled; SourceLocation *current_loc; } FunctionEmitter; typedef struct Word Word; typedef struct CompileTimeVM CompileTimeVM; typedef struct Parser Parser; typedef void (*MacroFn)(Parser *parser); typedef void (*IntrinsicEmitter)(FunctionEmitter *builder); typedef void (*CompileTimeIntrinsic)(CompileTimeVM *vm); struct Word { char *name; bool immediate; bool compile_only; bool compile_time_override; bool is_extern; int extern_inputs; int extern_outputs; char **extern_arg_types; int extern_arg_count; char *extern_ret_type; bool inline_def; Definition *definition; Definition *ct_definition; Definition *prev_definition; AsmDefinition *asm_def; AsmDefinition *ct_asm_def; AsmDefinition *prev_asm_def; MacroFn macro; IntrinsicEmitter intrinsic; CompileTimeIntrinsic ct_intrinsic; char **macro_expansion; int macro_param_count; }; typedef struct { char **keys; void **values; size_t cap; size_t len; } StrMap; static void strmap_init(StrMap *map) { map->keys = NULL; map->values = NULL; map->cap = 0; map->len = 0; } static void strmap_free(StrMap *map) { free(map->keys); free(map->values); map->keys = NULL; map->values = NULL; map->cap = 0; map->len = 0; } static void strmap_grow(StrMap *map) { size_t new_cap = map->cap ? map->cap * 2 : 128; char **new_keys = (char **)xmalloc(new_cap * sizeof(char *)); void **new_vals = (void **)xmalloc(new_cap * sizeof(void *)); for (size_t i = 0; i < new_cap; i++) { new_keys[i] = NULL; new_vals[i] = NULL; } if (map->keys) { for (size_t i = 0; i < map->cap; i++) { if (!map->keys[i]) { continue; } uint64_t hash = hash_str(map->keys[i]); size_t idx = (size_t)(hash & (new_cap - 1)); while (new_keys[idx]) { idx = (idx + 1) & (new_cap - 1); } new_keys[idx] = map->keys[i]; new_vals[idx] = map->values[i]; } } free(map->keys); free(map->values); map->keys = new_keys; map->values = new_vals; map->cap = new_cap; } static void strmap_set(StrMap *map, const char *key, void *value) { if (!map->cap || (map->len + 1) * 3 >= map->cap * 2) { strmap_grow(map); } uint64_t hash = hash_str(key); size_t idx = (size_t)(hash & (map->cap - 1)); while (map->keys[idx]) { if (strcmp(map->keys[idx], key) == 0) { map->values[idx] = value; return; } idx = (idx + 1) & (map->cap - 1); } map->keys[idx] = str_dup(key); map->values[idx] = value; map->len++; } static void *strmap_get(StrMap *map, const char *key) { if (!map->cap) { return NULL; } uint64_t hash = hash_str(key); size_t idx = (size_t)(hash & (map->cap - 1)); size_t start = idx; while (map->keys[idx]) { if (strcmp(map->keys[idx], key) == 0) { return map->values[idx]; } idx = (idx + 1) & (map->cap - 1); if (idx == start) { break; } } return NULL; } static bool strmap_has(StrMap *map, const char *key) { return strmap_get(map, key) != NULL; } typedef struct { StrMap words; } Dictionary; static void dictionary_init(Dictionary *dict) { strmap_init(&dict->words); } static Word *dictionary_lookup(Dictionary *dict, const char *name) { return (Word *)strmap_get(&dict->words, name); } static void dictionary_register(Dictionary *dict, Word *word) { strmap_set(&dict->words, word->name, word); } typedef struct { StrVec custom_tokens; StrVec token_order; } Reader; static void reader_init(Reader *reader) { VEC_INIT(&reader->custom_tokens); VEC_INIT(&reader->token_order); const char *defaults[] = {"(", ")", "{", "}", ";", ",", "[", "]"}; for (size_t i = 0; i < ARRAY_LEN(defaults); i++) { VEC_PUSH(&reader->custom_tokens, str_dup(defaults[i])); } for (size_t i = 0; i < reader->custom_tokens.len; i++) { VEC_PUSH(&reader->token_order, reader->custom_tokens.data[i]); } } static void reader_resort(Reader *reader) { for (size_t i = 0; i < reader->token_order.len; i++) { for (size_t j = i + 1; j < reader->token_order.len; j++) { if (strlen(reader->token_order.data[j]) > strlen(reader->token_order.data[i])) { char *tmp = reader->token_order.data[i]; reader->token_order.data[i] = reader->token_order.data[j]; reader->token_order.data[j] = tmp; } } } } static void reader_add_tokens(Reader *reader, const char *tok) { if (!tok || !*tok) { return; } for (size_t i = 0; i < reader->custom_tokens.len; i++) { if (strcmp(reader->custom_tokens.data[i], tok) == 0) { return; } } VEC_PUSH(&reader->custom_tokens, str_dup(tok)); VEC_PUSH(&reader->token_order, reader->custom_tokens.data[reader->custom_tokens.len - 1]); reader_resort(reader); } static void reader_add_token_chars(Reader *reader, const char *chars) { if (!chars) { return; } char buf[2] = {0, 0}; for (const char *p = chars; *p; p++) { buf[0] = *p; reader_add_tokens(reader, buf); } } typedef struct { const char *source; size_t length; size_t index; int line; int column; Reader *reader; } Tokenizer; static void tokenizer_init(Tokenizer *tokenizer, Reader *reader, const char *source) { tokenizer->source = source; tokenizer->length = strlen(source); tokenizer->index = 0; tokenizer->line = 1; tokenizer->column = 0; tokenizer->reader = reader; } static bool tokenizer_next(Tokenizer *tokenizer, Token *out) { const char *src = tokenizer->source; size_t len = tokenizer->length; size_t idx = tokenizer->index; int line = tokenizer->line; int col = tokenizer->column; while (idx < len) { char ch = src[idx]; if (ch == '"') { size_t start = idx; int token_line = line; int token_col = col; idx++; col++; bool escape = false; while (idx < len) { char c = src[idx++]; if (c == '\n') { line++; col = 0; } else { col++; } if (escape) { escape = false; continue; } if (c == '\\') { escape = true; continue; } if (c == '"') { size_t end = idx; size_t tok_len = end - start; char *lex = (char *)xmalloc(tok_len + 1); memcpy(lex, src + start, tok_len); lex[tok_len] = '\0'; out->lexeme = lex; out->line = token_line; out->column = token_col; out->start = (int)start; out->end = (int)end; tokenizer->index = idx; tokenizer->line = line; tokenizer->column = col; return true; } } fprintf(stderr, "[error] unterminated string literal\n"); exit(1); } if (ch == '#') { while (idx < len && src[idx] != '\n') { idx++; } continue; } if (ch == ';' && idx + 1 < len && isalpha((unsigned char)src[idx + 1])) { size_t start = idx; int token_line = line; int token_col = col; idx++; col++; size_t tok_len = idx - start; char *lex = (char *)xmalloc(tok_len + 1); memcpy(lex, src + start, tok_len); lex[tok_len] = '\0'; out->lexeme = lex; out->line = token_line; out->column = token_col; out->start = (int)start; out->end = (int)idx; tokenizer->index = idx; tokenizer->line = line; tokenizer->column = col; return true; } bool matched = false; const char *matched_tok = NULL; for (size_t i = 0; i < tokenizer->reader->token_order.len; i++) { const char *tok = tokenizer->reader->token_order.data[i]; size_t tok_len = strlen(tok); if (tok_len == 0) { continue; } if (idx + tok_len <= len && strncmp(src + idx, tok, tok_len) == 0) { matched = true; matched_tok = tok; size_t start = idx; int token_line = line; int token_col = col; idx += tok_len; col += (int)tok_len; out->lexeme = str_dup(matched_tok); out->line = token_line; out->column = token_col; out->start = (int)start; out->end = (int)idx; tokenizer->index = idx; tokenizer->line = line; tokenizer->column = col; return true; } } if (matched) { continue; } if (isspace((unsigned char)ch)) { if (ch == '\n') { line++; col = 0; } else { col++; } idx++; continue; } size_t start = idx; int token_line = line; int token_col = col; while (idx < len) { char c = src[idx]; bool is_sep = isspace((unsigned char)c) || c == '"' || c == '#'; if (is_sep) { break; } bool token_hit = false; for (size_t i = 0; i < tokenizer->reader->token_order.len; i++) { const char *tok = tokenizer->reader->token_order.data[i]; size_t tok_len = strlen(tok); if (tok_len && idx + tok_len <= len && strncmp(src + idx, tok, tok_len) == 0) { token_hit = true; break; } } if (token_hit) { break; } idx++; col++; } size_t tok_len = idx - start; if (tok_len) { char *lex = (char *)xmalloc(tok_len + 1); memcpy(lex, src + start, tok_len); lex[tok_len] = '\0'; out->lexeme = lex; out->line = token_line; out->column = token_col; out->start = (int)start; out->end = (int)idx; tokenizer->index = idx; tokenizer->line = line; tokenizer->column = col; return true; } idx++; col++; } tokenizer->index = idx; tokenizer->line = line; tokenizer->column = col; return false; } struct Parser { Dictionary *dictionary; Reader *reader; TokenVec tokens; size_t pos; Tokenizer tokenizer; bool tokenizer_exhausted; struct { FormVec forms; StrMap variables; StrVec *prelude; StrVec *bss; } module; Definition *current_def; Word **definition_stack; size_t definition_stack_len; size_t definition_stack_cap; Word *last_defined; FileSpanVec file_spans; char *source; struct { char *name; StrVec tokens; int param_count; bool active; } macro_recording; struct { char *type; char *false_label; char *end_label; char *begin_label; char *loop_label; int line; int column; StrVec with_names; } *control_stack; size_t control_len; size_t control_cap; int label_counter; char *token_hook; Token last_token; bool has_last_token; StrMap variable_labels; StrMap variable_words; CompileTimeVM *ct_vm; StrVec *custom_prelude; StrVec *custom_bss; bool pending_inline_def; bool uses_libc; bool uses_libm; char *primary_path; }; typedef enum { CT_NIL, CT_INT, CT_STR, CT_TOKEN, CT_LIST, CT_MAP, CT_LEXER } CtValueKind; typedef struct CtValue CtValue; VEC_DECL(CtValueVec, CtValue); typedef struct { CtValueVec items; } CtList; typedef struct { char **keys; CtValue *values; size_t cap; size_t len; } CtMap; typedef struct { Parser *parser; bool separators[256]; TokenVec buffer; } SplitLexer; struct CtValue { CtValueKind kind; union { int64_t i64; char *str; Token token; CtList *list; CtMap *map; SplitLexer *lexer; } as; }; struct CompileTimeVM { Parser *parser; Dictionary *dictionary; CtValueVec stack; CtValueVec rstack; IntVec loop_remaining; IntVec loop_begin; IntVec loop_initial; StrVec call_stack; }; static void ct_value_free(CtValue *value); static CtValue ct_make_nil(void) { CtValue v = {0}; v.kind = CT_NIL; return v; } static CtValue ct_make_int(int64_t i) { CtValue v = {0}; v.kind = CT_INT; v.as.i64 = i; return v; } static CtValue ct_make_str(const char *s) { CtValue v = {0}; v.kind = CT_STR; v.as.str = str_dup(s); return v; } static CtValue ct_make_token(Token token) { CtValue v = {0}; v.kind = CT_TOKEN; v.as.token = token; return v; } static CtValue ct_make_list(CtList *list) { CtValue v = {0}; v.kind = CT_LIST; v.as.list = list; return v; } static CtValue ct_make_map(CtMap *map) { CtValue v = {0}; v.kind = CT_MAP; v.as.map = map; return v; } static CtValue ct_make_lexer(SplitLexer *lexer) { CtValue v = {0}; v.kind = CT_LEXER; v.as.lexer = lexer; return v; } static void ct_value_free(CtValue *value) { if (!value) { return; } if (value->kind == CT_STR) { free(value->as.str); } } static void ct_stack_init(CtValueVec *vec) { VEC_INIT(vec); } static void ct_stack_push(CtValueVec *vec, CtValue value) { VEC_PUSH(vec, value); } static CtValue ct_stack_pop(CtValueVec *vec) { if (!vec->len) { CtValue v = ct_make_nil(); return v; } return VEC_POP(vec); } static CtValue ct_stack_peek(CtValueVec *vec) { if (!vec->len) { CtValue v = ct_make_nil(); return v; } return vec->data[vec->len - 1]; } static CtList *ct_list_new(void) { CtList *list = (CtList *)xmalloc(sizeof(CtList)); VEC_INIT(&list->items); return list; } static CtMap *ct_map_new(void) { CtMap *map = (CtMap *)xmalloc(sizeof(CtMap)); map->keys = NULL; map->values = NULL; map->cap = 0; map->len = 0; return map; } static void ct_map_grow(CtMap *map) { size_t new_cap = map->cap ? map->cap * 2 : 64; char **new_keys = (char **)xmalloc(new_cap * sizeof(char *)); CtValue *new_vals = (CtValue *)xmalloc(new_cap * sizeof(CtValue)); for (size_t i = 0; i < new_cap; i++) { new_keys[i] = NULL; } if (map->keys) { for (size_t i = 0; i < map->cap; i++) { if (!map->keys[i]) { continue; } uint64_t hash = hash_str(map->keys[i]); size_t idx = (size_t)(hash & (new_cap - 1)); while (new_keys[idx]) { idx = (idx + 1) & (new_cap - 1); } new_keys[idx] = map->keys[i]; new_vals[idx] = map->values[i]; } } free(map->keys); free(map->values); map->keys = new_keys; map->values = new_vals; map->cap = new_cap; } static void ct_map_set(CtMap *map, const char *key, CtValue value) { if (!map->cap || (map->len + 1) * 3 >= map->cap * 2) { ct_map_grow(map); } uint64_t hash = hash_str(key); size_t idx = (size_t)(hash & (map->cap - 1)); while (map->keys[idx]) { if (strcmp(map->keys[idx], key) == 0) { ct_value_free(&map->values[idx]); map->values[idx] = value; return; } idx = (idx + 1) & (map->cap - 1); } map->keys[idx] = str_dup(key); map->values[idx] = value; map->len++; } static bool ct_map_get(CtMap *map, const char *key, CtValue *out) { if (!map->cap) { return false; } uint64_t hash = hash_str(key); size_t idx = (size_t)(hash & (map->cap - 1)); size_t start = idx; while (map->keys[idx]) { if (strcmp(map->keys[idx], key) == 0) { *out = map->values[idx]; return true; } idx = (idx + 1) & (map->cap - 1); if (idx == start) { break; } } return false; } static void emit_line(FunctionEmitter *builder, const char *line) { VEC_PUSH(builder->text, str_dup(line)); } static void emitter_init(FunctionEmitter *builder, StrVec *text, bool debug) { builder->text = text; builder->debug_enabled = debug; builder->current_loc = NULL; } static char *sanitize_label(const char *name) { size_t len = strlen(name); char *out = (char *)xmalloc(len * 4 + 2); size_t pos = 0; for (size_t i = 0; i < len; i++) { unsigned char ch = (unsigned char)name[i]; if (isalnum(ch) || ch == '_') { out[pos++] = ch; } else { pos += (size_t)sprintf(out + pos, "_%02x", ch); } } if (pos == 0) { out[pos++] = 'a'; } if (isdigit((unsigned char)out[0])) { memmove(out + 1, out, pos); out[0] = '_'; pos++; } out[pos] = '\0'; return out; } static bool is_identifier(const char *text) { if (!text || !*text) { return false; } if (!(isalpha((unsigned char)text[0]) || text[0] == '_')) { return false; } for (const char *p = text + 1; *p; p++) { if (!(isalnum((unsigned char)*p) || *p == '_')) { return false; } } return true; } static char *path_basename(const char *path); static SourceLocation *location_for_token(Parser *parser, Token token) { for (size_t i = 0; i < parser->file_spans.len; i++) { FileSpan span = parser->file_spans.data[i]; if (token.line >= span.start_line && token.line < span.end_line) { int local_line = span.local_start_line + (token.line - span.start_line); SourceLocation *loc = (SourceLocation *)xmalloc(sizeof(SourceLocation)); loc->path = path_basename(span.path); loc->line = local_line; loc->column = token.column; return loc; } } SourceLocation *loc = (SourceLocation *)xmalloc(sizeof(SourceLocation)); loc->path = parser->primary_path ? path_basename(parser->primary_path) : str_dup(""); loc->line = token.line; loc->column = token.column; return loc; } static void parser_push_control(Parser *parser, const char *type) { if (parser->control_len + 1 > parser->control_cap) { parser->control_cap = parser->control_cap ? parser->control_cap * 2 : 16; parser->control_stack = xrealloc(parser->control_stack, parser->control_cap * sizeof(*parser->control_stack)); } parser->control_stack[parser->control_len].type = str_dup(type); parser->control_stack[parser->control_len].false_label = NULL; parser->control_stack[parser->control_len].end_label = NULL; parser->control_stack[parser->control_len].begin_label = NULL; parser->control_stack[parser->control_len].loop_label = NULL; parser->control_stack[parser->control_len].line = parser->has_last_token ? parser->last_token.line : 0; parser->control_stack[parser->control_len].column = parser->has_last_token ? parser->last_token.column : 0; VEC_INIT(&parser->control_stack[parser->control_len].with_names); parser->control_len++; } static int parser_pop_control(Parser *parser, const char *expected_type) { if (!parser->control_len) { return -1; } if (expected_type && strcmp(parser->control_stack[parser->control_len - 1].type, expected_type) != 0) { return -2; } parser->control_len--; return 0; } static void parser_emit_op(Parser *parser, Op op) { if (op.loc == NULL && parser->has_last_token) { op.loc = location_for_token(parser, parser->last_token); } if (parser->current_def) { VEC_PUSH(&parser->current_def->body, op); } else { Form form = {0}; form.kind = FORM_DEF; Definition *dummy = (Definition *)xmalloc(sizeof(Definition)); *dummy = (Definition){0}; dummy->name = str_dup(""); VEC_INIT(&dummy->body); VEC_PUSH(&dummy->body, op); form.ptr = dummy; VEC_PUSH(&parser->module.forms, form); } } static void parser_init(Parser *parser, Dictionary *dict, Reader *reader) { parser->dictionary = dict; parser->reader = reader; VEC_INIT(&parser->tokens); parser->pos = 0; parser->tokenizer_exhausted = false; VEC_INIT(&parser->module.forms); strmap_init(&parser->module.variables); parser->module.prelude = NULL; parser->module.bss = NULL; parser->current_def = NULL; parser->definition_stack = NULL; parser->definition_stack_len = 0; parser->definition_stack_cap = 0; parser->last_defined = NULL; VEC_INIT(&parser->file_spans); parser->source = NULL; parser->macro_recording.active = false; parser->control_stack = NULL; parser->control_len = 0; parser->control_cap = 0; parser->label_counter = 0; parser->token_hook = NULL; parser->has_last_token = false; strmap_init(&parser->variable_labels); strmap_init(&parser->variable_words); parser->ct_vm = NULL; parser->custom_prelude = NULL; parser->custom_bss = NULL; parser->pending_inline_def = false; parser->uses_libc = false; parser->uses_libm = false; parser->primary_path = NULL; } static void register_builtin_syscall(Parser *parser) { AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition)); memset(def, 0, sizeof(AsmDefinition)); def->name = str_dup("syscall"); def->body = str_dup( " mov rax, [r12]\n" " add r12, 8\n" " mov rcx, [r12]\n" " add r12, 8\n" " cmp rcx, 6\n" " jle .sys_args\n" " mov rcx, 6\n" ".sys_args:\n" " cmp rcx, 6\n" " jl .arg5\n" " mov r9, [r12]\n" " add r12, 8\n" ".arg5:\n" " cmp rcx, 5\n" " jl .arg4\n" " mov r8, [r12]\n" " add r12, 8\n" ".arg4:\n" " cmp rcx, 4\n" " jl .arg3\n" " mov r10, [r12]\n" " add r12, 8\n" ".arg3:\n" " cmp rcx, 3\n" " jl .arg2\n" " mov rdx, [r12]\n" " add r12, 8\n" ".arg2:\n" " cmp rcx, 2\n" " jl .arg1\n" " mov rsi, [r12]\n" " add r12, 8\n" ".arg1:\n" " cmp rcx, 1\n" " jl .do_syscall\n" " mov rdi, [r12]\n" " add r12, 8\n" ".do_syscall:\n" " syscall\n" " sub r12, 8\n" " mov [r12], rax\n" ); Word *word = dictionary_lookup(parser->dictionary, def->name); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(def->name); dictionary_register(parser->dictionary, word); } word->asm_def = def; Form form = {0}; form.kind = FORM_ASM; form.ptr = def; VEC_PUSH(&parser->module.forms, form); } static void ensure_tokens(Parser *parser, size_t upto) { if (parser->tokenizer_exhausted) { return; } while (parser->tokens.len <= upto && !parser->tokenizer_exhausted) { Token tok = {0}; if (!tokenizer_next(&parser->tokenizer, &tok)) { parser->tokenizer_exhausted = true; break; } VEC_PUSH(&parser->tokens, tok); } } static bool parser_eof(Parser *parser) { ensure_tokens(parser, parser->pos); return parser->pos >= parser->tokens.len; } static Token parser_peek_token(Parser *parser) { ensure_tokens(parser, parser->pos); if (parser->pos >= parser->tokens.len) { Token empty = {0}; empty.lexeme = NULL; return empty; } return parser->tokens.data[parser->pos]; } static Token parser_next_token(Parser *parser) { ensure_tokens(parser, parser->pos); if (parser->pos >= parser->tokens.len) { Token empty = {0}; empty.lexeme = NULL; return empty; } Token tok = parser->tokens.data[parser->pos++]; parser->last_token = tok; parser->has_last_token = true; return tok; } static char *parser_new_label(Parser *parser, const char *prefix) { char *label = str_printf("L_%s_%d", prefix, parser->label_counter++); return label; } static void ct_vm_init(CompileTimeVM *vm, Parser *parser) { vm->parser = parser; vm->dictionary = parser->dictionary; ct_stack_init(&vm->stack); ct_stack_init(&vm->rstack); VEC_INIT(&vm->loop_remaining); VEC_INIT(&vm->loop_begin); VEC_INIT(&vm->loop_initial); VEC_INIT(&vm->call_stack); } static void ct_vm_reset(CompileTimeVM *vm) { vm->stack.len = 0; vm->rstack.len = 0; vm->loop_remaining.len = 0; vm->loop_begin.len = 0; vm->loop_initial.len = 0; vm->call_stack.len = 0; } static bool try_parse_int(const char *lexeme, int64_t *out); static void parser_inject_tokens(Parser *parser, TokenVec *injected); static void ct_trace_error(CompileTimeVM *vm, const char *msg) { fprintf(stderr, "[error] %s\n", msg); if (vm && vm->call_stack.len) { fprintf(stderr, "[error] compile-time call stack:\n"); for (size_t i = 0; i < vm->call_stack.len; i++) { fprintf(stderr, " - %s\n", vm->call_stack.data[i]); } } exit(1); } static int64_t ct_pop_int(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind == CT_STR) { int64_t out = 0; if (try_parse_int(v.as.str, &out)) { return out; } } if (v.kind != CT_INT) { const char *kind = "unknown"; const char *extra = ""; if (v.kind == CT_NIL) { kind = "nil"; } else if (v.kind == CT_STR) { kind = "string"; extra = v.as.str ? v.as.str : ""; } else if (v.kind == CT_TOKEN) { kind = "token"; extra = v.as.token.lexeme ? v.as.token.lexeme : ""; } else if (v.kind == CT_LIST) { kind = "list"; } else if (v.kind == CT_MAP) { kind = "map"; } else if (v.kind == CT_LEXER) { kind = "lexer"; } char *msg = NULL; if (extra[0] != '\0') { msg = str_printf("expected integer on compile-time stack (got %s: %s)", kind, extra); } else { msg = str_printf("expected integer on compile-time stack (got %s)", kind); } ct_trace_error(vm, msg); free(msg); } return v.as.i64; } static char *ct_pop_str(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind == CT_TOKEN) { return str_dup(v.as.token.lexeme); } if (v.kind != CT_STR) { ct_trace_error(vm, "expected string on compile-time stack"); } return str_dup(v.as.str); } static CtList *ct_pop_list(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_LIST) { ct_trace_error(vm, "expected list on compile-time stack"); } return v.as.list; } static CtMap *ct_pop_map(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_MAP) { ct_trace_error(vm, "expected map on compile-time stack"); } return v.as.map; } static Token ct_pop_token(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind == CT_TOKEN) { return v.as.token; } if (v.kind == CT_STR) { Token tok = {0}; tok.lexeme = v.as.str; tok.line = 0; tok.column = 0; tok.start = 0; tok.end = 0; return tok; } ct_trace_error(vm, "expected token on compile-time stack"); } static void ct_word_call(CompileTimeVM *vm, Word *word); static bool ct_try_asm_io(CompileTimeVM *vm, Word *word, AsmDefinition *asm_def) { if (asm_def && asm_def->effect_string_io) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind == CT_STR) { FILE *out = stdout; if (strcmp(word->name, "ewrite_buf") == 0) { out = stderr; } fputs(v.as.str ? v.as.str : "", out); } else { ct_stack_pop(&vm->stack); } return true; } if (strcmp(word->name, "putc") == 0) { CtValue v = ct_stack_pop(&vm->stack); int ch = 0; if (v.kind == CT_INT) { ch = (int)v.as.i64; } else if (v.kind == CT_STR && v.as.str && v.as.str[0]) { ch = (unsigned char)v.as.str[0]; } fputc(ch, stdout); return true; } return false; } static void ct_execute_nodes(CompileTimeVM *vm, OpVec *nodes) { StrMap labels; strmap_init(&labels); for (size_t i = 0; i < nodes->len; i++) { Op *node = &nodes->data[i]; if (node->kind == OP_LABEL) { strmap_set(&labels, node->data.label, (void *)(uintptr_t)i); } } IntVec begin_stack; VEC_INIT(&begin_stack); size_t ip = 0; while (ip < nodes->len) { Op node = nodes->data[ip]; if (node.kind == OP_LITERAL) { if (node.lit_kind == LIT_INT) { ct_stack_push(&vm->stack, ct_make_int(node.data.i64)); } else if (node.lit_kind == LIT_FLOAT) { ct_stack_push(&vm->stack, ct_make_int((int64_t)node.data.f64)); } else if (node.lit_kind == LIT_STRING) { ct_stack_push(&vm->stack, ct_make_str(node.data.str)); } ip++; continue; } if (node.kind == OP_WORD) { const char *name = node.data.word; if (strcmp(name, "begin") == 0) { VEC_PUSH(&begin_stack, (int)ip); ip++; continue; } if (strcmp(name, "again") == 0) { if (!begin_stack.len) { fprintf(stderr, "[error] 'again' without matching 'begin'\n"); exit(1); } ip = (size_t)begin_stack.data[begin_stack.len - 1] + 1; continue; } if (strcmp(name, "continue") == 0) { if (!begin_stack.len) { fprintf(stderr, "[error] 'continue' outside begin/again loop\n"); exit(1); } ip = (size_t)begin_stack.data[begin_stack.len - 1] + 1; continue; } if (strcmp(name, "exit") == 0) { return; } Word *word = dictionary_lookup(vm->dictionary, name); if (!word) { fprintf(stderr, "[error] unknown word '%s' during compile-time execution\n", name); exit(1); } ct_word_call(vm, word); ip++; continue; } if (node.kind == OP_BRANCH_ZERO) { CtValue v = ct_stack_pop(&vm->stack); bool flag = false; if (v.kind == CT_INT) { flag = v.as.i64 != 0; } if (!flag) { void *target = strmap_get(&labels, node.data.label); if (!target) { fprintf(stderr, "[error] unknown label '%s' during compile-time execution\n", node.data.label); exit(1); } ip = (size_t)(uintptr_t)target; } else { ip++; } continue; } if (node.kind == OP_JUMP) { void *target = strmap_get(&labels, node.data.label); if (!target) { fprintf(stderr, "[error] unknown label '%s' during compile-time execution\n", node.data.label); exit(1); } ip = (size_t)(uintptr_t)target; continue; } if (node.kind == OP_FOR_BEGIN) { int64_t count = ct_pop_int(vm); if (count <= 0) { ip++; continue; } VEC_PUSH(&vm->loop_remaining, (int)count); VEC_PUSH(&vm->loop_begin, (int)ip); VEC_PUSH(&vm->loop_initial, (int)count); ip++; continue; } if (node.kind == OP_FOR_END) { if (!vm->loop_remaining.len) { fprintf(stderr, "[error] 'next' without matching 'for'\n"); exit(1); } int idx = (int)vm->loop_remaining.len - 1; vm->loop_remaining.data[idx] -= 1; if (vm->loop_remaining.data[idx] > 0) { ip = (size_t)vm->loop_begin.data[idx] + 1; } else { vm->loop_remaining.len--; vm->loop_begin.len--; vm->loop_initial.len--; ip++; } continue; } ip++; } } static void ct_word_call(CompileTimeVM *vm, Word *word) { VEC_PUSH(&vm->call_stack, str_dup(word->name)); if (word->compile_time_override) { if (word->ct_definition) { ct_execute_nodes(vm, &word->ct_definition->body); vm->call_stack.len--; return; } if (word->definition) { ct_execute_nodes(vm, &word->definition->body); vm->call_stack.len--; return; } if (word->ct_intrinsic) { word->ct_intrinsic(vm); vm->call_stack.len--; return; } if (word->ct_asm_def) { if (ct_try_asm_io(vm, word, word->ct_asm_def)) { vm->call_stack.len--; return; } vm->call_stack.len--; return; } } bool prefer_def = (word->definition && (word->immediate || word->compile_only)); if (!prefer_def && word->ct_intrinsic) { word->ct_intrinsic(vm); vm->call_stack.len--; return; } Definition *def = word->definition; if (word->compile_only && word->ct_definition) { def = word->ct_definition; } if (!def) { if (word->asm_def || word->ct_asm_def) { AsmDefinition *asm_def = word->ct_asm_def ? word->ct_asm_def : word->asm_def; ct_try_asm_io(vm, word, asm_def); vm->call_stack.len--; return; } if (word->is_extern) { int pops = word->extern_arg_count > 0 ? word->extern_arg_count : word->extern_inputs; for (int i = 0; i < pops; i++) { ct_stack_pop(&vm->stack); } int outputs = 0; if (word->extern_arg_count > 0) { if (!word->extern_ret_type || strcmp(word->extern_ret_type, "void") != 0) { outputs = 1; } } else { outputs = word->extern_outputs; } for (int i = 0; i < outputs; i++) { ct_stack_push(&vm->stack, ct_make_int(0)); } vm->call_stack.len--; return; } fprintf(stderr, "[error] word '%s' has no compile-time definition\n", word->name); exit(1); } ct_execute_nodes(vm, &def->body); vm->call_stack.len--; } static bool ct_truthy(CtValue v) { if (v.kind == CT_NIL) { return false; } if (v.kind == CT_INT) { return v.as.i64 != 0; } if (v.kind == CT_STR) { return v.as.str && v.as.str[0] != '\0'; } return true; } static char *ct_string_from_value(CtValue v) { if (v.kind == CT_TOKEN) { return str_dup(v.as.token.lexeme); } if (v.kind == CT_STR) { return str_dup(v.as.str); } if (v.kind == CT_INT) { return str_printf("%lld", (long long)v.as.i64); } return str_dup(""); } static void ct_intrinsic_dup(CompileTimeVM *vm) { CtValue v = ct_stack_peek(&vm->stack); ct_stack_push(&vm->stack, v); } static void ct_intrinsic_drop(CompileTimeVM *vm) { ct_stack_pop(&vm->stack); } static void ct_intrinsic_swap(CompileTimeVM *vm) { CtValue a = ct_stack_pop(&vm->stack); CtValue b = ct_stack_pop(&vm->stack); ct_stack_push(&vm->stack, a); ct_stack_push(&vm->stack, b); } static void ct_intrinsic_over(CompileTimeVM *vm) { if (vm->stack.len < 2) { fprintf(stderr, "[error] over expects at least 2 items\n"); exit(1); } CtValue v = vm->stack.data[vm->stack.len - 2]; ct_stack_push(&vm->stack, v); } static void ct_intrinsic_rot(CompileTimeVM *vm) { if (vm->stack.len < 3) { fprintf(stderr, "[error] rot expects at least 3 items\n"); exit(1); } CtValue a = vm->stack.data[vm->stack.len - 3]; CtValue b = vm->stack.data[vm->stack.len - 2]; CtValue c = vm->stack.data[vm->stack.len - 1]; vm->stack.data[vm->stack.len - 3] = b; vm->stack.data[vm->stack.len - 2] = c; vm->stack.data[vm->stack.len - 1] = a; } static void ct_intrinsic_pick(CompileTimeVM *vm) { int64_t idx = ct_pop_int(vm); if (idx < 0 || (size_t)(idx + 1) > vm->stack.len) { fprintf(stderr, "[error] pick index out of range\n"); exit(1); } CtValue v = vm->stack.data[vm->stack.len - 1 - (size_t)idx]; ct_stack_push(&vm->stack, v); } static void ct_intrinsic_rpick(CompileTimeVM *vm) { int64_t idx = ct_pop_int(vm); if (idx < 0 || (size_t)(idx + 1) > vm->rstack.len) { fprintf(stderr, "[error] rpick index out of range\n"); exit(1); } CtValue v = vm->rstack.data[vm->rstack.len - 1 - (size_t)idx]; ct_stack_push(&vm->stack, v); } static void ct_intrinsic_to_r(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); ct_stack_push(&vm->rstack, v); } static void ct_intrinsic_from_r(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->rstack); ct_stack_push(&vm->stack, v); } static void ct_intrinsic_rdrop(CompileTimeVM *vm) { ct_stack_pop(&vm->rstack); } static void ct_intrinsic_add(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a + b)); } static void ct_intrinsic_sub(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a - b)); } static void ct_intrinsic_mul(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a * b)); } static void ct_intrinsic_div(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); if (b == 0) { fprintf(stderr, "[error] division by zero in compile-time VM\n"); exit(1); } ct_stack_push(&vm->stack, ct_make_int(a / b)); } static void ct_intrinsic_mod(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); if (b == 0) { fprintf(stderr, "[error] modulo by zero in compile-time VM\n"); exit(1); } ct_stack_push(&vm->stack, ct_make_int(a % b)); } static void ct_intrinsic_eq(CompileTimeVM *vm) { CtValue b = ct_stack_pop(&vm->stack); CtValue a = ct_stack_pop(&vm->stack); if (a.kind == CT_INT && b.kind == CT_INT) { ct_stack_push(&vm->stack, ct_make_int(a.as.i64 == b.as.i64)); return; } char *sa = ct_string_from_value(a); char *sb = ct_string_from_value(b); bool eq = strcmp(sa, sb) == 0; free(sa); free(sb); ct_stack_push(&vm->stack, ct_make_int(eq)); } static void ct_intrinsic_gt(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a > b)); } static void ct_intrinsic_lt(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a < b)); } static void ct_intrinsic_ge(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a >= b)); } static void ct_intrinsic_le(CompileTimeVM *vm) { int64_t b = ct_pop_int(vm); int64_t a = ct_pop_int(vm); ct_stack_push(&vm->stack, ct_make_int(a <= b)); } static void ct_intrinsic_ne(CompileTimeVM *vm) { CtValue b = ct_stack_pop(&vm->stack); CtValue a = ct_stack_pop(&vm->stack); if (a.kind == CT_INT && b.kind == CT_INT) { ct_stack_push(&vm->stack, ct_make_int(a.as.i64 != b.as.i64)); return; } char *sa = ct_string_from_value(a); char *sb = ct_string_from_value(b); bool ne = strcmp(sa, sb) != 0; free(sa); free(sb); ct_stack_push(&vm->stack, ct_make_int(ne)); } static void ct_intrinsic_not(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); ct_stack_push(&vm->stack, ct_make_int(!ct_truthy(v))); } static void ct_intrinsic_nil(CompileTimeVM *vm) { ct_stack_push(&vm->stack, ct_make_nil()); } static void ct_intrinsic_nilp(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); ct_stack_push(&vm->stack, ct_make_int(v.kind == CT_NIL)); } static void ct_intrinsic_string_eq(CompileTimeVM *vm) { char *b = ct_pop_str(vm); char *a = ct_pop_str(vm); bool eq = strcmp(a, b) == 0; free(a); free(b); ct_stack_push(&vm->stack, ct_make_int(eq)); } static void ct_intrinsic_string_length(CompileTimeVM *vm) { char *s = ct_pop_str(vm); ct_stack_push(&vm->stack, ct_make_int((int64_t)strlen(s))); free(s); } static void ct_intrinsic_string_append(CompileTimeVM *vm) { char *b = ct_pop_str(vm); char *a = ct_pop_str(vm); char *out = str_printf("%s%s", a, b); free(a); free(b); ct_stack_push(&vm->stack, ct_make_str(out)); free(out); } static void ct_intrinsic_string_to_number(CompileTimeVM *vm) { char *s = ct_pop_str(vm); int64_t out = 0; bool ok = try_parse_int(s, &out); ct_stack_push(&vm->stack, ct_make_int(out)); ct_stack_push(&vm->stack, ct_make_int(ok ? 1 : 0)); free(s); } static void ct_intrinsic_int_to_string(CompileTimeVM *vm) { int64_t v = ct_pop_int(vm); char *out = str_printf("%lld", (long long)v); ct_stack_push(&vm->stack, ct_make_str(out)); free(out); } static void ct_intrinsic_identifierp(CompileTimeVM *vm) { char *s = ct_pop_str(vm); ct_stack_push(&vm->stack, ct_make_int(is_identifier(s))); free(s); } static void ct_intrinsic_list_new(CompileTimeVM *vm) { CtList *list = ct_list_new(); ct_stack_push(&vm->stack, ct_make_list(list)); } static void ct_intrinsic_list_append(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); CtList *list = ct_pop_list(vm); VEC_PUSH(&list->items, v); ct_stack_push(&vm->stack, ct_make_list(list)); } static void ct_intrinsic_list_pop(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); if (!list->items.len) { ct_stack_push(&vm->stack, ct_make_list(list)); ct_stack_push(&vm->stack, ct_make_nil()); return; } CtValue v = VEC_POP(&list->items); ct_stack_push(&vm->stack, ct_make_list(list)); ct_stack_push(&vm->stack, v); } static void ct_intrinsic_list_pop_front(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); if (!list->items.len) { ct_stack_push(&vm->stack, ct_make_list(list)); ct_stack_push(&vm->stack, ct_make_nil()); return; } CtValue v = list->items.data[0]; memmove(&list->items.data[0], &list->items.data[1], (list->items.len - 1) * sizeof(CtValue)); list->items.len--; ct_stack_push(&vm->stack, ct_make_list(list)); ct_stack_push(&vm->stack, v); } static void ct_intrinsic_list_length(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); ct_stack_push(&vm->stack, ct_make_int((int64_t)list->items.len)); } static void ct_intrinsic_list_empty(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); ct_stack_push(&vm->stack, ct_make_int(list->items.len == 0)); } static void ct_intrinsic_list_get(CompileTimeVM *vm) { int64_t idx = ct_pop_int(vm); CtList *list = ct_pop_list(vm); CtValue v = ct_make_nil(); if (idx >= 0 && (size_t)idx < list->items.len) { v = list->items.data[idx]; } ct_stack_push(&vm->stack, v); } static void ct_intrinsic_list_set(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); int64_t idx = ct_pop_int(vm); CtList *list = ct_pop_list(vm); if (idx < 0 || (size_t)idx >= list->items.len) { fprintf(stderr, "[error] list-set index out of range\n"); exit(1); } list->items.data[idx] = v; ct_stack_push(&vm->stack, ct_make_list(list)); } static void ct_intrinsic_list_extend(CompileTimeVM *vm) { CtList *list2 = ct_pop_list(vm); CtList *list1 = ct_pop_list(vm); for (size_t i = 0; i < list2->items.len; i++) { VEC_PUSH(&list1->items, list2->items.data[i]); } ct_stack_push(&vm->stack, ct_make_list(list1)); } static void ct_intrinsic_list_last(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); CtValue v = ct_make_nil(); if (list->items.len) { v = list->items.data[list->items.len - 1]; } ct_stack_push(&vm->stack, v); } static void ct_intrinsic_list_clone(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); CtList *out = ct_list_new(); for (size_t i = 0; i < list->items.len; i++) { VEC_PUSH(&out->items, list->items.data[i]); } ct_stack_push(&vm->stack, ct_make_list(list)); ct_stack_push(&vm->stack, ct_make_list(out)); } static void ct_intrinsic_map_new(CompileTimeVM *vm) { CtMap *map = ct_map_new(); ct_stack_push(&vm->stack, ct_make_map(map)); } static void ct_intrinsic_map_set(CompileTimeVM *vm) { CtValue val = ct_stack_pop(&vm->stack); char *key = ct_pop_str(vm); CtMap *map = ct_pop_map(vm); ct_map_set(map, key, val); free(key); ct_stack_push(&vm->stack, ct_make_map(map)); } static void ct_intrinsic_map_get(CompileTimeVM *vm) { char *key = ct_pop_str(vm); CtMap *map = ct_pop_map(vm); CtValue out = ct_make_nil(); bool ok = ct_map_get(map, key, &out); ct_stack_push(&vm->stack, ct_make_map(map)); ct_stack_push(&vm->stack, out); ct_stack_push(&vm->stack, ct_make_int(ok)); free(key); } static void ct_intrinsic_map_has(CompileTimeVM *vm) { char *key = ct_pop_str(vm); CtMap *map = ct_pop_map(vm); CtValue out = ct_make_nil(); bool ok = ct_map_get(map, key, &out); ct_stack_push(&vm->stack, ct_make_map(map)); ct_stack_push(&vm->stack, ct_make_int(ok)); free(key); } static void ct_intrinsic_token_lexeme(CompileTimeVM *vm) { Token tok = ct_pop_token(vm); ct_stack_push(&vm->stack, ct_make_str(tok.lexeme)); } static void ct_intrinsic_token_from_lexeme(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind == CT_NIL) { v = ct_stack_pop(&vm->stack); } char *lex = NULL; if (v.kind == CT_STR) { lex = str_dup(v.as.str); } else if (v.kind == CT_TOKEN) { lex = str_dup(v.as.token.lexeme); } else { ct_trace_error(vm, "expected string for token-from-lexeme"); } Token tok = {0}; tok.lexeme = lex; tok.line = 0; tok.column = 0; tok.start = 0; tok.end = 0; ct_stack_push(&vm->stack, ct_make_token(tok)); } static void ct_intrinsic_next_token(CompileTimeVM *vm) { Token tok = parser_next_token(vm->parser); if (!tok.lexeme) { ct_stack_push(&vm->stack, ct_make_nil()); return; } ct_stack_push(&vm->stack, ct_make_token(tok)); } static void ct_intrinsic_peek_token(CompileTimeVM *vm) { Token tok = parser_peek_token(vm->parser); if (!tok.lexeme) { ct_stack_push(&vm->stack, ct_make_nil()); return; } ct_stack_push(&vm->stack, ct_make_token(tok)); } static void ct_intrinsic_inject_tokens(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); TokenVec injected; VEC_INIT(&injected); for (size_t i = 0; i < list->items.len; i++) { CtValue v = list->items.data[i]; Token tok = {0}; if (v.kind == CT_TOKEN) { tok = v.as.token; } else if (v.kind == CT_STR) { tok.lexeme = str_dup(v.as.str); } else { tok.lexeme = ct_string_from_value(v); } VEC_PUSH(&injected, tok); } parser_inject_tokens(vm->parser, &injected); } static void ct_intrinsic_set_token_hook(CompileTimeVM *vm) { char *name = ct_pop_str(vm); if (vm->parser->token_hook) { free(vm->parser->token_hook); } vm->parser->token_hook = name; } static void ct_intrinsic_clear_token_hook(CompileTimeVM *vm) { if (vm->parser->token_hook) { free(vm->parser->token_hook); vm->parser->token_hook = NULL; } } static void ct_intrinsic_parse_error(CompileTimeVM *vm) { char *msg = ct_pop_str(vm); fprintf(stderr, "[error] %s\n", msg); free(msg); exit(1); } static void ct_intrinsic_add_token(CompileTimeVM *vm) { char *tok = ct_pop_str(vm); reader_add_tokens(vm->parser->reader, tok); free(tok); } static void ct_intrinsic_add_token_chars(CompileTimeVM *vm) { char *chars = ct_pop_str(vm); reader_add_token_chars(vm->parser->reader, chars); free(chars); } static void ct_intrinsic_prelude_clear(CompileTimeVM *vm) { if (!vm->parser->custom_prelude) { vm->parser->custom_prelude = (StrVec *)xmalloc(sizeof(StrVec)); VEC_INIT(vm->parser->custom_prelude); } vm->parser->custom_prelude->len = 0; } static void ct_intrinsic_prelude_append(CompileTimeVM *vm) { char *line = ct_pop_str(vm); if (!vm->parser->custom_prelude) { vm->parser->custom_prelude = (StrVec *)xmalloc(sizeof(StrVec)); VEC_INIT(vm->parser->custom_prelude); } VEC_PUSH(vm->parser->custom_prelude, line); } static void ct_intrinsic_bss_clear(CompileTimeVM *vm) { if (!vm->parser->custom_bss) { vm->parser->custom_bss = (StrVec *)xmalloc(sizeof(StrVec)); VEC_INIT(vm->parser->custom_bss); } vm->parser->custom_bss->len = 0; } static void ct_intrinsic_bss_append(CompileTimeVM *vm) { char *line = ct_pop_str(vm); if (!vm->parser->custom_bss) { vm->parser->custom_bss = (StrVec *)xmalloc(sizeof(StrVec)); VEC_INIT(vm->parser->custom_bss); } VEC_PUSH(vm->parser->custom_bss, line); } static void ct_intrinsic_use_l2_ct(CompileTimeVM *vm) { char *name = ct_pop_str(vm); Word *word = dictionary_lookup(vm->dictionary, name); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(name); dictionary_register(vm->dictionary, word); } word->compile_time_override = true; free(name); } static CtList *ct_list_from_tokens(const char **tokens, size_t count) { CtList *list = ct_list_new(); for (size_t i = 0; i < count; i++) { VEC_PUSH(&list->items, ct_make_str(tokens[i])); } return list; } static void ct_intrinsic_shunt(CompileTimeVM *vm) { CtList *list = ct_pop_list(vm); CtList *output = ct_list_new(); CtList *ops = ct_list_new(); for (size_t i = 0; i < list->items.len; i++) { CtValue tok = list->items.data[i]; char *lex = ct_string_from_value(tok); if (strcmp(lex, "(") == 0) { VEC_PUSH(&ops->items, ct_make_str(lex)); free(lex); continue; } if (strcmp(lex, ")") == 0) { while (ops->items.len) { CtValue top = ops->items.data[ops->items.len - 1]; char *top_lex = ct_string_from_value(top); if (strcmp(top_lex, "(") == 0) { ops->items.len--; free(top_lex); break; } VEC_PUSH(&output->items, top); ops->items.len--; free(top_lex); } free(lex); continue; } int prec = 0; if (strcmp(lex, "+") == 0 || strcmp(lex, "-") == 0) { prec = 1; } else if (strcmp(lex, "*") == 0 || strcmp(lex, "/") == 0 || strcmp(lex, "%") == 0) { prec = 2; } if (prec > 0) { while (ops->items.len) { CtValue top = ops->items.data[ops->items.len - 1]; char *top_lex = ct_string_from_value(top); int top_prec = 0; if (strcmp(top_lex, "+") == 0 || strcmp(top_lex, "-") == 0) { top_prec = 1; } else if (strcmp(top_lex, "*") == 0 || strcmp(top_lex, "/") == 0 || strcmp(top_lex, "%") == 0) { top_prec = 2; } if (top_prec >= prec) { VEC_PUSH(&output->items, top); ops->items.len--; } else { free(top_lex); break; } free(top_lex); } VEC_PUSH(&ops->items, ct_make_str(lex)); free(lex); continue; } VEC_PUSH(&output->items, ct_make_str(lex)); free(lex); } while (ops->items.len) { CtValue top = VEC_POP(&ops->items); VEC_PUSH(&output->items, top); } ct_stack_push(&vm->stack, ct_make_list(output)); } static SplitLexer *split_lexer_new(Parser *parser, const char *seps) { SplitLexer *lexer = (SplitLexer *)xmalloc(sizeof(SplitLexer)); lexer->parser = parser; memset(lexer->separators, 0, sizeof(lexer->separators)); for (const char *p = seps; p && *p; p++) { lexer->separators[(unsigned char)*p] = true; } VEC_INIT(&lexer->buffer); return lexer; } static void split_lexer_buffer_token(SplitLexer *lexer, Token tok) { if (!tok.lexeme) { return; } size_t len = strlen(tok.lexeme); if (len == 0 || tok.lexeme[0] == '"') { VEC_PUSH(&lexer->buffer, tok); return; } size_t start = 0; for (size_t i = 0; i <= len; i++) { bool is_sep = (i < len) && lexer->separators[(unsigned char)tok.lexeme[i]]; bool at_end = (i == len); if (is_sep || at_end) { if (i > start) { size_t tok_len = i - start; char *lex = (char *)xmalloc(tok_len + 1); memcpy(lex, tok.lexeme + start, tok_len); lex[tok_len] = '\0'; Token out = tok; out.lexeme = lex; VEC_PUSH(&lexer->buffer, out); } if (is_sep) { char sep[2] = {tok.lexeme[i], '\0'}; Token out = tok; out.lexeme = str_dup(sep); VEC_PUSH(&lexer->buffer, out); } start = i + 1; } } } static Token split_lexer_pop(SplitLexer *lexer) { if (lexer->buffer.len == 0) { Token tok = parser_next_token(lexer->parser); if (!tok.lexeme) { Token empty = {0}; empty.lexeme = NULL; return empty; } split_lexer_buffer_token(lexer, tok); } if (lexer->buffer.len == 0) { Token empty = {0}; empty.lexeme = NULL; return empty; } Token out = lexer->buffer.data[0]; memmove(&lexer->buffer.data[0], &lexer->buffer.data[1], (lexer->buffer.len - 1) * sizeof(Token)); lexer->buffer.len--; return out; } static Token split_lexer_peek(SplitLexer *lexer) { if (lexer->buffer.len == 0) { Token tok = parser_next_token(lexer->parser); if (!tok.lexeme) { Token empty = {0}; empty.lexeme = NULL; return empty; } split_lexer_buffer_token(lexer, tok); } if (lexer->buffer.len == 0) { Token empty = {0}; empty.lexeme = NULL; return empty; } return lexer->buffer.data[0]; } static void split_lexer_push_back(SplitLexer *lexer, Token tok) { if (lexer->buffer.len + 1 > lexer->buffer.cap) { lexer->buffer.cap = lexer->buffer.cap ? lexer->buffer.cap * 2 : 8; lexer->buffer.data = xrealloc(lexer->buffer.data, lexer->buffer.cap * sizeof(Token)); } memmove(&lexer->buffer.data[1], &lexer->buffer.data[0], lexer->buffer.len * sizeof(Token)); lexer->buffer.data[0] = tok; lexer->buffer.len++; } static void ct_intrinsic_lexer_new(CompileTimeVM *vm) { char *seps = ct_pop_str(vm); SplitLexer *lexer = split_lexer_new(vm->parser, seps); free(seps); ct_stack_push(&vm->stack, ct_make_lexer(lexer)); } static void ct_intrinsic_lexer_pop(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_LEXER) { fprintf(stderr, "[error] lexer-pop expects lexer\n"); exit(1); } Token tok = split_lexer_pop(v.as.lexer); ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); if (!tok.lexeme) { ct_stack_push(&vm->stack, ct_make_nil()); } else { ct_stack_push(&vm->stack, ct_make_token(tok)); } } static void ct_intrinsic_lexer_peek(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_LEXER) { fprintf(stderr, "[error] lexer-peek expects lexer\n"); exit(1); } Token tok = split_lexer_peek(v.as.lexer); ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); if (!tok.lexeme) { ct_stack_push(&vm->stack, ct_make_nil()); } else { ct_stack_push(&vm->stack, ct_make_token(tok)); } } static void ct_intrinsic_lexer_expect(CompileTimeVM *vm) { char *expected = ct_pop_str(vm); CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_LEXER) { fprintf(stderr, "[error] lexer-expect expects lexer\n"); exit(1); } Token tok = split_lexer_pop(v.as.lexer); if (!tok.lexeme || strcmp(tok.lexeme, expected) != 0) { fprintf(stderr, "[error] lexer-expect expected '%s'\n", expected); exit(1); } ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); ct_stack_push(&vm->stack, ct_make_token(tok)); free(expected); } static void ct_intrinsic_lexer_collect_brace(CompileTimeVM *vm) { CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_LEXER) { fprintf(stderr, "[error] lexer-collect-brace expects lexer\n"); exit(1); } int depth = 1; CtList *list = ct_list_new(); while (depth > 0) { Token tok = split_lexer_pop(v.as.lexer); if (!tok.lexeme) { fprintf(stderr, "[error] unterminated brace in lexer\n"); exit(1); } if (strcmp(tok.lexeme, "{") == 0) { depth++; } else if (strcmp(tok.lexeme, "}") == 0) { depth--; if (depth == 0) { break; } } VEC_PUSH(&list->items, ct_make_token(tok)); } ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); ct_stack_push(&vm->stack, ct_make_list(list)); } static void ct_intrinsic_lexer_push_back(CompileTimeVM *vm) { Token tok = ct_pop_token(vm); CtValue v = ct_stack_pop(&vm->stack); if (v.kind != CT_LEXER) { fprintf(stderr, "[error] lexer-push-back expects lexer\n"); exit(1); } split_lexer_push_back(v.as.lexer, tok); ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); } static void ct_intrinsic_emit_definition(CompileTimeVM *vm) { CtList *body = ct_pop_list(vm); Token name = ct_pop_token(vm); TokenVec injected; VEC_INIT(&injected); Token tok = {0}; tok.lexeme = str_dup("word"); VEC_PUSH(&injected, tok); VEC_PUSH(&injected, name); for (size_t i = 0; i < body->items.len; i++) { CtValue item = body->items.data[i]; Token t = {0}; if (item.kind == CT_TOKEN) { t = item.as.token; } else if (item.kind == CT_STR) { t.lexeme = str_dup(item.as.str); } else if (item.kind == CT_INT) { t.lexeme = str_printf("%lld", (long long)item.as.i64); } else { t.lexeme = ct_string_from_value(item); } VEC_PUSH(&injected, t); } tok.lexeme = str_dup("end"); VEC_PUSH(&injected, tok); parser_inject_tokens(vm->parser, &injected); } static void ct_intrinsic_prelude_set(CompileTimeVM *vm) { ct_intrinsic_prelude_clear(vm); ct_intrinsic_prelude_append(vm); } static void ct_intrinsic_bss_set(CompileTimeVM *vm) { ct_intrinsic_bss_clear(vm); ct_intrinsic_bss_append(vm); } static Word *register_ct_intrinsic(Dictionary *dict, const char *name, CompileTimeIntrinsic fn) { Word *word = dictionary_lookup(dict, name); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(name); dictionary_register(dict, word); } word->ct_intrinsic = fn; word->compile_only = true; return word; } static void bootstrap_dictionary(Dictionary *dict, Parser *parser, CompileTimeVM *vm) { (void)parser; register_ct_intrinsic(dict, "dup", ct_intrinsic_dup); register_ct_intrinsic(dict, "drop", ct_intrinsic_drop); register_ct_intrinsic(dict, "swap", ct_intrinsic_swap); register_ct_intrinsic(dict, "over", ct_intrinsic_over); register_ct_intrinsic(dict, "rot", ct_intrinsic_rot); register_ct_intrinsic(dict, "pick", ct_intrinsic_pick); register_ct_intrinsic(dict, "rpick", ct_intrinsic_rpick); register_ct_intrinsic(dict, ">r", ct_intrinsic_to_r); register_ct_intrinsic(dict, "r>", ct_intrinsic_from_r); register_ct_intrinsic(dict, "rdrop", ct_intrinsic_rdrop); register_ct_intrinsic(dict, "+", ct_intrinsic_add); register_ct_intrinsic(dict, "-", ct_intrinsic_sub); register_ct_intrinsic(dict, "*", ct_intrinsic_mul); register_ct_intrinsic(dict, "/", ct_intrinsic_div); register_ct_intrinsic(dict, "%", ct_intrinsic_mod); register_ct_intrinsic(dict, "==", ct_intrinsic_eq); register_ct_intrinsic(dict, "!=", ct_intrinsic_ne); register_ct_intrinsic(dict, ">", ct_intrinsic_gt); register_ct_intrinsic(dict, "<", ct_intrinsic_lt); register_ct_intrinsic(dict, ">=", ct_intrinsic_ge); register_ct_intrinsic(dict, "<=", ct_intrinsic_le); register_ct_intrinsic(dict, "not", ct_intrinsic_not); register_ct_intrinsic(dict, "nil", ct_intrinsic_nil); register_ct_intrinsic(dict, "nil?", ct_intrinsic_nilp); register_ct_intrinsic(dict, "string=", ct_intrinsic_string_eq); register_ct_intrinsic(dict, "string-length", ct_intrinsic_string_length); register_ct_intrinsic(dict, "string-append", ct_intrinsic_string_append); register_ct_intrinsic(dict, "string>number", ct_intrinsic_string_to_number); register_ct_intrinsic(dict, "int>string", ct_intrinsic_int_to_string); register_ct_intrinsic(dict, "identifier?", ct_intrinsic_identifierp); register_ct_intrinsic(dict, "list-new", ct_intrinsic_list_new); register_ct_intrinsic(dict, "list-append", ct_intrinsic_list_append); register_ct_intrinsic(dict, "list-pop", ct_intrinsic_list_pop); register_ct_intrinsic(dict, "list-pop-front", ct_intrinsic_list_pop_front); register_ct_intrinsic(dict, "list-length", ct_intrinsic_list_length); register_ct_intrinsic(dict, "list-empty?", ct_intrinsic_list_empty); register_ct_intrinsic(dict, "list-get", ct_intrinsic_list_get); register_ct_intrinsic(dict, "list-set", ct_intrinsic_list_set); register_ct_intrinsic(dict, "list-extend", ct_intrinsic_list_extend); register_ct_intrinsic(dict, "list-last", ct_intrinsic_list_last); register_ct_intrinsic(dict, "list-clone", ct_intrinsic_list_clone); register_ct_intrinsic(dict, "map-new", ct_intrinsic_map_new); register_ct_intrinsic(dict, "map-set", ct_intrinsic_map_set); register_ct_intrinsic(dict, "map-get", ct_intrinsic_map_get); register_ct_intrinsic(dict, "map-has?", ct_intrinsic_map_has); register_ct_intrinsic(dict, "token-lexeme", ct_intrinsic_token_lexeme); register_ct_intrinsic(dict, "token-from-lexeme", ct_intrinsic_token_from_lexeme); register_ct_intrinsic(dict, "next-token", ct_intrinsic_next_token); register_ct_intrinsic(dict, "peek-token", ct_intrinsic_peek_token); register_ct_intrinsic(dict, "inject-tokens", ct_intrinsic_inject_tokens); register_ct_intrinsic(dict, "set-token-hook", ct_intrinsic_set_token_hook); register_ct_intrinsic(dict, "clear-token-hook", ct_intrinsic_clear_token_hook); register_ct_intrinsic(dict, "parse-error", ct_intrinsic_parse_error); register_ct_intrinsic(dict, "add-token", ct_intrinsic_add_token); register_ct_intrinsic(dict, "add-token-chars", ct_intrinsic_add_token_chars); register_ct_intrinsic(dict, "prelude-clear", ct_intrinsic_prelude_clear); register_ct_intrinsic(dict, "prelude-append", ct_intrinsic_prelude_append); register_ct_intrinsic(dict, "prelude-set", ct_intrinsic_prelude_set); register_ct_intrinsic(dict, "bss-clear", ct_intrinsic_bss_clear); register_ct_intrinsic(dict, "bss-append", ct_intrinsic_bss_append); register_ct_intrinsic(dict, "bss-set", ct_intrinsic_bss_set); register_ct_intrinsic(dict, "use-l2-ct", ct_intrinsic_use_l2_ct); register_ct_intrinsic(dict, "shunt", ct_intrinsic_shunt); register_ct_intrinsic(dict, "emit-definition", ct_intrinsic_emit_definition); register_ct_intrinsic(dict, "lexer-new", ct_intrinsic_lexer_new); register_ct_intrinsic(dict, "lexer-pop", ct_intrinsic_lexer_pop); register_ct_intrinsic(dict, "lexer-peek", ct_intrinsic_lexer_peek); register_ct_intrinsic(dict, "lexer-expect", ct_intrinsic_lexer_expect); register_ct_intrinsic(dict, "lexer-collect-brace", ct_intrinsic_lexer_collect_brace); register_ct_intrinsic(dict, "lexer-push-back", ct_intrinsic_lexer_push_back); vm->dictionary = dict; } static void emit_push_literal(FunctionEmitter *builder, int64_t value) { emit_line(builder, str_printf(" ; push %lld", (long long)value)); emit_line(builder, " sub r12, 8"); emit_line(builder, str_printf(" mov qword [r12], %lld", (long long)value)); } static void emit_push_literal_u64(FunctionEmitter *builder, uint64_t value) { emit_line(builder, str_printf(" ; push %llu", (unsigned long long)value)); emit_line(builder, " sub r12, 8"); emit_line(builder, str_printf(" mov rax, %llu", (unsigned long long)value)); emit_line(builder, " mov [r12], rax"); } static void emit_push_label(FunctionEmitter *builder, const char *label) { emit_line(builder, str_printf(" ; push %s", label)); emit_line(builder, str_printf(" lea rax, [rel %s]", label)); emit_line(builder, " sub r12, 8"); emit_line(builder, " mov [r12], rax"); } static void emit_push_from(FunctionEmitter *builder, const char *reg) { emit_line(builder, " sub r12, 8"); emit_line(builder, str_printf(" mov [r12], %s", reg)); } static void emit_pop_to(FunctionEmitter *builder, const char *reg) { emit_line(builder, str_printf(" mov %s, [r12]", reg)); emit_line(builder, " add r12, 8"); } static void emission_init(Emission *emission) { VEC_INIT(&emission->text); VEC_INIT(&emission->data); VEC_INIT(&emission->bss); } typedef struct { Emission *emission; Dictionary *dictionary; StrMap string_labels; StrMap externs; StrMap label_cache; int unique_id; bool debug; } EmitContext; static void emit_extern(EmitContext *ctx, const char *name) { if (strmap_has(&ctx->externs, name)) { return; } strmap_set(&ctx->externs, name, (void *)1); VEC_PUSH(&ctx->emission->text, str_printf("extern %s", name)); } static const char *emit_string_literal(EmitContext *ctx, const char *value) { char *label = (char *)strmap_get(&ctx->string_labels, value); if (label) { return label; } label = str_printf("__str_%d", ctx->unique_id++); strmap_set(&ctx->string_labels, value, label); StrVec bytes; VEC_INIT(&bytes); for (const unsigned char *p = (const unsigned char *)value; *p; p++) { VEC_PUSH(&bytes, str_printf("%u", (unsigned int)*p)); } VEC_PUSH(&bytes, str_dup("0")); size_t total = 0; for (size_t i = 0; i < bytes.len; i++) { total += strlen(bytes.data[i]) + 2; } char *line = (char *)xmalloc(total + strlen(label) + 6); strcpy(line, label); strcat(line, ": db "); for (size_t i = 0; i < bytes.len; i++) { strcat(line, bytes.data[i]); if (i + 1 < bytes.len) { strcat(line, ", "); } } VEC_PUSH(&ctx->emission->data, line); return label; } static const char *emit_word_label(EmitContext *ctx, const char *name) { char *label = (char *)strmap_get(&ctx->label_cache, name); if (label) { return label; } char *sanitized = sanitize_label(name); label = str_printf("w_%s", sanitized); free(sanitized); strmap_set(&ctx->label_cache, name, label); return label; } static bool inline_stack_has(StrVec *stack, const char *name) { for (size_t i = 0; i < stack->len; i++) { if (strcmp(stack->data[i], name) == 0) { return true; } } return false; } static bool is_float_type(const char *type) { return type && (strcmp(type, "double") == 0 || strcmp(type, "float") == 0); } static void emit_extern_call(EmitContext *ctx, FunctionEmitter *builder, Word *word) { emit_extern(ctx, word->name); if (!word->extern_arg_types || word->extern_arg_count == 0) { emit_line(builder, str_printf(" call %s", word->name)); if (word->extern_ret_type && strcmp(word->extern_ret_type, "void") != 0) { emit_push_from(builder, "rax"); } return; } const char *int_regs[] = {"rdi", "rsi", "rdx", "rcx", "r8", "r9"}; const char *float_regs[] = {"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"}; int int_idx = 0; int float_idx = 0; for (int i = 0; i < word->extern_arg_count; i++) { const char *type = word->extern_arg_types[i]; int offset = (word->extern_arg_count - 1 - i) * 8; if (is_float_type(type)) { if (float_idx >= 8) { fprintf(stderr, "[error] too many float args for extern %s\n", word->name); exit(1); } emit_line(builder, str_printf(" movq %s, [r12 + %d]", float_regs[float_idx], offset)); float_idx++; } else { if (int_idx >= 6) { fprintf(stderr, "[error] too many int args for extern %s\n", word->name); exit(1); } emit_line(builder, str_printf(" mov %s, [r12 + %d]", int_regs[int_idx], offset)); int_idx++; } } emit_line(builder, str_printf(" add r12, %d", word->extern_arg_count * 8)); emit_line(builder, " mov r11, rsp"); emit_line(builder, " and r11, 15"); char *align_label = str_printf(".L_align_%d", ctx->unique_id++); emit_line(builder, str_printf(" cmp r11, 0")); emit_line(builder, str_printf(" je %s", align_label)); emit_line(builder, " sub rsp, 8"); emit_line(builder, " xor eax, eax"); emit_line(builder, str_printf(" mov al, %d", float_idx)); emit_line(builder, str_printf(" call %s", word->name)); emit_line(builder, " add rsp, 8"); emit_line(builder, str_printf(" jmp %s_done", align_label)); emit_line(builder, str_printf("%s:", align_label)); emit_line(builder, " xor eax, eax"); emit_line(builder, str_printf(" mov al, %d", float_idx)); emit_line(builder, str_printf(" call %s", word->name)); emit_line(builder, str_printf("%s_done:", align_label)); free(align_label); if (word->extern_ret_type && strcmp(word->extern_ret_type, "void") == 0) { return; } if (word->extern_ret_type && is_float_type(word->extern_ret_type)) { emit_line(builder, " sub r12, 8"); emit_line(builder, " movq [r12], xmm0"); } else { emit_push_from(builder, "rax"); } } static void emit_ops(EmitContext *ctx, FunctionEmitter *builder, OpVec *body, StrVec *inline_stack); static void emit_word_call(EmitContext *ctx, FunctionEmitter *builder, const char *name, StrVec *inline_stack) { Word *word = dictionary_lookup(ctx->dictionary, name); if (!word) { fprintf(stderr, "[error] unknown word '%s' during emission\n", name); exit(1); } if (word->inline_def && word->definition) { if (inline_stack_has(inline_stack, word->name)) { fprintf(stderr, "[error] recursive inline word '%s'\n", word->name); exit(1); } VEC_PUSH(inline_stack, word->name); emit_ops(ctx, builder, &word->definition->body, inline_stack); inline_stack->len--; return; } if (word->is_extern && !word->extern_arg_types) { emit_extern(ctx, word->name); emit_line(builder, str_printf(" call %s", word->name)); return; } if (word->asm_def) { emit_line(builder, str_printf(" call %s", emit_word_label(ctx, word->name))); return; } if (word->is_extern && word->extern_arg_types) { emit_extern_call(ctx, builder, word); return; } emit_line(builder, str_printf(" call %s", emit_word_label(ctx, word->name))); } static void emit_op(EmitContext *ctx, FunctionEmitter *builder, Op *op, StrVec *inline_stack) { switch (op->kind) { case OP_LITERAL: { if (op->lit_kind == LIT_INT) { emit_push_literal(builder, op->data.i64); } else if (op->lit_kind == LIT_FLOAT) { union { double f; uint64_t u; } conv; conv.f = op->data.f64; emit_push_literal_u64(builder, conv.u); } else if (op->lit_kind == LIT_STRING) { const char *label = emit_string_literal(ctx, op->data.str); emit_push_label(builder, label); emit_push_literal(builder, (int64_t)strlen(op->data.str)); } break; } case OP_WORD: emit_word_call(ctx, builder, op->data.word, inline_stack); break; case OP_BRANCH_ZERO: emit_pop_to(builder, "rax"); emit_line(builder, " cmp rax, 0"); emit_line(builder, str_printf(" je %s", op->data.label)); break; case OP_JUMP: emit_line(builder, str_printf(" jmp %s", op->data.label)); break; case OP_LABEL: emit_line(builder, str_printf("%s:", op->data.label)); break; case OP_FOR_BEGIN: emit_pop_to(builder, "rax"); emit_line(builder, " cmp rax, 0"); emit_line(builder, str_printf(" jle %s", op->data.loop.end)); emit_line(builder, " sub r13, 8"); emit_line(builder, " mov [r13], rax"); emit_line(builder, str_printf("%s:", op->data.loop.loop)); break; case OP_FOR_END: emit_line(builder, " mov rax, [r13]"); emit_line(builder, " dec rax"); emit_line(builder, " mov [r13], rax"); emit_line(builder, " cmp rax, 0"); emit_line(builder, str_printf(" jg %s", op->data.loop.loop)); emit_line(builder, " add r13, 8"); emit_line(builder, str_printf("%s:", op->data.loop.end)); break; case OP_LIST_BEGIN: emit_line(builder, " mov rax, [rel list_capture_sp]"); emit_line(builder, " mov [rax], r12"); emit_line(builder, " add rax, 8"); emit_line(builder, " mov [rel list_capture_sp], rax"); break; case OP_LIST_END: char *list_done = str_printf(".list_copy_done_%d", ctx->unique_id++); char *list_loop = str_printf(".list_copy_loop_%d", ctx->unique_id++); emit_line(builder, " mov rax, [rel list_capture_sp]"); emit_line(builder, " sub rax, 8"); emit_line(builder, " mov [rel list_capture_sp], rax"); emit_line(builder, " mov rbx, [rax]"); emit_line(builder, " mov rcx, rbx"); emit_line(builder, " sub rcx, r12"); emit_line(builder, " shr rcx, 3"); emit_line(builder, " mov r15, rcx"); emit_line(builder, " mov rdi, 0"); emit_line(builder, " mov rsi, rcx"); emit_line(builder, " add rsi, 1"); emit_line(builder, " shl rsi, 3"); emit_line(builder, " mov rdx, 3"); emit_line(builder, " mov r10, 34"); emit_line(builder, " mov r8, -1"); emit_line(builder, " mov r9, 0"); emit_line(builder, " mov rax, 9"); emit_line(builder, " syscall"); emit_line(builder, " mov [rax], r15"); emit_line(builder, " mov rcx, r15"); emit_line(builder, " cmp rcx, 0"); emit_line(builder, str_printf(" je %s", list_done)); emit_line(builder, " lea rsi, [r12 + rcx*8 - 8]"); emit_line(builder, " lea rdi, [rax + 8]"); emit_line(builder, str_printf("%s:", list_loop)); emit_line(builder, " mov rdx, [rsi]"); emit_line(builder, " mov [rdi], rdx"); emit_line(builder, " sub rsi, 8"); emit_line(builder, " add rdi, 8"); emit_line(builder, " dec rcx"); emit_line(builder, str_printf(" jnz %s", list_loop)); emit_line(builder, str_printf("%s:", list_done)); emit_line(builder, " mov r12, rbx"); emit_line(builder, " sub r12, 8"); emit_line(builder, " mov [r12], rax"); free(list_done); free(list_loop); break; } } static void emit_ops(EmitContext *ctx, FunctionEmitter *builder, OpVec *body, StrVec *inline_stack) { for (size_t i = 0; i < body->len; i++) { emit_op(ctx, builder, &body->data[i], inline_stack); } } static void emit_definition(EmitContext *ctx, Definition *def) { FunctionEmitter builder; emitter_init(&builder, &ctx->emission->text, ctx->debug); const char *label = emit_word_label(ctx, def->name); if (strcmp(def->name, "main") == 0) { emit_line(&builder, str_printf("global %s", label)); } emit_line(&builder, str_printf("%s:", label)); StrVec inline_stack; VEC_INIT(&inline_stack); emit_ops(ctx, &builder, &def->body, &inline_stack); emit_line(&builder, " ret"); } static void emit_asm_definition(EmitContext *ctx, AsmDefinition *def) { if (!def || !def->body) { return; } VEC_PUSH(&ctx->emission->text, str_printf("%s:", emit_word_label(ctx, def->name))); const char *cursor = def->body; while (*cursor) { const char *line_end = strchr(cursor, '\n'); size_t len = line_end ? (size_t)(line_end - cursor) : strlen(cursor); char *line = (char *)xmalloc(len + 1); memcpy(line, cursor, len); line[len] = '\0'; if (len > 0) { char *trim = line; while (*trim && isspace((unsigned char)*trim)) { trim++; } size_t trim_len = strlen(trim); if (trim_len > 0 && trim[trim_len - 1] == ':') { VEC_PUSH(&ctx->emission->text, str_dup(trim)); free(line); } else { VEC_PUSH(&ctx->emission->text, line); } } else { free(line); } if (!line_end) { break; } cursor = line_end + 1; } VEC_PUSH(&ctx->emission->text, str_dup(" ret")); } static void emit_default_prelude(Emission *emission) { VEC_PUSH(&emission->text, str_dup("%define DSTK_BYTES 65536")); VEC_PUSH(&emission->text, str_dup("%define RSTK_BYTES 65536")); VEC_PUSH(&emission->text, str_dup("%define PRINT_BUF_BYTES 4096")); VEC_PUSH(&emission->text, str_dup("global _start")); VEC_PUSH(&emission->text, str_dup("_start:")); VEC_PUSH(&emission->text, str_dup(" mov rbx, rsp")); VEC_PUSH(&emission->text, str_dup(" mov rax, [rbx]")); VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argc], rax")); VEC_PUSH(&emission->text, str_dup(" lea rax, [rbx + 8]")); VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argv], rax")); VEC_PUSH(&emission->text, str_dup(" lea r12, [rel dstack_top]")); VEC_PUSH(&emission->text, str_dup(" lea r13, [rel rstack_top]")); VEC_PUSH(&emission->text, str_dup(" lea rax, [rel list_capture_stack]")); VEC_PUSH(&emission->text, str_dup(" mov [rel list_capture_sp], rax")); VEC_PUSH(&emission->text, str_dup(" call w_main")); VEC_PUSH(&emission->text, str_dup(" mov rax, [r12]")); VEC_PUSH(&emission->text, str_dup(" mov rdi, rax")); VEC_PUSH(&emission->text, str_dup(" mov rax, 60")); VEC_PUSH(&emission->text, str_dup(" syscall")); } static void emit_libc_prelude(Emission *emission) { VEC_PUSH(&emission->text, str_dup("%define DSTK_BYTES 65536")); VEC_PUSH(&emission->text, str_dup("%define RSTK_BYTES 65536")); VEC_PUSH(&emission->text, str_dup("%define PRINT_BUF_BYTES 4096")); VEC_PUSH(&emission->text, str_dup("global main")); VEC_PUSH(&emission->text, str_dup("main:")); VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argc], rdi")); VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argv], rsi")); VEC_PUSH(&emission->text, str_dup(" lea r12, [rel dstack_top]")); VEC_PUSH(&emission->text, str_dup(" lea r13, [rel rstack_top]")); VEC_PUSH(&emission->text, str_dup(" lea rax, [rel list_capture_stack]")); VEC_PUSH(&emission->text, str_dup(" mov [rel list_capture_sp], rax")); VEC_PUSH(&emission->text, str_dup(" call w_main")); VEC_PUSH(&emission->text, str_dup(" mov rax, [r12]")); VEC_PUSH(&emission->text, str_dup(" ret")); } static void emit_default_bss(Emission *emission) { VEC_PUSH(&emission->bss, str_dup("align 16")); VEC_PUSH(&emission->bss, str_dup("dstack: resb DSTK_BYTES")); VEC_PUSH(&emission->bss, str_dup("dstack_top:")); VEC_PUSH(&emission->bss, str_dup("align 16")); VEC_PUSH(&emission->bss, str_dup("rstack: resb RSTK_BYTES")); VEC_PUSH(&emission->bss, str_dup("rstack_top:")); VEC_PUSH(&emission->bss, str_dup("align 16")); VEC_PUSH(&emission->bss, str_dup("print_buf: resb PRINT_BUF_BYTES")); VEC_PUSH(&emission->bss, str_dup("print_buf_end:")); VEC_PUSH(&emission->bss, str_dup("align 16")); VEC_PUSH(&emission->bss, str_dup("persistent: resb 64")); VEC_PUSH(&emission->bss, str_dup("persistent_end:")); VEC_PUSH(&emission->bss, str_dup("align 16")); VEC_PUSH(&emission->bss, str_dup("list_capture_sp: resq 1")); VEC_PUSH(&emission->bss, str_dup("list_capture_tmp: resq 1")); VEC_PUSH(&emission->bss, str_dup("list_capture_stack: resq 1024")); } static Emission emit_module(Parser *parser, Dictionary *dict, bool debug) { Emission emission; emission_init(&emission); EmitContext ctx; ctx.emission = &emission; ctx.dictionary = dict; strmap_init(&ctx.string_labels); strmap_init(&ctx.externs); strmap_init(&ctx.label_cache); ctx.unique_id = 0; ctx.debug = debug; if (parser->custom_prelude) { for (size_t i = 0; i < parser->custom_prelude->len; i++) { VEC_PUSH(&emission.text, str_dup(parser->custom_prelude->data[i])); } } else if (parser->uses_libc) { emit_libc_prelude(&emission); } else { emit_default_prelude(&emission); } VEC_PUSH(&emission.data, str_dup("sys_argc: dq 0")); VEC_PUSH(&emission.data, str_dup("sys_argv: dq 0")); if (parser->custom_bss) { for (size_t i = 0; i < parser->custom_bss->len; i++) { VEC_PUSH(&emission.bss, str_dup(parser->custom_bss->data[i])); } } else { emit_default_bss(&emission); } for (size_t i = 0; i < parser->module.forms.len; i++) { Form form = parser->module.forms.data[i]; if (form.kind == FORM_DEF) { Definition *def = (Definition *)form.ptr; if (def->compile_only) { continue; } Word *word = dictionary_lookup(dict, def->name); if (!word || word->definition != def) { continue; } emit_definition(&ctx, def); } else if (form.kind == FORM_ASM) { AsmDefinition *def = (AsmDefinition *)form.ptr; if (def->compile_only) { continue; } Word *word = dictionary_lookup(dict, def->name); if (!word || word->asm_def != def) { continue; } emit_asm_definition(&ctx, def); } } for (size_t i = 0; i < parser->variable_labels.cap; i++) { if (!parser->variable_labels.keys || !parser->variable_labels.keys[i]) { continue; } const char *label = (const char *)parser->variable_labels.values[i]; if (label) { VEC_PUSH(&emission.data, str_printf("%s: dq 0", label)); } } return emission; } static char *emission_snapshot(Emission *emission) { StrVec parts; VEC_INIT(&parts); if (emission->text.len) { VEC_PUSH(&parts, str_dup("section .text")); for (size_t i = 0; i < emission->text.len; i++) { if (emission->text.data[i]) { VEC_PUSH(&parts, str_dup(emission->text.data[i])); } } } if (emission->data.len) { VEC_PUSH(&parts, str_dup("section .data")); VEC_PUSH(&parts, str_dup("data_start:")); for (size_t i = 0; i < emission->data.len; i++) { if (emission->data.data[i]) { VEC_PUSH(&parts, str_dup(emission->data.data[i])); } } VEC_PUSH(&parts, str_dup("data_end:")); } if (emission->bss.len) { VEC_PUSH(&parts, str_dup("section .bss")); for (size_t i = 0; i < emission->bss.len; i++) { if (emission->bss.data[i]) { VEC_PUSH(&parts, str_dup(emission->bss.data[i])); } } } VEC_PUSH(&parts, str_dup("section .note.GNU-stack noalloc noexec nowrite")); size_t total = 0; for (size_t i = 0; i < parts.len; i++) { if (parts.data[i]) { total += strlen(parts.data[i]) + 1; } } char *buf = (char *)xmalloc(total + 1); buf[0] = '\0'; for (size_t i = 0; i < parts.len; i++) { strcat(buf, parts.data[i]); strcat(buf, "\n"); } return buf; } static void write_file(const char *path, const char *data) { FILE *f = fopen(path, "w"); if (!f) { fprintf(stderr, "[error] failed to write %s: %s\n", path, strerror(errno)); exit(1); } fputs(data, f); fclose(f); } static void run_cmd(char *const argv[]) { pid_t pid = fork(); if (pid < 0) { fprintf(stderr, "[error] fork failed: %s\n", strerror(errno)); exit(1); } if (pid == 0) { execvp(argv[0], argv); fprintf(stderr, "[error] failed to exec %s: %s\n", argv[0], strerror(errno)); _exit(1); } int status = 0; if (waitpid(pid, &status, 0) < 0) { fprintf(stderr, "[error] waitpid failed: %s\n", strerror(errno)); exit(1); } if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { fprintf(stderr, "[error] command failed\n"); exit(1); } } static void run_nasm(const char *asm_path, const char *obj_path, bool debug) { char *argv[8]; int idx = 0; argv[idx++] = "nasm"; argv[idx++] = "-f"; argv[idx++] = "elf64"; if (debug) { argv[idx++] = "-g"; argv[idx++] = "-F"; argv[idx++] = "dwarf"; } argv[idx++] = "-o"; argv[idx++] = (char *)obj_path; argv[idx++] = (char *)asm_path; argv[idx++] = NULL; run_cmd(argv); } static void run_linker(const char *obj_path, const char *exe_path, bool debug, StrVec *libs, bool shared, bool use_libc) { const char *linker = NULL; if (use_libc) { if (access("/usr/bin/cc", X_OK) == 0) { linker = "cc"; } else if (access("/usr/bin/gcc", X_OK) == 0) { linker = "gcc"; } else { fprintf(stderr, "[error] no C compiler found for libc linking\n"); exit(1); } } else if (access("/usr/bin/ld.lld", X_OK) == 0) { linker = "ld.lld"; } else if (access("/usr/bin/ld", X_OK) == 0) { linker = "ld"; } else { fprintf(stderr, "[error] no linker found\n"); exit(1); } StrVec argv; VEC_INIT(&argv); VEC_PUSH(&argv, str_dup((char *)linker)); if (!use_libc && strstr(linker, "lld")) { VEC_PUSH(&argv, str_dup("-m")); VEC_PUSH(&argv, str_dup("elf_x86_64")); } if (shared) { VEC_PUSH(&argv, str_dup("-shared")); } VEC_PUSH(&argv, str_dup("-o")); VEC_PUSH(&argv, str_dup((char *)exe_path)); VEC_PUSH(&argv, str_dup((char *)obj_path)); if (use_libc) { VEC_PUSH(&argv, str_dup("-no-pie")); } else if (!shared && (!libs || libs->len == 0)) { VEC_PUSH(&argv, str_dup("-nostdlib")); VEC_PUSH(&argv, str_dup("-static")); } else if (!shared) { const char *candidates[] = { "/lib64/ld-linux-x86-64.so.2", "/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2", "/lib/ld-linux-x86-64.so.2", "/lib/ld64.so.1" }; const char *interp = NULL; for (size_t i = 0; i < ARRAY_LEN(candidates); i++) { if (access(candidates[i], R_OK) == 0) { interp = candidates[i]; break; } } if (interp) { VEC_PUSH(&argv, str_dup("-dynamic-linker")); VEC_PUSH(&argv, str_dup(interp)); } } if (libs) { for (size_t i = 0; i < libs->len; i++) { VEC_PUSH(&argv, str_dup(libs->data[i])); } } if (debug) { VEC_PUSH(&argv, str_dup("-g")); } VEC_PUSH(&argv, NULL); run_cmd(argv.data); } static char *read_text_file(const char *path) { FILE *f = fopen(path, "r"); if (!f) { return NULL; } fseek(f, 0, SEEK_END); long size = ftell(f); fseek(f, 0, SEEK_SET); if (size < 0) { fclose(f); return NULL; } char *buf = (char *)xmalloc((size_t)size + 1); size_t n = fread(buf, 1, (size_t)size, f); buf[n] = '\0'; fclose(f); return buf; } static bool file_exists(const char *path) { return access(path, R_OK) == 0; } static char *path_dirname(const char *path) { const char *slash = strrchr(path, '/'); if (!slash) { return str_dup("."); } size_t len = (size_t)(slash - path); if (len == 0) { return str_dup("/"); } char *out = (char *)xmalloc(len + 1); memcpy(out, path, len); out[len] = '\0'; return out; } static char *path_basename(const char *path) { if (!path) { return str_dup(""); } const char *slash = strrchr(path, '/'); if (!slash || !slash[1]) { return str_dup(path); } return str_dup(slash + 1); } static char *path_join(const char *a, const char *b) { if (!a || !*a) { return str_dup(b); } if (!b || !*b) { return str_dup(a); } size_t len_a = strlen(a); bool has_sep = a[len_a - 1] == '/'; return str_printf("%s%s%s", a, has_sep ? "" : "/", b); } static char *resolve_import(const char *base_dir, const char *import_path, StrVec *include_dirs) { if (!import_path || !*import_path) { return NULL; } if (import_path[0] == '/') { return file_exists(import_path) ? str_dup(import_path) : NULL; } if (base_dir) { char *candidate = path_join(base_dir, import_path); if (file_exists(candidate)) { return candidate; } free(candidate); } if (include_dirs) { for (size_t i = 0; i < include_dirs->len; i++) { char *candidate = path_join(include_dirs->data[i], import_path); if (file_exists(candidate)) { return candidate; } free(candidate); } } return NULL; } static char *expand_imports(const char *path, StrVec *include_dirs, StrMap *visited, FileSpanVec *spans, int *line_counter) { if (strmap_has(visited, path)) { return str_dup(""); } strmap_set(visited, path, (void *)1); char *content = read_text_file(path); if (!content) { fprintf(stderr, "[error] failed to read %s\n", path); exit(1); } char *base_dir = path_dirname(path); StrVec parts; VEC_INIT(&parts); const char *cursor = content; int local_line = 1; int span_start = *line_counter; int span_local_start = local_line; bool span_active = false; while (*cursor) { const char *line_end = strchr(cursor, '\n'); size_t len = line_end ? (size_t)(line_end - cursor) : strlen(cursor); char *line = (char *)xmalloc(len + 1); memcpy(line, cursor, len); line[len] = '\0'; char *trim = line; while (*trim && isspace((unsigned char)*trim)) { trim++; } bool is_import = false; if (str_starts_with(trim, "import") && (trim[6] == ' ' || trim[6] == '\t')) { trim += 6; while (*trim && isspace((unsigned char)*trim)) { trim++; } char *end = trim; while (*end && !isspace((unsigned char)*end) && *end != '#') { end++; } if (end > trim) { char *import_path = (char *)xmalloc((size_t)(end - trim) + 1); memcpy(import_path, trim, (size_t)(end - trim)); import_path[end - trim] = '\0'; char *resolved = resolve_import(base_dir, import_path, include_dirs); if (!resolved) { fprintf(stderr, "[error] import not found: %s\n", import_path); exit(1); } if (span_active) { FileSpan span = {0}; span.path = str_dup(path); span.start_line = span_start; span.end_line = *line_counter; span.local_start_line = span_local_start; VEC_PUSH(spans, span); span_active = false; } char *expanded = expand_imports(resolved, include_dirs, visited, spans, line_counter); if (expanded && *expanded) { VEC_PUSH(&parts, expanded); } VEC_PUSH(&parts, str_dup("\n")); (*line_counter)++; local_line++; free(resolved); free(import_path); is_import = true; } } if (!is_import) { if (!span_active) { span_start = *line_counter; span_local_start = local_line; span_active = true; } VEC_PUSH(&parts, line); VEC_PUSH(&parts, str_dup("\n")); (*line_counter)++; local_line++; } else { free(line); } if (!line_end) { break; } cursor = line_end + 1; } if (span_active) { FileSpan span = {0}; span.path = str_dup(path); span.start_line = span_start; span.end_line = *line_counter; span.local_start_line = span_local_start; VEC_PUSH(spans, span); } size_t total = 0; for (size_t i = 0; i < parts.len; i++) { total += strlen(parts.data[i]); } char *out = (char *)xmalloc(total + 1); out[0] = '\0'; for (size_t i = 0; i < parts.len; i++) { strcat(out, parts.data[i]); } free(content); free(base_dir); return out; } static bool parse_string_literal(const char *lexeme, char **out) { size_t len = strlen(lexeme); if (len < 2 || lexeme[0] != '"' || lexeme[len - 1] != '"') { return false; } const char *body = lexeme + 1; size_t body_len = len - 2; char *buf = (char *)xmalloc(body_len + 1); size_t pos = 0; for (size_t i = 0; i < body_len; i++) { char ch = body[i]; if (ch != '\\') { buf[pos++] = ch; continue; } i++; if (i >= body_len) { fprintf(stderr, "[error] unterminated escape sequence\n"); exit(1); } char esc = body[i]; if (esc == 'n') { buf[pos++] = '\n'; } else if (esc == 't') { buf[pos++] = '\t'; } else if (esc == 'r') { buf[pos++] = '\r'; } else if (esc == '0') { buf[pos++] = '\0'; } else if (esc == '"') { buf[pos++] = '"'; } else if (esc == '\\') { buf[pos++] = '\\'; } else { fprintf(stderr, "[error] unsupported escape sequence \\%c\n", esc); exit(1); } } buf[pos] = '\0'; *out = buf; return true; } static bool try_parse_int(const char *lexeme, int64_t *out) { char *end = NULL; errno = 0; long long val = strtoll(lexeme, &end, 0); if (errno != 0 || !end || *end != '\0') { return false; } *out = (int64_t)val; return true; } static bool try_parse_float(const char *lexeme, double *out) { if (!strchr(lexeme, '.') && !strchr(lexeme, 'e') && !strchr(lexeme, 'E')) { return false; } char *end = NULL; errno = 0; double val = strtod(lexeme, &end); if (errno != 0 || !end || *end != '\0') { return false; } *out = val; return true; } static void parser_inject_tokens(Parser *parser, TokenVec *injected) { if (!injected || injected->len == 0) { return; } if (parser->pos > parser->tokens.len) { parser->pos = parser->tokens.len; } size_t new_len = parser->tokens.len + injected->len; if (new_len > parser->tokens.cap) { parser->tokens.cap = new_len + 16; parser->tokens.data = xrealloc(parser->tokens.data, parser->tokens.cap * sizeof(Token)); } memmove(&parser->tokens.data[parser->pos + injected->len], &parser->tokens.data[parser->pos], (parser->tokens.len - parser->pos) * sizeof(Token)); for (size_t i = 0; i < injected->len; i++) { parser->tokens.data[parser->pos + i] = injected->data[i]; } parser->tokens.len = new_len; } static void parser_start_macro(Parser *parser, const char *name, int param_count) { if (parser->macro_recording.active) { fprintf(stderr, "[error] nested macro definitions are not supported\n"); exit(1); } parser->macro_recording.active = true; parser->macro_recording.name = str_dup(name); VEC_INIT(&parser->macro_recording.tokens); parser->macro_recording.param_count = param_count; } static void parser_finish_macro(Parser *parser) { if (!parser->macro_recording.active) { fprintf(stderr, "[error] unexpected ';' closing a macro\n"); exit(1); } Word *word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(parser->macro_recording.name); word->macro_expansion = (char **)xmalloc((parser->macro_recording.tokens.len + 1) * sizeof(char *)); word->macro_param_count = parser->macro_recording.param_count; for (size_t i = 0; i < parser->macro_recording.tokens.len; i++) { word->macro_expansion[i] = str_dup(parser->macro_recording.tokens.data[i]); } word->macro_expansion[parser->macro_recording.tokens.len] = NULL; dictionary_register(parser->dictionary, word); parser->macro_recording.active = false; } static void parser_emit_literal(Parser *parser, LiteralKind kind, int64_t i64, double f64, const char *str) { Op op = {0}; op.kind = OP_LITERAL; op.lit_kind = kind; if (kind == LIT_INT) { op.data.i64 = i64; } else if (kind == LIT_FLOAT) { op.data.f64 = f64; } else { op.data.str = str_dup(str); } parser_emit_op(parser, op); } static void parser_handle_token(Parser *parser, Token token); static void parse_tokens(Parser *parser, const char *source) { parser->source = str_dup(source); tokenizer_init(&parser->tokenizer, parser->reader, source); parser->tokenizer_exhausted = false; parser->pos = 0; parser->current_def = NULL; parser->control_len = 0; parser->label_counter = 0; parser->token_hook = NULL; parser->has_last_token = false; parser->custom_prelude = NULL; parser->custom_bss = NULL; parser->pending_inline_def = false; while (!parser_eof(parser)) { Token token = parser_next_token(parser); if (!token.lexeme) { break; } if (parser->macro_recording.active) { if (strcmp(token.lexeme, ";") == 0) { parser_finish_macro(parser); } else { VEC_PUSH(&parser->macro_recording.tokens, str_dup(token.lexeme)); } continue; } if (strcmp(token.lexeme, "[") == 0) { Op op = {0}; op.kind = OP_LIST_BEGIN; op.data.label = parser_new_label(parser, "list"); parser_emit_op(parser, op); parser_push_control(parser, "list"); parser->control_stack[parser->control_len - 1].begin_label = op.data.label; continue; } if (strcmp(token.lexeme, "]") == 0) { if (!parser->control_len || strcmp(parser->control_stack[parser->control_len - 1].type, "list") != 0) { fprintf(stderr, "[error] mismatched ']'\n"); exit(1); } char *label = parser->control_stack[parser->control_len - 1].begin_label; parser->control_len--; Op op = {0}; op.kind = OP_LIST_END; op.data.label = str_dup(label); parser_emit_op(parser, op); continue; } if (strcmp(token.lexeme, "word") == 0) { Token name_tok = parser_next_token(parser); if (!name_tok.lexeme) { fprintf(stderr, "[error] definition name missing after 'word'\n"); exit(1); } Definition *def = (Definition *)xmalloc(sizeof(Definition)); memset(def, 0, sizeof(Definition)); def->name = str_dup(name_tok.lexeme); VEC_INIT(&def->body); def->terminator = str_dup("end"); def->inline_def = parser->pending_inline_def; parser->pending_inline_def = false; parser->current_def = def; Word *word = dictionary_lookup(parser->dictionary, def->name); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(def->name); dictionary_register(parser->dictionary, word); } word->prev_definition = word->definition; word->prev_asm_def = word->asm_def; word->immediate = false; word->compile_only = false; word->definition = def; word->asm_def = NULL; word->inline_def = def->inline_def; if (parser->definition_stack_len + 1 > parser->definition_stack_cap) { parser->definition_stack_cap = parser->definition_stack_cap ? parser->definition_stack_cap * 2 : 8; parser->definition_stack = xrealloc(parser->definition_stack, parser->definition_stack_cap * sizeof(Word *)); } parser->definition_stack[parser->definition_stack_len++] = word; continue; } if (strcmp(token.lexeme, "end") == 0) { if (parser->control_len) { const char *type = parser->control_stack[parser->control_len - 1].type; if (strcmp(type, "if") == 0 || strcmp(type, "elif") == 0) { if (parser->control_stack[parser->control_len - 1].false_label) { Op op = {0}; op.kind = OP_LABEL; op.data.label = str_dup(parser->control_stack[parser->control_len - 1].false_label); parser_emit_op(parser, op); } if (parser->control_stack[parser->control_len - 1].end_label) { Op op = {0}; op.kind = OP_LABEL; op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); parser_emit_op(parser, op); } parser->control_len--; continue; } if (strcmp(type, "else") == 0) { Op op = {0}; op.kind = OP_LABEL; op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); parser_emit_op(parser, op); parser->control_len--; continue; } if (strcmp(type, "begin") == 0) { Op op = {0}; op.kind = OP_JUMP; op.data.label = str_dup(parser->control_stack[parser->control_len - 1].begin_label); parser_emit_op(parser, op); op.kind = OP_LABEL; op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); parser_emit_op(parser, op); parser->control_len--; continue; } if (strcmp(type, "for") == 0) { Op op = {0}; op.kind = OP_FOR_END; op.data.loop.loop = str_dup(parser->control_stack[parser->control_len - 1].loop_label); op.data.loop.end = str_dup(parser->control_stack[parser->control_len - 1].end_label); parser_emit_op(parser, op); parser->control_len--; continue; } if (strcmp(type, "with") == 0) { StrVec *with_names = &parser->control_stack[parser->control_len - 1].with_names; for (size_t i = 0; i < with_names->len; i++) { const char *name = with_names->data[i]; strmap_set(&parser->variable_words, name, NULL); free(with_names->data[i]); } VEC_FREE(with_names); parser->control_len--; continue; } } if (parser->current_def) { Definition *def = parser->current_def; Word *word = parser->definition_stack[parser->definition_stack_len - 1]; def->immediate = word->immediate; def->compile_only = word->compile_only; def->inline_def = word->inline_def; Form form = {0}; form.kind = FORM_DEF; form.ptr = def; VEC_PUSH(&parser->module.forms, form); parser->current_def = NULL; parser->definition_stack_len--; parser->last_defined = word; continue; } fprintf(stderr, "[error] unexpected 'end'\n"); exit(1); } if (strcmp(token.lexeme, ":asm") == 0) { Token name_tok = parser_next_token(parser); if (!name_tok.lexeme) { fprintf(stderr, "[error] definition name missing after ':asm'\n"); exit(1); } bool effect_string_io = false; Token brace = parser_next_token(parser); if (brace.lexeme && strcmp(brace.lexeme, "(") == 0) { while (!parser_eof(parser)) { Token meta = parser_next_token(parser); if (!meta.lexeme) { break; } if (strcmp(meta.lexeme, ")") == 0) { break; } if (strcmp(meta.lexeme, "string-io") == 0) { effect_string_io = true; } } brace = parser_next_token(parser); } if (!brace.lexeme || strcmp(brace.lexeme, "{") != 0) { fprintf(stderr, "[error] expected '{' after asm name, got '%s'\n", brace.lexeme ? brace.lexeme : ""); exit(1); } size_t body_start = (size_t)brace.end; size_t body_end = body_start; while (!parser_eof(parser)) { Token next = parser_next_token(parser); if (next.lexeme && strcmp(next.lexeme, "}") == 0) { body_end = (size_t)next.start; break; } } if (body_end <= body_start) { fprintf(stderr, "[error] missing '}' to terminate asm body\n"); exit(1); } size_t body_len = body_end - body_start; char *body = (char *)xmalloc(body_len + 1); memcpy(body, parser->source + body_start, body_len); body[body_len] = '\0'; AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition)); memset(def, 0, sizeof(AsmDefinition)); def->name = str_dup(name_tok.lexeme); def->body = body; def->effect_string_io = effect_string_io; Token term = parser_next_token(parser); if (!term.lexeme || strcmp(term.lexeme, ";") != 0) { fprintf(stderr, "[error] expected ';' after asm definition\n"); exit(1); } Word *word = dictionary_lookup(parser->dictionary, def->name); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(def->name); dictionary_register(parser->dictionary, word); } word->prev_definition = word->definition; word->prev_asm_def = word->asm_def; word->immediate = false; word->compile_only = false; word->asm_def = def; word->definition = NULL; Form form = {0}; form.kind = FORM_ASM; form.ptr = def; VEC_PUSH(&parser->module.forms, form); parser->last_defined = word; continue; } if (strcmp(token.lexeme, "extern") == 0) { Token tok1 = parser_next_token(parser); if (!tok1.lexeme) { fprintf(stderr, "[error] extern missing name or return type\n"); exit(1); } Token peek = parser_peek_token(parser); if (peek.lexeme && isdigit((unsigned char)peek.lexeme[0])) { Word *word = dictionary_lookup(parser->dictionary, tok1.lexeme); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(tok1.lexeme); dictionary_register(parser->dictionary, word); } word->is_extern = true; parser_next_token(parser); word->extern_inputs = atoi(peek.lexeme); Token next = parser_peek_token(parser); if (next.lexeme && isdigit((unsigned char)next.lexeme[0])) { parser_next_token(parser); word->extern_outputs = atoi(next.lexeme); } else { word->extern_outputs = 0; } continue; } Token tok2 = parser_next_token(parser); Token tok3 = parser_next_token(parser); if (tok2.lexeme && tok3.lexeme && strcmp(tok3.lexeme, "(") == 0) { Word *word = dictionary_lookup(parser->dictionary, tok2.lexeme); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(tok2.lexeme); dictionary_register(parser->dictionary, word); } word->is_extern = true; word->extern_ret_type = str_dup(tok1.lexeme); parser->uses_libc = true; if (strcmp(tok1.lexeme, "double") == 0 || strcmp(tok1.lexeme, "float") == 0) { if (strcmp(tok2.lexeme, "printf") != 0) { parser->uses_libm = true; } } word->extern_arg_types = NULL; word->extern_arg_count = 0; int cap = 0; Token arg = parser_peek_token(parser); if (arg.lexeme && strcmp(arg.lexeme, ")") == 0) { parser_next_token(parser); } else { while (true) { Token type_tok = parser_next_token(parser); if (!type_tok.lexeme) { fprintf(stderr, "[error] unterminated extern signature\n"); exit(1); } if (word->extern_arg_count + 1 > cap) { cap = cap ? cap * 2 : 4; word->extern_arg_types = xrealloc(word->extern_arg_types, (size_t)cap * sizeof(char *)); } word->extern_arg_types[word->extern_arg_count++] = str_dup(type_tok.lexeme); if (strcmp(type_tok.lexeme, "double") == 0 || strcmp(type_tok.lexeme, "float") == 0) { if (strcmp(tok2.lexeme, "printf") != 0) { parser->uses_libm = true; } } Token maybe_name = parser_peek_token(parser); if (maybe_name.lexeme && strcmp(maybe_name.lexeme, ",") != 0 && strcmp(maybe_name.lexeme, ")") != 0) { parser_next_token(parser); } Token sep = parser_next_token(parser); if (!sep.lexeme) { fprintf(stderr, "[error] unterminated extern signature\n"); exit(1); } if (strcmp(sep.lexeme, ")") == 0) { break; } if (strcmp(sep.lexeme, ",") != 0) { fprintf(stderr, "[error] expected ',' or ')' in extern signature\n"); exit(1); } } } continue; } TokenVec reinject; VEC_INIT(&reinject); if (tok2.lexeme) { VEC_PUSH(&reinject, tok2); } if (tok3.lexeme) { VEC_PUSH(&reinject, tok3); } parser_inject_tokens(parser, &reinject); Word *word = dictionary_lookup(parser->dictionary, tok1.lexeme); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(tok1.lexeme); dictionary_register(parser->dictionary, word); } word->is_extern = true; continue; } if (strcmp(token.lexeme, "if") == 0) { char *false_label = parser_new_label(parser, "if_false"); Op op = {0}; op.kind = OP_BRANCH_ZERO; op.data.label = str_dup(false_label); parser_emit_op(parser, op); parser_push_control(parser, "if"); parser->control_stack[parser->control_len - 1].false_label = false_label; continue; } if (strcmp(token.lexeme, "else") == 0) { if (!parser->control_len || (strcmp(parser->control_stack[parser->control_len - 1].type, "if") != 0 && strcmp(parser->control_stack[parser->control_len - 1].type, "elif") != 0)) { fprintf(stderr, "[error] 'else' without matching if\n"); exit(1); } char *end_label = parser->control_stack[parser->control_len - 1].end_label; if (!end_label) { end_label = parser_new_label(parser, "if_end"); } Op jump = {0}; jump.kind = OP_JUMP; jump.data.label = str_dup(end_label); parser_emit_op(parser, jump); Op label = {0}; label.kind = OP_LABEL; label.data.label = str_dup(parser->control_stack[parser->control_len - 1].false_label); parser_emit_op(parser, label); Token next = parser_peek_token(parser); if (next.lexeme && next.line == token.line && strcmp(next.lexeme, "if") != 0) { TokenVec cond_tokens; VEC_INIT(&cond_tokens); bool shorthand = false; while (!parser_eof(parser)) { Token cond = parser_next_token(parser); if (!cond.lexeme) { break; } if (cond.line != token.line) { VEC_PUSH(&cond_tokens, cond); break; } if (strcmp(cond.lexeme, "if") == 0) { shorthand = true; break; } VEC_PUSH(&cond_tokens, cond); } if (shorthand) { for (size_t i = 0; i < cond_tokens.len; i++) { parser_handle_token(parser, cond_tokens.data[i]); } char *false_label = parser_new_label(parser, "if_false"); Op br = {0}; br.kind = OP_BRANCH_ZERO; br.data.label = str_dup(false_label); parser_emit_op(parser, br); parser->control_stack[parser->control_len - 1].type = str_dup("elif"); parser->control_stack[parser->control_len - 1].false_label = false_label; parser->control_stack[parser->control_len - 1].end_label = end_label; } else { parser_inject_tokens(parser, &cond_tokens); parser->control_stack[parser->control_len - 1].type = str_dup("else"); parser->control_stack[parser->control_len - 1].end_label = end_label; } } else { parser->control_stack[parser->control_len - 1].type = str_dup("else"); parser->control_stack[parser->control_len - 1].end_label = end_label; } continue; } if (strcmp(token.lexeme, "for") == 0) { char *loop_label = parser_new_label(parser, "for_loop"); char *end_label = parser_new_label(parser, "for_end"); Op op = {0}; op.kind = OP_FOR_BEGIN; op.data.loop.loop = str_dup(loop_label); op.data.loop.end = str_dup(end_label); parser_emit_op(parser, op); parser_push_control(parser, "for"); parser->control_stack[parser->control_len - 1].loop_label = loop_label; parser->control_stack[parser->control_len - 1].end_label = end_label; continue; } if (strcmp(token.lexeme, "while") == 0) { char *begin_label = parser_new_label(parser, "begin"); char *end_label = parser_new_label(parser, "end"); Op label = {0}; label.kind = OP_LABEL; label.data.label = str_dup(begin_label); parser_emit_op(parser, label); parser_push_control(parser, "begin"); parser->control_stack[parser->control_len - 1].begin_label = begin_label; parser->control_stack[parser->control_len - 1].end_label = end_label; continue; } if (strcmp(token.lexeme, "do") == 0) { if (!parser->control_len || strcmp(parser->control_stack[parser->control_len - 1].type, "begin") != 0) { fprintf(stderr, "[error] 'do' without matching while\n"); exit(1); } Op op = {0}; op.kind = OP_BRANCH_ZERO; op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); parser_emit_op(parser, op); continue; } parser_handle_token(parser, token); } if (parser->macro_recording.active) { fprintf(stderr, "[error] unterminated macro definition\n"); exit(1); } if (parser->control_len) { fprintf(stderr, "[error] unclosed control structure\n"); exit(1); } if (parser->current_def) { fprintf(stderr, "[error] unclosed definition at EOF\n"); exit(1); } } static void parser_expand_macro(Parser *parser, Word *word) { int param_count = word->macro_param_count; char **params = NULL; if (param_count > 0) { params = (char **)xmalloc((size_t)param_count * sizeof(char *)); for (int i = 0; i < param_count; i++) { Token tok = parser_next_token(parser); if (!tok.lexeme) { fprintf(stderr, "[error] not enough macro parameters for '%s'\n", word->name); exit(1); } params[i] = str_dup(tok.lexeme); } } TokenVec injected; VEC_INIT(&injected); for (size_t i = 0; word->macro_expansion && word->macro_expansion[i]; i++) { const char *item = word->macro_expansion[i]; if (item && item[0] == '$' && isdigit((unsigned char)item[1])) { int idx = atoi(item + 1) - 1; if (idx >= 0 && idx < param_count) { Token tok = {0}; tok.lexeme = str_dup(params[idx]); VEC_PUSH(&injected, tok); continue; } } Token tok = {0}; tok.lexeme = str_dup(item); VEC_PUSH(&injected, tok); } parser_inject_tokens(parser, &injected); for (int i = 0; i < param_count; i++) { free(params[i]); } free(params); } static void parser_handle_struct(Parser *parser) { Token name_tok = parser_next_token(parser); if (!name_tok.lexeme) { fprintf(stderr, "[error] struct missing name\n"); exit(1); } typedef struct { char *name; int64_t size; int64_t offset; } Field; Field *fields = NULL; size_t field_len = 0; size_t field_cap = 0; int64_t offset = 0; while (!parser_eof(parser)) { Token tok = parser_next_token(parser); if (!tok.lexeme) { break; } if (strcmp(tok.lexeme, "end") == 0) { break; } if (strcmp(tok.lexeme, "field") != 0) { fprintf(stderr, "[error] unexpected token '%s' in struct\n", tok.lexeme); exit(1); } Token field_name = parser_next_token(parser); Token field_size = parser_next_token(parser); if (!field_name.lexeme || !field_size.lexeme) { fprintf(stderr, "[error] malformed struct field\n"); exit(1); } int64_t size = 0; if (!try_parse_int(field_size.lexeme, &size)) { fprintf(stderr, "[error] invalid struct field size '%s'\n", field_size.lexeme); exit(1); } if (field_len + 1 > field_cap) { field_cap = field_cap ? field_cap * 2 : 8; fields = xrealloc(fields, field_cap * sizeof(Field)); } fields[field_len++] = (Field){str_dup(field_name.lexeme), size, offset}; offset += size; } TokenVec injected; VEC_INIT(&injected); Token tok = {0}; tok.lexeme = str_dup("word"); VEC_PUSH(&injected, tok); tok.lexeme = str_printf("%s.size", name_tok.lexeme); VEC_PUSH(&injected, tok); tok.lexeme = str_printf("%lld", (long long)offset); VEC_PUSH(&injected, tok); tok.lexeme = str_dup("end"); VEC_PUSH(&injected, tok); for (size_t i = 0; i < field_len; i++) { Field f = fields[i]; Token t = {0}; t.lexeme = str_dup("word"); VEC_PUSH(&injected, t); t.lexeme = str_printf("%s.%s.size", name_tok.lexeme, f.name); VEC_PUSH(&injected, t); t.lexeme = str_printf("%lld", (long long)f.size); VEC_PUSH(&injected, t); t.lexeme = str_dup("end"); VEC_PUSH(&injected, t); t.lexeme = str_dup("word"); VEC_PUSH(&injected, t); t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name); VEC_PUSH(&injected, t); t.lexeme = str_printf("%lld", (long long)f.offset); VEC_PUSH(&injected, t); t.lexeme = str_dup("end"); VEC_PUSH(&injected, t); t.lexeme = str_dup("word"); VEC_PUSH(&injected, t); t.lexeme = str_printf("%s.%s@", name_tok.lexeme, f.name); VEC_PUSH(&injected, t); t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name); VEC_PUSH(&injected, t); t.lexeme = str_dup("+"); VEC_PUSH(&injected, t); t.lexeme = str_dup("@"); VEC_PUSH(&injected, t); t.lexeme = str_dup("end"); VEC_PUSH(&injected, t); t.lexeme = str_dup("word"); VEC_PUSH(&injected, t); t.lexeme = str_printf("%s.%s!", name_tok.lexeme, f.name); VEC_PUSH(&injected, t); t.lexeme = str_dup("swap"); VEC_PUSH(&injected, t); t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name); VEC_PUSH(&injected, t); t.lexeme = str_dup("+"); VEC_PUSH(&injected, t); t.lexeme = str_dup("swap"); VEC_PUSH(&injected, t); t.lexeme = str_dup("!"); VEC_PUSH(&injected, t); t.lexeme = str_dup("end"); VEC_PUSH(&injected, t); free(f.name); } free(fields); parser_inject_tokens(parser, &injected); } static void parser_handle_with(Parser *parser) { StrVec names; VEC_INIT(&names); while (!parser_eof(parser)) { Token tok = parser_next_token(parser); if (!tok.lexeme) { fprintf(stderr, "[error] unterminated with block\n"); exit(1); } if (strcmp(tok.lexeme, "in") == 0) { break; } VEC_PUSH(&names, str_dup(tok.lexeme)); } for (size_t i = 0; i < names.len; i++) { const char *name = names.data[i]; int id = parser->label_counter++; char *cell_label = str_printf("__with_%s_%d_cell", name, id); char *word_name = str_printf("__with_%s_%d", name, id); strmap_set(&parser->variable_labels, name, cell_label); strmap_set(&parser->variable_words, name, str_dup(word_name)); AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition)); memset(def, 0, sizeof(AsmDefinition)); def->name = str_dup(word_name); def->body = str_printf(" lea rax, [rel %s]\n sub r12, 8\n mov [r12], rax\n", cell_label); Word *word = dictionary_lookup(parser->dictionary, word_name); if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(word_name); dictionary_register(parser->dictionary, word); } word->asm_def = def; Form form = {0}; form.kind = FORM_ASM; form.ptr = def; VEC_PUSH(&parser->module.forms, form); } parser_push_control(parser, "with"); parser->control_stack[parser->control_len - 1].with_names = names; TokenVec injected; VEC_INIT(&injected); for (size_t i = names.len; i-- > 0;) { Token t = {0}; char *label = (char *)strmap_get(&parser->variable_words, names.data[i]); t.lexeme = str_dup(label); VEC_PUSH(&injected, t); t.lexeme = str_dup("swap"); VEC_PUSH(&injected, t); t.lexeme = str_dup("!"); VEC_PUSH(&injected, t); } parser_inject_tokens(parser, &injected); names.data = NULL; names.len = 0; names.cap = 0; } static void parser_handle_token(Parser *parser, Token token) { if (parser->token_hook) { Word *hook = dictionary_lookup(parser->dictionary, parser->token_hook); if (!hook) { fprintf(stderr, "[error] unknown token hook '%s'\n", parser->token_hook); exit(1); } ct_stack_push(&parser->ct_vm->stack, ct_make_token(token)); ct_word_call(parser->ct_vm, hook); CtValue handled = ct_stack_pop(&parser->ct_vm->stack); if (ct_truthy(handled)) { return; } } if (strcmp(token.lexeme, "macro") == 0) { Token name = parser_next_token(parser); if (!name.lexeme) { fprintf(stderr, "[error] macro missing name\n"); exit(1); } int param_count = 0; Token maybe_num = parser_peek_token(parser); if (maybe_num.lexeme && isdigit((unsigned char)maybe_num.lexeme[0])) { parser_next_token(parser); param_count = atoi(maybe_num.lexeme); } parser_start_macro(parser, name.lexeme, param_count); return; } if (strcmp(token.lexeme, "inline") == 0) { parser->pending_inline_def = true; return; } if (strcmp(token.lexeme, "immediate") == 0) { if (!parser->last_defined) { fprintf(stderr, "[error] immediate used without a preceding definition\n"); exit(1); } parser->last_defined->immediate = true; if (parser->last_defined->definition) { parser->last_defined->definition->immediate = true; } if (parser->last_defined->asm_def) { parser->last_defined->asm_def->immediate = true; } return; } if (strcmp(token.lexeme, "compile-only") == 0) { if (!parser->last_defined) { fprintf(stderr, "[error] compile-only used without a preceding definition\n"); exit(1); } parser->last_defined->compile_only = true; if (parser->last_defined->definition) { parser->last_defined->definition->compile_only = true; } if (parser->last_defined->asm_def) { parser->last_defined->asm_def->compile_only = true; } if (parser->last_defined->prev_definition) { parser->last_defined->ct_definition = parser->last_defined->definition; parser->last_defined->definition = parser->last_defined->prev_definition; parser->last_defined->prev_definition = NULL; } if (parser->last_defined->prev_asm_def) { parser->last_defined->ct_asm_def = parser->last_defined->asm_def; parser->last_defined->asm_def = parser->last_defined->prev_asm_def; parser->last_defined->prev_asm_def = NULL; } return; } if (strcmp(token.lexeme, "compile-time") == 0) { Token name = parser_next_token(parser); if (!name.lexeme) { fprintf(stderr, "[error] compile-time missing word name\n"); exit(1); } Word *word = dictionary_lookup(parser->dictionary, name.lexeme); if (!word) { fprintf(stderr, "[error] unknown word '%s' for compile-time\n", name.lexeme); exit(1); } ct_word_call(parser->ct_vm, word); if (parser->current_def) { Op op = {0}; op.kind = OP_WORD; op.data.word = str_dup(name.lexeme); parser_emit_op(parser, op); } return; } if (strcmp(token.lexeme, "here") == 0) { SourceLocation *loc = location_for_token(parser, token); char *text = str_printf("%s:%d:%d", loc->path, loc->line, loc->column); parser_emit_literal(parser, LIT_STRING, 0, 0.0, text); free(text); return; } if (strcmp(token.lexeme, "label") == 0) { Token name = parser_next_token(parser); if (!name.lexeme) { fprintf(stderr, "[error] label missing name\n"); exit(1); } Op op = {0}; op.kind = OP_LABEL; op.data.label = str_dup(name.lexeme); parser_emit_op(parser, op); return; } if (strcmp(token.lexeme, "goto") == 0) { Token name = parser_next_token(parser); if (!name.lexeme) { fprintf(stderr, "[error] goto missing label\n"); exit(1); } Op op = {0}; op.kind = OP_JUMP; op.data.label = str_dup(name.lexeme); parser_emit_op(parser, op); return; } if (strcmp(token.lexeme, "struct") == 0) { parser_handle_struct(parser); return; } if (strcmp(token.lexeme, "with") == 0) { parser_handle_with(parser); return; } char *str_lit = NULL; if (parse_string_literal(token.lexeme, &str_lit)) { parser_emit_literal(parser, LIT_STRING, 0, 0.0, str_lit); free(str_lit); return; } int64_t int_val = 0; if (try_parse_int(token.lexeme, &int_val)) { parser_emit_literal(parser, LIT_INT, int_val, 0.0, NULL); return; } double float_val = 0.0; if (try_parse_float(token.lexeme, &float_val)) { parser_emit_literal(parser, LIT_FLOAT, 0, float_val, NULL); return; } const char *var_label = (const char *)strmap_get(&parser->variable_words, token.lexeme); if (var_label) { Token peek = parser_peek_token(parser); Op op = {0}; op.kind = OP_WORD; op.data.word = str_dup(var_label); parser_emit_op(parser, op); if (!peek.lexeme || strcmp(peek.lexeme, "!") != 0) { op.data.word = str_dup("@"); parser_emit_op(parser, op); } return; } Word *word = dictionary_lookup(parser->dictionary, token.lexeme); if (word && word->macro_expansion) { parser_expand_macro(parser, word); return; } if (word && word->immediate) { ct_word_call(parser->ct_vm, word); if (parser->current_def && !word->compile_only) { Op op = {0}; op.kind = OP_WORD; op.data.word = str_dup(word->name); parser_emit_op(parser, op); } return; } if (word && word->compile_only && parser->current_def && parser->definition_stack_len) { Word *current = parser->definition_stack[parser->definition_stack_len - 1]; current->compile_only = true; if (current->definition) { current->definition->compile_only = true; } } if (!word) { word = (Word *)xmalloc(sizeof(Word)); memset(word, 0, sizeof(Word)); word->name = str_dup(token.lexeme); dictionary_register(parser->dictionary, word); } Op op = {0}; op.kind = OP_WORD; op.data.word = str_dup(token.lexeme); parser_emit_op(parser, op); } int main(int argc, char **argv) { StrVec inputs; StrVec include_dirs; StrVec libs; VEC_INIT(&inputs); VEC_INIT(&include_dirs); VEC_INIT(&libs); const char *output = "a.out"; const char *temp_dir = "build"; bool emit_asm = false; bool debug = false; for (int i = 1; i < argc; i++) { const char *arg = argv[i]; if (strcmp(arg, "-o") == 0 && i + 1 < argc) { output = argv[++i]; continue; } if (strcmp(arg, "--emit-asm") == 0) { emit_asm = true; continue; } if (strcmp(arg, "--dbg") == 0) { debug = true; continue; } if ((strcmp(arg, "-I") == 0 || strcmp(arg, "--include") == 0) && i + 1 < argc) { VEC_PUSH(&include_dirs, str_dup(argv[++i])); continue; } if (strncmp(arg, "-I", 2) == 0 && strlen(arg) > 2) { VEC_PUSH(&include_dirs, str_dup(arg + 2)); continue; } if ((strcmp(arg, "-l") == 0) && i + 1 < argc) { const char *lib = argv[++i]; if (strchr(lib, '/') || strstr(lib, ".so") || strstr(lib, ".a")) { VEC_PUSH(&libs, str_printf("-l:%s", lib)); } else { VEC_PUSH(&libs, str_printf("-l%s", lib)); } continue; } if (strncmp(arg, "-l", 2) == 0 && strlen(arg) > 2) { VEC_PUSH(&libs, str_dup(arg)); continue; } if (strcmp(arg, "--temp-dir") == 0 && i + 1 < argc) { temp_dir = argv[++i]; continue; } if (arg[0] == '-') { fprintf(stderr, "[error] unknown option: %s\n", arg); return 1; } VEC_PUSH(&inputs, str_dup(arg)); } if (inputs.len == 0) { fprintf(stderr, "usage: %s [-o output] [--emit-asm]\n", argv[0]); return 1; } VEC_PUSH(&include_dirs, str_dup(".")); VEC_PUSH(&include_dirs, str_dup("./stdlib")); StrMap visited; strmap_init(&visited); StrVec sources; VEC_INIT(&sources); FileSpanVec file_spans; VEC_INIT(&file_spans); int line_counter = 1; for (size_t i = 0; i < inputs.len; i++) { char *expanded = expand_imports(inputs.data[i], &include_dirs, &visited, &file_spans, &line_counter); VEC_PUSH(&sources, expanded); } size_t total = 0; for (size_t i = 0; i < sources.len; i++) { total += strlen(sources.data[i]); } char *combined = (char *)xmalloc(total + 1); combined[0] = '\0'; for (size_t i = 0; i < sources.len; i++) { strcat(combined, sources.data[i]); } Dictionary dict; dictionary_init(&dict); Reader reader; reader_init(&reader); Parser parser; parser_init(&parser, &dict, &reader); parser.file_spans = file_spans; parser.primary_path = inputs.len ? str_dup(inputs.data[0]) : NULL; CompileTimeVM vm; ct_vm_init(&vm, &parser); parser.ct_vm = &vm; bootstrap_dictionary(&dict, &parser, &vm); register_builtin_syscall(&parser); parse_tokens(&parser, combined); if (parser.uses_libc && !strvec_contains(&libs, "-lc")) { VEC_PUSH(&libs, str_dup("-lc")); } if (parser.uses_libm && !strvec_contains(&libs, "-lm")) { VEC_PUSH(&libs, str_dup("-lm")); } Emission emission = emit_module(&parser, &dict, debug); char *asm_text = emission_snapshot(&emission); char *asm_path = NULL; char *obj_path = NULL; if (emit_asm) { asm_path = str_dup(output); } else { mkdir(temp_dir, 0755); const char *base = strrchr(output, '/'); base = base ? base + 1 : output; asm_path = str_printf("%s/%s.asm", temp_dir, base); obj_path = str_printf("%s/%s.o", temp_dir, base); } write_file(asm_path, asm_text); if (emit_asm) { return 0; } run_nasm(asm_path, obj_path, debug); run_linker(obj_path, output, debug, &libs, false, parser.uses_libc); return 0; }