From 02f54b8f8614d1731ce7fc1a56ca6ea9d7130d39 Mon Sep 17 00:00:00 2001 From: IgorCielniak Date: Mon, 16 Feb 2026 09:20:34 +0100 Subject: [PATCH] draft of the c version of the compiler --- main.c | 4384 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4384 insertions(+) create mode 100644 main.c diff --git a/main.c b/main.c new file mode 100644 index 0000000..3c7431e --- /dev/null +++ b/main.c @@ -0,0 +1,4384 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0])) + +static void *xmalloc(size_t size) { + void *ptr = malloc(size); + if (!ptr) { + fprintf(stderr, "[error] out of memory\n"); + exit(1); + } + return ptr; +} + +static void *xrealloc(void *ptr, size_t size) { + void *out = realloc(ptr, size); + if (!out) { + fprintf(stderr, "[error] out of memory\n"); + exit(1); + } + return out; +} + +static char *str_dup(const char *src) { + if (!src) { + return NULL; + } + size_t len = strlen(src); + char *out = (char *)xmalloc(len + 1); + memcpy(out, src, len + 1); + return out; +} + +static char *str_printf(const char *fmt, ...) { + va_list args; + va_start(args, fmt); + va_list args2; + va_copy(args2, args); + int needed = vsnprintf(NULL, 0, fmt, args2); + va_end(args2); + if (needed < 0) { + va_end(args); + return str_dup(""); + } + char *buf = (char *)xmalloc((size_t)needed + 1); + vsnprintf(buf, (size_t)needed + 1, fmt, args); + va_end(args); + return buf; +} + +static bool str_starts_with(const char *text, const char *prefix) { + if (!text || !prefix) { + return false; + } + size_t len = strlen(prefix); + return strncmp(text, prefix, len) == 0; +} + +static bool str_equals(const char *a, const char *b) { + if (!a || !b) { + return false; + } + return strcmp(a, b) == 0; +} + +static uint64_t hash_str(const char *text) { + uint64_t hash = 1469598103934665603ULL; + while (*text) { + hash ^= (unsigned char)(*text++); + hash *= 1099511628211ULL; + } + return hash; +} + +#define VEC_DECL(name, type) \ + typedef struct { \ + type *data; \ + size_t len; \ + size_t cap; \ + } name + +#define VEC_INIT(vec) do { (vec)->data = NULL; (vec)->len = 0; (vec)->cap = 0; } while (0) + +#define VEC_FREE(vec) do { free((vec)->data); (vec)->data = NULL; (vec)->len = 0; (vec)->cap = 0; } while (0) + +#define VEC_PUSH(vec, value) do { \ + if ((vec)->len + 1 > (vec)->cap) { \ + (vec)->cap = (vec)->cap ? (vec)->cap * 2 : 8; \ + (vec)->data = xrealloc((vec)->data, (vec)->cap * sizeof(*(vec)->data)); \ + } \ + (vec)->data[(vec)->len++] = (value); \ +} while (0) + +#define VEC_POP(vec) ((vec)->len ? (vec)->data[--(vec)->len] : (vec)->data[0]) + +VEC_DECL(StrVec, char *); +VEC_DECL(IntVec, int); + +static bool strvec_contains(StrVec *vec, const char *value) { + if (!vec || !value) { + return false; + } + for (size_t i = 0; i < vec->len; i++) { + if (strcmp(vec->data[i], value) == 0) { + return true; + } + } + return false; +} + +typedef struct { + char *lexeme; + int line; + int column; + int start; + int end; +} Token; + +VEC_DECL(TokenVec, Token); + +typedef struct { + char *path; + int line; + int column; +} SourceLocation; + +typedef struct { + char *path; + int start_line; + int end_line; + int local_start_line; +} FileSpan; + +VEC_DECL(FileSpanVec, FileSpan); + +typedef enum { + OP_LITERAL, + OP_WORD, + OP_BRANCH_ZERO, + OP_JUMP, + OP_LABEL, + OP_FOR_BEGIN, + OP_FOR_END, + OP_LIST_BEGIN, + OP_LIST_END +} OpKind; + +typedef enum { + LIT_INT, + LIT_FLOAT, + LIT_STRING +} LiteralKind; + +typedef struct { + OpKind kind; + LiteralKind lit_kind; + SourceLocation *loc; + union { + int64_t i64; + double f64; + char *str; + char *word; + char *label; + struct { + char *loop; + char *end; + } loop; + } data; +} Op; + +VEC_DECL(OpVec, Op); + +typedef struct { + char *name; + OpVec body; + bool immediate; + bool compile_only; + char *terminator; + bool inline_def; +} Definition; + +typedef struct { + char *name; + char *body; + bool immediate; + bool compile_only; + bool effect_string_io; +} AsmDefinition; + +typedef enum { + FORM_DEF, + FORM_ASM +} FormKind; + +typedef struct { + FormKind kind; + void *ptr; +} Form; + +VEC_DECL(FormVec, Form); + +typedef struct { + StrVec text; + StrVec data; + StrVec bss; +} Emission; + +typedef struct { + StrVec *text; + bool debug_enabled; + SourceLocation *current_loc; +} FunctionEmitter; + +typedef struct Word Word; +typedef struct CompileTimeVM CompileTimeVM; +typedef struct Parser Parser; + +typedef void (*MacroFn)(Parser *parser); +typedef void (*IntrinsicEmitter)(FunctionEmitter *builder); +typedef void (*CompileTimeIntrinsic)(CompileTimeVM *vm); + +struct Word { + char *name; + bool immediate; + bool compile_only; + bool compile_time_override; + bool is_extern; + int extern_inputs; + int extern_outputs; + char **extern_arg_types; + int extern_arg_count; + char *extern_ret_type; + bool inline_def; + Definition *definition; + Definition *ct_definition; + Definition *prev_definition; + AsmDefinition *asm_def; + AsmDefinition *ct_asm_def; + AsmDefinition *prev_asm_def; + MacroFn macro; + IntrinsicEmitter intrinsic; + CompileTimeIntrinsic ct_intrinsic; + char **macro_expansion; + int macro_param_count; +}; + +typedef struct { + char **keys; + void **values; + size_t cap; + size_t len; +} StrMap; + +static void strmap_init(StrMap *map) { + map->keys = NULL; + map->values = NULL; + map->cap = 0; + map->len = 0; +} + +static void strmap_free(StrMap *map) { + free(map->keys); + free(map->values); + map->keys = NULL; + map->values = NULL; + map->cap = 0; + map->len = 0; +} + +static void strmap_grow(StrMap *map) { + size_t new_cap = map->cap ? map->cap * 2 : 128; + char **new_keys = (char **)xmalloc(new_cap * sizeof(char *)); + void **new_vals = (void **)xmalloc(new_cap * sizeof(void *)); + for (size_t i = 0; i < new_cap; i++) { + new_keys[i] = NULL; + new_vals[i] = NULL; + } + if (map->keys) { + for (size_t i = 0; i < map->cap; i++) { + if (!map->keys[i]) { + continue; + } + uint64_t hash = hash_str(map->keys[i]); + size_t idx = (size_t)(hash & (new_cap - 1)); + while (new_keys[idx]) { + idx = (idx + 1) & (new_cap - 1); + } + new_keys[idx] = map->keys[i]; + new_vals[idx] = map->values[i]; + } + } + free(map->keys); + free(map->values); + map->keys = new_keys; + map->values = new_vals; + map->cap = new_cap; +} + +static void strmap_set(StrMap *map, const char *key, void *value) { + if (!map->cap || (map->len + 1) * 3 >= map->cap * 2) { + strmap_grow(map); + } + uint64_t hash = hash_str(key); + size_t idx = (size_t)(hash & (map->cap - 1)); + while (map->keys[idx]) { + if (strcmp(map->keys[idx], key) == 0) { + map->values[idx] = value; + return; + } + idx = (idx + 1) & (map->cap - 1); + } + map->keys[idx] = str_dup(key); + map->values[idx] = value; + map->len++; +} + +static void *strmap_get(StrMap *map, const char *key) { + if (!map->cap) { + return NULL; + } + uint64_t hash = hash_str(key); + size_t idx = (size_t)(hash & (map->cap - 1)); + size_t start = idx; + while (map->keys[idx]) { + if (strcmp(map->keys[idx], key) == 0) { + return map->values[idx]; + } + idx = (idx + 1) & (map->cap - 1); + if (idx == start) { + break; + } + } + return NULL; +} + +static bool strmap_has(StrMap *map, const char *key) { + return strmap_get(map, key) != NULL; +} + +typedef struct { + StrMap words; +} Dictionary; + +static void dictionary_init(Dictionary *dict) { + strmap_init(&dict->words); +} + +static Word *dictionary_lookup(Dictionary *dict, const char *name) { + return (Word *)strmap_get(&dict->words, name); +} + +static void dictionary_register(Dictionary *dict, Word *word) { + strmap_set(&dict->words, word->name, word); +} + +typedef struct { + StrVec custom_tokens; + StrVec token_order; +} Reader; + +static void reader_init(Reader *reader) { + VEC_INIT(&reader->custom_tokens); + VEC_INIT(&reader->token_order); + const char *defaults[] = {"(", ")", "{", "}", ";", ",", "[", "]"}; + for (size_t i = 0; i < ARRAY_LEN(defaults); i++) { + VEC_PUSH(&reader->custom_tokens, str_dup(defaults[i])); + } + for (size_t i = 0; i < reader->custom_tokens.len; i++) { + VEC_PUSH(&reader->token_order, reader->custom_tokens.data[i]); + } +} + +static void reader_resort(Reader *reader) { + for (size_t i = 0; i < reader->token_order.len; i++) { + for (size_t j = i + 1; j < reader->token_order.len; j++) { + if (strlen(reader->token_order.data[j]) > strlen(reader->token_order.data[i])) { + char *tmp = reader->token_order.data[i]; + reader->token_order.data[i] = reader->token_order.data[j]; + reader->token_order.data[j] = tmp; + } + } + } +} + +static void reader_add_tokens(Reader *reader, const char *tok) { + if (!tok || !*tok) { + return; + } + for (size_t i = 0; i < reader->custom_tokens.len; i++) { + if (strcmp(reader->custom_tokens.data[i], tok) == 0) { + return; + } + } + VEC_PUSH(&reader->custom_tokens, str_dup(tok)); + VEC_PUSH(&reader->token_order, reader->custom_tokens.data[reader->custom_tokens.len - 1]); + reader_resort(reader); +} + +static void reader_add_token_chars(Reader *reader, const char *chars) { + if (!chars) { + return; + } + char buf[2] = {0, 0}; + for (const char *p = chars; *p; p++) { + buf[0] = *p; + reader_add_tokens(reader, buf); + } +} + +typedef struct { + const char *source; + size_t length; + size_t index; + int line; + int column; + Reader *reader; +} Tokenizer; + +static void tokenizer_init(Tokenizer *tokenizer, Reader *reader, const char *source) { + tokenizer->source = source; + tokenizer->length = strlen(source); + tokenizer->index = 0; + tokenizer->line = 1; + tokenizer->column = 0; + tokenizer->reader = reader; +} + +static bool tokenizer_next(Tokenizer *tokenizer, Token *out) { + const char *src = tokenizer->source; + size_t len = tokenizer->length; + size_t idx = tokenizer->index; + int line = tokenizer->line; + int col = tokenizer->column; + + while (idx < len) { + char ch = src[idx]; + if (ch == '"') { + size_t start = idx; + int token_line = line; + int token_col = col; + idx++; + col++; + bool escape = false; + while (idx < len) { + char c = src[idx++]; + if (c == '\n') { + line++; + col = 0; + } else { + col++; + } + if (escape) { + escape = false; + continue; + } + if (c == '\\') { + escape = true; + continue; + } + if (c == '"') { + size_t end = idx; + size_t tok_len = end - start; + char *lex = (char *)xmalloc(tok_len + 1); + memcpy(lex, src + start, tok_len); + lex[tok_len] = '\0'; + out->lexeme = lex; + out->line = token_line; + out->column = token_col; + out->start = (int)start; + out->end = (int)end; + tokenizer->index = idx; + tokenizer->line = line; + tokenizer->column = col; + return true; + } + } + fprintf(stderr, "[error] unterminated string literal\n"); + exit(1); + } + if (ch == '#') { + while (idx < len && src[idx] != '\n') { + idx++; + } + continue; + } + if (ch == ';' && idx + 1 < len && isalpha((unsigned char)src[idx + 1])) { + size_t start = idx; + int token_line = line; + int token_col = col; + idx++; + col++; + size_t tok_len = idx - start; + char *lex = (char *)xmalloc(tok_len + 1); + memcpy(lex, src + start, tok_len); + lex[tok_len] = '\0'; + out->lexeme = lex; + out->line = token_line; + out->column = token_col; + out->start = (int)start; + out->end = (int)idx; + tokenizer->index = idx; + tokenizer->line = line; + tokenizer->column = col; + return true; + } + + bool matched = false; + const char *matched_tok = NULL; + for (size_t i = 0; i < tokenizer->reader->token_order.len; i++) { + const char *tok = tokenizer->reader->token_order.data[i]; + size_t tok_len = strlen(tok); + if (tok_len == 0) { + continue; + } + if (idx + tok_len <= len && strncmp(src + idx, tok, tok_len) == 0) { + matched = true; + matched_tok = tok; + size_t start = idx; + int token_line = line; + int token_col = col; + idx += tok_len; + col += (int)tok_len; + out->lexeme = str_dup(matched_tok); + out->line = token_line; + out->column = token_col; + out->start = (int)start; + out->end = (int)idx; + tokenizer->index = idx; + tokenizer->line = line; + tokenizer->column = col; + return true; + } + } + if (matched) { + continue; + } + if (isspace((unsigned char)ch)) { + if (ch == '\n') { + line++; + col = 0; + } else { + col++; + } + idx++; + continue; + } + size_t start = idx; + int token_line = line; + int token_col = col; + while (idx < len) { + char c = src[idx]; + bool is_sep = isspace((unsigned char)c) || c == '"' || c == '#'; + if (is_sep) { + break; + } + bool token_hit = false; + for (size_t i = 0; i < tokenizer->reader->token_order.len; i++) { + const char *tok = tokenizer->reader->token_order.data[i]; + size_t tok_len = strlen(tok); + if (tok_len && idx + tok_len <= len && strncmp(src + idx, tok, tok_len) == 0) { + token_hit = true; + break; + } + } + if (token_hit) { + break; + } + idx++; + col++; + } + size_t tok_len = idx - start; + if (tok_len) { + char *lex = (char *)xmalloc(tok_len + 1); + memcpy(lex, src + start, tok_len); + lex[tok_len] = '\0'; + out->lexeme = lex; + out->line = token_line; + out->column = token_col; + out->start = (int)start; + out->end = (int)idx; + tokenizer->index = idx; + tokenizer->line = line; + tokenizer->column = col; + return true; + } + idx++; + col++; + } + + tokenizer->index = idx; + tokenizer->line = line; + tokenizer->column = col; + return false; +} + +struct Parser { + Dictionary *dictionary; + Reader *reader; + TokenVec tokens; + size_t pos; + Tokenizer tokenizer; + bool tokenizer_exhausted; + struct { + FormVec forms; + StrMap variables; + StrVec *prelude; + StrVec *bss; + } module; + Definition *current_def; + Word **definition_stack; + size_t definition_stack_len; + size_t definition_stack_cap; + Word *last_defined; + FileSpanVec file_spans; + char *source; + struct { + char *name; + StrVec tokens; + int param_count; + bool active; + } macro_recording; + struct { + char *type; + char *false_label; + char *end_label; + char *begin_label; + char *loop_label; + int line; + int column; + StrVec with_names; + } *control_stack; + size_t control_len; + size_t control_cap; + int label_counter; + char *token_hook; + Token last_token; + bool has_last_token; + StrMap variable_labels; + StrMap variable_words; + CompileTimeVM *ct_vm; + StrVec *custom_prelude; + StrVec *custom_bss; + bool pending_inline_def; + bool uses_libc; + bool uses_libm; + char *primary_path; +}; + +typedef enum { + CT_NIL, + CT_INT, + CT_STR, + CT_TOKEN, + CT_LIST, + CT_MAP, + CT_LEXER +} CtValueKind; + +typedef struct CtValue CtValue; + +VEC_DECL(CtValueVec, CtValue); + +typedef struct { + CtValueVec items; +} CtList; + +typedef struct { + char **keys; + CtValue *values; + size_t cap; + size_t len; +} CtMap; + +typedef struct { + Parser *parser; + bool separators[256]; + TokenVec buffer; +} SplitLexer; + +struct CtValue { + CtValueKind kind; + union { + int64_t i64; + char *str; + Token token; + CtList *list; + CtMap *map; + SplitLexer *lexer; + } as; +}; + +struct CompileTimeVM { + Parser *parser; + Dictionary *dictionary; + CtValueVec stack; + CtValueVec rstack; + IntVec loop_remaining; + IntVec loop_begin; + IntVec loop_initial; + StrVec call_stack; +}; + +static void ct_value_free(CtValue *value); + +static CtValue ct_make_nil(void) { + CtValue v = {0}; + v.kind = CT_NIL; + return v; +} + +static CtValue ct_make_int(int64_t i) { + CtValue v = {0}; + v.kind = CT_INT; + v.as.i64 = i; + return v; +} + +static CtValue ct_make_str(const char *s) { + CtValue v = {0}; + v.kind = CT_STR; + v.as.str = str_dup(s); + return v; +} + +static CtValue ct_make_token(Token token) { + CtValue v = {0}; + v.kind = CT_TOKEN; + v.as.token = token; + return v; +} + +static CtValue ct_make_list(CtList *list) { + CtValue v = {0}; + v.kind = CT_LIST; + v.as.list = list; + return v; +} + +static CtValue ct_make_map(CtMap *map) { + CtValue v = {0}; + v.kind = CT_MAP; + v.as.map = map; + return v; +} + +static CtValue ct_make_lexer(SplitLexer *lexer) { + CtValue v = {0}; + v.kind = CT_LEXER; + v.as.lexer = lexer; + return v; +} + +static void ct_value_free(CtValue *value) { + if (!value) { + return; + } + if (value->kind == CT_STR) { + free(value->as.str); + } +} + +static void ct_stack_init(CtValueVec *vec) { + VEC_INIT(vec); +} + +static void ct_stack_push(CtValueVec *vec, CtValue value) { + VEC_PUSH(vec, value); +} + +static CtValue ct_stack_pop(CtValueVec *vec) { + if (!vec->len) { + CtValue v = ct_make_nil(); + return v; + } + return VEC_POP(vec); +} + +static CtValue ct_stack_peek(CtValueVec *vec) { + if (!vec->len) { + CtValue v = ct_make_nil(); + return v; + } + return vec->data[vec->len - 1]; +} + +static CtList *ct_list_new(void) { + CtList *list = (CtList *)xmalloc(sizeof(CtList)); + VEC_INIT(&list->items); + return list; +} + +static CtMap *ct_map_new(void) { + CtMap *map = (CtMap *)xmalloc(sizeof(CtMap)); + map->keys = NULL; + map->values = NULL; + map->cap = 0; + map->len = 0; + return map; +} + +static void ct_map_grow(CtMap *map) { + size_t new_cap = map->cap ? map->cap * 2 : 64; + char **new_keys = (char **)xmalloc(new_cap * sizeof(char *)); + CtValue *new_vals = (CtValue *)xmalloc(new_cap * sizeof(CtValue)); + for (size_t i = 0; i < new_cap; i++) { + new_keys[i] = NULL; + } + if (map->keys) { + for (size_t i = 0; i < map->cap; i++) { + if (!map->keys[i]) { + continue; + } + uint64_t hash = hash_str(map->keys[i]); + size_t idx = (size_t)(hash & (new_cap - 1)); + while (new_keys[idx]) { + idx = (idx + 1) & (new_cap - 1); + } + new_keys[idx] = map->keys[i]; + new_vals[idx] = map->values[i]; + } + } + free(map->keys); + free(map->values); + map->keys = new_keys; + map->values = new_vals; + map->cap = new_cap; +} + +static void ct_map_set(CtMap *map, const char *key, CtValue value) { + if (!map->cap || (map->len + 1) * 3 >= map->cap * 2) { + ct_map_grow(map); + } + uint64_t hash = hash_str(key); + size_t idx = (size_t)(hash & (map->cap - 1)); + while (map->keys[idx]) { + if (strcmp(map->keys[idx], key) == 0) { + ct_value_free(&map->values[idx]); + map->values[idx] = value; + return; + } + idx = (idx + 1) & (map->cap - 1); + } + map->keys[idx] = str_dup(key); + map->values[idx] = value; + map->len++; +} + +static bool ct_map_get(CtMap *map, const char *key, CtValue *out) { + if (!map->cap) { + return false; + } + uint64_t hash = hash_str(key); + size_t idx = (size_t)(hash & (map->cap - 1)); + size_t start = idx; + while (map->keys[idx]) { + if (strcmp(map->keys[idx], key) == 0) { + *out = map->values[idx]; + return true; + } + idx = (idx + 1) & (map->cap - 1); + if (idx == start) { + break; + } + } + return false; +} + +static void emit_line(FunctionEmitter *builder, const char *line) { + VEC_PUSH(builder->text, str_dup(line)); +} + +static void emitter_init(FunctionEmitter *builder, StrVec *text, bool debug) { + builder->text = text; + builder->debug_enabled = debug; + builder->current_loc = NULL; +} + +static char *sanitize_label(const char *name) { + size_t len = strlen(name); + char *out = (char *)xmalloc(len * 4 + 2); + size_t pos = 0; + for (size_t i = 0; i < len; i++) { + unsigned char ch = (unsigned char)name[i]; + if (isalnum(ch) || ch == '_') { + out[pos++] = ch; + } else { + pos += (size_t)sprintf(out + pos, "_%02x", ch); + } + } + if (pos == 0) { + out[pos++] = 'a'; + } + if (isdigit((unsigned char)out[0])) { + memmove(out + 1, out, pos); + out[0] = '_'; + pos++; + } + out[pos] = '\0'; + return out; +} + +static bool is_identifier(const char *text) { + if (!text || !*text) { + return false; + } + if (!(isalpha((unsigned char)text[0]) || text[0] == '_')) { + return false; + } + for (const char *p = text + 1; *p; p++) { + if (!(isalnum((unsigned char)*p) || *p == '_')) { + return false; + } + } + return true; +} + +static char *path_basename(const char *path); + +static SourceLocation *location_for_token(Parser *parser, Token token) { + for (size_t i = 0; i < parser->file_spans.len; i++) { + FileSpan span = parser->file_spans.data[i]; + if (token.line >= span.start_line && token.line < span.end_line) { + int local_line = span.local_start_line + (token.line - span.start_line); + SourceLocation *loc = (SourceLocation *)xmalloc(sizeof(SourceLocation)); + loc->path = path_basename(span.path); + loc->line = local_line; + loc->column = token.column; + return loc; + } + } + SourceLocation *loc = (SourceLocation *)xmalloc(sizeof(SourceLocation)); + loc->path = parser->primary_path ? path_basename(parser->primary_path) : str_dup(""); + loc->line = token.line; + loc->column = token.column; + return loc; +} + +static void parser_push_control(Parser *parser, const char *type) { + if (parser->control_len + 1 > parser->control_cap) { + parser->control_cap = parser->control_cap ? parser->control_cap * 2 : 16; + parser->control_stack = xrealloc(parser->control_stack, parser->control_cap * sizeof(*parser->control_stack)); + } + parser->control_stack[parser->control_len].type = str_dup(type); + parser->control_stack[parser->control_len].false_label = NULL; + parser->control_stack[parser->control_len].end_label = NULL; + parser->control_stack[parser->control_len].begin_label = NULL; + parser->control_stack[parser->control_len].loop_label = NULL; + parser->control_stack[parser->control_len].line = parser->has_last_token ? parser->last_token.line : 0; + parser->control_stack[parser->control_len].column = parser->has_last_token ? parser->last_token.column : 0; + VEC_INIT(&parser->control_stack[parser->control_len].with_names); + parser->control_len++; +} + +static int parser_pop_control(Parser *parser, const char *expected_type) { + if (!parser->control_len) { + return -1; + } + if (expected_type && strcmp(parser->control_stack[parser->control_len - 1].type, expected_type) != 0) { + return -2; + } + parser->control_len--; + return 0; +} + +static void parser_emit_op(Parser *parser, Op op) { + if (op.loc == NULL && parser->has_last_token) { + op.loc = location_for_token(parser, parser->last_token); + } + if (parser->current_def) { + VEC_PUSH(&parser->current_def->body, op); + } else { + Form form = {0}; + form.kind = FORM_DEF; + Definition *dummy = (Definition *)xmalloc(sizeof(Definition)); + *dummy = (Definition){0}; + dummy->name = str_dup(""); + VEC_INIT(&dummy->body); + VEC_PUSH(&dummy->body, op); + form.ptr = dummy; + VEC_PUSH(&parser->module.forms, form); + } +} + +static void parser_init(Parser *parser, Dictionary *dict, Reader *reader) { + parser->dictionary = dict; + parser->reader = reader; + VEC_INIT(&parser->tokens); + parser->pos = 0; + parser->tokenizer_exhausted = false; + VEC_INIT(&parser->module.forms); + strmap_init(&parser->module.variables); + parser->module.prelude = NULL; + parser->module.bss = NULL; + parser->current_def = NULL; + parser->definition_stack = NULL; + parser->definition_stack_len = 0; + parser->definition_stack_cap = 0; + parser->last_defined = NULL; + VEC_INIT(&parser->file_spans); + parser->source = NULL; + parser->macro_recording.active = false; + parser->control_stack = NULL; + parser->control_len = 0; + parser->control_cap = 0; + parser->label_counter = 0; + parser->token_hook = NULL; + parser->has_last_token = false; + strmap_init(&parser->variable_labels); + strmap_init(&parser->variable_words); + parser->ct_vm = NULL; + parser->custom_prelude = NULL; + parser->custom_bss = NULL; + parser->pending_inline_def = false; + parser->uses_libc = false; + parser->uses_libm = false; + parser->primary_path = NULL; +} + +static void register_builtin_syscall(Parser *parser) { + AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition)); + memset(def, 0, sizeof(AsmDefinition)); + def->name = str_dup("syscall"); + def->body = str_dup( + " mov rax, [r12]\n" + " add r12, 8\n" + " mov rcx, [r12]\n" + " add r12, 8\n" + " cmp rcx, 6\n" + " jle .sys_args\n" + " mov rcx, 6\n" + ".sys_args:\n" + " cmp rcx, 6\n" + " jl .arg5\n" + " mov r9, [r12]\n" + " add r12, 8\n" + ".arg5:\n" + " cmp rcx, 5\n" + " jl .arg4\n" + " mov r8, [r12]\n" + " add r12, 8\n" + ".arg4:\n" + " cmp rcx, 4\n" + " jl .arg3\n" + " mov r10, [r12]\n" + " add r12, 8\n" + ".arg3:\n" + " cmp rcx, 3\n" + " jl .arg2\n" + " mov rdx, [r12]\n" + " add r12, 8\n" + ".arg2:\n" + " cmp rcx, 2\n" + " jl .arg1\n" + " mov rsi, [r12]\n" + " add r12, 8\n" + ".arg1:\n" + " cmp rcx, 1\n" + " jl .do_syscall\n" + " mov rdi, [r12]\n" + " add r12, 8\n" + ".do_syscall:\n" + " syscall\n" + " sub r12, 8\n" + " mov [r12], rax\n" + ); + + Word *word = dictionary_lookup(parser->dictionary, def->name); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(def->name); + dictionary_register(parser->dictionary, word); + } + word->asm_def = def; + Form form = {0}; + form.kind = FORM_ASM; + form.ptr = def; + VEC_PUSH(&parser->module.forms, form); +} + +static void ensure_tokens(Parser *parser, size_t upto) { + if (parser->tokenizer_exhausted) { + return; + } + while (parser->tokens.len <= upto && !parser->tokenizer_exhausted) { + Token tok = {0}; + if (!tokenizer_next(&parser->tokenizer, &tok)) { + parser->tokenizer_exhausted = true; + break; + } + VEC_PUSH(&parser->tokens, tok); + } +} + +static bool parser_eof(Parser *parser) { + ensure_tokens(parser, parser->pos); + return parser->pos >= parser->tokens.len; +} + +static Token parser_peek_token(Parser *parser) { + ensure_tokens(parser, parser->pos); + if (parser->pos >= parser->tokens.len) { + Token empty = {0}; + empty.lexeme = NULL; + return empty; + } + return parser->tokens.data[parser->pos]; +} + +static Token parser_next_token(Parser *parser) { + ensure_tokens(parser, parser->pos); + if (parser->pos >= parser->tokens.len) { + Token empty = {0}; + empty.lexeme = NULL; + return empty; + } + Token tok = parser->tokens.data[parser->pos++]; + parser->last_token = tok; + parser->has_last_token = true; + return tok; +} + +static char *parser_new_label(Parser *parser, const char *prefix) { + char *label = str_printf("L_%s_%d", prefix, parser->label_counter++); + return label; +} + +static void ct_vm_init(CompileTimeVM *vm, Parser *parser) { + vm->parser = parser; + vm->dictionary = parser->dictionary; + ct_stack_init(&vm->stack); + ct_stack_init(&vm->rstack); + VEC_INIT(&vm->loop_remaining); + VEC_INIT(&vm->loop_begin); + VEC_INIT(&vm->loop_initial); + VEC_INIT(&vm->call_stack); +} + +static void ct_vm_reset(CompileTimeVM *vm) { + vm->stack.len = 0; + vm->rstack.len = 0; + vm->loop_remaining.len = 0; + vm->loop_begin.len = 0; + vm->loop_initial.len = 0; + vm->call_stack.len = 0; +} + +static bool try_parse_int(const char *lexeme, int64_t *out); +static void parser_inject_tokens(Parser *parser, TokenVec *injected); + +static void ct_trace_error(CompileTimeVM *vm, const char *msg) { + fprintf(stderr, "[error] %s\n", msg); + if (vm && vm->call_stack.len) { + fprintf(stderr, "[error] compile-time call stack:\n"); + for (size_t i = 0; i < vm->call_stack.len; i++) { + fprintf(stderr, " - %s\n", vm->call_stack.data[i]); + } + } + exit(1); +} + +static int64_t ct_pop_int(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind == CT_STR) { + int64_t out = 0; + if (try_parse_int(v.as.str, &out)) { + return out; + } + } + if (v.kind != CT_INT) { + const char *kind = "unknown"; + const char *extra = ""; + if (v.kind == CT_NIL) { + kind = "nil"; + } else if (v.kind == CT_STR) { + kind = "string"; + extra = v.as.str ? v.as.str : ""; + } else if (v.kind == CT_TOKEN) { + kind = "token"; + extra = v.as.token.lexeme ? v.as.token.lexeme : ""; + } else if (v.kind == CT_LIST) { + kind = "list"; + } else if (v.kind == CT_MAP) { + kind = "map"; + } else if (v.kind == CT_LEXER) { + kind = "lexer"; + } + char *msg = NULL; + if (extra[0] != '\0') { + msg = str_printf("expected integer on compile-time stack (got %s: %s)", kind, extra); + } else { + msg = str_printf("expected integer on compile-time stack (got %s)", kind); + } + ct_trace_error(vm, msg); + free(msg); + } + return v.as.i64; +} + + +static char *ct_pop_str(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind == CT_TOKEN) { + return str_dup(v.as.token.lexeme); + } + if (v.kind != CT_STR) { + ct_trace_error(vm, "expected string on compile-time stack"); + } + return str_dup(v.as.str); +} + +static CtList *ct_pop_list(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_LIST) { + ct_trace_error(vm, "expected list on compile-time stack"); + } + return v.as.list; +} + +static CtMap *ct_pop_map(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_MAP) { + ct_trace_error(vm, "expected map on compile-time stack"); + } + return v.as.map; +} + +static Token ct_pop_token(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind == CT_TOKEN) { + return v.as.token; + } + if (v.kind == CT_STR) { + Token tok = {0}; + tok.lexeme = v.as.str; + tok.line = 0; + tok.column = 0; + tok.start = 0; + tok.end = 0; + return tok; + } + ct_trace_error(vm, "expected token on compile-time stack"); +} + +static void ct_word_call(CompileTimeVM *vm, Word *word); + +static bool ct_try_asm_io(CompileTimeVM *vm, Word *word, AsmDefinition *asm_def) { + if (asm_def && asm_def->effect_string_io) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind == CT_STR) { + FILE *out = stdout; + if (strcmp(word->name, "ewrite_buf") == 0) { + out = stderr; + } + fputs(v.as.str ? v.as.str : "", out); + } else { + ct_stack_pop(&vm->stack); + } + return true; + } + if (strcmp(word->name, "putc") == 0) { + CtValue v = ct_stack_pop(&vm->stack); + int ch = 0; + if (v.kind == CT_INT) { + ch = (int)v.as.i64; + } else if (v.kind == CT_STR && v.as.str && v.as.str[0]) { + ch = (unsigned char)v.as.str[0]; + } + fputc(ch, stdout); + return true; + } + return false; +} + +static void ct_execute_nodes(CompileTimeVM *vm, OpVec *nodes) { + StrMap labels; + strmap_init(&labels); + for (size_t i = 0; i < nodes->len; i++) { + Op *node = &nodes->data[i]; + if (node->kind == OP_LABEL) { + strmap_set(&labels, node->data.label, (void *)(uintptr_t)i); + } + } + + IntVec begin_stack; + VEC_INIT(&begin_stack); + size_t ip = 0; + while (ip < nodes->len) { + Op node = nodes->data[ip]; + if (node.kind == OP_LITERAL) { + if (node.lit_kind == LIT_INT) { + ct_stack_push(&vm->stack, ct_make_int(node.data.i64)); + } else if (node.lit_kind == LIT_FLOAT) { + ct_stack_push(&vm->stack, ct_make_int((int64_t)node.data.f64)); + } else if (node.lit_kind == LIT_STRING) { + ct_stack_push(&vm->stack, ct_make_str(node.data.str)); + } + ip++; + continue; + } + if (node.kind == OP_WORD) { + const char *name = node.data.word; + if (strcmp(name, "begin") == 0) { + VEC_PUSH(&begin_stack, (int)ip); + ip++; + continue; + } + if (strcmp(name, "again") == 0) { + if (!begin_stack.len) { + fprintf(stderr, "[error] 'again' without matching 'begin'\n"); + exit(1); + } + ip = (size_t)begin_stack.data[begin_stack.len - 1] + 1; + continue; + } + if (strcmp(name, "continue") == 0) { + if (!begin_stack.len) { + fprintf(stderr, "[error] 'continue' outside begin/again loop\n"); + exit(1); + } + ip = (size_t)begin_stack.data[begin_stack.len - 1] + 1; + continue; + } + if (strcmp(name, "exit") == 0) { + return; + } + Word *word = dictionary_lookup(vm->dictionary, name); + if (!word) { + fprintf(stderr, "[error] unknown word '%s' during compile-time execution\n", name); + exit(1); + } + ct_word_call(vm, word); + ip++; + continue; + } + if (node.kind == OP_BRANCH_ZERO) { + CtValue v = ct_stack_pop(&vm->stack); + bool flag = false; + if (v.kind == CT_INT) { + flag = v.as.i64 != 0; + } + if (!flag) { + void *target = strmap_get(&labels, node.data.label); + if (!target) { + fprintf(stderr, "[error] unknown label '%s' during compile-time execution\n", node.data.label); + exit(1); + } + ip = (size_t)(uintptr_t)target; + } else { + ip++; + } + continue; + } + if (node.kind == OP_JUMP) { + void *target = strmap_get(&labels, node.data.label); + if (!target) { + fprintf(stderr, "[error] unknown label '%s' during compile-time execution\n", node.data.label); + exit(1); + } + ip = (size_t)(uintptr_t)target; + continue; + } + if (node.kind == OP_FOR_BEGIN) { + int64_t count = ct_pop_int(vm); + if (count <= 0) { + ip++; + continue; + } + VEC_PUSH(&vm->loop_remaining, (int)count); + VEC_PUSH(&vm->loop_begin, (int)ip); + VEC_PUSH(&vm->loop_initial, (int)count); + ip++; + continue; + } + if (node.kind == OP_FOR_END) { + if (!vm->loop_remaining.len) { + fprintf(stderr, "[error] 'next' without matching 'for'\n"); + exit(1); + } + int idx = (int)vm->loop_remaining.len - 1; + vm->loop_remaining.data[idx] -= 1; + if (vm->loop_remaining.data[idx] > 0) { + ip = (size_t)vm->loop_begin.data[idx] + 1; + } else { + vm->loop_remaining.len--; + vm->loop_begin.len--; + vm->loop_initial.len--; + ip++; + } + continue; + } + ip++; + } +} + +static void ct_word_call(CompileTimeVM *vm, Word *word) { + VEC_PUSH(&vm->call_stack, str_dup(word->name)); + if (word->compile_time_override) { + if (word->ct_definition) { + ct_execute_nodes(vm, &word->ct_definition->body); + vm->call_stack.len--; + return; + } + if (word->definition) { + ct_execute_nodes(vm, &word->definition->body); + vm->call_stack.len--; + return; + } + if (word->ct_intrinsic) { + word->ct_intrinsic(vm); + vm->call_stack.len--; + return; + } + if (word->ct_asm_def) { + if (ct_try_asm_io(vm, word, word->ct_asm_def)) { + vm->call_stack.len--; + return; + } + vm->call_stack.len--; + return; + } + } + bool prefer_def = (word->definition && (word->immediate || word->compile_only)); + if (!prefer_def && word->ct_intrinsic) { + word->ct_intrinsic(vm); + vm->call_stack.len--; + return; + } + Definition *def = word->definition; + if (word->compile_only && word->ct_definition) { + def = word->ct_definition; + } + if (!def) { + if (word->asm_def || word->ct_asm_def) { + AsmDefinition *asm_def = word->ct_asm_def ? word->ct_asm_def : word->asm_def; + ct_try_asm_io(vm, word, asm_def); + vm->call_stack.len--; + return; + } + if (word->is_extern) { + int pops = word->extern_arg_count > 0 ? word->extern_arg_count : word->extern_inputs; + for (int i = 0; i < pops; i++) { + ct_stack_pop(&vm->stack); + } + int outputs = 0; + if (word->extern_arg_count > 0) { + if (!word->extern_ret_type || strcmp(word->extern_ret_type, "void") != 0) { + outputs = 1; + } + } else { + outputs = word->extern_outputs; + } + for (int i = 0; i < outputs; i++) { + ct_stack_push(&vm->stack, ct_make_int(0)); + } + vm->call_stack.len--; + return; + } + fprintf(stderr, "[error] word '%s' has no compile-time definition\n", word->name); + exit(1); + } + ct_execute_nodes(vm, &def->body); + vm->call_stack.len--; +} + +static bool ct_truthy(CtValue v) { + if (v.kind == CT_NIL) { + return false; + } + if (v.kind == CT_INT) { + return v.as.i64 != 0; + } + if (v.kind == CT_STR) { + return v.as.str && v.as.str[0] != '\0'; + } + return true; +} + +static char *ct_string_from_value(CtValue v) { + if (v.kind == CT_TOKEN) { + return str_dup(v.as.token.lexeme); + } + if (v.kind == CT_STR) { + return str_dup(v.as.str); + } + if (v.kind == CT_INT) { + return str_printf("%lld", (long long)v.as.i64); + } + return str_dup(""); +} + +static void ct_intrinsic_dup(CompileTimeVM *vm) { + CtValue v = ct_stack_peek(&vm->stack); + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_drop(CompileTimeVM *vm) { + ct_stack_pop(&vm->stack); +} + +static void ct_intrinsic_swap(CompileTimeVM *vm) { + CtValue a = ct_stack_pop(&vm->stack); + CtValue b = ct_stack_pop(&vm->stack); + ct_stack_push(&vm->stack, a); + ct_stack_push(&vm->stack, b); +} + +static void ct_intrinsic_over(CompileTimeVM *vm) { + if (vm->stack.len < 2) { + fprintf(stderr, "[error] over expects at least 2 items\n"); + exit(1); + } + CtValue v = vm->stack.data[vm->stack.len - 2]; + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_rot(CompileTimeVM *vm) { + if (vm->stack.len < 3) { + fprintf(stderr, "[error] rot expects at least 3 items\n"); + exit(1); + } + CtValue a = vm->stack.data[vm->stack.len - 3]; + CtValue b = vm->stack.data[vm->stack.len - 2]; + CtValue c = vm->stack.data[vm->stack.len - 1]; + vm->stack.data[vm->stack.len - 3] = b; + vm->stack.data[vm->stack.len - 2] = c; + vm->stack.data[vm->stack.len - 1] = a; +} + +static void ct_intrinsic_pick(CompileTimeVM *vm) { + int64_t idx = ct_pop_int(vm); + if (idx < 0 || (size_t)(idx + 1) > vm->stack.len) { + fprintf(stderr, "[error] pick index out of range\n"); + exit(1); + } + CtValue v = vm->stack.data[vm->stack.len - 1 - (size_t)idx]; + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_rpick(CompileTimeVM *vm) { + int64_t idx = ct_pop_int(vm); + if (idx < 0 || (size_t)(idx + 1) > vm->rstack.len) { + fprintf(stderr, "[error] rpick index out of range\n"); + exit(1); + } + CtValue v = vm->rstack.data[vm->rstack.len - 1 - (size_t)idx]; + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_to_r(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + ct_stack_push(&vm->rstack, v); +} + +static void ct_intrinsic_from_r(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->rstack); + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_rdrop(CompileTimeVM *vm) { + ct_stack_pop(&vm->rstack); +} + +static void ct_intrinsic_add(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a + b)); +} + +static void ct_intrinsic_sub(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a - b)); +} + +static void ct_intrinsic_mul(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a * b)); +} + +static void ct_intrinsic_div(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + if (b == 0) { + fprintf(stderr, "[error] division by zero in compile-time VM\n"); + exit(1); + } + ct_stack_push(&vm->stack, ct_make_int(a / b)); +} + +static void ct_intrinsic_mod(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + if (b == 0) { + fprintf(stderr, "[error] modulo by zero in compile-time VM\n"); + exit(1); + } + ct_stack_push(&vm->stack, ct_make_int(a % b)); +} + +static void ct_intrinsic_eq(CompileTimeVM *vm) { + CtValue b = ct_stack_pop(&vm->stack); + CtValue a = ct_stack_pop(&vm->stack); + if (a.kind == CT_INT && b.kind == CT_INT) { + ct_stack_push(&vm->stack, ct_make_int(a.as.i64 == b.as.i64)); + return; + } + char *sa = ct_string_from_value(a); + char *sb = ct_string_from_value(b); + bool eq = strcmp(sa, sb) == 0; + free(sa); + free(sb); + ct_stack_push(&vm->stack, ct_make_int(eq)); +} + +static void ct_intrinsic_gt(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a > b)); +} + +static void ct_intrinsic_lt(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a < b)); +} + +static void ct_intrinsic_ge(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a >= b)); +} + +static void ct_intrinsic_le(CompileTimeVM *vm) { + int64_t b = ct_pop_int(vm); + int64_t a = ct_pop_int(vm); + ct_stack_push(&vm->stack, ct_make_int(a <= b)); +} + +static void ct_intrinsic_ne(CompileTimeVM *vm) { + CtValue b = ct_stack_pop(&vm->stack); + CtValue a = ct_stack_pop(&vm->stack); + if (a.kind == CT_INT && b.kind == CT_INT) { + ct_stack_push(&vm->stack, ct_make_int(a.as.i64 != b.as.i64)); + return; + } + char *sa = ct_string_from_value(a); + char *sb = ct_string_from_value(b); + bool ne = strcmp(sa, sb) != 0; + free(sa); + free(sb); + ct_stack_push(&vm->stack, ct_make_int(ne)); +} + +static void ct_intrinsic_not(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + ct_stack_push(&vm->stack, ct_make_int(!ct_truthy(v))); +} + +static void ct_intrinsic_nil(CompileTimeVM *vm) { + ct_stack_push(&vm->stack, ct_make_nil()); +} + +static void ct_intrinsic_nilp(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + ct_stack_push(&vm->stack, ct_make_int(v.kind == CT_NIL)); +} + +static void ct_intrinsic_string_eq(CompileTimeVM *vm) { + char *b = ct_pop_str(vm); + char *a = ct_pop_str(vm); + bool eq = strcmp(a, b) == 0; + free(a); + free(b); + ct_stack_push(&vm->stack, ct_make_int(eq)); +} + +static void ct_intrinsic_string_length(CompileTimeVM *vm) { + char *s = ct_pop_str(vm); + ct_stack_push(&vm->stack, ct_make_int((int64_t)strlen(s))); + free(s); +} + +static void ct_intrinsic_string_append(CompileTimeVM *vm) { + char *b = ct_pop_str(vm); + char *a = ct_pop_str(vm); + char *out = str_printf("%s%s", a, b); + free(a); + free(b); + ct_stack_push(&vm->stack, ct_make_str(out)); + free(out); +} + +static void ct_intrinsic_string_to_number(CompileTimeVM *vm) { + char *s = ct_pop_str(vm); + int64_t out = 0; + bool ok = try_parse_int(s, &out); + ct_stack_push(&vm->stack, ct_make_int(out)); + ct_stack_push(&vm->stack, ct_make_int(ok ? 1 : 0)); + free(s); +} + +static void ct_intrinsic_int_to_string(CompileTimeVM *vm) { + int64_t v = ct_pop_int(vm); + char *out = str_printf("%lld", (long long)v); + ct_stack_push(&vm->stack, ct_make_str(out)); + free(out); +} + +static void ct_intrinsic_identifierp(CompileTimeVM *vm) { + char *s = ct_pop_str(vm); + ct_stack_push(&vm->stack, ct_make_int(is_identifier(s))); + free(s); +} + +static void ct_intrinsic_list_new(CompileTimeVM *vm) { + CtList *list = ct_list_new(); + ct_stack_push(&vm->stack, ct_make_list(list)); +} + +static void ct_intrinsic_list_append(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + CtList *list = ct_pop_list(vm); + VEC_PUSH(&list->items, v); + ct_stack_push(&vm->stack, ct_make_list(list)); +} + +static void ct_intrinsic_list_pop(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + if (!list->items.len) { + ct_stack_push(&vm->stack, ct_make_list(list)); + ct_stack_push(&vm->stack, ct_make_nil()); + return; + } + CtValue v = VEC_POP(&list->items); + ct_stack_push(&vm->stack, ct_make_list(list)); + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_list_pop_front(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + if (!list->items.len) { + ct_stack_push(&vm->stack, ct_make_list(list)); + ct_stack_push(&vm->stack, ct_make_nil()); + return; + } + CtValue v = list->items.data[0]; + memmove(&list->items.data[0], &list->items.data[1], (list->items.len - 1) * sizeof(CtValue)); + list->items.len--; + ct_stack_push(&vm->stack, ct_make_list(list)); + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_list_length(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + ct_stack_push(&vm->stack, ct_make_int((int64_t)list->items.len)); +} + +static void ct_intrinsic_list_empty(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + ct_stack_push(&vm->stack, ct_make_int(list->items.len == 0)); +} + +static void ct_intrinsic_list_get(CompileTimeVM *vm) { + int64_t idx = ct_pop_int(vm); + CtList *list = ct_pop_list(vm); + CtValue v = ct_make_nil(); + if (idx >= 0 && (size_t)idx < list->items.len) { + v = list->items.data[idx]; + } + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_list_set(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + int64_t idx = ct_pop_int(vm); + CtList *list = ct_pop_list(vm); + if (idx < 0 || (size_t)idx >= list->items.len) { + fprintf(stderr, "[error] list-set index out of range\n"); + exit(1); + } + list->items.data[idx] = v; + ct_stack_push(&vm->stack, ct_make_list(list)); +} + +static void ct_intrinsic_list_extend(CompileTimeVM *vm) { + CtList *list2 = ct_pop_list(vm); + CtList *list1 = ct_pop_list(vm); + for (size_t i = 0; i < list2->items.len; i++) { + VEC_PUSH(&list1->items, list2->items.data[i]); + } + ct_stack_push(&vm->stack, ct_make_list(list1)); +} + +static void ct_intrinsic_list_last(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + CtValue v = ct_make_nil(); + if (list->items.len) { + v = list->items.data[list->items.len - 1]; + } + ct_stack_push(&vm->stack, v); +} + +static void ct_intrinsic_list_clone(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + CtList *out = ct_list_new(); + for (size_t i = 0; i < list->items.len; i++) { + VEC_PUSH(&out->items, list->items.data[i]); + } + ct_stack_push(&vm->stack, ct_make_list(list)); + ct_stack_push(&vm->stack, ct_make_list(out)); +} + +static void ct_intrinsic_map_new(CompileTimeVM *vm) { + CtMap *map = ct_map_new(); + ct_stack_push(&vm->stack, ct_make_map(map)); +} + +static void ct_intrinsic_map_set(CompileTimeVM *vm) { + CtValue val = ct_stack_pop(&vm->stack); + char *key = ct_pop_str(vm); + CtMap *map = ct_pop_map(vm); + ct_map_set(map, key, val); + free(key); + ct_stack_push(&vm->stack, ct_make_map(map)); +} + +static void ct_intrinsic_map_get(CompileTimeVM *vm) { + char *key = ct_pop_str(vm); + CtMap *map = ct_pop_map(vm); + CtValue out = ct_make_nil(); + bool ok = ct_map_get(map, key, &out); + ct_stack_push(&vm->stack, ct_make_map(map)); + ct_stack_push(&vm->stack, out); + ct_stack_push(&vm->stack, ct_make_int(ok)); + free(key); +} + +static void ct_intrinsic_map_has(CompileTimeVM *vm) { + char *key = ct_pop_str(vm); + CtMap *map = ct_pop_map(vm); + CtValue out = ct_make_nil(); + bool ok = ct_map_get(map, key, &out); + ct_stack_push(&vm->stack, ct_make_map(map)); + ct_stack_push(&vm->stack, ct_make_int(ok)); + free(key); +} + +static void ct_intrinsic_token_lexeme(CompileTimeVM *vm) { + Token tok = ct_pop_token(vm); + ct_stack_push(&vm->stack, ct_make_str(tok.lexeme)); +} + +static void ct_intrinsic_token_from_lexeme(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind == CT_NIL) { + v = ct_stack_pop(&vm->stack); + } + char *lex = NULL; + if (v.kind == CT_STR) { + lex = str_dup(v.as.str); + } else if (v.kind == CT_TOKEN) { + lex = str_dup(v.as.token.lexeme); + } else { + ct_trace_error(vm, "expected string for token-from-lexeme"); + } + Token tok = {0}; + tok.lexeme = lex; + tok.line = 0; + tok.column = 0; + tok.start = 0; + tok.end = 0; + ct_stack_push(&vm->stack, ct_make_token(tok)); +} + +static void ct_intrinsic_next_token(CompileTimeVM *vm) { + Token tok = parser_next_token(vm->parser); + if (!tok.lexeme) { + ct_stack_push(&vm->stack, ct_make_nil()); + return; + } + ct_stack_push(&vm->stack, ct_make_token(tok)); +} + +static void ct_intrinsic_peek_token(CompileTimeVM *vm) { + Token tok = parser_peek_token(vm->parser); + if (!tok.lexeme) { + ct_stack_push(&vm->stack, ct_make_nil()); + return; + } + ct_stack_push(&vm->stack, ct_make_token(tok)); +} + +static void ct_intrinsic_inject_tokens(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + TokenVec injected; + VEC_INIT(&injected); + for (size_t i = 0; i < list->items.len; i++) { + CtValue v = list->items.data[i]; + Token tok = {0}; + if (v.kind == CT_TOKEN) { + tok = v.as.token; + } else if (v.kind == CT_STR) { + tok.lexeme = str_dup(v.as.str); + } else { + tok.lexeme = ct_string_from_value(v); + } + VEC_PUSH(&injected, tok); + } + parser_inject_tokens(vm->parser, &injected); +} + +static void ct_intrinsic_set_token_hook(CompileTimeVM *vm) { + char *name = ct_pop_str(vm); + if (vm->parser->token_hook) { + free(vm->parser->token_hook); + } + vm->parser->token_hook = name; +} + +static void ct_intrinsic_clear_token_hook(CompileTimeVM *vm) { + if (vm->parser->token_hook) { + free(vm->parser->token_hook); + vm->parser->token_hook = NULL; + } +} + +static void ct_intrinsic_parse_error(CompileTimeVM *vm) { + char *msg = ct_pop_str(vm); + fprintf(stderr, "[error] %s\n", msg); + free(msg); + exit(1); +} + +static void ct_intrinsic_add_token(CompileTimeVM *vm) { + char *tok = ct_pop_str(vm); + reader_add_tokens(vm->parser->reader, tok); + free(tok); +} + +static void ct_intrinsic_add_token_chars(CompileTimeVM *vm) { + char *chars = ct_pop_str(vm); + reader_add_token_chars(vm->parser->reader, chars); + free(chars); +} + +static void ct_intrinsic_prelude_clear(CompileTimeVM *vm) { + if (!vm->parser->custom_prelude) { + vm->parser->custom_prelude = (StrVec *)xmalloc(sizeof(StrVec)); + VEC_INIT(vm->parser->custom_prelude); + } + vm->parser->custom_prelude->len = 0; +} + +static void ct_intrinsic_prelude_append(CompileTimeVM *vm) { + char *line = ct_pop_str(vm); + if (!vm->parser->custom_prelude) { + vm->parser->custom_prelude = (StrVec *)xmalloc(sizeof(StrVec)); + VEC_INIT(vm->parser->custom_prelude); + } + VEC_PUSH(vm->parser->custom_prelude, line); +} + +static void ct_intrinsic_bss_clear(CompileTimeVM *vm) { + if (!vm->parser->custom_bss) { + vm->parser->custom_bss = (StrVec *)xmalloc(sizeof(StrVec)); + VEC_INIT(vm->parser->custom_bss); + } + vm->parser->custom_bss->len = 0; +} + +static void ct_intrinsic_bss_append(CompileTimeVM *vm) { + char *line = ct_pop_str(vm); + if (!vm->parser->custom_bss) { + vm->parser->custom_bss = (StrVec *)xmalloc(sizeof(StrVec)); + VEC_INIT(vm->parser->custom_bss); + } + VEC_PUSH(vm->parser->custom_bss, line); +} + +static void ct_intrinsic_use_l2_ct(CompileTimeVM *vm) { + char *name = ct_pop_str(vm); + Word *word = dictionary_lookup(vm->dictionary, name); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(name); + dictionary_register(vm->dictionary, word); + } + word->compile_time_override = true; + free(name); +} + +static CtList *ct_list_from_tokens(const char **tokens, size_t count) { + CtList *list = ct_list_new(); + for (size_t i = 0; i < count; i++) { + VEC_PUSH(&list->items, ct_make_str(tokens[i])); + } + return list; +} + +static void ct_intrinsic_shunt(CompileTimeVM *vm) { + CtList *list = ct_pop_list(vm); + CtList *output = ct_list_new(); + CtList *ops = ct_list_new(); + for (size_t i = 0; i < list->items.len; i++) { + CtValue tok = list->items.data[i]; + char *lex = ct_string_from_value(tok); + if (strcmp(lex, "(") == 0) { + VEC_PUSH(&ops->items, ct_make_str(lex)); + free(lex); + continue; + } + if (strcmp(lex, ")") == 0) { + while (ops->items.len) { + CtValue top = ops->items.data[ops->items.len - 1]; + char *top_lex = ct_string_from_value(top); + if (strcmp(top_lex, "(") == 0) { + ops->items.len--; + free(top_lex); + break; + } + VEC_PUSH(&output->items, top); + ops->items.len--; + free(top_lex); + } + free(lex); + continue; + } + int prec = 0; + if (strcmp(lex, "+") == 0 || strcmp(lex, "-") == 0) { + prec = 1; + } else if (strcmp(lex, "*") == 0 || strcmp(lex, "/") == 0 || strcmp(lex, "%") == 0) { + prec = 2; + } + if (prec > 0) { + while (ops->items.len) { + CtValue top = ops->items.data[ops->items.len - 1]; + char *top_lex = ct_string_from_value(top); + int top_prec = 0; + if (strcmp(top_lex, "+") == 0 || strcmp(top_lex, "-") == 0) { + top_prec = 1; + } else if (strcmp(top_lex, "*") == 0 || strcmp(top_lex, "/") == 0 || strcmp(top_lex, "%") == 0) { + top_prec = 2; + } + if (top_prec >= prec) { + VEC_PUSH(&output->items, top); + ops->items.len--; + } else { + free(top_lex); + break; + } + free(top_lex); + } + VEC_PUSH(&ops->items, ct_make_str(lex)); + free(lex); + continue; + } + VEC_PUSH(&output->items, ct_make_str(lex)); + free(lex); + } + while (ops->items.len) { + CtValue top = VEC_POP(&ops->items); + VEC_PUSH(&output->items, top); + } + ct_stack_push(&vm->stack, ct_make_list(output)); +} + +static SplitLexer *split_lexer_new(Parser *parser, const char *seps) { + SplitLexer *lexer = (SplitLexer *)xmalloc(sizeof(SplitLexer)); + lexer->parser = parser; + memset(lexer->separators, 0, sizeof(lexer->separators)); + for (const char *p = seps; p && *p; p++) { + lexer->separators[(unsigned char)*p] = true; + } + VEC_INIT(&lexer->buffer); + return lexer; +} + +static void split_lexer_buffer_token(SplitLexer *lexer, Token tok) { + if (!tok.lexeme) { + return; + } + size_t len = strlen(tok.lexeme); + if (len == 0 || tok.lexeme[0] == '"') { + VEC_PUSH(&lexer->buffer, tok); + return; + } + size_t start = 0; + for (size_t i = 0; i <= len; i++) { + bool is_sep = (i < len) && lexer->separators[(unsigned char)tok.lexeme[i]]; + bool at_end = (i == len); + if (is_sep || at_end) { + if (i > start) { + size_t tok_len = i - start; + char *lex = (char *)xmalloc(tok_len + 1); + memcpy(lex, tok.lexeme + start, tok_len); + lex[tok_len] = '\0'; + Token out = tok; + out.lexeme = lex; + VEC_PUSH(&lexer->buffer, out); + } + if (is_sep) { + char sep[2] = {tok.lexeme[i], '\0'}; + Token out = tok; + out.lexeme = str_dup(sep); + VEC_PUSH(&lexer->buffer, out); + } + start = i + 1; + } + } +} + +static Token split_lexer_pop(SplitLexer *lexer) { + if (lexer->buffer.len == 0) { + Token tok = parser_next_token(lexer->parser); + if (!tok.lexeme) { + Token empty = {0}; + empty.lexeme = NULL; + return empty; + } + split_lexer_buffer_token(lexer, tok); + } + if (lexer->buffer.len == 0) { + Token empty = {0}; + empty.lexeme = NULL; + return empty; + } + Token out = lexer->buffer.data[0]; + memmove(&lexer->buffer.data[0], &lexer->buffer.data[1], (lexer->buffer.len - 1) * sizeof(Token)); + lexer->buffer.len--; + return out; +} + +static Token split_lexer_peek(SplitLexer *lexer) { + if (lexer->buffer.len == 0) { + Token tok = parser_next_token(lexer->parser); + if (!tok.lexeme) { + Token empty = {0}; + empty.lexeme = NULL; + return empty; + } + split_lexer_buffer_token(lexer, tok); + } + if (lexer->buffer.len == 0) { + Token empty = {0}; + empty.lexeme = NULL; + return empty; + } + return lexer->buffer.data[0]; +} + +static void split_lexer_push_back(SplitLexer *lexer, Token tok) { + if (lexer->buffer.len + 1 > lexer->buffer.cap) { + lexer->buffer.cap = lexer->buffer.cap ? lexer->buffer.cap * 2 : 8; + lexer->buffer.data = xrealloc(lexer->buffer.data, lexer->buffer.cap * sizeof(Token)); + } + memmove(&lexer->buffer.data[1], &lexer->buffer.data[0], lexer->buffer.len * sizeof(Token)); + lexer->buffer.data[0] = tok; + lexer->buffer.len++; +} + +static void ct_intrinsic_lexer_new(CompileTimeVM *vm) { + char *seps = ct_pop_str(vm); + SplitLexer *lexer = split_lexer_new(vm->parser, seps); + free(seps); + ct_stack_push(&vm->stack, ct_make_lexer(lexer)); +} + +static void ct_intrinsic_lexer_pop(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_LEXER) { + fprintf(stderr, "[error] lexer-pop expects lexer\n"); + exit(1); + } + Token tok = split_lexer_pop(v.as.lexer); + ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); + if (!tok.lexeme) { + ct_stack_push(&vm->stack, ct_make_nil()); + } else { + ct_stack_push(&vm->stack, ct_make_token(tok)); + } +} + +static void ct_intrinsic_lexer_peek(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_LEXER) { + fprintf(stderr, "[error] lexer-peek expects lexer\n"); + exit(1); + } + Token tok = split_lexer_peek(v.as.lexer); + ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); + if (!tok.lexeme) { + ct_stack_push(&vm->stack, ct_make_nil()); + } else { + ct_stack_push(&vm->stack, ct_make_token(tok)); + } +} + +static void ct_intrinsic_lexer_expect(CompileTimeVM *vm) { + char *expected = ct_pop_str(vm); + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_LEXER) { + fprintf(stderr, "[error] lexer-expect expects lexer\n"); + exit(1); + } + Token tok = split_lexer_pop(v.as.lexer); + if (!tok.lexeme || strcmp(tok.lexeme, expected) != 0) { + fprintf(stderr, "[error] lexer-expect expected '%s'\n", expected); + exit(1); + } + ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); + ct_stack_push(&vm->stack, ct_make_token(tok)); + free(expected); +} + +static void ct_intrinsic_lexer_collect_brace(CompileTimeVM *vm) { + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_LEXER) { + fprintf(stderr, "[error] lexer-collect-brace expects lexer\n"); + exit(1); + } + int depth = 1; + CtList *list = ct_list_new(); + while (depth > 0) { + Token tok = split_lexer_pop(v.as.lexer); + if (!tok.lexeme) { + fprintf(stderr, "[error] unterminated brace in lexer\n"); + exit(1); + } + if (strcmp(tok.lexeme, "{") == 0) { + depth++; + } else if (strcmp(tok.lexeme, "}") == 0) { + depth--; + if (depth == 0) { + break; + } + } + VEC_PUSH(&list->items, ct_make_token(tok)); + } + ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); + ct_stack_push(&vm->stack, ct_make_list(list)); +} + +static void ct_intrinsic_lexer_push_back(CompileTimeVM *vm) { + Token tok = ct_pop_token(vm); + CtValue v = ct_stack_pop(&vm->stack); + if (v.kind != CT_LEXER) { + fprintf(stderr, "[error] lexer-push-back expects lexer\n"); + exit(1); + } + split_lexer_push_back(v.as.lexer, tok); + ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer)); +} + +static void ct_intrinsic_emit_definition(CompileTimeVM *vm) { + CtList *body = ct_pop_list(vm); + Token name = ct_pop_token(vm); + TokenVec injected; + VEC_INIT(&injected); + Token tok = {0}; + tok.lexeme = str_dup("word"); + VEC_PUSH(&injected, tok); + VEC_PUSH(&injected, name); + for (size_t i = 0; i < body->items.len; i++) { + CtValue item = body->items.data[i]; + Token t = {0}; + if (item.kind == CT_TOKEN) { + t = item.as.token; + } else if (item.kind == CT_STR) { + t.lexeme = str_dup(item.as.str); + } else if (item.kind == CT_INT) { + t.lexeme = str_printf("%lld", (long long)item.as.i64); + } else { + t.lexeme = ct_string_from_value(item); + } + VEC_PUSH(&injected, t); + } + tok.lexeme = str_dup("end"); + VEC_PUSH(&injected, tok); + parser_inject_tokens(vm->parser, &injected); +} + +static void ct_intrinsic_prelude_set(CompileTimeVM *vm) { + ct_intrinsic_prelude_clear(vm); + ct_intrinsic_prelude_append(vm); +} + +static void ct_intrinsic_bss_set(CompileTimeVM *vm) { + ct_intrinsic_bss_clear(vm); + ct_intrinsic_bss_append(vm); +} + +static Word *register_ct_intrinsic(Dictionary *dict, const char *name, CompileTimeIntrinsic fn) { + Word *word = dictionary_lookup(dict, name); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(name); + dictionary_register(dict, word); + } + word->ct_intrinsic = fn; + word->compile_only = true; + return word; +} + +static void bootstrap_dictionary(Dictionary *dict, Parser *parser, CompileTimeVM *vm) { + (void)parser; + register_ct_intrinsic(dict, "dup", ct_intrinsic_dup); + register_ct_intrinsic(dict, "drop", ct_intrinsic_drop); + register_ct_intrinsic(dict, "swap", ct_intrinsic_swap); + register_ct_intrinsic(dict, "over", ct_intrinsic_over); + register_ct_intrinsic(dict, "rot", ct_intrinsic_rot); + register_ct_intrinsic(dict, "pick", ct_intrinsic_pick); + register_ct_intrinsic(dict, "rpick", ct_intrinsic_rpick); + register_ct_intrinsic(dict, ">r", ct_intrinsic_to_r); + register_ct_intrinsic(dict, "r>", ct_intrinsic_from_r); + register_ct_intrinsic(dict, "rdrop", ct_intrinsic_rdrop); + register_ct_intrinsic(dict, "+", ct_intrinsic_add); + register_ct_intrinsic(dict, "-", ct_intrinsic_sub); + register_ct_intrinsic(dict, "*", ct_intrinsic_mul); + register_ct_intrinsic(dict, "/", ct_intrinsic_div); + register_ct_intrinsic(dict, "%", ct_intrinsic_mod); + register_ct_intrinsic(dict, "==", ct_intrinsic_eq); + register_ct_intrinsic(dict, "!=", ct_intrinsic_ne); + register_ct_intrinsic(dict, ">", ct_intrinsic_gt); + register_ct_intrinsic(dict, "<", ct_intrinsic_lt); + register_ct_intrinsic(dict, ">=", ct_intrinsic_ge); + register_ct_intrinsic(dict, "<=", ct_intrinsic_le); + register_ct_intrinsic(dict, "not", ct_intrinsic_not); + register_ct_intrinsic(dict, "nil", ct_intrinsic_nil); + register_ct_intrinsic(dict, "nil?", ct_intrinsic_nilp); + register_ct_intrinsic(dict, "string=", ct_intrinsic_string_eq); + register_ct_intrinsic(dict, "string-length", ct_intrinsic_string_length); + register_ct_intrinsic(dict, "string-append", ct_intrinsic_string_append); + register_ct_intrinsic(dict, "string>number", ct_intrinsic_string_to_number); + register_ct_intrinsic(dict, "int>string", ct_intrinsic_int_to_string); + register_ct_intrinsic(dict, "identifier?", ct_intrinsic_identifierp); + register_ct_intrinsic(dict, "list-new", ct_intrinsic_list_new); + register_ct_intrinsic(dict, "list-append", ct_intrinsic_list_append); + register_ct_intrinsic(dict, "list-pop", ct_intrinsic_list_pop); + register_ct_intrinsic(dict, "list-pop-front", ct_intrinsic_list_pop_front); + register_ct_intrinsic(dict, "list-length", ct_intrinsic_list_length); + register_ct_intrinsic(dict, "list-empty?", ct_intrinsic_list_empty); + register_ct_intrinsic(dict, "list-get", ct_intrinsic_list_get); + register_ct_intrinsic(dict, "list-set", ct_intrinsic_list_set); + register_ct_intrinsic(dict, "list-extend", ct_intrinsic_list_extend); + register_ct_intrinsic(dict, "list-last", ct_intrinsic_list_last); + register_ct_intrinsic(dict, "list-clone", ct_intrinsic_list_clone); + register_ct_intrinsic(dict, "map-new", ct_intrinsic_map_new); + register_ct_intrinsic(dict, "map-set", ct_intrinsic_map_set); + register_ct_intrinsic(dict, "map-get", ct_intrinsic_map_get); + register_ct_intrinsic(dict, "map-has?", ct_intrinsic_map_has); + register_ct_intrinsic(dict, "token-lexeme", ct_intrinsic_token_lexeme); + register_ct_intrinsic(dict, "token-from-lexeme", ct_intrinsic_token_from_lexeme); + register_ct_intrinsic(dict, "next-token", ct_intrinsic_next_token); + register_ct_intrinsic(dict, "peek-token", ct_intrinsic_peek_token); + register_ct_intrinsic(dict, "inject-tokens", ct_intrinsic_inject_tokens); + register_ct_intrinsic(dict, "set-token-hook", ct_intrinsic_set_token_hook); + register_ct_intrinsic(dict, "clear-token-hook", ct_intrinsic_clear_token_hook); + register_ct_intrinsic(dict, "parse-error", ct_intrinsic_parse_error); + register_ct_intrinsic(dict, "add-token", ct_intrinsic_add_token); + register_ct_intrinsic(dict, "add-token-chars", ct_intrinsic_add_token_chars); + register_ct_intrinsic(dict, "prelude-clear", ct_intrinsic_prelude_clear); + register_ct_intrinsic(dict, "prelude-append", ct_intrinsic_prelude_append); + register_ct_intrinsic(dict, "prelude-set", ct_intrinsic_prelude_set); + register_ct_intrinsic(dict, "bss-clear", ct_intrinsic_bss_clear); + register_ct_intrinsic(dict, "bss-append", ct_intrinsic_bss_append); + register_ct_intrinsic(dict, "bss-set", ct_intrinsic_bss_set); + register_ct_intrinsic(dict, "use-l2-ct", ct_intrinsic_use_l2_ct); + register_ct_intrinsic(dict, "shunt", ct_intrinsic_shunt); + register_ct_intrinsic(dict, "emit-definition", ct_intrinsic_emit_definition); + register_ct_intrinsic(dict, "lexer-new", ct_intrinsic_lexer_new); + register_ct_intrinsic(dict, "lexer-pop", ct_intrinsic_lexer_pop); + register_ct_intrinsic(dict, "lexer-peek", ct_intrinsic_lexer_peek); + register_ct_intrinsic(dict, "lexer-expect", ct_intrinsic_lexer_expect); + register_ct_intrinsic(dict, "lexer-collect-brace", ct_intrinsic_lexer_collect_brace); + register_ct_intrinsic(dict, "lexer-push-back", ct_intrinsic_lexer_push_back); + vm->dictionary = dict; +} + +static void emit_push_literal(FunctionEmitter *builder, int64_t value) { + emit_line(builder, str_printf(" ; push %lld", (long long)value)); + emit_line(builder, " sub r12, 8"); + emit_line(builder, str_printf(" mov qword [r12], %lld", (long long)value)); +} + +static void emit_push_literal_u64(FunctionEmitter *builder, uint64_t value) { + emit_line(builder, str_printf(" ; push %llu", (unsigned long long)value)); + emit_line(builder, " sub r12, 8"); + emit_line(builder, str_printf(" mov rax, %llu", (unsigned long long)value)); + emit_line(builder, " mov [r12], rax"); +} + +static void emit_push_label(FunctionEmitter *builder, const char *label) { + emit_line(builder, str_printf(" ; push %s", label)); + emit_line(builder, str_printf(" lea rax, [rel %s]", label)); + emit_line(builder, " sub r12, 8"); + emit_line(builder, " mov [r12], rax"); +} + +static void emit_push_from(FunctionEmitter *builder, const char *reg) { + emit_line(builder, " sub r12, 8"); + emit_line(builder, str_printf(" mov [r12], %s", reg)); +} + +static void emit_pop_to(FunctionEmitter *builder, const char *reg) { + emit_line(builder, str_printf(" mov %s, [r12]", reg)); + emit_line(builder, " add r12, 8"); +} + +static void emission_init(Emission *emission) { + VEC_INIT(&emission->text); + VEC_INIT(&emission->data); + VEC_INIT(&emission->bss); +} + +typedef struct { + Emission *emission; + Dictionary *dictionary; + StrMap string_labels; + StrMap externs; + StrMap label_cache; + int unique_id; + bool debug; +} EmitContext; + +static void emit_extern(EmitContext *ctx, const char *name) { + if (strmap_has(&ctx->externs, name)) { + return; + } + strmap_set(&ctx->externs, name, (void *)1); + VEC_PUSH(&ctx->emission->text, str_printf("extern %s", name)); +} + +static const char *emit_string_literal(EmitContext *ctx, const char *value) { + char *label = (char *)strmap_get(&ctx->string_labels, value); + if (label) { + return label; + } + label = str_printf("__str_%d", ctx->unique_id++); + strmap_set(&ctx->string_labels, value, label); + StrVec bytes; + VEC_INIT(&bytes); + for (const unsigned char *p = (const unsigned char *)value; *p; p++) { + VEC_PUSH(&bytes, str_printf("%u", (unsigned int)*p)); + } + VEC_PUSH(&bytes, str_dup("0")); + size_t total = 0; + for (size_t i = 0; i < bytes.len; i++) { + total += strlen(bytes.data[i]) + 2; + } + char *line = (char *)xmalloc(total + strlen(label) + 6); + strcpy(line, label); + strcat(line, ": db "); + for (size_t i = 0; i < bytes.len; i++) { + strcat(line, bytes.data[i]); + if (i + 1 < bytes.len) { + strcat(line, ", "); + } + } + VEC_PUSH(&ctx->emission->data, line); + return label; +} + +static const char *emit_word_label(EmitContext *ctx, const char *name) { + char *label = (char *)strmap_get(&ctx->label_cache, name); + if (label) { + return label; + } + char *sanitized = sanitize_label(name); + label = str_printf("w_%s", sanitized); + free(sanitized); + strmap_set(&ctx->label_cache, name, label); + return label; +} + +static bool inline_stack_has(StrVec *stack, const char *name) { + for (size_t i = 0; i < stack->len; i++) { + if (strcmp(stack->data[i], name) == 0) { + return true; + } + } + return false; +} + +static bool is_float_type(const char *type) { + return type && (strcmp(type, "double") == 0 || strcmp(type, "float") == 0); +} + +static void emit_extern_call(EmitContext *ctx, FunctionEmitter *builder, Word *word) { + emit_extern(ctx, word->name); + if (!word->extern_arg_types || word->extern_arg_count == 0) { + emit_line(builder, str_printf(" call %s", word->name)); + if (word->extern_ret_type && strcmp(word->extern_ret_type, "void") != 0) { + emit_push_from(builder, "rax"); + } + return; + } + const char *int_regs[] = {"rdi", "rsi", "rdx", "rcx", "r8", "r9"}; + const char *float_regs[] = {"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"}; + int int_idx = 0; + int float_idx = 0; + for (int i = 0; i < word->extern_arg_count; i++) { + const char *type = word->extern_arg_types[i]; + int offset = (word->extern_arg_count - 1 - i) * 8; + if (is_float_type(type)) { + if (float_idx >= 8) { + fprintf(stderr, "[error] too many float args for extern %s\n", word->name); + exit(1); + } + emit_line(builder, str_printf(" movq %s, [r12 + %d]", float_regs[float_idx], offset)); + float_idx++; + } else { + if (int_idx >= 6) { + fprintf(stderr, "[error] too many int args for extern %s\n", word->name); + exit(1); + } + emit_line(builder, str_printf(" mov %s, [r12 + %d]", int_regs[int_idx], offset)); + int_idx++; + } + } + emit_line(builder, str_printf(" add r12, %d", word->extern_arg_count * 8)); + emit_line(builder, " mov r11, rsp"); + emit_line(builder, " and r11, 15"); + char *align_label = str_printf(".L_align_%d", ctx->unique_id++); + emit_line(builder, str_printf(" cmp r11, 0")); + emit_line(builder, str_printf(" je %s", align_label)); + emit_line(builder, " sub rsp, 8"); + emit_line(builder, " xor eax, eax"); + emit_line(builder, str_printf(" mov al, %d", float_idx)); + emit_line(builder, str_printf(" call %s", word->name)); + emit_line(builder, " add rsp, 8"); + emit_line(builder, str_printf(" jmp %s_done", align_label)); + emit_line(builder, str_printf("%s:", align_label)); + emit_line(builder, " xor eax, eax"); + emit_line(builder, str_printf(" mov al, %d", float_idx)); + emit_line(builder, str_printf(" call %s", word->name)); + emit_line(builder, str_printf("%s_done:", align_label)); + free(align_label); + if (word->extern_ret_type && strcmp(word->extern_ret_type, "void") == 0) { + return; + } + if (word->extern_ret_type && is_float_type(word->extern_ret_type)) { + emit_line(builder, " sub r12, 8"); + emit_line(builder, " movq [r12], xmm0"); + } else { + emit_push_from(builder, "rax"); + } +} + +static void emit_ops(EmitContext *ctx, FunctionEmitter *builder, OpVec *body, StrVec *inline_stack); + +static void emit_word_call(EmitContext *ctx, FunctionEmitter *builder, const char *name, StrVec *inline_stack) { + Word *word = dictionary_lookup(ctx->dictionary, name); + if (!word) { + fprintf(stderr, "[error] unknown word '%s' during emission\n", name); + exit(1); + } + if (word->inline_def && word->definition) { + if (inline_stack_has(inline_stack, word->name)) { + fprintf(stderr, "[error] recursive inline word '%s'\n", word->name); + exit(1); + } + VEC_PUSH(inline_stack, word->name); + emit_ops(ctx, builder, &word->definition->body, inline_stack); + inline_stack->len--; + return; + } + if (word->is_extern && !word->extern_arg_types) { + emit_extern(ctx, word->name); + emit_line(builder, str_printf(" call %s", word->name)); + return; + } + if (word->asm_def) { + emit_line(builder, str_printf(" call %s", emit_word_label(ctx, word->name))); + return; + } + if (word->is_extern && word->extern_arg_types) { + emit_extern_call(ctx, builder, word); + return; + } + emit_line(builder, str_printf(" call %s", emit_word_label(ctx, word->name))); +} + +static void emit_op(EmitContext *ctx, FunctionEmitter *builder, Op *op, StrVec *inline_stack) { + switch (op->kind) { + case OP_LITERAL: { + if (op->lit_kind == LIT_INT) { + emit_push_literal(builder, op->data.i64); + } else if (op->lit_kind == LIT_FLOAT) { + union { double f; uint64_t u; } conv; + conv.f = op->data.f64; + emit_push_literal_u64(builder, conv.u); + } else if (op->lit_kind == LIT_STRING) { + const char *label = emit_string_literal(ctx, op->data.str); + emit_push_label(builder, label); + emit_push_literal(builder, (int64_t)strlen(op->data.str)); + } + break; + } + case OP_WORD: + emit_word_call(ctx, builder, op->data.word, inline_stack); + break; + case OP_BRANCH_ZERO: + emit_pop_to(builder, "rax"); + emit_line(builder, " cmp rax, 0"); + emit_line(builder, str_printf(" je %s", op->data.label)); + break; + case OP_JUMP: + emit_line(builder, str_printf(" jmp %s", op->data.label)); + break; + case OP_LABEL: + emit_line(builder, str_printf("%s:", op->data.label)); + break; + case OP_FOR_BEGIN: + emit_pop_to(builder, "rax"); + emit_line(builder, " cmp rax, 0"); + emit_line(builder, str_printf(" jle %s", op->data.loop.end)); + emit_line(builder, " sub r13, 8"); + emit_line(builder, " mov [r13], rax"); + emit_line(builder, str_printf("%s:", op->data.loop.loop)); + break; + case OP_FOR_END: + emit_line(builder, " mov rax, [r13]"); + emit_line(builder, " dec rax"); + emit_line(builder, " mov [r13], rax"); + emit_line(builder, " cmp rax, 0"); + emit_line(builder, str_printf(" jg %s", op->data.loop.loop)); + emit_line(builder, " add r13, 8"); + emit_line(builder, str_printf("%s:", op->data.loop.end)); + break; + case OP_LIST_BEGIN: + emit_line(builder, " mov rax, [rel list_capture_sp]"); + emit_line(builder, " mov [rax], r12"); + emit_line(builder, " add rax, 8"); + emit_line(builder, " mov [rel list_capture_sp], rax"); + break; + case OP_LIST_END: + char *list_done = str_printf(".list_copy_done_%d", ctx->unique_id++); + char *list_loop = str_printf(".list_copy_loop_%d", ctx->unique_id++); + emit_line(builder, " mov rax, [rel list_capture_sp]"); + emit_line(builder, " sub rax, 8"); + emit_line(builder, " mov [rel list_capture_sp], rax"); + emit_line(builder, " mov rbx, [rax]"); + emit_line(builder, " mov rcx, rbx"); + emit_line(builder, " sub rcx, r12"); + emit_line(builder, " shr rcx, 3"); + emit_line(builder, " mov r15, rcx"); + emit_line(builder, " mov rdi, 0"); + emit_line(builder, " mov rsi, rcx"); + emit_line(builder, " add rsi, 1"); + emit_line(builder, " shl rsi, 3"); + emit_line(builder, " mov rdx, 3"); + emit_line(builder, " mov r10, 34"); + emit_line(builder, " mov r8, -1"); + emit_line(builder, " mov r9, 0"); + emit_line(builder, " mov rax, 9"); + emit_line(builder, " syscall"); + emit_line(builder, " mov [rax], r15"); + emit_line(builder, " mov rcx, r15"); + emit_line(builder, " cmp rcx, 0"); + emit_line(builder, str_printf(" je %s", list_done)); + emit_line(builder, " lea rsi, [r12 + rcx*8 - 8]"); + emit_line(builder, " lea rdi, [rax + 8]"); + emit_line(builder, str_printf("%s:", list_loop)); + emit_line(builder, " mov rdx, [rsi]"); + emit_line(builder, " mov [rdi], rdx"); + emit_line(builder, " sub rsi, 8"); + emit_line(builder, " add rdi, 8"); + emit_line(builder, " dec rcx"); + emit_line(builder, str_printf(" jnz %s", list_loop)); + emit_line(builder, str_printf("%s:", list_done)); + emit_line(builder, " mov r12, rbx"); + emit_line(builder, " sub r12, 8"); + emit_line(builder, " mov [r12], rax"); + free(list_done); + free(list_loop); + break; + } +} + +static void emit_ops(EmitContext *ctx, FunctionEmitter *builder, OpVec *body, StrVec *inline_stack) { + for (size_t i = 0; i < body->len; i++) { + emit_op(ctx, builder, &body->data[i], inline_stack); + } +} + +static void emit_definition(EmitContext *ctx, Definition *def) { + FunctionEmitter builder; + emitter_init(&builder, &ctx->emission->text, ctx->debug); + const char *label = emit_word_label(ctx, def->name); + if (strcmp(def->name, "main") == 0) { + emit_line(&builder, str_printf("global %s", label)); + } + emit_line(&builder, str_printf("%s:", label)); + StrVec inline_stack; + VEC_INIT(&inline_stack); + emit_ops(ctx, &builder, &def->body, &inline_stack); + emit_line(&builder, " ret"); +} + +static void emit_asm_definition(EmitContext *ctx, AsmDefinition *def) { + if (!def || !def->body) { + return; + } + VEC_PUSH(&ctx->emission->text, str_printf("%s:", emit_word_label(ctx, def->name))); + const char *cursor = def->body; + while (*cursor) { + const char *line_end = strchr(cursor, '\n'); + size_t len = line_end ? (size_t)(line_end - cursor) : strlen(cursor); + char *line = (char *)xmalloc(len + 1); + memcpy(line, cursor, len); + line[len] = '\0'; + if (len > 0) { + char *trim = line; + while (*trim && isspace((unsigned char)*trim)) { + trim++; + } + size_t trim_len = strlen(trim); + if (trim_len > 0 && trim[trim_len - 1] == ':') { + VEC_PUSH(&ctx->emission->text, str_dup(trim)); + free(line); + } else { + VEC_PUSH(&ctx->emission->text, line); + } + } else { + free(line); + } + if (!line_end) { + break; + } + cursor = line_end + 1; + } + VEC_PUSH(&ctx->emission->text, str_dup(" ret")); +} + +static void emit_default_prelude(Emission *emission) { + VEC_PUSH(&emission->text, str_dup("%define DSTK_BYTES 65536")); + VEC_PUSH(&emission->text, str_dup("%define RSTK_BYTES 65536")); + VEC_PUSH(&emission->text, str_dup("%define PRINT_BUF_BYTES 4096")); + VEC_PUSH(&emission->text, str_dup("global _start")); + VEC_PUSH(&emission->text, str_dup("_start:")); + VEC_PUSH(&emission->text, str_dup(" mov rbx, rsp")); + VEC_PUSH(&emission->text, str_dup(" mov rax, [rbx]")); + VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argc], rax")); + VEC_PUSH(&emission->text, str_dup(" lea rax, [rbx + 8]")); + VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argv], rax")); + VEC_PUSH(&emission->text, str_dup(" lea r12, [rel dstack_top]")); + VEC_PUSH(&emission->text, str_dup(" lea r13, [rel rstack_top]")); + VEC_PUSH(&emission->text, str_dup(" lea rax, [rel list_capture_stack]")); + VEC_PUSH(&emission->text, str_dup(" mov [rel list_capture_sp], rax")); + VEC_PUSH(&emission->text, str_dup(" call w_main")); + VEC_PUSH(&emission->text, str_dup(" mov rax, [r12]")); + VEC_PUSH(&emission->text, str_dup(" mov rdi, rax")); + VEC_PUSH(&emission->text, str_dup(" mov rax, 60")); + VEC_PUSH(&emission->text, str_dup(" syscall")); +} + +static void emit_libc_prelude(Emission *emission) { + VEC_PUSH(&emission->text, str_dup("%define DSTK_BYTES 65536")); + VEC_PUSH(&emission->text, str_dup("%define RSTK_BYTES 65536")); + VEC_PUSH(&emission->text, str_dup("%define PRINT_BUF_BYTES 4096")); + VEC_PUSH(&emission->text, str_dup("global main")); + VEC_PUSH(&emission->text, str_dup("main:")); + VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argc], rdi")); + VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argv], rsi")); + VEC_PUSH(&emission->text, str_dup(" lea r12, [rel dstack_top]")); + VEC_PUSH(&emission->text, str_dup(" lea r13, [rel rstack_top]")); + VEC_PUSH(&emission->text, str_dup(" lea rax, [rel list_capture_stack]")); + VEC_PUSH(&emission->text, str_dup(" mov [rel list_capture_sp], rax")); + VEC_PUSH(&emission->text, str_dup(" call w_main")); + VEC_PUSH(&emission->text, str_dup(" mov rax, [r12]")); + VEC_PUSH(&emission->text, str_dup(" ret")); +} + +static void emit_default_bss(Emission *emission) { + VEC_PUSH(&emission->bss, str_dup("align 16")); + VEC_PUSH(&emission->bss, str_dup("dstack: resb DSTK_BYTES")); + VEC_PUSH(&emission->bss, str_dup("dstack_top:")); + VEC_PUSH(&emission->bss, str_dup("align 16")); + VEC_PUSH(&emission->bss, str_dup("rstack: resb RSTK_BYTES")); + VEC_PUSH(&emission->bss, str_dup("rstack_top:")); + VEC_PUSH(&emission->bss, str_dup("align 16")); + VEC_PUSH(&emission->bss, str_dup("print_buf: resb PRINT_BUF_BYTES")); + VEC_PUSH(&emission->bss, str_dup("print_buf_end:")); + VEC_PUSH(&emission->bss, str_dup("align 16")); + VEC_PUSH(&emission->bss, str_dup("persistent: resb 64")); + VEC_PUSH(&emission->bss, str_dup("persistent_end:")); + VEC_PUSH(&emission->bss, str_dup("align 16")); + VEC_PUSH(&emission->bss, str_dup("list_capture_sp: resq 1")); + VEC_PUSH(&emission->bss, str_dup("list_capture_tmp: resq 1")); + VEC_PUSH(&emission->bss, str_dup("list_capture_stack: resq 1024")); +} + +static Emission emit_module(Parser *parser, Dictionary *dict, bool debug) { + Emission emission; + emission_init(&emission); + EmitContext ctx; + ctx.emission = &emission; + ctx.dictionary = dict; + strmap_init(&ctx.string_labels); + strmap_init(&ctx.externs); + strmap_init(&ctx.label_cache); + ctx.unique_id = 0; + ctx.debug = debug; + + if (parser->custom_prelude) { + for (size_t i = 0; i < parser->custom_prelude->len; i++) { + VEC_PUSH(&emission.text, str_dup(parser->custom_prelude->data[i])); + } + } else if (parser->uses_libc) { + emit_libc_prelude(&emission); + } else { + emit_default_prelude(&emission); + } + + VEC_PUSH(&emission.data, str_dup("sys_argc: dq 0")); + VEC_PUSH(&emission.data, str_dup("sys_argv: dq 0")); + + if (parser->custom_bss) { + for (size_t i = 0; i < parser->custom_bss->len; i++) { + VEC_PUSH(&emission.bss, str_dup(parser->custom_bss->data[i])); + } + } else { + emit_default_bss(&emission); + } + + for (size_t i = 0; i < parser->module.forms.len; i++) { + Form form = parser->module.forms.data[i]; + if (form.kind == FORM_DEF) { + Definition *def = (Definition *)form.ptr; + if (def->compile_only) { + continue; + } + Word *word = dictionary_lookup(dict, def->name); + if (!word || word->definition != def) { + continue; + } + emit_definition(&ctx, def); + } else if (form.kind == FORM_ASM) { + AsmDefinition *def = (AsmDefinition *)form.ptr; + if (def->compile_only) { + continue; + } + Word *word = dictionary_lookup(dict, def->name); + if (!word || word->asm_def != def) { + continue; + } + emit_asm_definition(&ctx, def); + } + } + + for (size_t i = 0; i < parser->variable_labels.cap; i++) { + if (!parser->variable_labels.keys || !parser->variable_labels.keys[i]) { + continue; + } + const char *label = (const char *)parser->variable_labels.values[i]; + if (label) { + VEC_PUSH(&emission.data, str_printf("%s: dq 0", label)); + } + } + + return emission; +} + +static char *emission_snapshot(Emission *emission) { + StrVec parts; + VEC_INIT(&parts); + if (emission->text.len) { + VEC_PUSH(&parts, str_dup("section .text")); + for (size_t i = 0; i < emission->text.len; i++) { + if (emission->text.data[i]) { + VEC_PUSH(&parts, str_dup(emission->text.data[i])); + } + } + } + if (emission->data.len) { + VEC_PUSH(&parts, str_dup("section .data")); + VEC_PUSH(&parts, str_dup("data_start:")); + for (size_t i = 0; i < emission->data.len; i++) { + if (emission->data.data[i]) { + VEC_PUSH(&parts, str_dup(emission->data.data[i])); + } + } + VEC_PUSH(&parts, str_dup("data_end:")); + } + if (emission->bss.len) { + VEC_PUSH(&parts, str_dup("section .bss")); + for (size_t i = 0; i < emission->bss.len; i++) { + if (emission->bss.data[i]) { + VEC_PUSH(&parts, str_dup(emission->bss.data[i])); + } + } + } + VEC_PUSH(&parts, str_dup("section .note.GNU-stack noalloc noexec nowrite")); + size_t total = 0; + for (size_t i = 0; i < parts.len; i++) { + if (parts.data[i]) { + total += strlen(parts.data[i]) + 1; + } + } + char *buf = (char *)xmalloc(total + 1); + buf[0] = '\0'; + for (size_t i = 0; i < parts.len; i++) { + strcat(buf, parts.data[i]); + strcat(buf, "\n"); + } + return buf; +} + +static void write_file(const char *path, const char *data) { + FILE *f = fopen(path, "w"); + if (!f) { + fprintf(stderr, "[error] failed to write %s: %s\n", path, strerror(errno)); + exit(1); + } + fputs(data, f); + fclose(f); +} + +static void run_cmd(char *const argv[]) { + pid_t pid = fork(); + if (pid < 0) { + fprintf(stderr, "[error] fork failed: %s\n", strerror(errno)); + exit(1); + } + if (pid == 0) { + execvp(argv[0], argv); + fprintf(stderr, "[error] failed to exec %s: %s\n", argv[0], strerror(errno)); + _exit(1); + } + int status = 0; + if (waitpid(pid, &status, 0) < 0) { + fprintf(stderr, "[error] waitpid failed: %s\n", strerror(errno)); + exit(1); + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + fprintf(stderr, "[error] command failed\n"); + exit(1); + } +} + +static void run_nasm(const char *asm_path, const char *obj_path, bool debug) { + char *argv[8]; + int idx = 0; + argv[idx++] = "nasm"; + argv[idx++] = "-f"; + argv[idx++] = "elf64"; + if (debug) { + argv[idx++] = "-g"; + argv[idx++] = "-F"; + argv[idx++] = "dwarf"; + } + argv[idx++] = "-o"; + argv[idx++] = (char *)obj_path; + argv[idx++] = (char *)asm_path; + argv[idx++] = NULL; + run_cmd(argv); +} + +static void run_linker(const char *obj_path, const char *exe_path, bool debug, StrVec *libs, bool shared, bool use_libc) { + const char *linker = NULL; + if (use_libc) { + if (access("/usr/bin/cc", X_OK) == 0) { + linker = "cc"; + } else if (access("/usr/bin/gcc", X_OK) == 0) { + linker = "gcc"; + } else { + fprintf(stderr, "[error] no C compiler found for libc linking\n"); + exit(1); + } + } else if (access("/usr/bin/ld.lld", X_OK) == 0) { + linker = "ld.lld"; + } else if (access("/usr/bin/ld", X_OK) == 0) { + linker = "ld"; + } else { + fprintf(stderr, "[error] no linker found\n"); + exit(1); + } + StrVec argv; + VEC_INIT(&argv); + VEC_PUSH(&argv, str_dup((char *)linker)); + if (!use_libc && strstr(linker, "lld")) { + VEC_PUSH(&argv, str_dup("-m")); + VEC_PUSH(&argv, str_dup("elf_x86_64")); + } + if (shared) { + VEC_PUSH(&argv, str_dup("-shared")); + } + VEC_PUSH(&argv, str_dup("-o")); + VEC_PUSH(&argv, str_dup((char *)exe_path)); + VEC_PUSH(&argv, str_dup((char *)obj_path)); + if (use_libc) { + VEC_PUSH(&argv, str_dup("-no-pie")); + } else if (!shared && (!libs || libs->len == 0)) { + VEC_PUSH(&argv, str_dup("-nostdlib")); + VEC_PUSH(&argv, str_dup("-static")); + } else if (!shared) { + const char *candidates[] = { + "/lib64/ld-linux-x86-64.so.2", + "/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2", + "/lib/ld-linux-x86-64.so.2", + "/lib/ld64.so.1" + }; + const char *interp = NULL; + for (size_t i = 0; i < ARRAY_LEN(candidates); i++) { + if (access(candidates[i], R_OK) == 0) { + interp = candidates[i]; + break; + } + } + if (interp) { + VEC_PUSH(&argv, str_dup("-dynamic-linker")); + VEC_PUSH(&argv, str_dup(interp)); + } + } + if (libs) { + for (size_t i = 0; i < libs->len; i++) { + VEC_PUSH(&argv, str_dup(libs->data[i])); + } + } + if (debug) { + VEC_PUSH(&argv, str_dup("-g")); + } + VEC_PUSH(&argv, NULL); + run_cmd(argv.data); +} + +static char *read_text_file(const char *path) { + FILE *f = fopen(path, "r"); + if (!f) { + return NULL; + } + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + if (size < 0) { + fclose(f); + return NULL; + } + char *buf = (char *)xmalloc((size_t)size + 1); + size_t n = fread(buf, 1, (size_t)size, f); + buf[n] = '\0'; + fclose(f); + return buf; +} + +static bool file_exists(const char *path) { + return access(path, R_OK) == 0; +} + +static char *path_dirname(const char *path) { + const char *slash = strrchr(path, '/'); + if (!slash) { + return str_dup("."); + } + size_t len = (size_t)(slash - path); + if (len == 0) { + return str_dup("/"); + } + char *out = (char *)xmalloc(len + 1); + memcpy(out, path, len); + out[len] = '\0'; + return out; +} + +static char *path_basename(const char *path) { + if (!path) { + return str_dup(""); + } + const char *slash = strrchr(path, '/'); + if (!slash || !slash[1]) { + return str_dup(path); + } + return str_dup(slash + 1); +} + +static char *path_join(const char *a, const char *b) { + if (!a || !*a) { + return str_dup(b); + } + if (!b || !*b) { + return str_dup(a); + } + size_t len_a = strlen(a); + bool has_sep = a[len_a - 1] == '/'; + return str_printf("%s%s%s", a, has_sep ? "" : "/", b); +} + +static char *resolve_import(const char *base_dir, const char *import_path, StrVec *include_dirs) { + if (!import_path || !*import_path) { + return NULL; + } + if (import_path[0] == '/') { + return file_exists(import_path) ? str_dup(import_path) : NULL; + } + if (base_dir) { + char *candidate = path_join(base_dir, import_path); + if (file_exists(candidate)) { + return candidate; + } + free(candidate); + } + if (include_dirs) { + for (size_t i = 0; i < include_dirs->len; i++) { + char *candidate = path_join(include_dirs->data[i], import_path); + if (file_exists(candidate)) { + return candidate; + } + free(candidate); + } + } + return NULL; +} + +static char *expand_imports(const char *path, StrVec *include_dirs, StrMap *visited, FileSpanVec *spans, int *line_counter) { + if (strmap_has(visited, path)) { + return str_dup(""); + } + strmap_set(visited, path, (void *)1); + char *content = read_text_file(path); + if (!content) { + fprintf(stderr, "[error] failed to read %s\n", path); + exit(1); + } + char *base_dir = path_dirname(path); + StrVec parts; + VEC_INIT(&parts); + const char *cursor = content; + int local_line = 1; + int span_start = *line_counter; + int span_local_start = local_line; + bool span_active = false; + while (*cursor) { + const char *line_end = strchr(cursor, '\n'); + size_t len = line_end ? (size_t)(line_end - cursor) : strlen(cursor); + char *line = (char *)xmalloc(len + 1); + memcpy(line, cursor, len); + line[len] = '\0'; + char *trim = line; + while (*trim && isspace((unsigned char)*trim)) { + trim++; + } + bool is_import = false; + if (str_starts_with(trim, "import") && (trim[6] == ' ' || trim[6] == '\t')) { + trim += 6; + while (*trim && isspace((unsigned char)*trim)) { + trim++; + } + char *end = trim; + while (*end && !isspace((unsigned char)*end) && *end != '#') { + end++; + } + if (end > trim) { + char *import_path = (char *)xmalloc((size_t)(end - trim) + 1); + memcpy(import_path, trim, (size_t)(end - trim)); + import_path[end - trim] = '\0'; + char *resolved = resolve_import(base_dir, import_path, include_dirs); + if (!resolved) { + fprintf(stderr, "[error] import not found: %s\n", import_path); + exit(1); + } + if (span_active) { + FileSpan span = {0}; + span.path = str_dup(path); + span.start_line = span_start; + span.end_line = *line_counter; + span.local_start_line = span_local_start; + VEC_PUSH(spans, span); + span_active = false; + } + char *expanded = expand_imports(resolved, include_dirs, visited, spans, line_counter); + if (expanded && *expanded) { + VEC_PUSH(&parts, expanded); + } + VEC_PUSH(&parts, str_dup("\n")); + (*line_counter)++; + local_line++; + free(resolved); + free(import_path); + is_import = true; + } + } + if (!is_import) { + if (!span_active) { + span_start = *line_counter; + span_local_start = local_line; + span_active = true; + } + VEC_PUSH(&parts, line); + VEC_PUSH(&parts, str_dup("\n")); + (*line_counter)++; + local_line++; + } else { + free(line); + } + if (!line_end) { + break; + } + cursor = line_end + 1; + } + if (span_active) { + FileSpan span = {0}; + span.path = str_dup(path); + span.start_line = span_start; + span.end_line = *line_counter; + span.local_start_line = span_local_start; + VEC_PUSH(spans, span); + } + size_t total = 0; + for (size_t i = 0; i < parts.len; i++) { + total += strlen(parts.data[i]); + } + char *out = (char *)xmalloc(total + 1); + out[0] = '\0'; + for (size_t i = 0; i < parts.len; i++) { + strcat(out, parts.data[i]); + } + free(content); + free(base_dir); + return out; +} + +static bool parse_string_literal(const char *lexeme, char **out) { + size_t len = strlen(lexeme); + if (len < 2 || lexeme[0] != '"' || lexeme[len - 1] != '"') { + return false; + } + const char *body = lexeme + 1; + size_t body_len = len - 2; + char *buf = (char *)xmalloc(body_len + 1); + size_t pos = 0; + for (size_t i = 0; i < body_len; i++) { + char ch = body[i]; + if (ch != '\\') { + buf[pos++] = ch; + continue; + } + i++; + if (i >= body_len) { + fprintf(stderr, "[error] unterminated escape sequence\n"); + exit(1); + } + char esc = body[i]; + if (esc == 'n') { + buf[pos++] = '\n'; + } else if (esc == 't') { + buf[pos++] = '\t'; + } else if (esc == 'r') { + buf[pos++] = '\r'; + } else if (esc == '0') { + buf[pos++] = '\0'; + } else if (esc == '"') { + buf[pos++] = '"'; + } else if (esc == '\\') { + buf[pos++] = '\\'; + } else { + fprintf(stderr, "[error] unsupported escape sequence \\%c\n", esc); + exit(1); + } + } + buf[pos] = '\0'; + *out = buf; + return true; +} + +static bool try_parse_int(const char *lexeme, int64_t *out) { + char *end = NULL; + errno = 0; + long long val = strtoll(lexeme, &end, 0); + if (errno != 0 || !end || *end != '\0') { + return false; + } + *out = (int64_t)val; + return true; +} + +static bool try_parse_float(const char *lexeme, double *out) { + if (!strchr(lexeme, '.') && !strchr(lexeme, 'e') && !strchr(lexeme, 'E')) { + return false; + } + char *end = NULL; + errno = 0; + double val = strtod(lexeme, &end); + if (errno != 0 || !end || *end != '\0') { + return false; + } + *out = val; + return true; +} + +static void parser_inject_tokens(Parser *parser, TokenVec *injected) { + if (!injected || injected->len == 0) { + return; + } + if (parser->pos > parser->tokens.len) { + parser->pos = parser->tokens.len; + } + size_t new_len = parser->tokens.len + injected->len; + if (new_len > parser->tokens.cap) { + parser->tokens.cap = new_len + 16; + parser->tokens.data = xrealloc(parser->tokens.data, parser->tokens.cap * sizeof(Token)); + } + memmove(&parser->tokens.data[parser->pos + injected->len], + &parser->tokens.data[parser->pos], + (parser->tokens.len - parser->pos) * sizeof(Token)); + for (size_t i = 0; i < injected->len; i++) { + parser->tokens.data[parser->pos + i] = injected->data[i]; + } + parser->tokens.len = new_len; +} + +static void parser_start_macro(Parser *parser, const char *name, int param_count) { + if (parser->macro_recording.active) { + fprintf(stderr, "[error] nested macro definitions are not supported\n"); + exit(1); + } + parser->macro_recording.active = true; + parser->macro_recording.name = str_dup(name); + VEC_INIT(&parser->macro_recording.tokens); + parser->macro_recording.param_count = param_count; +} + +static void parser_finish_macro(Parser *parser) { + if (!parser->macro_recording.active) { + fprintf(stderr, "[error] unexpected ';' closing a macro\n"); + exit(1); + } + Word *word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(parser->macro_recording.name); + word->macro_expansion = (char **)xmalloc((parser->macro_recording.tokens.len + 1) * sizeof(char *)); + word->macro_param_count = parser->macro_recording.param_count; + for (size_t i = 0; i < parser->macro_recording.tokens.len; i++) { + word->macro_expansion[i] = str_dup(parser->macro_recording.tokens.data[i]); + } + word->macro_expansion[parser->macro_recording.tokens.len] = NULL; + dictionary_register(parser->dictionary, word); + parser->macro_recording.active = false; +} + +static void parser_emit_literal(Parser *parser, LiteralKind kind, int64_t i64, double f64, const char *str) { + Op op = {0}; + op.kind = OP_LITERAL; + op.lit_kind = kind; + if (kind == LIT_INT) { + op.data.i64 = i64; + } else if (kind == LIT_FLOAT) { + op.data.f64 = f64; + } else { + op.data.str = str_dup(str); + } + parser_emit_op(parser, op); +} + +static void parser_handle_token(Parser *parser, Token token); + +static void parse_tokens(Parser *parser, const char *source) { + parser->source = str_dup(source); + tokenizer_init(&parser->tokenizer, parser->reader, source); + parser->tokenizer_exhausted = false; + parser->pos = 0; + parser->current_def = NULL; + parser->control_len = 0; + parser->label_counter = 0; + parser->token_hook = NULL; + parser->has_last_token = false; + parser->custom_prelude = NULL; + parser->custom_bss = NULL; + parser->pending_inline_def = false; + + while (!parser_eof(parser)) { + Token token = parser_next_token(parser); + if (!token.lexeme) { + break; + } + if (parser->macro_recording.active) { + if (strcmp(token.lexeme, ";") == 0) { + parser_finish_macro(parser); + } else { + VEC_PUSH(&parser->macro_recording.tokens, str_dup(token.lexeme)); + } + continue; + } + if (strcmp(token.lexeme, "[") == 0) { + Op op = {0}; + op.kind = OP_LIST_BEGIN; + op.data.label = parser_new_label(parser, "list"); + parser_emit_op(parser, op); + parser_push_control(parser, "list"); + parser->control_stack[parser->control_len - 1].begin_label = op.data.label; + continue; + } + if (strcmp(token.lexeme, "]") == 0) { + if (!parser->control_len || strcmp(parser->control_stack[parser->control_len - 1].type, "list") != 0) { + fprintf(stderr, "[error] mismatched ']'\n"); + exit(1); + } + char *label = parser->control_stack[parser->control_len - 1].begin_label; + parser->control_len--; + Op op = {0}; + op.kind = OP_LIST_END; + op.data.label = str_dup(label); + parser_emit_op(parser, op); + continue; + } + if (strcmp(token.lexeme, "word") == 0) { + Token name_tok = parser_next_token(parser); + if (!name_tok.lexeme) { + fprintf(stderr, "[error] definition name missing after 'word'\n"); + exit(1); + } + Definition *def = (Definition *)xmalloc(sizeof(Definition)); + memset(def, 0, sizeof(Definition)); + def->name = str_dup(name_tok.lexeme); + VEC_INIT(&def->body); + def->terminator = str_dup("end"); + def->inline_def = parser->pending_inline_def; + parser->pending_inline_def = false; + parser->current_def = def; + Word *word = dictionary_lookup(parser->dictionary, def->name); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(def->name); + dictionary_register(parser->dictionary, word); + } + word->prev_definition = word->definition; + word->prev_asm_def = word->asm_def; + word->immediate = false; + word->compile_only = false; + word->definition = def; + word->asm_def = NULL; + word->inline_def = def->inline_def; + if (parser->definition_stack_len + 1 > parser->definition_stack_cap) { + parser->definition_stack_cap = parser->definition_stack_cap ? parser->definition_stack_cap * 2 : 8; + parser->definition_stack = xrealloc(parser->definition_stack, parser->definition_stack_cap * sizeof(Word *)); + } + parser->definition_stack[parser->definition_stack_len++] = word; + continue; + } + if (strcmp(token.lexeme, "end") == 0) { + if (parser->control_len) { + const char *type = parser->control_stack[parser->control_len - 1].type; + if (strcmp(type, "if") == 0 || strcmp(type, "elif") == 0) { + if (parser->control_stack[parser->control_len - 1].false_label) { + Op op = {0}; + op.kind = OP_LABEL; + op.data.label = str_dup(parser->control_stack[parser->control_len - 1].false_label); + parser_emit_op(parser, op); + } + if (parser->control_stack[parser->control_len - 1].end_label) { + Op op = {0}; + op.kind = OP_LABEL; + op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); + parser_emit_op(parser, op); + } + parser->control_len--; + continue; + } + if (strcmp(type, "else") == 0) { + Op op = {0}; + op.kind = OP_LABEL; + op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); + parser_emit_op(parser, op); + parser->control_len--; + continue; + } + if (strcmp(type, "begin") == 0) { + Op op = {0}; + op.kind = OP_JUMP; + op.data.label = str_dup(parser->control_stack[parser->control_len - 1].begin_label); + parser_emit_op(parser, op); + op.kind = OP_LABEL; + op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); + parser_emit_op(parser, op); + parser->control_len--; + continue; + } + if (strcmp(type, "for") == 0) { + Op op = {0}; + op.kind = OP_FOR_END; + op.data.loop.loop = str_dup(parser->control_stack[parser->control_len - 1].loop_label); + op.data.loop.end = str_dup(parser->control_stack[parser->control_len - 1].end_label); + parser_emit_op(parser, op); + parser->control_len--; + continue; + } + if (strcmp(type, "with") == 0) { + StrVec *with_names = &parser->control_stack[parser->control_len - 1].with_names; + for (size_t i = 0; i < with_names->len; i++) { + const char *name = with_names->data[i]; + strmap_set(&parser->variable_words, name, NULL); + free(with_names->data[i]); + } + VEC_FREE(with_names); + parser->control_len--; + continue; + } + } + if (parser->current_def) { + Definition *def = parser->current_def; + Word *word = parser->definition_stack[parser->definition_stack_len - 1]; + def->immediate = word->immediate; + def->compile_only = word->compile_only; + def->inline_def = word->inline_def; + Form form = {0}; + form.kind = FORM_DEF; + form.ptr = def; + VEC_PUSH(&parser->module.forms, form); + parser->current_def = NULL; + parser->definition_stack_len--; + parser->last_defined = word; + continue; + } + fprintf(stderr, "[error] unexpected 'end'\n"); + exit(1); + } + if (strcmp(token.lexeme, ":asm") == 0) { + Token name_tok = parser_next_token(parser); + if (!name_tok.lexeme) { + fprintf(stderr, "[error] definition name missing after ':asm'\n"); + exit(1); + } + bool effect_string_io = false; + Token brace = parser_next_token(parser); + if (brace.lexeme && strcmp(brace.lexeme, "(") == 0) { + while (!parser_eof(parser)) { + Token meta = parser_next_token(parser); + if (!meta.lexeme) { + break; + } + if (strcmp(meta.lexeme, ")") == 0) { + break; + } + if (strcmp(meta.lexeme, "string-io") == 0) { + effect_string_io = true; + } + } + brace = parser_next_token(parser); + } + if (!brace.lexeme || strcmp(brace.lexeme, "{") != 0) { + fprintf(stderr, "[error] expected '{' after asm name, got '%s'\n", brace.lexeme ? brace.lexeme : ""); + exit(1); + } + size_t body_start = (size_t)brace.end; + size_t body_end = body_start; + while (!parser_eof(parser)) { + Token next = parser_next_token(parser); + if (next.lexeme && strcmp(next.lexeme, "}") == 0) { + body_end = (size_t)next.start; + break; + } + } + if (body_end <= body_start) { + fprintf(stderr, "[error] missing '}' to terminate asm body\n"); + exit(1); + } + size_t body_len = body_end - body_start; + char *body = (char *)xmalloc(body_len + 1); + memcpy(body, parser->source + body_start, body_len); + body[body_len] = '\0'; + AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition)); + memset(def, 0, sizeof(AsmDefinition)); + def->name = str_dup(name_tok.lexeme); + def->body = body; + def->effect_string_io = effect_string_io; + Token term = parser_next_token(parser); + if (!term.lexeme || strcmp(term.lexeme, ";") != 0) { + fprintf(stderr, "[error] expected ';' after asm definition\n"); + exit(1); + } + Word *word = dictionary_lookup(parser->dictionary, def->name); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(def->name); + dictionary_register(parser->dictionary, word); + } + word->prev_definition = word->definition; + word->prev_asm_def = word->asm_def; + word->immediate = false; + word->compile_only = false; + word->asm_def = def; + word->definition = NULL; + Form form = {0}; + form.kind = FORM_ASM; + form.ptr = def; + VEC_PUSH(&parser->module.forms, form); + parser->last_defined = word; + continue; + } + if (strcmp(token.lexeme, "extern") == 0) { + Token tok1 = parser_next_token(parser); + if (!tok1.lexeme) { + fprintf(stderr, "[error] extern missing name or return type\n"); + exit(1); + } + Token peek = parser_peek_token(parser); + if (peek.lexeme && isdigit((unsigned char)peek.lexeme[0])) { + Word *word = dictionary_lookup(parser->dictionary, tok1.lexeme); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(tok1.lexeme); + dictionary_register(parser->dictionary, word); + } + word->is_extern = true; + parser_next_token(parser); + word->extern_inputs = atoi(peek.lexeme); + Token next = parser_peek_token(parser); + if (next.lexeme && isdigit((unsigned char)next.lexeme[0])) { + parser_next_token(parser); + word->extern_outputs = atoi(next.lexeme); + } else { + word->extern_outputs = 0; + } + continue; + } + Token tok2 = parser_next_token(parser); + Token tok3 = parser_next_token(parser); + if (tok2.lexeme && tok3.lexeme && strcmp(tok3.lexeme, "(") == 0) { + Word *word = dictionary_lookup(parser->dictionary, tok2.lexeme); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(tok2.lexeme); + dictionary_register(parser->dictionary, word); + } + word->is_extern = true; + word->extern_ret_type = str_dup(tok1.lexeme); + parser->uses_libc = true; + if (strcmp(tok1.lexeme, "double") == 0 || strcmp(tok1.lexeme, "float") == 0) { + if (strcmp(tok2.lexeme, "printf") != 0) { + parser->uses_libm = true; + } + } + word->extern_arg_types = NULL; + word->extern_arg_count = 0; + int cap = 0; + Token arg = parser_peek_token(parser); + if (arg.lexeme && strcmp(arg.lexeme, ")") == 0) { + parser_next_token(parser); + } else { + while (true) { + Token type_tok = parser_next_token(parser); + if (!type_tok.lexeme) { + fprintf(stderr, "[error] unterminated extern signature\n"); + exit(1); + } + if (word->extern_arg_count + 1 > cap) { + cap = cap ? cap * 2 : 4; + word->extern_arg_types = xrealloc(word->extern_arg_types, (size_t)cap * sizeof(char *)); + } + word->extern_arg_types[word->extern_arg_count++] = str_dup(type_tok.lexeme); + if (strcmp(type_tok.lexeme, "double") == 0 || strcmp(type_tok.lexeme, "float") == 0) { + if (strcmp(tok2.lexeme, "printf") != 0) { + parser->uses_libm = true; + } + } + Token maybe_name = parser_peek_token(parser); + if (maybe_name.lexeme && strcmp(maybe_name.lexeme, ",") != 0 && strcmp(maybe_name.lexeme, ")") != 0) { + parser_next_token(parser); + } + Token sep = parser_next_token(parser); + if (!sep.lexeme) { + fprintf(stderr, "[error] unterminated extern signature\n"); + exit(1); + } + if (strcmp(sep.lexeme, ")") == 0) { + break; + } + if (strcmp(sep.lexeme, ",") != 0) { + fprintf(stderr, "[error] expected ',' or ')' in extern signature\n"); + exit(1); + } + } + } + continue; + } + TokenVec reinject; + VEC_INIT(&reinject); + if (tok2.lexeme) { + VEC_PUSH(&reinject, tok2); + } + if (tok3.lexeme) { + VEC_PUSH(&reinject, tok3); + } + parser_inject_tokens(parser, &reinject); + Word *word = dictionary_lookup(parser->dictionary, tok1.lexeme); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(tok1.lexeme); + dictionary_register(parser->dictionary, word); + } + word->is_extern = true; + continue; + } + if (strcmp(token.lexeme, "if") == 0) { + char *false_label = parser_new_label(parser, "if_false"); + Op op = {0}; + op.kind = OP_BRANCH_ZERO; + op.data.label = str_dup(false_label); + parser_emit_op(parser, op); + parser_push_control(parser, "if"); + parser->control_stack[parser->control_len - 1].false_label = false_label; + continue; + } + if (strcmp(token.lexeme, "else") == 0) { + if (!parser->control_len || (strcmp(parser->control_stack[parser->control_len - 1].type, "if") != 0 && strcmp(parser->control_stack[parser->control_len - 1].type, "elif") != 0)) { + fprintf(stderr, "[error] 'else' without matching if\n"); + exit(1); + } + char *end_label = parser->control_stack[parser->control_len - 1].end_label; + if (!end_label) { + end_label = parser_new_label(parser, "if_end"); + } + Op jump = {0}; + jump.kind = OP_JUMP; + jump.data.label = str_dup(end_label); + parser_emit_op(parser, jump); + Op label = {0}; + label.kind = OP_LABEL; + label.data.label = str_dup(parser->control_stack[parser->control_len - 1].false_label); + parser_emit_op(parser, label); + Token next = parser_peek_token(parser); + if (next.lexeme && next.line == token.line && strcmp(next.lexeme, "if") != 0) { + TokenVec cond_tokens; + VEC_INIT(&cond_tokens); + bool shorthand = false; + while (!parser_eof(parser)) { + Token cond = parser_next_token(parser); + if (!cond.lexeme) { + break; + } + if (cond.line != token.line) { + VEC_PUSH(&cond_tokens, cond); + break; + } + if (strcmp(cond.lexeme, "if") == 0) { + shorthand = true; + break; + } + VEC_PUSH(&cond_tokens, cond); + } + if (shorthand) { + for (size_t i = 0; i < cond_tokens.len; i++) { + parser_handle_token(parser, cond_tokens.data[i]); + } + char *false_label = parser_new_label(parser, "if_false"); + Op br = {0}; + br.kind = OP_BRANCH_ZERO; + br.data.label = str_dup(false_label); + parser_emit_op(parser, br); + parser->control_stack[parser->control_len - 1].type = str_dup("elif"); + parser->control_stack[parser->control_len - 1].false_label = false_label; + parser->control_stack[parser->control_len - 1].end_label = end_label; + } else { + parser_inject_tokens(parser, &cond_tokens); + parser->control_stack[parser->control_len - 1].type = str_dup("else"); + parser->control_stack[parser->control_len - 1].end_label = end_label; + } + } else { + parser->control_stack[parser->control_len - 1].type = str_dup("else"); + parser->control_stack[parser->control_len - 1].end_label = end_label; + } + continue; + } + if (strcmp(token.lexeme, "for") == 0) { + char *loop_label = parser_new_label(parser, "for_loop"); + char *end_label = parser_new_label(parser, "for_end"); + Op op = {0}; + op.kind = OP_FOR_BEGIN; + op.data.loop.loop = str_dup(loop_label); + op.data.loop.end = str_dup(end_label); + parser_emit_op(parser, op); + parser_push_control(parser, "for"); + parser->control_stack[parser->control_len - 1].loop_label = loop_label; + parser->control_stack[parser->control_len - 1].end_label = end_label; + continue; + } + if (strcmp(token.lexeme, "while") == 0) { + char *begin_label = parser_new_label(parser, "begin"); + char *end_label = parser_new_label(parser, "end"); + Op label = {0}; + label.kind = OP_LABEL; + label.data.label = str_dup(begin_label); + parser_emit_op(parser, label); + parser_push_control(parser, "begin"); + parser->control_stack[parser->control_len - 1].begin_label = begin_label; + parser->control_stack[parser->control_len - 1].end_label = end_label; + continue; + } + if (strcmp(token.lexeme, "do") == 0) { + if (!parser->control_len || strcmp(parser->control_stack[parser->control_len - 1].type, "begin") != 0) { + fprintf(stderr, "[error] 'do' without matching while\n"); + exit(1); + } + Op op = {0}; + op.kind = OP_BRANCH_ZERO; + op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label); + parser_emit_op(parser, op); + continue; + } + parser_handle_token(parser, token); + } + if (parser->macro_recording.active) { + fprintf(stderr, "[error] unterminated macro definition\n"); + exit(1); + } + if (parser->control_len) { + fprintf(stderr, "[error] unclosed control structure\n"); + exit(1); + } + if (parser->current_def) { + fprintf(stderr, "[error] unclosed definition at EOF\n"); + exit(1); + } +} + + +static void parser_expand_macro(Parser *parser, Word *word) { + int param_count = word->macro_param_count; + char **params = NULL; + if (param_count > 0) { + params = (char **)xmalloc((size_t)param_count * sizeof(char *)); + for (int i = 0; i < param_count; i++) { + Token tok = parser_next_token(parser); + if (!tok.lexeme) { + fprintf(stderr, "[error] not enough macro parameters for '%s'\n", word->name); + exit(1); + } + params[i] = str_dup(tok.lexeme); + } + } + TokenVec injected; + VEC_INIT(&injected); + for (size_t i = 0; word->macro_expansion && word->macro_expansion[i]; i++) { + const char *item = word->macro_expansion[i]; + if (item && item[0] == '$' && isdigit((unsigned char)item[1])) { + int idx = atoi(item + 1) - 1; + if (idx >= 0 && idx < param_count) { + Token tok = {0}; + tok.lexeme = str_dup(params[idx]); + VEC_PUSH(&injected, tok); + continue; + } + } + Token tok = {0}; + tok.lexeme = str_dup(item); + VEC_PUSH(&injected, tok); + } + parser_inject_tokens(parser, &injected); + for (int i = 0; i < param_count; i++) { + free(params[i]); + } + free(params); +} + +static void parser_handle_struct(Parser *parser) { + Token name_tok = parser_next_token(parser); + if (!name_tok.lexeme) { + fprintf(stderr, "[error] struct missing name\n"); + exit(1); + } + typedef struct { + char *name; + int64_t size; + int64_t offset; + } Field; + Field *fields = NULL; + size_t field_len = 0; + size_t field_cap = 0; + int64_t offset = 0; + while (!parser_eof(parser)) { + Token tok = parser_next_token(parser); + if (!tok.lexeme) { + break; + } + if (strcmp(tok.lexeme, "end") == 0) { + break; + } + if (strcmp(tok.lexeme, "field") != 0) { + fprintf(stderr, "[error] unexpected token '%s' in struct\n", tok.lexeme); + exit(1); + } + Token field_name = parser_next_token(parser); + Token field_size = parser_next_token(parser); + if (!field_name.lexeme || !field_size.lexeme) { + fprintf(stderr, "[error] malformed struct field\n"); + exit(1); + } + int64_t size = 0; + if (!try_parse_int(field_size.lexeme, &size)) { + fprintf(stderr, "[error] invalid struct field size '%s'\n", field_size.lexeme); + exit(1); + } + if (field_len + 1 > field_cap) { + field_cap = field_cap ? field_cap * 2 : 8; + fields = xrealloc(fields, field_cap * sizeof(Field)); + } + fields[field_len++] = (Field){str_dup(field_name.lexeme), size, offset}; + offset += size; + } + TokenVec injected; + VEC_INIT(&injected); + Token tok = {0}; + tok.lexeme = str_dup("word"); + VEC_PUSH(&injected, tok); + tok.lexeme = str_printf("%s.size", name_tok.lexeme); + VEC_PUSH(&injected, tok); + tok.lexeme = str_printf("%lld", (long long)offset); + VEC_PUSH(&injected, tok); + tok.lexeme = str_dup("end"); + VEC_PUSH(&injected, tok); + for (size_t i = 0; i < field_len; i++) { + Field f = fields[i]; + Token t = {0}; + t.lexeme = str_dup("word"); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%s.%s.size", name_tok.lexeme, f.name); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%lld", (long long)f.size); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("end"); + VEC_PUSH(&injected, t); + + t.lexeme = str_dup("word"); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%lld", (long long)f.offset); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("end"); + VEC_PUSH(&injected, t); + + t.lexeme = str_dup("word"); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%s.%s@", name_tok.lexeme, f.name); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("+"); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("@"); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("end"); + VEC_PUSH(&injected, t); + + t.lexeme = str_dup("word"); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%s.%s!", name_tok.lexeme, f.name); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("swap"); + VEC_PUSH(&injected, t); + t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("+"); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("swap"); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("!"); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("end"); + VEC_PUSH(&injected, t); + free(f.name); + } + free(fields); + parser_inject_tokens(parser, &injected); +} + +static void parser_handle_with(Parser *parser) { + StrVec names; + VEC_INIT(&names); + while (!parser_eof(parser)) { + Token tok = parser_next_token(parser); + if (!tok.lexeme) { + fprintf(stderr, "[error] unterminated with block\n"); + exit(1); + } + if (strcmp(tok.lexeme, "in") == 0) { + break; + } + VEC_PUSH(&names, str_dup(tok.lexeme)); + } + for (size_t i = 0; i < names.len; i++) { + const char *name = names.data[i]; + int id = parser->label_counter++; + char *cell_label = str_printf("__with_%s_%d_cell", name, id); + char *word_name = str_printf("__with_%s_%d", name, id); + strmap_set(&parser->variable_labels, name, cell_label); + strmap_set(&parser->variable_words, name, str_dup(word_name)); + + AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition)); + memset(def, 0, sizeof(AsmDefinition)); + def->name = str_dup(word_name); + def->body = str_printf(" lea rax, [rel %s]\n sub r12, 8\n mov [r12], rax\n", cell_label); + Word *word = dictionary_lookup(parser->dictionary, word_name); + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(word_name); + dictionary_register(parser->dictionary, word); + } + word->asm_def = def; + Form form = {0}; + form.kind = FORM_ASM; + form.ptr = def; + VEC_PUSH(&parser->module.forms, form); + } + + parser_push_control(parser, "with"); + parser->control_stack[parser->control_len - 1].with_names = names; + TokenVec injected; + VEC_INIT(&injected); + for (size_t i = names.len; i-- > 0;) { + Token t = {0}; + char *label = (char *)strmap_get(&parser->variable_words, names.data[i]); + t.lexeme = str_dup(label); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("swap"); + VEC_PUSH(&injected, t); + t.lexeme = str_dup("!"); + VEC_PUSH(&injected, t); + } + parser_inject_tokens(parser, &injected); + names.data = NULL; + names.len = 0; + names.cap = 0; +} + +static void parser_handle_token(Parser *parser, Token token) { + if (parser->token_hook) { + Word *hook = dictionary_lookup(parser->dictionary, parser->token_hook); + if (!hook) { + fprintf(stderr, "[error] unknown token hook '%s'\n", parser->token_hook); + exit(1); + } + ct_stack_push(&parser->ct_vm->stack, ct_make_token(token)); + ct_word_call(parser->ct_vm, hook); + CtValue handled = ct_stack_pop(&parser->ct_vm->stack); + if (ct_truthy(handled)) { + return; + } + } + + if (strcmp(token.lexeme, "macro") == 0) { + Token name = parser_next_token(parser); + if (!name.lexeme) { + fprintf(stderr, "[error] macro missing name\n"); + exit(1); + } + int param_count = 0; + Token maybe_num = parser_peek_token(parser); + if (maybe_num.lexeme && isdigit((unsigned char)maybe_num.lexeme[0])) { + parser_next_token(parser); + param_count = atoi(maybe_num.lexeme); + } + parser_start_macro(parser, name.lexeme, param_count); + return; + } + + if (strcmp(token.lexeme, "inline") == 0) { + parser->pending_inline_def = true; + return; + } + + if (strcmp(token.lexeme, "immediate") == 0) { + if (!parser->last_defined) { + fprintf(stderr, "[error] immediate used without a preceding definition\n"); + exit(1); + } + parser->last_defined->immediate = true; + if (parser->last_defined->definition) { + parser->last_defined->definition->immediate = true; + } + if (parser->last_defined->asm_def) { + parser->last_defined->asm_def->immediate = true; + } + return; + } + + if (strcmp(token.lexeme, "compile-only") == 0) { + if (!parser->last_defined) { + fprintf(stderr, "[error] compile-only used without a preceding definition\n"); + exit(1); + } + parser->last_defined->compile_only = true; + if (parser->last_defined->definition) { + parser->last_defined->definition->compile_only = true; + } + if (parser->last_defined->asm_def) { + parser->last_defined->asm_def->compile_only = true; + } + if (parser->last_defined->prev_definition) { + parser->last_defined->ct_definition = parser->last_defined->definition; + parser->last_defined->definition = parser->last_defined->prev_definition; + parser->last_defined->prev_definition = NULL; + } + if (parser->last_defined->prev_asm_def) { + parser->last_defined->ct_asm_def = parser->last_defined->asm_def; + parser->last_defined->asm_def = parser->last_defined->prev_asm_def; + parser->last_defined->prev_asm_def = NULL; + } + return; + } + + if (strcmp(token.lexeme, "compile-time") == 0) { + Token name = parser_next_token(parser); + if (!name.lexeme) { + fprintf(stderr, "[error] compile-time missing word name\n"); + exit(1); + } + Word *word = dictionary_lookup(parser->dictionary, name.lexeme); + if (!word) { + fprintf(stderr, "[error] unknown word '%s' for compile-time\n", name.lexeme); + exit(1); + } + ct_word_call(parser->ct_vm, word); + if (parser->current_def) { + Op op = {0}; + op.kind = OP_WORD; + op.data.word = str_dup(name.lexeme); + parser_emit_op(parser, op); + } + return; + } + + if (strcmp(token.lexeme, "here") == 0) { + SourceLocation *loc = location_for_token(parser, token); + char *text = str_printf("%s:%d:%d", loc->path, loc->line, loc->column); + parser_emit_literal(parser, LIT_STRING, 0, 0.0, text); + free(text); + return; + } + + if (strcmp(token.lexeme, "label") == 0) { + Token name = parser_next_token(parser); + if (!name.lexeme) { + fprintf(stderr, "[error] label missing name\n"); + exit(1); + } + Op op = {0}; + op.kind = OP_LABEL; + op.data.label = str_dup(name.lexeme); + parser_emit_op(parser, op); + return; + } + + if (strcmp(token.lexeme, "goto") == 0) { + Token name = parser_next_token(parser); + if (!name.lexeme) { + fprintf(stderr, "[error] goto missing label\n"); + exit(1); + } + Op op = {0}; + op.kind = OP_JUMP; + op.data.label = str_dup(name.lexeme); + parser_emit_op(parser, op); + return; + } + + if (strcmp(token.lexeme, "struct") == 0) { + parser_handle_struct(parser); + return; + } + + if (strcmp(token.lexeme, "with") == 0) { + parser_handle_with(parser); + return; + } + + char *str_lit = NULL; + if (parse_string_literal(token.lexeme, &str_lit)) { + parser_emit_literal(parser, LIT_STRING, 0, 0.0, str_lit); + free(str_lit); + return; + } + int64_t int_val = 0; + if (try_parse_int(token.lexeme, &int_val)) { + parser_emit_literal(parser, LIT_INT, int_val, 0.0, NULL); + return; + } + double float_val = 0.0; + if (try_parse_float(token.lexeme, &float_val)) { + parser_emit_literal(parser, LIT_FLOAT, 0, float_val, NULL); + return; + } + + const char *var_label = (const char *)strmap_get(&parser->variable_words, token.lexeme); + if (var_label) { + Token peek = parser_peek_token(parser); + Op op = {0}; + op.kind = OP_WORD; + op.data.word = str_dup(var_label); + parser_emit_op(parser, op); + if (!peek.lexeme || strcmp(peek.lexeme, "!") != 0) { + op.data.word = str_dup("@"); + parser_emit_op(parser, op); + } + return; + } + + Word *word = dictionary_lookup(parser->dictionary, token.lexeme); + if (word && word->macro_expansion) { + parser_expand_macro(parser, word); + return; + } + if (word && word->immediate) { + ct_word_call(parser->ct_vm, word); + if (parser->current_def && !word->compile_only) { + Op op = {0}; + op.kind = OP_WORD; + op.data.word = str_dup(word->name); + parser_emit_op(parser, op); + } + return; + } + if (word && word->compile_only && parser->current_def && parser->definition_stack_len) { + Word *current = parser->definition_stack[parser->definition_stack_len - 1]; + current->compile_only = true; + if (current->definition) { + current->definition->compile_only = true; + } + } + + if (!word) { + word = (Word *)xmalloc(sizeof(Word)); + memset(word, 0, sizeof(Word)); + word->name = str_dup(token.lexeme); + dictionary_register(parser->dictionary, word); + } + Op op = {0}; + op.kind = OP_WORD; + op.data.word = str_dup(token.lexeme); + parser_emit_op(parser, op); +} + +int main(int argc, char **argv) { + StrVec inputs; + StrVec include_dirs; + StrVec libs; + VEC_INIT(&inputs); + VEC_INIT(&include_dirs); + VEC_INIT(&libs); + const char *output = "a.out"; + const char *temp_dir = "build"; + bool emit_asm = false; + bool debug = false; + + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-o") == 0 && i + 1 < argc) { + output = argv[++i]; + continue; + } + if (strcmp(arg, "--emit-asm") == 0) { + emit_asm = true; + continue; + } + if (strcmp(arg, "--dbg") == 0) { + debug = true; + continue; + } + if ((strcmp(arg, "-I") == 0 || strcmp(arg, "--include") == 0) && i + 1 < argc) { + VEC_PUSH(&include_dirs, str_dup(argv[++i])); + continue; + } + if (strncmp(arg, "-I", 2) == 0 && strlen(arg) > 2) { + VEC_PUSH(&include_dirs, str_dup(arg + 2)); + continue; + } + if ((strcmp(arg, "-l") == 0) && i + 1 < argc) { + const char *lib = argv[++i]; + if (strchr(lib, '/') || strstr(lib, ".so") || strstr(lib, ".a")) { + VEC_PUSH(&libs, str_printf("-l:%s", lib)); + } else { + VEC_PUSH(&libs, str_printf("-l%s", lib)); + } + continue; + } + if (strncmp(arg, "-l", 2) == 0 && strlen(arg) > 2) { + VEC_PUSH(&libs, str_dup(arg)); + continue; + } + if (strcmp(arg, "--temp-dir") == 0 && i + 1 < argc) { + temp_dir = argv[++i]; + continue; + } + if (arg[0] == '-') { + fprintf(stderr, "[error] unknown option: %s\n", arg); + return 1; + } + VEC_PUSH(&inputs, str_dup(arg)); + } + + if (inputs.len == 0) { + fprintf(stderr, "usage: %s [-o output] [--emit-asm]\n", argv[0]); + return 1; + } + + VEC_PUSH(&include_dirs, str_dup(".")); + VEC_PUSH(&include_dirs, str_dup("./stdlib")); + + StrMap visited; + strmap_init(&visited); + StrVec sources; + VEC_INIT(&sources); + FileSpanVec file_spans; + VEC_INIT(&file_spans); + int line_counter = 1; + for (size_t i = 0; i < inputs.len; i++) { + char *expanded = expand_imports(inputs.data[i], &include_dirs, &visited, &file_spans, &line_counter); + VEC_PUSH(&sources, expanded); + } + size_t total = 0; + for (size_t i = 0; i < sources.len; i++) { + total += strlen(sources.data[i]); + } + char *combined = (char *)xmalloc(total + 1); + combined[0] = '\0'; + for (size_t i = 0; i < sources.len; i++) { + strcat(combined, sources.data[i]); + } + + Dictionary dict; + dictionary_init(&dict); + Reader reader; + reader_init(&reader); + Parser parser; + parser_init(&parser, &dict, &reader); + parser.file_spans = file_spans; + parser.primary_path = inputs.len ? str_dup(inputs.data[0]) : NULL; + CompileTimeVM vm; + ct_vm_init(&vm, &parser); + parser.ct_vm = &vm; + bootstrap_dictionary(&dict, &parser, &vm); + register_builtin_syscall(&parser); + + parse_tokens(&parser, combined); + + if (parser.uses_libc && !strvec_contains(&libs, "-lc")) { + VEC_PUSH(&libs, str_dup("-lc")); + } + if (parser.uses_libm && !strvec_contains(&libs, "-lm")) { + VEC_PUSH(&libs, str_dup("-lm")); + } + + Emission emission = emit_module(&parser, &dict, debug); + char *asm_text = emission_snapshot(&emission); + + char *asm_path = NULL; + char *obj_path = NULL; + if (emit_asm) { + asm_path = str_dup(output); + } else { + mkdir(temp_dir, 0755); + const char *base = strrchr(output, '/'); + base = base ? base + 1 : output; + asm_path = str_printf("%s/%s.asm", temp_dir, base); + obj_path = str_printf("%s/%s.o", temp_dir, base); + } + + write_file(asm_path, asm_text); + if (emit_asm) { + return 0; + } + run_nasm(asm_path, obj_path, debug); + run_linker(obj_path, output, debug, &libs, false, parser.uses_libc); + return 0; +}