4385 lines
142 KiB
C
4385 lines
142 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <ctype.h>
|
|
#include <stdarg.h>
|
|
#include <errno.h>
|
|
#include <limits.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/wait.h>
|
|
|
|
#define ARRAY_LEN(x) (sizeof(x) / sizeof((x)[0]))
|
|
|
|
static void *xmalloc(size_t size) {
|
|
void *ptr = malloc(size);
|
|
if (!ptr) {
|
|
fprintf(stderr, "[error] out of memory\n");
|
|
exit(1);
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
static void *xrealloc(void *ptr, size_t size) {
|
|
void *out = realloc(ptr, size);
|
|
if (!out) {
|
|
fprintf(stderr, "[error] out of memory\n");
|
|
exit(1);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
static char *str_dup(const char *src) {
|
|
if (!src) {
|
|
return NULL;
|
|
}
|
|
size_t len = strlen(src);
|
|
char *out = (char *)xmalloc(len + 1);
|
|
memcpy(out, src, len + 1);
|
|
return out;
|
|
}
|
|
|
|
static char *str_printf(const char *fmt, ...) {
|
|
va_list args;
|
|
va_start(args, fmt);
|
|
va_list args2;
|
|
va_copy(args2, args);
|
|
int needed = vsnprintf(NULL, 0, fmt, args2);
|
|
va_end(args2);
|
|
if (needed < 0) {
|
|
va_end(args);
|
|
return str_dup("");
|
|
}
|
|
char *buf = (char *)xmalloc((size_t)needed + 1);
|
|
vsnprintf(buf, (size_t)needed + 1, fmt, args);
|
|
va_end(args);
|
|
return buf;
|
|
}
|
|
|
|
static bool str_starts_with(const char *text, const char *prefix) {
|
|
if (!text || !prefix) {
|
|
return false;
|
|
}
|
|
size_t len = strlen(prefix);
|
|
return strncmp(text, prefix, len) == 0;
|
|
}
|
|
|
|
static bool str_equals(const char *a, const char *b) {
|
|
if (!a || !b) {
|
|
return false;
|
|
}
|
|
return strcmp(a, b) == 0;
|
|
}
|
|
|
|
static uint64_t hash_str(const char *text) {
|
|
uint64_t hash = 1469598103934665603ULL;
|
|
while (*text) {
|
|
hash ^= (unsigned char)(*text++);
|
|
hash *= 1099511628211ULL;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
#define VEC_DECL(name, type) \
|
|
typedef struct { \
|
|
type *data; \
|
|
size_t len; \
|
|
size_t cap; \
|
|
} name
|
|
|
|
#define VEC_INIT(vec) do { (vec)->data = NULL; (vec)->len = 0; (vec)->cap = 0; } while (0)
|
|
|
|
#define VEC_FREE(vec) do { free((vec)->data); (vec)->data = NULL; (vec)->len = 0; (vec)->cap = 0; } while (0)
|
|
|
|
#define VEC_PUSH(vec, value) do { \
|
|
if ((vec)->len + 1 > (vec)->cap) { \
|
|
(vec)->cap = (vec)->cap ? (vec)->cap * 2 : 8; \
|
|
(vec)->data = xrealloc((vec)->data, (vec)->cap * sizeof(*(vec)->data)); \
|
|
} \
|
|
(vec)->data[(vec)->len++] = (value); \
|
|
} while (0)
|
|
|
|
#define VEC_POP(vec) ((vec)->len ? (vec)->data[--(vec)->len] : (vec)->data[0])
|
|
|
|
VEC_DECL(StrVec, char *);
|
|
VEC_DECL(IntVec, int);
|
|
|
|
static bool strvec_contains(StrVec *vec, const char *value) {
|
|
if (!vec || !value) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i < vec->len; i++) {
|
|
if (strcmp(vec->data[i], value) == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
typedef struct {
|
|
char *lexeme;
|
|
int line;
|
|
int column;
|
|
int start;
|
|
int end;
|
|
} Token;
|
|
|
|
VEC_DECL(TokenVec, Token);
|
|
|
|
typedef struct {
|
|
char *path;
|
|
int line;
|
|
int column;
|
|
} SourceLocation;
|
|
|
|
typedef struct {
|
|
char *path;
|
|
int start_line;
|
|
int end_line;
|
|
int local_start_line;
|
|
} FileSpan;
|
|
|
|
VEC_DECL(FileSpanVec, FileSpan);
|
|
|
|
typedef enum {
|
|
OP_LITERAL,
|
|
OP_WORD,
|
|
OP_BRANCH_ZERO,
|
|
OP_JUMP,
|
|
OP_LABEL,
|
|
OP_FOR_BEGIN,
|
|
OP_FOR_END,
|
|
OP_LIST_BEGIN,
|
|
OP_LIST_END
|
|
} OpKind;
|
|
|
|
typedef enum {
|
|
LIT_INT,
|
|
LIT_FLOAT,
|
|
LIT_STRING
|
|
} LiteralKind;
|
|
|
|
typedef struct {
|
|
OpKind kind;
|
|
LiteralKind lit_kind;
|
|
SourceLocation *loc;
|
|
union {
|
|
int64_t i64;
|
|
double f64;
|
|
char *str;
|
|
char *word;
|
|
char *label;
|
|
struct {
|
|
char *loop;
|
|
char *end;
|
|
} loop;
|
|
} data;
|
|
} Op;
|
|
|
|
VEC_DECL(OpVec, Op);
|
|
|
|
typedef struct {
|
|
char *name;
|
|
OpVec body;
|
|
bool immediate;
|
|
bool compile_only;
|
|
char *terminator;
|
|
bool inline_def;
|
|
} Definition;
|
|
|
|
typedef struct {
|
|
char *name;
|
|
char *body;
|
|
bool immediate;
|
|
bool compile_only;
|
|
bool effect_string_io;
|
|
} AsmDefinition;
|
|
|
|
typedef enum {
|
|
FORM_DEF,
|
|
FORM_ASM
|
|
} FormKind;
|
|
|
|
typedef struct {
|
|
FormKind kind;
|
|
void *ptr;
|
|
} Form;
|
|
|
|
VEC_DECL(FormVec, Form);
|
|
|
|
typedef struct {
|
|
StrVec text;
|
|
StrVec data;
|
|
StrVec bss;
|
|
} Emission;
|
|
|
|
typedef struct {
|
|
StrVec *text;
|
|
bool debug_enabled;
|
|
SourceLocation *current_loc;
|
|
} FunctionEmitter;
|
|
|
|
typedef struct Word Word;
|
|
typedef struct CompileTimeVM CompileTimeVM;
|
|
typedef struct Parser Parser;
|
|
|
|
typedef void (*MacroFn)(Parser *parser);
|
|
typedef void (*IntrinsicEmitter)(FunctionEmitter *builder);
|
|
typedef void (*CompileTimeIntrinsic)(CompileTimeVM *vm);
|
|
|
|
struct Word {
|
|
char *name;
|
|
bool immediate;
|
|
bool compile_only;
|
|
bool compile_time_override;
|
|
bool is_extern;
|
|
int extern_inputs;
|
|
int extern_outputs;
|
|
char **extern_arg_types;
|
|
int extern_arg_count;
|
|
char *extern_ret_type;
|
|
bool inline_def;
|
|
Definition *definition;
|
|
Definition *ct_definition;
|
|
Definition *prev_definition;
|
|
AsmDefinition *asm_def;
|
|
AsmDefinition *ct_asm_def;
|
|
AsmDefinition *prev_asm_def;
|
|
MacroFn macro;
|
|
IntrinsicEmitter intrinsic;
|
|
CompileTimeIntrinsic ct_intrinsic;
|
|
char **macro_expansion;
|
|
int macro_param_count;
|
|
};
|
|
|
|
typedef struct {
|
|
char **keys;
|
|
void **values;
|
|
size_t cap;
|
|
size_t len;
|
|
} StrMap;
|
|
|
|
static void strmap_init(StrMap *map) {
|
|
map->keys = NULL;
|
|
map->values = NULL;
|
|
map->cap = 0;
|
|
map->len = 0;
|
|
}
|
|
|
|
static void strmap_free(StrMap *map) {
|
|
free(map->keys);
|
|
free(map->values);
|
|
map->keys = NULL;
|
|
map->values = NULL;
|
|
map->cap = 0;
|
|
map->len = 0;
|
|
}
|
|
|
|
static void strmap_grow(StrMap *map) {
|
|
size_t new_cap = map->cap ? map->cap * 2 : 128;
|
|
char **new_keys = (char **)xmalloc(new_cap * sizeof(char *));
|
|
void **new_vals = (void **)xmalloc(new_cap * sizeof(void *));
|
|
for (size_t i = 0; i < new_cap; i++) {
|
|
new_keys[i] = NULL;
|
|
new_vals[i] = NULL;
|
|
}
|
|
if (map->keys) {
|
|
for (size_t i = 0; i < map->cap; i++) {
|
|
if (!map->keys[i]) {
|
|
continue;
|
|
}
|
|
uint64_t hash = hash_str(map->keys[i]);
|
|
size_t idx = (size_t)(hash & (new_cap - 1));
|
|
while (new_keys[idx]) {
|
|
idx = (idx + 1) & (new_cap - 1);
|
|
}
|
|
new_keys[idx] = map->keys[i];
|
|
new_vals[idx] = map->values[i];
|
|
}
|
|
}
|
|
free(map->keys);
|
|
free(map->values);
|
|
map->keys = new_keys;
|
|
map->values = new_vals;
|
|
map->cap = new_cap;
|
|
}
|
|
|
|
static void strmap_set(StrMap *map, const char *key, void *value) {
|
|
if (!map->cap || (map->len + 1) * 3 >= map->cap * 2) {
|
|
strmap_grow(map);
|
|
}
|
|
uint64_t hash = hash_str(key);
|
|
size_t idx = (size_t)(hash & (map->cap - 1));
|
|
while (map->keys[idx]) {
|
|
if (strcmp(map->keys[idx], key) == 0) {
|
|
map->values[idx] = value;
|
|
return;
|
|
}
|
|
idx = (idx + 1) & (map->cap - 1);
|
|
}
|
|
map->keys[idx] = str_dup(key);
|
|
map->values[idx] = value;
|
|
map->len++;
|
|
}
|
|
|
|
static void *strmap_get(StrMap *map, const char *key) {
|
|
if (!map->cap) {
|
|
return NULL;
|
|
}
|
|
uint64_t hash = hash_str(key);
|
|
size_t idx = (size_t)(hash & (map->cap - 1));
|
|
size_t start = idx;
|
|
while (map->keys[idx]) {
|
|
if (strcmp(map->keys[idx], key) == 0) {
|
|
return map->values[idx];
|
|
}
|
|
idx = (idx + 1) & (map->cap - 1);
|
|
if (idx == start) {
|
|
break;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static bool strmap_has(StrMap *map, const char *key) {
|
|
return strmap_get(map, key) != NULL;
|
|
}
|
|
|
|
typedef struct {
|
|
StrMap words;
|
|
} Dictionary;
|
|
|
|
static void dictionary_init(Dictionary *dict) {
|
|
strmap_init(&dict->words);
|
|
}
|
|
|
|
static Word *dictionary_lookup(Dictionary *dict, const char *name) {
|
|
return (Word *)strmap_get(&dict->words, name);
|
|
}
|
|
|
|
static void dictionary_register(Dictionary *dict, Word *word) {
|
|
strmap_set(&dict->words, word->name, word);
|
|
}
|
|
|
|
typedef struct {
|
|
StrVec custom_tokens;
|
|
StrVec token_order;
|
|
} Reader;
|
|
|
|
static void reader_init(Reader *reader) {
|
|
VEC_INIT(&reader->custom_tokens);
|
|
VEC_INIT(&reader->token_order);
|
|
const char *defaults[] = {"(", ")", "{", "}", ";", ",", "[", "]"};
|
|
for (size_t i = 0; i < ARRAY_LEN(defaults); i++) {
|
|
VEC_PUSH(&reader->custom_tokens, str_dup(defaults[i]));
|
|
}
|
|
for (size_t i = 0; i < reader->custom_tokens.len; i++) {
|
|
VEC_PUSH(&reader->token_order, reader->custom_tokens.data[i]);
|
|
}
|
|
}
|
|
|
|
static void reader_resort(Reader *reader) {
|
|
for (size_t i = 0; i < reader->token_order.len; i++) {
|
|
for (size_t j = i + 1; j < reader->token_order.len; j++) {
|
|
if (strlen(reader->token_order.data[j]) > strlen(reader->token_order.data[i])) {
|
|
char *tmp = reader->token_order.data[i];
|
|
reader->token_order.data[i] = reader->token_order.data[j];
|
|
reader->token_order.data[j] = tmp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void reader_add_tokens(Reader *reader, const char *tok) {
|
|
if (!tok || !*tok) {
|
|
return;
|
|
}
|
|
for (size_t i = 0; i < reader->custom_tokens.len; i++) {
|
|
if (strcmp(reader->custom_tokens.data[i], tok) == 0) {
|
|
return;
|
|
}
|
|
}
|
|
VEC_PUSH(&reader->custom_tokens, str_dup(tok));
|
|
VEC_PUSH(&reader->token_order, reader->custom_tokens.data[reader->custom_tokens.len - 1]);
|
|
reader_resort(reader);
|
|
}
|
|
|
|
static void reader_add_token_chars(Reader *reader, const char *chars) {
|
|
if (!chars) {
|
|
return;
|
|
}
|
|
char buf[2] = {0, 0};
|
|
for (const char *p = chars; *p; p++) {
|
|
buf[0] = *p;
|
|
reader_add_tokens(reader, buf);
|
|
}
|
|
}
|
|
|
|
typedef struct {
|
|
const char *source;
|
|
size_t length;
|
|
size_t index;
|
|
int line;
|
|
int column;
|
|
Reader *reader;
|
|
} Tokenizer;
|
|
|
|
static void tokenizer_init(Tokenizer *tokenizer, Reader *reader, const char *source) {
|
|
tokenizer->source = source;
|
|
tokenizer->length = strlen(source);
|
|
tokenizer->index = 0;
|
|
tokenizer->line = 1;
|
|
tokenizer->column = 0;
|
|
tokenizer->reader = reader;
|
|
}
|
|
|
|
static bool tokenizer_next(Tokenizer *tokenizer, Token *out) {
|
|
const char *src = tokenizer->source;
|
|
size_t len = tokenizer->length;
|
|
size_t idx = tokenizer->index;
|
|
int line = tokenizer->line;
|
|
int col = tokenizer->column;
|
|
|
|
while (idx < len) {
|
|
char ch = src[idx];
|
|
if (ch == '"') {
|
|
size_t start = idx;
|
|
int token_line = line;
|
|
int token_col = col;
|
|
idx++;
|
|
col++;
|
|
bool escape = false;
|
|
while (idx < len) {
|
|
char c = src[idx++];
|
|
if (c == '\n') {
|
|
line++;
|
|
col = 0;
|
|
} else {
|
|
col++;
|
|
}
|
|
if (escape) {
|
|
escape = false;
|
|
continue;
|
|
}
|
|
if (c == '\\') {
|
|
escape = true;
|
|
continue;
|
|
}
|
|
if (c == '"') {
|
|
size_t end = idx;
|
|
size_t tok_len = end - start;
|
|
char *lex = (char *)xmalloc(tok_len + 1);
|
|
memcpy(lex, src + start, tok_len);
|
|
lex[tok_len] = '\0';
|
|
out->lexeme = lex;
|
|
out->line = token_line;
|
|
out->column = token_col;
|
|
out->start = (int)start;
|
|
out->end = (int)end;
|
|
tokenizer->index = idx;
|
|
tokenizer->line = line;
|
|
tokenizer->column = col;
|
|
return true;
|
|
}
|
|
}
|
|
fprintf(stderr, "[error] unterminated string literal\n");
|
|
exit(1);
|
|
}
|
|
if (ch == '#') {
|
|
while (idx < len && src[idx] != '\n') {
|
|
idx++;
|
|
}
|
|
continue;
|
|
}
|
|
if (ch == ';' && idx + 1 < len && isalpha((unsigned char)src[idx + 1])) {
|
|
size_t start = idx;
|
|
int token_line = line;
|
|
int token_col = col;
|
|
idx++;
|
|
col++;
|
|
size_t tok_len = idx - start;
|
|
char *lex = (char *)xmalloc(tok_len + 1);
|
|
memcpy(lex, src + start, tok_len);
|
|
lex[tok_len] = '\0';
|
|
out->lexeme = lex;
|
|
out->line = token_line;
|
|
out->column = token_col;
|
|
out->start = (int)start;
|
|
out->end = (int)idx;
|
|
tokenizer->index = idx;
|
|
tokenizer->line = line;
|
|
tokenizer->column = col;
|
|
return true;
|
|
}
|
|
|
|
bool matched = false;
|
|
const char *matched_tok = NULL;
|
|
for (size_t i = 0; i < tokenizer->reader->token_order.len; i++) {
|
|
const char *tok = tokenizer->reader->token_order.data[i];
|
|
size_t tok_len = strlen(tok);
|
|
if (tok_len == 0) {
|
|
continue;
|
|
}
|
|
if (idx + tok_len <= len && strncmp(src + idx, tok, tok_len) == 0) {
|
|
matched = true;
|
|
matched_tok = tok;
|
|
size_t start = idx;
|
|
int token_line = line;
|
|
int token_col = col;
|
|
idx += tok_len;
|
|
col += (int)tok_len;
|
|
out->lexeme = str_dup(matched_tok);
|
|
out->line = token_line;
|
|
out->column = token_col;
|
|
out->start = (int)start;
|
|
out->end = (int)idx;
|
|
tokenizer->index = idx;
|
|
tokenizer->line = line;
|
|
tokenizer->column = col;
|
|
return true;
|
|
}
|
|
}
|
|
if (matched) {
|
|
continue;
|
|
}
|
|
if (isspace((unsigned char)ch)) {
|
|
if (ch == '\n') {
|
|
line++;
|
|
col = 0;
|
|
} else {
|
|
col++;
|
|
}
|
|
idx++;
|
|
continue;
|
|
}
|
|
size_t start = idx;
|
|
int token_line = line;
|
|
int token_col = col;
|
|
while (idx < len) {
|
|
char c = src[idx];
|
|
bool is_sep = isspace((unsigned char)c) || c == '"' || c == '#';
|
|
if (is_sep) {
|
|
break;
|
|
}
|
|
bool token_hit = false;
|
|
for (size_t i = 0; i < tokenizer->reader->token_order.len; i++) {
|
|
const char *tok = tokenizer->reader->token_order.data[i];
|
|
size_t tok_len = strlen(tok);
|
|
if (tok_len && idx + tok_len <= len && strncmp(src + idx, tok, tok_len) == 0) {
|
|
token_hit = true;
|
|
break;
|
|
}
|
|
}
|
|
if (token_hit) {
|
|
break;
|
|
}
|
|
idx++;
|
|
col++;
|
|
}
|
|
size_t tok_len = idx - start;
|
|
if (tok_len) {
|
|
char *lex = (char *)xmalloc(tok_len + 1);
|
|
memcpy(lex, src + start, tok_len);
|
|
lex[tok_len] = '\0';
|
|
out->lexeme = lex;
|
|
out->line = token_line;
|
|
out->column = token_col;
|
|
out->start = (int)start;
|
|
out->end = (int)idx;
|
|
tokenizer->index = idx;
|
|
tokenizer->line = line;
|
|
tokenizer->column = col;
|
|
return true;
|
|
}
|
|
idx++;
|
|
col++;
|
|
}
|
|
|
|
tokenizer->index = idx;
|
|
tokenizer->line = line;
|
|
tokenizer->column = col;
|
|
return false;
|
|
}
|
|
|
|
struct Parser {
|
|
Dictionary *dictionary;
|
|
Reader *reader;
|
|
TokenVec tokens;
|
|
size_t pos;
|
|
Tokenizer tokenizer;
|
|
bool tokenizer_exhausted;
|
|
struct {
|
|
FormVec forms;
|
|
StrMap variables;
|
|
StrVec *prelude;
|
|
StrVec *bss;
|
|
} module;
|
|
Definition *current_def;
|
|
Word **definition_stack;
|
|
size_t definition_stack_len;
|
|
size_t definition_stack_cap;
|
|
Word *last_defined;
|
|
FileSpanVec file_spans;
|
|
char *source;
|
|
struct {
|
|
char *name;
|
|
StrVec tokens;
|
|
int param_count;
|
|
bool active;
|
|
} macro_recording;
|
|
struct {
|
|
char *type;
|
|
char *false_label;
|
|
char *end_label;
|
|
char *begin_label;
|
|
char *loop_label;
|
|
int line;
|
|
int column;
|
|
StrVec with_names;
|
|
} *control_stack;
|
|
size_t control_len;
|
|
size_t control_cap;
|
|
int label_counter;
|
|
char *token_hook;
|
|
Token last_token;
|
|
bool has_last_token;
|
|
StrMap variable_labels;
|
|
StrMap variable_words;
|
|
CompileTimeVM *ct_vm;
|
|
StrVec *custom_prelude;
|
|
StrVec *custom_bss;
|
|
bool pending_inline_def;
|
|
bool uses_libc;
|
|
bool uses_libm;
|
|
char *primary_path;
|
|
};
|
|
|
|
typedef enum {
|
|
CT_NIL,
|
|
CT_INT,
|
|
CT_STR,
|
|
CT_TOKEN,
|
|
CT_LIST,
|
|
CT_MAP,
|
|
CT_LEXER
|
|
} CtValueKind;
|
|
|
|
typedef struct CtValue CtValue;
|
|
|
|
VEC_DECL(CtValueVec, CtValue);
|
|
|
|
typedef struct {
|
|
CtValueVec items;
|
|
} CtList;
|
|
|
|
typedef struct {
|
|
char **keys;
|
|
CtValue *values;
|
|
size_t cap;
|
|
size_t len;
|
|
} CtMap;
|
|
|
|
typedef struct {
|
|
Parser *parser;
|
|
bool separators[256];
|
|
TokenVec buffer;
|
|
} SplitLexer;
|
|
|
|
struct CtValue {
|
|
CtValueKind kind;
|
|
union {
|
|
int64_t i64;
|
|
char *str;
|
|
Token token;
|
|
CtList *list;
|
|
CtMap *map;
|
|
SplitLexer *lexer;
|
|
} as;
|
|
};
|
|
|
|
struct CompileTimeVM {
|
|
Parser *parser;
|
|
Dictionary *dictionary;
|
|
CtValueVec stack;
|
|
CtValueVec rstack;
|
|
IntVec loop_remaining;
|
|
IntVec loop_begin;
|
|
IntVec loop_initial;
|
|
StrVec call_stack;
|
|
};
|
|
|
|
static void ct_value_free(CtValue *value);
|
|
|
|
static CtValue ct_make_nil(void) {
|
|
CtValue v = {0};
|
|
v.kind = CT_NIL;
|
|
return v;
|
|
}
|
|
|
|
static CtValue ct_make_int(int64_t i) {
|
|
CtValue v = {0};
|
|
v.kind = CT_INT;
|
|
v.as.i64 = i;
|
|
return v;
|
|
}
|
|
|
|
static CtValue ct_make_str(const char *s) {
|
|
CtValue v = {0};
|
|
v.kind = CT_STR;
|
|
v.as.str = str_dup(s);
|
|
return v;
|
|
}
|
|
|
|
static CtValue ct_make_token(Token token) {
|
|
CtValue v = {0};
|
|
v.kind = CT_TOKEN;
|
|
v.as.token = token;
|
|
return v;
|
|
}
|
|
|
|
static CtValue ct_make_list(CtList *list) {
|
|
CtValue v = {0};
|
|
v.kind = CT_LIST;
|
|
v.as.list = list;
|
|
return v;
|
|
}
|
|
|
|
static CtValue ct_make_map(CtMap *map) {
|
|
CtValue v = {0};
|
|
v.kind = CT_MAP;
|
|
v.as.map = map;
|
|
return v;
|
|
}
|
|
|
|
static CtValue ct_make_lexer(SplitLexer *lexer) {
|
|
CtValue v = {0};
|
|
v.kind = CT_LEXER;
|
|
v.as.lexer = lexer;
|
|
return v;
|
|
}
|
|
|
|
static void ct_value_free(CtValue *value) {
|
|
if (!value) {
|
|
return;
|
|
}
|
|
if (value->kind == CT_STR) {
|
|
free(value->as.str);
|
|
}
|
|
}
|
|
|
|
static void ct_stack_init(CtValueVec *vec) {
|
|
VEC_INIT(vec);
|
|
}
|
|
|
|
static void ct_stack_push(CtValueVec *vec, CtValue value) {
|
|
VEC_PUSH(vec, value);
|
|
}
|
|
|
|
static CtValue ct_stack_pop(CtValueVec *vec) {
|
|
if (!vec->len) {
|
|
CtValue v = ct_make_nil();
|
|
return v;
|
|
}
|
|
return VEC_POP(vec);
|
|
}
|
|
|
|
static CtValue ct_stack_peek(CtValueVec *vec) {
|
|
if (!vec->len) {
|
|
CtValue v = ct_make_nil();
|
|
return v;
|
|
}
|
|
return vec->data[vec->len - 1];
|
|
}
|
|
|
|
static CtList *ct_list_new(void) {
|
|
CtList *list = (CtList *)xmalloc(sizeof(CtList));
|
|
VEC_INIT(&list->items);
|
|
return list;
|
|
}
|
|
|
|
static CtMap *ct_map_new(void) {
|
|
CtMap *map = (CtMap *)xmalloc(sizeof(CtMap));
|
|
map->keys = NULL;
|
|
map->values = NULL;
|
|
map->cap = 0;
|
|
map->len = 0;
|
|
return map;
|
|
}
|
|
|
|
static void ct_map_grow(CtMap *map) {
|
|
size_t new_cap = map->cap ? map->cap * 2 : 64;
|
|
char **new_keys = (char **)xmalloc(new_cap * sizeof(char *));
|
|
CtValue *new_vals = (CtValue *)xmalloc(new_cap * sizeof(CtValue));
|
|
for (size_t i = 0; i < new_cap; i++) {
|
|
new_keys[i] = NULL;
|
|
}
|
|
if (map->keys) {
|
|
for (size_t i = 0; i < map->cap; i++) {
|
|
if (!map->keys[i]) {
|
|
continue;
|
|
}
|
|
uint64_t hash = hash_str(map->keys[i]);
|
|
size_t idx = (size_t)(hash & (new_cap - 1));
|
|
while (new_keys[idx]) {
|
|
idx = (idx + 1) & (new_cap - 1);
|
|
}
|
|
new_keys[idx] = map->keys[i];
|
|
new_vals[idx] = map->values[i];
|
|
}
|
|
}
|
|
free(map->keys);
|
|
free(map->values);
|
|
map->keys = new_keys;
|
|
map->values = new_vals;
|
|
map->cap = new_cap;
|
|
}
|
|
|
|
static void ct_map_set(CtMap *map, const char *key, CtValue value) {
|
|
if (!map->cap || (map->len + 1) * 3 >= map->cap * 2) {
|
|
ct_map_grow(map);
|
|
}
|
|
uint64_t hash = hash_str(key);
|
|
size_t idx = (size_t)(hash & (map->cap - 1));
|
|
while (map->keys[idx]) {
|
|
if (strcmp(map->keys[idx], key) == 0) {
|
|
ct_value_free(&map->values[idx]);
|
|
map->values[idx] = value;
|
|
return;
|
|
}
|
|
idx = (idx + 1) & (map->cap - 1);
|
|
}
|
|
map->keys[idx] = str_dup(key);
|
|
map->values[idx] = value;
|
|
map->len++;
|
|
}
|
|
|
|
static bool ct_map_get(CtMap *map, const char *key, CtValue *out) {
|
|
if (!map->cap) {
|
|
return false;
|
|
}
|
|
uint64_t hash = hash_str(key);
|
|
size_t idx = (size_t)(hash & (map->cap - 1));
|
|
size_t start = idx;
|
|
while (map->keys[idx]) {
|
|
if (strcmp(map->keys[idx], key) == 0) {
|
|
*out = map->values[idx];
|
|
return true;
|
|
}
|
|
idx = (idx + 1) & (map->cap - 1);
|
|
if (idx == start) {
|
|
break;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void emit_line(FunctionEmitter *builder, const char *line) {
|
|
VEC_PUSH(builder->text, str_dup(line));
|
|
}
|
|
|
|
static void emitter_init(FunctionEmitter *builder, StrVec *text, bool debug) {
|
|
builder->text = text;
|
|
builder->debug_enabled = debug;
|
|
builder->current_loc = NULL;
|
|
}
|
|
|
|
static char *sanitize_label(const char *name) {
|
|
size_t len = strlen(name);
|
|
char *out = (char *)xmalloc(len * 4 + 2);
|
|
size_t pos = 0;
|
|
for (size_t i = 0; i < len; i++) {
|
|
unsigned char ch = (unsigned char)name[i];
|
|
if (isalnum(ch) || ch == '_') {
|
|
out[pos++] = ch;
|
|
} else {
|
|
pos += (size_t)sprintf(out + pos, "_%02x", ch);
|
|
}
|
|
}
|
|
if (pos == 0) {
|
|
out[pos++] = 'a';
|
|
}
|
|
if (isdigit((unsigned char)out[0])) {
|
|
memmove(out + 1, out, pos);
|
|
out[0] = '_';
|
|
pos++;
|
|
}
|
|
out[pos] = '\0';
|
|
return out;
|
|
}
|
|
|
|
static bool is_identifier(const char *text) {
|
|
if (!text || !*text) {
|
|
return false;
|
|
}
|
|
if (!(isalpha((unsigned char)text[0]) || text[0] == '_')) {
|
|
return false;
|
|
}
|
|
for (const char *p = text + 1; *p; p++) {
|
|
if (!(isalnum((unsigned char)*p) || *p == '_')) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static char *path_basename(const char *path);
|
|
|
|
static SourceLocation *location_for_token(Parser *parser, Token token) {
|
|
for (size_t i = 0; i < parser->file_spans.len; i++) {
|
|
FileSpan span = parser->file_spans.data[i];
|
|
if (token.line >= span.start_line && token.line < span.end_line) {
|
|
int local_line = span.local_start_line + (token.line - span.start_line);
|
|
SourceLocation *loc = (SourceLocation *)xmalloc(sizeof(SourceLocation));
|
|
loc->path = path_basename(span.path);
|
|
loc->line = local_line;
|
|
loc->column = token.column;
|
|
return loc;
|
|
}
|
|
}
|
|
SourceLocation *loc = (SourceLocation *)xmalloc(sizeof(SourceLocation));
|
|
loc->path = parser->primary_path ? path_basename(parser->primary_path) : str_dup("<source>");
|
|
loc->line = token.line;
|
|
loc->column = token.column;
|
|
return loc;
|
|
}
|
|
|
|
static void parser_push_control(Parser *parser, const char *type) {
|
|
if (parser->control_len + 1 > parser->control_cap) {
|
|
parser->control_cap = parser->control_cap ? parser->control_cap * 2 : 16;
|
|
parser->control_stack = xrealloc(parser->control_stack, parser->control_cap * sizeof(*parser->control_stack));
|
|
}
|
|
parser->control_stack[parser->control_len].type = str_dup(type);
|
|
parser->control_stack[parser->control_len].false_label = NULL;
|
|
parser->control_stack[parser->control_len].end_label = NULL;
|
|
parser->control_stack[parser->control_len].begin_label = NULL;
|
|
parser->control_stack[parser->control_len].loop_label = NULL;
|
|
parser->control_stack[parser->control_len].line = parser->has_last_token ? parser->last_token.line : 0;
|
|
parser->control_stack[parser->control_len].column = parser->has_last_token ? parser->last_token.column : 0;
|
|
VEC_INIT(&parser->control_stack[parser->control_len].with_names);
|
|
parser->control_len++;
|
|
}
|
|
|
|
static int parser_pop_control(Parser *parser, const char *expected_type) {
|
|
if (!parser->control_len) {
|
|
return -1;
|
|
}
|
|
if (expected_type && strcmp(parser->control_stack[parser->control_len - 1].type, expected_type) != 0) {
|
|
return -2;
|
|
}
|
|
parser->control_len--;
|
|
return 0;
|
|
}
|
|
|
|
static void parser_emit_op(Parser *parser, Op op) {
|
|
if (op.loc == NULL && parser->has_last_token) {
|
|
op.loc = location_for_token(parser, parser->last_token);
|
|
}
|
|
if (parser->current_def) {
|
|
VEC_PUSH(&parser->current_def->body, op);
|
|
} else {
|
|
Form form = {0};
|
|
form.kind = FORM_DEF;
|
|
Definition *dummy = (Definition *)xmalloc(sizeof(Definition));
|
|
*dummy = (Definition){0};
|
|
dummy->name = str_dup("<top>");
|
|
VEC_INIT(&dummy->body);
|
|
VEC_PUSH(&dummy->body, op);
|
|
form.ptr = dummy;
|
|
VEC_PUSH(&parser->module.forms, form);
|
|
}
|
|
}
|
|
|
|
static void parser_init(Parser *parser, Dictionary *dict, Reader *reader) {
|
|
parser->dictionary = dict;
|
|
parser->reader = reader;
|
|
VEC_INIT(&parser->tokens);
|
|
parser->pos = 0;
|
|
parser->tokenizer_exhausted = false;
|
|
VEC_INIT(&parser->module.forms);
|
|
strmap_init(&parser->module.variables);
|
|
parser->module.prelude = NULL;
|
|
parser->module.bss = NULL;
|
|
parser->current_def = NULL;
|
|
parser->definition_stack = NULL;
|
|
parser->definition_stack_len = 0;
|
|
parser->definition_stack_cap = 0;
|
|
parser->last_defined = NULL;
|
|
VEC_INIT(&parser->file_spans);
|
|
parser->source = NULL;
|
|
parser->macro_recording.active = false;
|
|
parser->control_stack = NULL;
|
|
parser->control_len = 0;
|
|
parser->control_cap = 0;
|
|
parser->label_counter = 0;
|
|
parser->token_hook = NULL;
|
|
parser->has_last_token = false;
|
|
strmap_init(&parser->variable_labels);
|
|
strmap_init(&parser->variable_words);
|
|
parser->ct_vm = NULL;
|
|
parser->custom_prelude = NULL;
|
|
parser->custom_bss = NULL;
|
|
parser->pending_inline_def = false;
|
|
parser->uses_libc = false;
|
|
parser->uses_libm = false;
|
|
parser->primary_path = NULL;
|
|
}
|
|
|
|
static void register_builtin_syscall(Parser *parser) {
|
|
AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition));
|
|
memset(def, 0, sizeof(AsmDefinition));
|
|
def->name = str_dup("syscall");
|
|
def->body = str_dup(
|
|
" mov rax, [r12]\n"
|
|
" add r12, 8\n"
|
|
" mov rcx, [r12]\n"
|
|
" add r12, 8\n"
|
|
" cmp rcx, 6\n"
|
|
" jle .sys_args\n"
|
|
" mov rcx, 6\n"
|
|
".sys_args:\n"
|
|
" cmp rcx, 6\n"
|
|
" jl .arg5\n"
|
|
" mov r9, [r12]\n"
|
|
" add r12, 8\n"
|
|
".arg5:\n"
|
|
" cmp rcx, 5\n"
|
|
" jl .arg4\n"
|
|
" mov r8, [r12]\n"
|
|
" add r12, 8\n"
|
|
".arg4:\n"
|
|
" cmp rcx, 4\n"
|
|
" jl .arg3\n"
|
|
" mov r10, [r12]\n"
|
|
" add r12, 8\n"
|
|
".arg3:\n"
|
|
" cmp rcx, 3\n"
|
|
" jl .arg2\n"
|
|
" mov rdx, [r12]\n"
|
|
" add r12, 8\n"
|
|
".arg2:\n"
|
|
" cmp rcx, 2\n"
|
|
" jl .arg1\n"
|
|
" mov rsi, [r12]\n"
|
|
" add r12, 8\n"
|
|
".arg1:\n"
|
|
" cmp rcx, 1\n"
|
|
" jl .do_syscall\n"
|
|
" mov rdi, [r12]\n"
|
|
" add r12, 8\n"
|
|
".do_syscall:\n"
|
|
" syscall\n"
|
|
" sub r12, 8\n"
|
|
" mov [r12], rax\n"
|
|
);
|
|
|
|
Word *word = dictionary_lookup(parser->dictionary, def->name);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(def->name);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->asm_def = def;
|
|
Form form = {0};
|
|
form.kind = FORM_ASM;
|
|
form.ptr = def;
|
|
VEC_PUSH(&parser->module.forms, form);
|
|
}
|
|
|
|
static void ensure_tokens(Parser *parser, size_t upto) {
|
|
if (parser->tokenizer_exhausted) {
|
|
return;
|
|
}
|
|
while (parser->tokens.len <= upto && !parser->tokenizer_exhausted) {
|
|
Token tok = {0};
|
|
if (!tokenizer_next(&parser->tokenizer, &tok)) {
|
|
parser->tokenizer_exhausted = true;
|
|
break;
|
|
}
|
|
VEC_PUSH(&parser->tokens, tok);
|
|
}
|
|
}
|
|
|
|
static bool parser_eof(Parser *parser) {
|
|
ensure_tokens(parser, parser->pos);
|
|
return parser->pos >= parser->tokens.len;
|
|
}
|
|
|
|
static Token parser_peek_token(Parser *parser) {
|
|
ensure_tokens(parser, parser->pos);
|
|
if (parser->pos >= parser->tokens.len) {
|
|
Token empty = {0};
|
|
empty.lexeme = NULL;
|
|
return empty;
|
|
}
|
|
return parser->tokens.data[parser->pos];
|
|
}
|
|
|
|
static Token parser_next_token(Parser *parser) {
|
|
ensure_tokens(parser, parser->pos);
|
|
if (parser->pos >= parser->tokens.len) {
|
|
Token empty = {0};
|
|
empty.lexeme = NULL;
|
|
return empty;
|
|
}
|
|
Token tok = parser->tokens.data[parser->pos++];
|
|
parser->last_token = tok;
|
|
parser->has_last_token = true;
|
|
return tok;
|
|
}
|
|
|
|
static char *parser_new_label(Parser *parser, const char *prefix) {
|
|
char *label = str_printf("L_%s_%d", prefix, parser->label_counter++);
|
|
return label;
|
|
}
|
|
|
|
static void ct_vm_init(CompileTimeVM *vm, Parser *parser) {
|
|
vm->parser = parser;
|
|
vm->dictionary = parser->dictionary;
|
|
ct_stack_init(&vm->stack);
|
|
ct_stack_init(&vm->rstack);
|
|
VEC_INIT(&vm->loop_remaining);
|
|
VEC_INIT(&vm->loop_begin);
|
|
VEC_INIT(&vm->loop_initial);
|
|
VEC_INIT(&vm->call_stack);
|
|
}
|
|
|
|
static void ct_vm_reset(CompileTimeVM *vm) {
|
|
vm->stack.len = 0;
|
|
vm->rstack.len = 0;
|
|
vm->loop_remaining.len = 0;
|
|
vm->loop_begin.len = 0;
|
|
vm->loop_initial.len = 0;
|
|
vm->call_stack.len = 0;
|
|
}
|
|
|
|
static bool try_parse_int(const char *lexeme, int64_t *out);
|
|
static void parser_inject_tokens(Parser *parser, TokenVec *injected);
|
|
|
|
static void ct_trace_error(CompileTimeVM *vm, const char *msg) {
|
|
fprintf(stderr, "[error] %s\n", msg);
|
|
if (vm && vm->call_stack.len) {
|
|
fprintf(stderr, "[error] compile-time call stack:\n");
|
|
for (size_t i = 0; i < vm->call_stack.len; i++) {
|
|
fprintf(stderr, " - %s\n", vm->call_stack.data[i]);
|
|
}
|
|
}
|
|
exit(1);
|
|
}
|
|
|
|
static int64_t ct_pop_int(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind == CT_STR) {
|
|
int64_t out = 0;
|
|
if (try_parse_int(v.as.str, &out)) {
|
|
return out;
|
|
}
|
|
}
|
|
if (v.kind != CT_INT) {
|
|
const char *kind = "unknown";
|
|
const char *extra = "";
|
|
if (v.kind == CT_NIL) {
|
|
kind = "nil";
|
|
} else if (v.kind == CT_STR) {
|
|
kind = "string";
|
|
extra = v.as.str ? v.as.str : "";
|
|
} else if (v.kind == CT_TOKEN) {
|
|
kind = "token";
|
|
extra = v.as.token.lexeme ? v.as.token.lexeme : "";
|
|
} else if (v.kind == CT_LIST) {
|
|
kind = "list";
|
|
} else if (v.kind == CT_MAP) {
|
|
kind = "map";
|
|
} else if (v.kind == CT_LEXER) {
|
|
kind = "lexer";
|
|
}
|
|
char *msg = NULL;
|
|
if (extra[0] != '\0') {
|
|
msg = str_printf("expected integer on compile-time stack (got %s: %s)", kind, extra);
|
|
} else {
|
|
msg = str_printf("expected integer on compile-time stack (got %s)", kind);
|
|
}
|
|
ct_trace_error(vm, msg);
|
|
free(msg);
|
|
}
|
|
return v.as.i64;
|
|
}
|
|
|
|
|
|
static char *ct_pop_str(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind == CT_TOKEN) {
|
|
return str_dup(v.as.token.lexeme);
|
|
}
|
|
if (v.kind != CT_STR) {
|
|
ct_trace_error(vm, "expected string on compile-time stack");
|
|
}
|
|
return str_dup(v.as.str);
|
|
}
|
|
|
|
static CtList *ct_pop_list(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_LIST) {
|
|
ct_trace_error(vm, "expected list on compile-time stack");
|
|
}
|
|
return v.as.list;
|
|
}
|
|
|
|
static CtMap *ct_pop_map(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_MAP) {
|
|
ct_trace_error(vm, "expected map on compile-time stack");
|
|
}
|
|
return v.as.map;
|
|
}
|
|
|
|
static Token ct_pop_token(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind == CT_TOKEN) {
|
|
return v.as.token;
|
|
}
|
|
if (v.kind == CT_STR) {
|
|
Token tok = {0};
|
|
tok.lexeme = v.as.str;
|
|
tok.line = 0;
|
|
tok.column = 0;
|
|
tok.start = 0;
|
|
tok.end = 0;
|
|
return tok;
|
|
}
|
|
ct_trace_error(vm, "expected token on compile-time stack");
|
|
}
|
|
|
|
static void ct_word_call(CompileTimeVM *vm, Word *word);
|
|
|
|
static bool ct_try_asm_io(CompileTimeVM *vm, Word *word, AsmDefinition *asm_def) {
|
|
if (asm_def && asm_def->effect_string_io) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind == CT_STR) {
|
|
FILE *out = stdout;
|
|
if (strcmp(word->name, "ewrite_buf") == 0) {
|
|
out = stderr;
|
|
}
|
|
fputs(v.as.str ? v.as.str : "", out);
|
|
} else {
|
|
ct_stack_pop(&vm->stack);
|
|
}
|
|
return true;
|
|
}
|
|
if (strcmp(word->name, "putc") == 0) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
int ch = 0;
|
|
if (v.kind == CT_INT) {
|
|
ch = (int)v.as.i64;
|
|
} else if (v.kind == CT_STR && v.as.str && v.as.str[0]) {
|
|
ch = (unsigned char)v.as.str[0];
|
|
}
|
|
fputc(ch, stdout);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void ct_execute_nodes(CompileTimeVM *vm, OpVec *nodes) {
|
|
StrMap labels;
|
|
strmap_init(&labels);
|
|
for (size_t i = 0; i < nodes->len; i++) {
|
|
Op *node = &nodes->data[i];
|
|
if (node->kind == OP_LABEL) {
|
|
strmap_set(&labels, node->data.label, (void *)(uintptr_t)i);
|
|
}
|
|
}
|
|
|
|
IntVec begin_stack;
|
|
VEC_INIT(&begin_stack);
|
|
size_t ip = 0;
|
|
while (ip < nodes->len) {
|
|
Op node = nodes->data[ip];
|
|
if (node.kind == OP_LITERAL) {
|
|
if (node.lit_kind == LIT_INT) {
|
|
ct_stack_push(&vm->stack, ct_make_int(node.data.i64));
|
|
} else if (node.lit_kind == LIT_FLOAT) {
|
|
ct_stack_push(&vm->stack, ct_make_int((int64_t)node.data.f64));
|
|
} else if (node.lit_kind == LIT_STRING) {
|
|
ct_stack_push(&vm->stack, ct_make_str(node.data.str));
|
|
}
|
|
ip++;
|
|
continue;
|
|
}
|
|
if (node.kind == OP_WORD) {
|
|
const char *name = node.data.word;
|
|
if (strcmp(name, "begin") == 0) {
|
|
VEC_PUSH(&begin_stack, (int)ip);
|
|
ip++;
|
|
continue;
|
|
}
|
|
if (strcmp(name, "again") == 0) {
|
|
if (!begin_stack.len) {
|
|
fprintf(stderr, "[error] 'again' without matching 'begin'\n");
|
|
exit(1);
|
|
}
|
|
ip = (size_t)begin_stack.data[begin_stack.len - 1] + 1;
|
|
continue;
|
|
}
|
|
if (strcmp(name, "continue") == 0) {
|
|
if (!begin_stack.len) {
|
|
fprintf(stderr, "[error] 'continue' outside begin/again loop\n");
|
|
exit(1);
|
|
}
|
|
ip = (size_t)begin_stack.data[begin_stack.len - 1] + 1;
|
|
continue;
|
|
}
|
|
if (strcmp(name, "exit") == 0) {
|
|
return;
|
|
}
|
|
Word *word = dictionary_lookup(vm->dictionary, name);
|
|
if (!word) {
|
|
fprintf(stderr, "[error] unknown word '%s' during compile-time execution\n", name);
|
|
exit(1);
|
|
}
|
|
ct_word_call(vm, word);
|
|
ip++;
|
|
continue;
|
|
}
|
|
if (node.kind == OP_BRANCH_ZERO) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
bool flag = false;
|
|
if (v.kind == CT_INT) {
|
|
flag = v.as.i64 != 0;
|
|
}
|
|
if (!flag) {
|
|
void *target = strmap_get(&labels, node.data.label);
|
|
if (!target) {
|
|
fprintf(stderr, "[error] unknown label '%s' during compile-time execution\n", node.data.label);
|
|
exit(1);
|
|
}
|
|
ip = (size_t)(uintptr_t)target;
|
|
} else {
|
|
ip++;
|
|
}
|
|
continue;
|
|
}
|
|
if (node.kind == OP_JUMP) {
|
|
void *target = strmap_get(&labels, node.data.label);
|
|
if (!target) {
|
|
fprintf(stderr, "[error] unknown label '%s' during compile-time execution\n", node.data.label);
|
|
exit(1);
|
|
}
|
|
ip = (size_t)(uintptr_t)target;
|
|
continue;
|
|
}
|
|
if (node.kind == OP_FOR_BEGIN) {
|
|
int64_t count = ct_pop_int(vm);
|
|
if (count <= 0) {
|
|
ip++;
|
|
continue;
|
|
}
|
|
VEC_PUSH(&vm->loop_remaining, (int)count);
|
|
VEC_PUSH(&vm->loop_begin, (int)ip);
|
|
VEC_PUSH(&vm->loop_initial, (int)count);
|
|
ip++;
|
|
continue;
|
|
}
|
|
if (node.kind == OP_FOR_END) {
|
|
if (!vm->loop_remaining.len) {
|
|
fprintf(stderr, "[error] 'next' without matching 'for'\n");
|
|
exit(1);
|
|
}
|
|
int idx = (int)vm->loop_remaining.len - 1;
|
|
vm->loop_remaining.data[idx] -= 1;
|
|
if (vm->loop_remaining.data[idx] > 0) {
|
|
ip = (size_t)vm->loop_begin.data[idx] + 1;
|
|
} else {
|
|
vm->loop_remaining.len--;
|
|
vm->loop_begin.len--;
|
|
vm->loop_initial.len--;
|
|
ip++;
|
|
}
|
|
continue;
|
|
}
|
|
ip++;
|
|
}
|
|
}
|
|
|
|
static void ct_word_call(CompileTimeVM *vm, Word *word) {
|
|
VEC_PUSH(&vm->call_stack, str_dup(word->name));
|
|
if (word->compile_time_override) {
|
|
if (word->ct_definition) {
|
|
ct_execute_nodes(vm, &word->ct_definition->body);
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
if (word->definition) {
|
|
ct_execute_nodes(vm, &word->definition->body);
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
if (word->ct_intrinsic) {
|
|
word->ct_intrinsic(vm);
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
if (word->ct_asm_def) {
|
|
if (ct_try_asm_io(vm, word, word->ct_asm_def)) {
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
}
|
|
bool prefer_def = (word->definition && (word->immediate || word->compile_only));
|
|
if (!prefer_def && word->ct_intrinsic) {
|
|
word->ct_intrinsic(vm);
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
Definition *def = word->definition;
|
|
if (word->compile_only && word->ct_definition) {
|
|
def = word->ct_definition;
|
|
}
|
|
if (!def) {
|
|
if (word->asm_def || word->ct_asm_def) {
|
|
AsmDefinition *asm_def = word->ct_asm_def ? word->ct_asm_def : word->asm_def;
|
|
ct_try_asm_io(vm, word, asm_def);
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
if (word->is_extern) {
|
|
int pops = word->extern_arg_count > 0 ? word->extern_arg_count : word->extern_inputs;
|
|
for (int i = 0; i < pops; i++) {
|
|
ct_stack_pop(&vm->stack);
|
|
}
|
|
int outputs = 0;
|
|
if (word->extern_arg_count > 0) {
|
|
if (!word->extern_ret_type || strcmp(word->extern_ret_type, "void") != 0) {
|
|
outputs = 1;
|
|
}
|
|
} else {
|
|
outputs = word->extern_outputs;
|
|
}
|
|
for (int i = 0; i < outputs; i++) {
|
|
ct_stack_push(&vm->stack, ct_make_int(0));
|
|
}
|
|
vm->call_stack.len--;
|
|
return;
|
|
}
|
|
fprintf(stderr, "[error] word '%s' has no compile-time definition\n", word->name);
|
|
exit(1);
|
|
}
|
|
ct_execute_nodes(vm, &def->body);
|
|
vm->call_stack.len--;
|
|
}
|
|
|
|
static bool ct_truthy(CtValue v) {
|
|
if (v.kind == CT_NIL) {
|
|
return false;
|
|
}
|
|
if (v.kind == CT_INT) {
|
|
return v.as.i64 != 0;
|
|
}
|
|
if (v.kind == CT_STR) {
|
|
return v.as.str && v.as.str[0] != '\0';
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static char *ct_string_from_value(CtValue v) {
|
|
if (v.kind == CT_TOKEN) {
|
|
return str_dup(v.as.token.lexeme);
|
|
}
|
|
if (v.kind == CT_STR) {
|
|
return str_dup(v.as.str);
|
|
}
|
|
if (v.kind == CT_INT) {
|
|
return str_printf("%lld", (long long)v.as.i64);
|
|
}
|
|
return str_dup("");
|
|
}
|
|
|
|
static void ct_intrinsic_dup(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_peek(&vm->stack);
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_drop(CompileTimeVM *vm) {
|
|
ct_stack_pop(&vm->stack);
|
|
}
|
|
|
|
static void ct_intrinsic_swap(CompileTimeVM *vm) {
|
|
CtValue a = ct_stack_pop(&vm->stack);
|
|
CtValue b = ct_stack_pop(&vm->stack);
|
|
ct_stack_push(&vm->stack, a);
|
|
ct_stack_push(&vm->stack, b);
|
|
}
|
|
|
|
static void ct_intrinsic_over(CompileTimeVM *vm) {
|
|
if (vm->stack.len < 2) {
|
|
fprintf(stderr, "[error] over expects at least 2 items\n");
|
|
exit(1);
|
|
}
|
|
CtValue v = vm->stack.data[vm->stack.len - 2];
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_rot(CompileTimeVM *vm) {
|
|
if (vm->stack.len < 3) {
|
|
fprintf(stderr, "[error] rot expects at least 3 items\n");
|
|
exit(1);
|
|
}
|
|
CtValue a = vm->stack.data[vm->stack.len - 3];
|
|
CtValue b = vm->stack.data[vm->stack.len - 2];
|
|
CtValue c = vm->stack.data[vm->stack.len - 1];
|
|
vm->stack.data[vm->stack.len - 3] = b;
|
|
vm->stack.data[vm->stack.len - 2] = c;
|
|
vm->stack.data[vm->stack.len - 1] = a;
|
|
}
|
|
|
|
static void ct_intrinsic_pick(CompileTimeVM *vm) {
|
|
int64_t idx = ct_pop_int(vm);
|
|
if (idx < 0 || (size_t)(idx + 1) > vm->stack.len) {
|
|
fprintf(stderr, "[error] pick index out of range\n");
|
|
exit(1);
|
|
}
|
|
CtValue v = vm->stack.data[vm->stack.len - 1 - (size_t)idx];
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_rpick(CompileTimeVM *vm) {
|
|
int64_t idx = ct_pop_int(vm);
|
|
if (idx < 0 || (size_t)(idx + 1) > vm->rstack.len) {
|
|
fprintf(stderr, "[error] rpick index out of range\n");
|
|
exit(1);
|
|
}
|
|
CtValue v = vm->rstack.data[vm->rstack.len - 1 - (size_t)idx];
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_to_r(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
ct_stack_push(&vm->rstack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_from_r(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->rstack);
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_rdrop(CompileTimeVM *vm) {
|
|
ct_stack_pop(&vm->rstack);
|
|
}
|
|
|
|
static void ct_intrinsic_add(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a + b));
|
|
}
|
|
|
|
static void ct_intrinsic_sub(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a - b));
|
|
}
|
|
|
|
static void ct_intrinsic_mul(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a * b));
|
|
}
|
|
|
|
static void ct_intrinsic_div(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
if (b == 0) {
|
|
fprintf(stderr, "[error] division by zero in compile-time VM\n");
|
|
exit(1);
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_int(a / b));
|
|
}
|
|
|
|
static void ct_intrinsic_mod(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
if (b == 0) {
|
|
fprintf(stderr, "[error] modulo by zero in compile-time VM\n");
|
|
exit(1);
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_int(a % b));
|
|
}
|
|
|
|
static void ct_intrinsic_eq(CompileTimeVM *vm) {
|
|
CtValue b = ct_stack_pop(&vm->stack);
|
|
CtValue a = ct_stack_pop(&vm->stack);
|
|
if (a.kind == CT_INT && b.kind == CT_INT) {
|
|
ct_stack_push(&vm->stack, ct_make_int(a.as.i64 == b.as.i64));
|
|
return;
|
|
}
|
|
char *sa = ct_string_from_value(a);
|
|
char *sb = ct_string_from_value(b);
|
|
bool eq = strcmp(sa, sb) == 0;
|
|
free(sa);
|
|
free(sb);
|
|
ct_stack_push(&vm->stack, ct_make_int(eq));
|
|
}
|
|
|
|
static void ct_intrinsic_gt(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a > b));
|
|
}
|
|
|
|
static void ct_intrinsic_lt(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a < b));
|
|
}
|
|
|
|
static void ct_intrinsic_ge(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a >= b));
|
|
}
|
|
|
|
static void ct_intrinsic_le(CompileTimeVM *vm) {
|
|
int64_t b = ct_pop_int(vm);
|
|
int64_t a = ct_pop_int(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(a <= b));
|
|
}
|
|
|
|
static void ct_intrinsic_ne(CompileTimeVM *vm) {
|
|
CtValue b = ct_stack_pop(&vm->stack);
|
|
CtValue a = ct_stack_pop(&vm->stack);
|
|
if (a.kind == CT_INT && b.kind == CT_INT) {
|
|
ct_stack_push(&vm->stack, ct_make_int(a.as.i64 != b.as.i64));
|
|
return;
|
|
}
|
|
char *sa = ct_string_from_value(a);
|
|
char *sb = ct_string_from_value(b);
|
|
bool ne = strcmp(sa, sb) != 0;
|
|
free(sa);
|
|
free(sb);
|
|
ct_stack_push(&vm->stack, ct_make_int(ne));
|
|
}
|
|
|
|
static void ct_intrinsic_not(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
ct_stack_push(&vm->stack, ct_make_int(!ct_truthy(v)));
|
|
}
|
|
|
|
static void ct_intrinsic_nil(CompileTimeVM *vm) {
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
}
|
|
|
|
static void ct_intrinsic_nilp(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
ct_stack_push(&vm->stack, ct_make_int(v.kind == CT_NIL));
|
|
}
|
|
|
|
static void ct_intrinsic_string_eq(CompileTimeVM *vm) {
|
|
char *b = ct_pop_str(vm);
|
|
char *a = ct_pop_str(vm);
|
|
bool eq = strcmp(a, b) == 0;
|
|
free(a);
|
|
free(b);
|
|
ct_stack_push(&vm->stack, ct_make_int(eq));
|
|
}
|
|
|
|
static void ct_intrinsic_string_length(CompileTimeVM *vm) {
|
|
char *s = ct_pop_str(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int((int64_t)strlen(s)));
|
|
free(s);
|
|
}
|
|
|
|
static void ct_intrinsic_string_append(CompileTimeVM *vm) {
|
|
char *b = ct_pop_str(vm);
|
|
char *a = ct_pop_str(vm);
|
|
char *out = str_printf("%s%s", a, b);
|
|
free(a);
|
|
free(b);
|
|
ct_stack_push(&vm->stack, ct_make_str(out));
|
|
free(out);
|
|
}
|
|
|
|
static void ct_intrinsic_string_to_number(CompileTimeVM *vm) {
|
|
char *s = ct_pop_str(vm);
|
|
int64_t out = 0;
|
|
bool ok = try_parse_int(s, &out);
|
|
ct_stack_push(&vm->stack, ct_make_int(out));
|
|
ct_stack_push(&vm->stack, ct_make_int(ok ? 1 : 0));
|
|
free(s);
|
|
}
|
|
|
|
static void ct_intrinsic_int_to_string(CompileTimeVM *vm) {
|
|
int64_t v = ct_pop_int(vm);
|
|
char *out = str_printf("%lld", (long long)v);
|
|
ct_stack_push(&vm->stack, ct_make_str(out));
|
|
free(out);
|
|
}
|
|
|
|
static void ct_intrinsic_identifierp(CompileTimeVM *vm) {
|
|
char *s = ct_pop_str(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(is_identifier(s)));
|
|
free(s);
|
|
}
|
|
|
|
static void ct_intrinsic_list_new(CompileTimeVM *vm) {
|
|
CtList *list = ct_list_new();
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
}
|
|
|
|
static void ct_intrinsic_list_append(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
CtList *list = ct_pop_list(vm);
|
|
VEC_PUSH(&list->items, v);
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
}
|
|
|
|
static void ct_intrinsic_list_pop(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
if (!list->items.len) {
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
return;
|
|
}
|
|
CtValue v = VEC_POP(&list->items);
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_list_pop_front(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
if (!list->items.len) {
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
return;
|
|
}
|
|
CtValue v = list->items.data[0];
|
|
memmove(&list->items.data[0], &list->items.data[1], (list->items.len - 1) * sizeof(CtValue));
|
|
list->items.len--;
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_list_length(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int((int64_t)list->items.len));
|
|
}
|
|
|
|
static void ct_intrinsic_list_empty(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
ct_stack_push(&vm->stack, ct_make_int(list->items.len == 0));
|
|
}
|
|
|
|
static void ct_intrinsic_list_get(CompileTimeVM *vm) {
|
|
int64_t idx = ct_pop_int(vm);
|
|
CtList *list = ct_pop_list(vm);
|
|
CtValue v = ct_make_nil();
|
|
if (idx >= 0 && (size_t)idx < list->items.len) {
|
|
v = list->items.data[idx];
|
|
}
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_list_set(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
int64_t idx = ct_pop_int(vm);
|
|
CtList *list = ct_pop_list(vm);
|
|
if (idx < 0 || (size_t)idx >= list->items.len) {
|
|
fprintf(stderr, "[error] list-set index out of range\n");
|
|
exit(1);
|
|
}
|
|
list->items.data[idx] = v;
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
}
|
|
|
|
static void ct_intrinsic_list_extend(CompileTimeVM *vm) {
|
|
CtList *list2 = ct_pop_list(vm);
|
|
CtList *list1 = ct_pop_list(vm);
|
|
for (size_t i = 0; i < list2->items.len; i++) {
|
|
VEC_PUSH(&list1->items, list2->items.data[i]);
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_list(list1));
|
|
}
|
|
|
|
static void ct_intrinsic_list_last(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
CtValue v = ct_make_nil();
|
|
if (list->items.len) {
|
|
v = list->items.data[list->items.len - 1];
|
|
}
|
|
ct_stack_push(&vm->stack, v);
|
|
}
|
|
|
|
static void ct_intrinsic_list_clone(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
CtList *out = ct_list_new();
|
|
for (size_t i = 0; i < list->items.len; i++) {
|
|
VEC_PUSH(&out->items, list->items.data[i]);
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
ct_stack_push(&vm->stack, ct_make_list(out));
|
|
}
|
|
|
|
static void ct_intrinsic_map_new(CompileTimeVM *vm) {
|
|
CtMap *map = ct_map_new();
|
|
ct_stack_push(&vm->stack, ct_make_map(map));
|
|
}
|
|
|
|
static void ct_intrinsic_map_set(CompileTimeVM *vm) {
|
|
CtValue val = ct_stack_pop(&vm->stack);
|
|
char *key = ct_pop_str(vm);
|
|
CtMap *map = ct_pop_map(vm);
|
|
ct_map_set(map, key, val);
|
|
free(key);
|
|
ct_stack_push(&vm->stack, ct_make_map(map));
|
|
}
|
|
|
|
static void ct_intrinsic_map_get(CompileTimeVM *vm) {
|
|
char *key = ct_pop_str(vm);
|
|
CtMap *map = ct_pop_map(vm);
|
|
CtValue out = ct_make_nil();
|
|
bool ok = ct_map_get(map, key, &out);
|
|
ct_stack_push(&vm->stack, ct_make_map(map));
|
|
ct_stack_push(&vm->stack, out);
|
|
ct_stack_push(&vm->stack, ct_make_int(ok));
|
|
free(key);
|
|
}
|
|
|
|
static void ct_intrinsic_map_has(CompileTimeVM *vm) {
|
|
char *key = ct_pop_str(vm);
|
|
CtMap *map = ct_pop_map(vm);
|
|
CtValue out = ct_make_nil();
|
|
bool ok = ct_map_get(map, key, &out);
|
|
ct_stack_push(&vm->stack, ct_make_map(map));
|
|
ct_stack_push(&vm->stack, ct_make_int(ok));
|
|
free(key);
|
|
}
|
|
|
|
static void ct_intrinsic_token_lexeme(CompileTimeVM *vm) {
|
|
Token tok = ct_pop_token(vm);
|
|
ct_stack_push(&vm->stack, ct_make_str(tok.lexeme));
|
|
}
|
|
|
|
static void ct_intrinsic_token_from_lexeme(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind == CT_NIL) {
|
|
v = ct_stack_pop(&vm->stack);
|
|
}
|
|
char *lex = NULL;
|
|
if (v.kind == CT_STR) {
|
|
lex = str_dup(v.as.str);
|
|
} else if (v.kind == CT_TOKEN) {
|
|
lex = str_dup(v.as.token.lexeme);
|
|
} else {
|
|
ct_trace_error(vm, "expected string for token-from-lexeme");
|
|
}
|
|
Token tok = {0};
|
|
tok.lexeme = lex;
|
|
tok.line = 0;
|
|
tok.column = 0;
|
|
tok.start = 0;
|
|
tok.end = 0;
|
|
ct_stack_push(&vm->stack, ct_make_token(tok));
|
|
}
|
|
|
|
static void ct_intrinsic_next_token(CompileTimeVM *vm) {
|
|
Token tok = parser_next_token(vm->parser);
|
|
if (!tok.lexeme) {
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
return;
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_token(tok));
|
|
}
|
|
|
|
static void ct_intrinsic_peek_token(CompileTimeVM *vm) {
|
|
Token tok = parser_peek_token(vm->parser);
|
|
if (!tok.lexeme) {
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
return;
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_token(tok));
|
|
}
|
|
|
|
static void ct_intrinsic_inject_tokens(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
TokenVec injected;
|
|
VEC_INIT(&injected);
|
|
for (size_t i = 0; i < list->items.len; i++) {
|
|
CtValue v = list->items.data[i];
|
|
Token tok = {0};
|
|
if (v.kind == CT_TOKEN) {
|
|
tok = v.as.token;
|
|
} else if (v.kind == CT_STR) {
|
|
tok.lexeme = str_dup(v.as.str);
|
|
} else {
|
|
tok.lexeme = ct_string_from_value(v);
|
|
}
|
|
VEC_PUSH(&injected, tok);
|
|
}
|
|
parser_inject_tokens(vm->parser, &injected);
|
|
}
|
|
|
|
static void ct_intrinsic_set_token_hook(CompileTimeVM *vm) {
|
|
char *name = ct_pop_str(vm);
|
|
if (vm->parser->token_hook) {
|
|
free(vm->parser->token_hook);
|
|
}
|
|
vm->parser->token_hook = name;
|
|
}
|
|
|
|
static void ct_intrinsic_clear_token_hook(CompileTimeVM *vm) {
|
|
if (vm->parser->token_hook) {
|
|
free(vm->parser->token_hook);
|
|
vm->parser->token_hook = NULL;
|
|
}
|
|
}
|
|
|
|
static void ct_intrinsic_parse_error(CompileTimeVM *vm) {
|
|
char *msg = ct_pop_str(vm);
|
|
fprintf(stderr, "[error] %s\n", msg);
|
|
free(msg);
|
|
exit(1);
|
|
}
|
|
|
|
static void ct_intrinsic_add_token(CompileTimeVM *vm) {
|
|
char *tok = ct_pop_str(vm);
|
|
reader_add_tokens(vm->parser->reader, tok);
|
|
free(tok);
|
|
}
|
|
|
|
static void ct_intrinsic_add_token_chars(CompileTimeVM *vm) {
|
|
char *chars = ct_pop_str(vm);
|
|
reader_add_token_chars(vm->parser->reader, chars);
|
|
free(chars);
|
|
}
|
|
|
|
static void ct_intrinsic_prelude_clear(CompileTimeVM *vm) {
|
|
if (!vm->parser->custom_prelude) {
|
|
vm->parser->custom_prelude = (StrVec *)xmalloc(sizeof(StrVec));
|
|
VEC_INIT(vm->parser->custom_prelude);
|
|
}
|
|
vm->parser->custom_prelude->len = 0;
|
|
}
|
|
|
|
static void ct_intrinsic_prelude_append(CompileTimeVM *vm) {
|
|
char *line = ct_pop_str(vm);
|
|
if (!vm->parser->custom_prelude) {
|
|
vm->parser->custom_prelude = (StrVec *)xmalloc(sizeof(StrVec));
|
|
VEC_INIT(vm->parser->custom_prelude);
|
|
}
|
|
VEC_PUSH(vm->parser->custom_prelude, line);
|
|
}
|
|
|
|
static void ct_intrinsic_bss_clear(CompileTimeVM *vm) {
|
|
if (!vm->parser->custom_bss) {
|
|
vm->parser->custom_bss = (StrVec *)xmalloc(sizeof(StrVec));
|
|
VEC_INIT(vm->parser->custom_bss);
|
|
}
|
|
vm->parser->custom_bss->len = 0;
|
|
}
|
|
|
|
static void ct_intrinsic_bss_append(CompileTimeVM *vm) {
|
|
char *line = ct_pop_str(vm);
|
|
if (!vm->parser->custom_bss) {
|
|
vm->parser->custom_bss = (StrVec *)xmalloc(sizeof(StrVec));
|
|
VEC_INIT(vm->parser->custom_bss);
|
|
}
|
|
VEC_PUSH(vm->parser->custom_bss, line);
|
|
}
|
|
|
|
static void ct_intrinsic_use_l2_ct(CompileTimeVM *vm) {
|
|
char *name = ct_pop_str(vm);
|
|
Word *word = dictionary_lookup(vm->dictionary, name);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(name);
|
|
dictionary_register(vm->dictionary, word);
|
|
}
|
|
word->compile_time_override = true;
|
|
free(name);
|
|
}
|
|
|
|
static CtList *ct_list_from_tokens(const char **tokens, size_t count) {
|
|
CtList *list = ct_list_new();
|
|
for (size_t i = 0; i < count; i++) {
|
|
VEC_PUSH(&list->items, ct_make_str(tokens[i]));
|
|
}
|
|
return list;
|
|
}
|
|
|
|
static void ct_intrinsic_shunt(CompileTimeVM *vm) {
|
|
CtList *list = ct_pop_list(vm);
|
|
CtList *output = ct_list_new();
|
|
CtList *ops = ct_list_new();
|
|
for (size_t i = 0; i < list->items.len; i++) {
|
|
CtValue tok = list->items.data[i];
|
|
char *lex = ct_string_from_value(tok);
|
|
if (strcmp(lex, "(") == 0) {
|
|
VEC_PUSH(&ops->items, ct_make_str(lex));
|
|
free(lex);
|
|
continue;
|
|
}
|
|
if (strcmp(lex, ")") == 0) {
|
|
while (ops->items.len) {
|
|
CtValue top = ops->items.data[ops->items.len - 1];
|
|
char *top_lex = ct_string_from_value(top);
|
|
if (strcmp(top_lex, "(") == 0) {
|
|
ops->items.len--;
|
|
free(top_lex);
|
|
break;
|
|
}
|
|
VEC_PUSH(&output->items, top);
|
|
ops->items.len--;
|
|
free(top_lex);
|
|
}
|
|
free(lex);
|
|
continue;
|
|
}
|
|
int prec = 0;
|
|
if (strcmp(lex, "+") == 0 || strcmp(lex, "-") == 0) {
|
|
prec = 1;
|
|
} else if (strcmp(lex, "*") == 0 || strcmp(lex, "/") == 0 || strcmp(lex, "%") == 0) {
|
|
prec = 2;
|
|
}
|
|
if (prec > 0) {
|
|
while (ops->items.len) {
|
|
CtValue top = ops->items.data[ops->items.len - 1];
|
|
char *top_lex = ct_string_from_value(top);
|
|
int top_prec = 0;
|
|
if (strcmp(top_lex, "+") == 0 || strcmp(top_lex, "-") == 0) {
|
|
top_prec = 1;
|
|
} else if (strcmp(top_lex, "*") == 0 || strcmp(top_lex, "/") == 0 || strcmp(top_lex, "%") == 0) {
|
|
top_prec = 2;
|
|
}
|
|
if (top_prec >= prec) {
|
|
VEC_PUSH(&output->items, top);
|
|
ops->items.len--;
|
|
} else {
|
|
free(top_lex);
|
|
break;
|
|
}
|
|
free(top_lex);
|
|
}
|
|
VEC_PUSH(&ops->items, ct_make_str(lex));
|
|
free(lex);
|
|
continue;
|
|
}
|
|
VEC_PUSH(&output->items, ct_make_str(lex));
|
|
free(lex);
|
|
}
|
|
while (ops->items.len) {
|
|
CtValue top = VEC_POP(&ops->items);
|
|
VEC_PUSH(&output->items, top);
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_list(output));
|
|
}
|
|
|
|
static SplitLexer *split_lexer_new(Parser *parser, const char *seps) {
|
|
SplitLexer *lexer = (SplitLexer *)xmalloc(sizeof(SplitLexer));
|
|
lexer->parser = parser;
|
|
memset(lexer->separators, 0, sizeof(lexer->separators));
|
|
for (const char *p = seps; p && *p; p++) {
|
|
lexer->separators[(unsigned char)*p] = true;
|
|
}
|
|
VEC_INIT(&lexer->buffer);
|
|
return lexer;
|
|
}
|
|
|
|
static void split_lexer_buffer_token(SplitLexer *lexer, Token tok) {
|
|
if (!tok.lexeme) {
|
|
return;
|
|
}
|
|
size_t len = strlen(tok.lexeme);
|
|
if (len == 0 || tok.lexeme[0] == '"') {
|
|
VEC_PUSH(&lexer->buffer, tok);
|
|
return;
|
|
}
|
|
size_t start = 0;
|
|
for (size_t i = 0; i <= len; i++) {
|
|
bool is_sep = (i < len) && lexer->separators[(unsigned char)tok.lexeme[i]];
|
|
bool at_end = (i == len);
|
|
if (is_sep || at_end) {
|
|
if (i > start) {
|
|
size_t tok_len = i - start;
|
|
char *lex = (char *)xmalloc(tok_len + 1);
|
|
memcpy(lex, tok.lexeme + start, tok_len);
|
|
lex[tok_len] = '\0';
|
|
Token out = tok;
|
|
out.lexeme = lex;
|
|
VEC_PUSH(&lexer->buffer, out);
|
|
}
|
|
if (is_sep) {
|
|
char sep[2] = {tok.lexeme[i], '\0'};
|
|
Token out = tok;
|
|
out.lexeme = str_dup(sep);
|
|
VEC_PUSH(&lexer->buffer, out);
|
|
}
|
|
start = i + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static Token split_lexer_pop(SplitLexer *lexer) {
|
|
if (lexer->buffer.len == 0) {
|
|
Token tok = parser_next_token(lexer->parser);
|
|
if (!tok.lexeme) {
|
|
Token empty = {0};
|
|
empty.lexeme = NULL;
|
|
return empty;
|
|
}
|
|
split_lexer_buffer_token(lexer, tok);
|
|
}
|
|
if (lexer->buffer.len == 0) {
|
|
Token empty = {0};
|
|
empty.lexeme = NULL;
|
|
return empty;
|
|
}
|
|
Token out = lexer->buffer.data[0];
|
|
memmove(&lexer->buffer.data[0], &lexer->buffer.data[1], (lexer->buffer.len - 1) * sizeof(Token));
|
|
lexer->buffer.len--;
|
|
return out;
|
|
}
|
|
|
|
static Token split_lexer_peek(SplitLexer *lexer) {
|
|
if (lexer->buffer.len == 0) {
|
|
Token tok = parser_next_token(lexer->parser);
|
|
if (!tok.lexeme) {
|
|
Token empty = {0};
|
|
empty.lexeme = NULL;
|
|
return empty;
|
|
}
|
|
split_lexer_buffer_token(lexer, tok);
|
|
}
|
|
if (lexer->buffer.len == 0) {
|
|
Token empty = {0};
|
|
empty.lexeme = NULL;
|
|
return empty;
|
|
}
|
|
return lexer->buffer.data[0];
|
|
}
|
|
|
|
static void split_lexer_push_back(SplitLexer *lexer, Token tok) {
|
|
if (lexer->buffer.len + 1 > lexer->buffer.cap) {
|
|
lexer->buffer.cap = lexer->buffer.cap ? lexer->buffer.cap * 2 : 8;
|
|
lexer->buffer.data = xrealloc(lexer->buffer.data, lexer->buffer.cap * sizeof(Token));
|
|
}
|
|
memmove(&lexer->buffer.data[1], &lexer->buffer.data[0], lexer->buffer.len * sizeof(Token));
|
|
lexer->buffer.data[0] = tok;
|
|
lexer->buffer.len++;
|
|
}
|
|
|
|
static void ct_intrinsic_lexer_new(CompileTimeVM *vm) {
|
|
char *seps = ct_pop_str(vm);
|
|
SplitLexer *lexer = split_lexer_new(vm->parser, seps);
|
|
free(seps);
|
|
ct_stack_push(&vm->stack, ct_make_lexer(lexer));
|
|
}
|
|
|
|
static void ct_intrinsic_lexer_pop(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_LEXER) {
|
|
fprintf(stderr, "[error] lexer-pop expects lexer\n");
|
|
exit(1);
|
|
}
|
|
Token tok = split_lexer_pop(v.as.lexer);
|
|
ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer));
|
|
if (!tok.lexeme) {
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
} else {
|
|
ct_stack_push(&vm->stack, ct_make_token(tok));
|
|
}
|
|
}
|
|
|
|
static void ct_intrinsic_lexer_peek(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_LEXER) {
|
|
fprintf(stderr, "[error] lexer-peek expects lexer\n");
|
|
exit(1);
|
|
}
|
|
Token tok = split_lexer_peek(v.as.lexer);
|
|
ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer));
|
|
if (!tok.lexeme) {
|
|
ct_stack_push(&vm->stack, ct_make_nil());
|
|
} else {
|
|
ct_stack_push(&vm->stack, ct_make_token(tok));
|
|
}
|
|
}
|
|
|
|
static void ct_intrinsic_lexer_expect(CompileTimeVM *vm) {
|
|
char *expected = ct_pop_str(vm);
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_LEXER) {
|
|
fprintf(stderr, "[error] lexer-expect expects lexer\n");
|
|
exit(1);
|
|
}
|
|
Token tok = split_lexer_pop(v.as.lexer);
|
|
if (!tok.lexeme || strcmp(tok.lexeme, expected) != 0) {
|
|
fprintf(stderr, "[error] lexer-expect expected '%s'\n", expected);
|
|
exit(1);
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer));
|
|
ct_stack_push(&vm->stack, ct_make_token(tok));
|
|
free(expected);
|
|
}
|
|
|
|
static void ct_intrinsic_lexer_collect_brace(CompileTimeVM *vm) {
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_LEXER) {
|
|
fprintf(stderr, "[error] lexer-collect-brace expects lexer\n");
|
|
exit(1);
|
|
}
|
|
int depth = 1;
|
|
CtList *list = ct_list_new();
|
|
while (depth > 0) {
|
|
Token tok = split_lexer_pop(v.as.lexer);
|
|
if (!tok.lexeme) {
|
|
fprintf(stderr, "[error] unterminated brace in lexer\n");
|
|
exit(1);
|
|
}
|
|
if (strcmp(tok.lexeme, "{") == 0) {
|
|
depth++;
|
|
} else if (strcmp(tok.lexeme, "}") == 0) {
|
|
depth--;
|
|
if (depth == 0) {
|
|
break;
|
|
}
|
|
}
|
|
VEC_PUSH(&list->items, ct_make_token(tok));
|
|
}
|
|
ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer));
|
|
ct_stack_push(&vm->stack, ct_make_list(list));
|
|
}
|
|
|
|
static void ct_intrinsic_lexer_push_back(CompileTimeVM *vm) {
|
|
Token tok = ct_pop_token(vm);
|
|
CtValue v = ct_stack_pop(&vm->stack);
|
|
if (v.kind != CT_LEXER) {
|
|
fprintf(stderr, "[error] lexer-push-back expects lexer\n");
|
|
exit(1);
|
|
}
|
|
split_lexer_push_back(v.as.lexer, tok);
|
|
ct_stack_push(&vm->stack, ct_make_lexer(v.as.lexer));
|
|
}
|
|
|
|
static void ct_intrinsic_emit_definition(CompileTimeVM *vm) {
|
|
CtList *body = ct_pop_list(vm);
|
|
Token name = ct_pop_token(vm);
|
|
TokenVec injected;
|
|
VEC_INIT(&injected);
|
|
Token tok = {0};
|
|
tok.lexeme = str_dup("word");
|
|
VEC_PUSH(&injected, tok);
|
|
VEC_PUSH(&injected, name);
|
|
for (size_t i = 0; i < body->items.len; i++) {
|
|
CtValue item = body->items.data[i];
|
|
Token t = {0};
|
|
if (item.kind == CT_TOKEN) {
|
|
t = item.as.token;
|
|
} else if (item.kind == CT_STR) {
|
|
t.lexeme = str_dup(item.as.str);
|
|
} else if (item.kind == CT_INT) {
|
|
t.lexeme = str_printf("%lld", (long long)item.as.i64);
|
|
} else {
|
|
t.lexeme = ct_string_from_value(item);
|
|
}
|
|
VEC_PUSH(&injected, t);
|
|
}
|
|
tok.lexeme = str_dup("end");
|
|
VEC_PUSH(&injected, tok);
|
|
parser_inject_tokens(vm->parser, &injected);
|
|
}
|
|
|
|
static void ct_intrinsic_prelude_set(CompileTimeVM *vm) {
|
|
ct_intrinsic_prelude_clear(vm);
|
|
ct_intrinsic_prelude_append(vm);
|
|
}
|
|
|
|
static void ct_intrinsic_bss_set(CompileTimeVM *vm) {
|
|
ct_intrinsic_bss_clear(vm);
|
|
ct_intrinsic_bss_append(vm);
|
|
}
|
|
|
|
static Word *register_ct_intrinsic(Dictionary *dict, const char *name, CompileTimeIntrinsic fn) {
|
|
Word *word = dictionary_lookup(dict, name);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(name);
|
|
dictionary_register(dict, word);
|
|
}
|
|
word->ct_intrinsic = fn;
|
|
word->compile_only = true;
|
|
return word;
|
|
}
|
|
|
|
static void bootstrap_dictionary(Dictionary *dict, Parser *parser, CompileTimeVM *vm) {
|
|
(void)parser;
|
|
register_ct_intrinsic(dict, "dup", ct_intrinsic_dup);
|
|
register_ct_intrinsic(dict, "drop", ct_intrinsic_drop);
|
|
register_ct_intrinsic(dict, "swap", ct_intrinsic_swap);
|
|
register_ct_intrinsic(dict, "over", ct_intrinsic_over);
|
|
register_ct_intrinsic(dict, "rot", ct_intrinsic_rot);
|
|
register_ct_intrinsic(dict, "pick", ct_intrinsic_pick);
|
|
register_ct_intrinsic(dict, "rpick", ct_intrinsic_rpick);
|
|
register_ct_intrinsic(dict, ">r", ct_intrinsic_to_r);
|
|
register_ct_intrinsic(dict, "r>", ct_intrinsic_from_r);
|
|
register_ct_intrinsic(dict, "rdrop", ct_intrinsic_rdrop);
|
|
register_ct_intrinsic(dict, "+", ct_intrinsic_add);
|
|
register_ct_intrinsic(dict, "-", ct_intrinsic_sub);
|
|
register_ct_intrinsic(dict, "*", ct_intrinsic_mul);
|
|
register_ct_intrinsic(dict, "/", ct_intrinsic_div);
|
|
register_ct_intrinsic(dict, "%", ct_intrinsic_mod);
|
|
register_ct_intrinsic(dict, "==", ct_intrinsic_eq);
|
|
register_ct_intrinsic(dict, "!=", ct_intrinsic_ne);
|
|
register_ct_intrinsic(dict, ">", ct_intrinsic_gt);
|
|
register_ct_intrinsic(dict, "<", ct_intrinsic_lt);
|
|
register_ct_intrinsic(dict, ">=", ct_intrinsic_ge);
|
|
register_ct_intrinsic(dict, "<=", ct_intrinsic_le);
|
|
register_ct_intrinsic(dict, "not", ct_intrinsic_not);
|
|
register_ct_intrinsic(dict, "nil", ct_intrinsic_nil);
|
|
register_ct_intrinsic(dict, "nil?", ct_intrinsic_nilp);
|
|
register_ct_intrinsic(dict, "string=", ct_intrinsic_string_eq);
|
|
register_ct_intrinsic(dict, "string-length", ct_intrinsic_string_length);
|
|
register_ct_intrinsic(dict, "string-append", ct_intrinsic_string_append);
|
|
register_ct_intrinsic(dict, "string>number", ct_intrinsic_string_to_number);
|
|
register_ct_intrinsic(dict, "int>string", ct_intrinsic_int_to_string);
|
|
register_ct_intrinsic(dict, "identifier?", ct_intrinsic_identifierp);
|
|
register_ct_intrinsic(dict, "list-new", ct_intrinsic_list_new);
|
|
register_ct_intrinsic(dict, "list-append", ct_intrinsic_list_append);
|
|
register_ct_intrinsic(dict, "list-pop", ct_intrinsic_list_pop);
|
|
register_ct_intrinsic(dict, "list-pop-front", ct_intrinsic_list_pop_front);
|
|
register_ct_intrinsic(dict, "list-length", ct_intrinsic_list_length);
|
|
register_ct_intrinsic(dict, "list-empty?", ct_intrinsic_list_empty);
|
|
register_ct_intrinsic(dict, "list-get", ct_intrinsic_list_get);
|
|
register_ct_intrinsic(dict, "list-set", ct_intrinsic_list_set);
|
|
register_ct_intrinsic(dict, "list-extend", ct_intrinsic_list_extend);
|
|
register_ct_intrinsic(dict, "list-last", ct_intrinsic_list_last);
|
|
register_ct_intrinsic(dict, "list-clone", ct_intrinsic_list_clone);
|
|
register_ct_intrinsic(dict, "map-new", ct_intrinsic_map_new);
|
|
register_ct_intrinsic(dict, "map-set", ct_intrinsic_map_set);
|
|
register_ct_intrinsic(dict, "map-get", ct_intrinsic_map_get);
|
|
register_ct_intrinsic(dict, "map-has?", ct_intrinsic_map_has);
|
|
register_ct_intrinsic(dict, "token-lexeme", ct_intrinsic_token_lexeme);
|
|
register_ct_intrinsic(dict, "token-from-lexeme", ct_intrinsic_token_from_lexeme);
|
|
register_ct_intrinsic(dict, "next-token", ct_intrinsic_next_token);
|
|
register_ct_intrinsic(dict, "peek-token", ct_intrinsic_peek_token);
|
|
register_ct_intrinsic(dict, "inject-tokens", ct_intrinsic_inject_tokens);
|
|
register_ct_intrinsic(dict, "set-token-hook", ct_intrinsic_set_token_hook);
|
|
register_ct_intrinsic(dict, "clear-token-hook", ct_intrinsic_clear_token_hook);
|
|
register_ct_intrinsic(dict, "parse-error", ct_intrinsic_parse_error);
|
|
register_ct_intrinsic(dict, "add-token", ct_intrinsic_add_token);
|
|
register_ct_intrinsic(dict, "add-token-chars", ct_intrinsic_add_token_chars);
|
|
register_ct_intrinsic(dict, "prelude-clear", ct_intrinsic_prelude_clear);
|
|
register_ct_intrinsic(dict, "prelude-append", ct_intrinsic_prelude_append);
|
|
register_ct_intrinsic(dict, "prelude-set", ct_intrinsic_prelude_set);
|
|
register_ct_intrinsic(dict, "bss-clear", ct_intrinsic_bss_clear);
|
|
register_ct_intrinsic(dict, "bss-append", ct_intrinsic_bss_append);
|
|
register_ct_intrinsic(dict, "bss-set", ct_intrinsic_bss_set);
|
|
register_ct_intrinsic(dict, "use-l2-ct", ct_intrinsic_use_l2_ct);
|
|
register_ct_intrinsic(dict, "shunt", ct_intrinsic_shunt);
|
|
register_ct_intrinsic(dict, "emit-definition", ct_intrinsic_emit_definition);
|
|
register_ct_intrinsic(dict, "lexer-new", ct_intrinsic_lexer_new);
|
|
register_ct_intrinsic(dict, "lexer-pop", ct_intrinsic_lexer_pop);
|
|
register_ct_intrinsic(dict, "lexer-peek", ct_intrinsic_lexer_peek);
|
|
register_ct_intrinsic(dict, "lexer-expect", ct_intrinsic_lexer_expect);
|
|
register_ct_intrinsic(dict, "lexer-collect-brace", ct_intrinsic_lexer_collect_brace);
|
|
register_ct_intrinsic(dict, "lexer-push-back", ct_intrinsic_lexer_push_back);
|
|
vm->dictionary = dict;
|
|
}
|
|
|
|
static void emit_push_literal(FunctionEmitter *builder, int64_t value) {
|
|
emit_line(builder, str_printf(" ; push %lld", (long long)value));
|
|
emit_line(builder, " sub r12, 8");
|
|
emit_line(builder, str_printf(" mov qword [r12], %lld", (long long)value));
|
|
}
|
|
|
|
static void emit_push_literal_u64(FunctionEmitter *builder, uint64_t value) {
|
|
emit_line(builder, str_printf(" ; push %llu", (unsigned long long)value));
|
|
emit_line(builder, " sub r12, 8");
|
|
emit_line(builder, str_printf(" mov rax, %llu", (unsigned long long)value));
|
|
emit_line(builder, " mov [r12], rax");
|
|
}
|
|
|
|
static void emit_push_label(FunctionEmitter *builder, const char *label) {
|
|
emit_line(builder, str_printf(" ; push %s", label));
|
|
emit_line(builder, str_printf(" lea rax, [rel %s]", label));
|
|
emit_line(builder, " sub r12, 8");
|
|
emit_line(builder, " mov [r12], rax");
|
|
}
|
|
|
|
static void emit_push_from(FunctionEmitter *builder, const char *reg) {
|
|
emit_line(builder, " sub r12, 8");
|
|
emit_line(builder, str_printf(" mov [r12], %s", reg));
|
|
}
|
|
|
|
static void emit_pop_to(FunctionEmitter *builder, const char *reg) {
|
|
emit_line(builder, str_printf(" mov %s, [r12]", reg));
|
|
emit_line(builder, " add r12, 8");
|
|
}
|
|
|
|
static void emission_init(Emission *emission) {
|
|
VEC_INIT(&emission->text);
|
|
VEC_INIT(&emission->data);
|
|
VEC_INIT(&emission->bss);
|
|
}
|
|
|
|
typedef struct {
|
|
Emission *emission;
|
|
Dictionary *dictionary;
|
|
StrMap string_labels;
|
|
StrMap externs;
|
|
StrMap label_cache;
|
|
int unique_id;
|
|
bool debug;
|
|
} EmitContext;
|
|
|
|
static void emit_extern(EmitContext *ctx, const char *name) {
|
|
if (strmap_has(&ctx->externs, name)) {
|
|
return;
|
|
}
|
|
strmap_set(&ctx->externs, name, (void *)1);
|
|
VEC_PUSH(&ctx->emission->text, str_printf("extern %s", name));
|
|
}
|
|
|
|
static const char *emit_string_literal(EmitContext *ctx, const char *value) {
|
|
char *label = (char *)strmap_get(&ctx->string_labels, value);
|
|
if (label) {
|
|
return label;
|
|
}
|
|
label = str_printf("__str_%d", ctx->unique_id++);
|
|
strmap_set(&ctx->string_labels, value, label);
|
|
StrVec bytes;
|
|
VEC_INIT(&bytes);
|
|
for (const unsigned char *p = (const unsigned char *)value; *p; p++) {
|
|
VEC_PUSH(&bytes, str_printf("%u", (unsigned int)*p));
|
|
}
|
|
VEC_PUSH(&bytes, str_dup("0"));
|
|
size_t total = 0;
|
|
for (size_t i = 0; i < bytes.len; i++) {
|
|
total += strlen(bytes.data[i]) + 2;
|
|
}
|
|
char *line = (char *)xmalloc(total + strlen(label) + 6);
|
|
strcpy(line, label);
|
|
strcat(line, ": db ");
|
|
for (size_t i = 0; i < bytes.len; i++) {
|
|
strcat(line, bytes.data[i]);
|
|
if (i + 1 < bytes.len) {
|
|
strcat(line, ", ");
|
|
}
|
|
}
|
|
VEC_PUSH(&ctx->emission->data, line);
|
|
return label;
|
|
}
|
|
|
|
static const char *emit_word_label(EmitContext *ctx, const char *name) {
|
|
char *label = (char *)strmap_get(&ctx->label_cache, name);
|
|
if (label) {
|
|
return label;
|
|
}
|
|
char *sanitized = sanitize_label(name);
|
|
label = str_printf("w_%s", sanitized);
|
|
free(sanitized);
|
|
strmap_set(&ctx->label_cache, name, label);
|
|
return label;
|
|
}
|
|
|
|
static bool inline_stack_has(StrVec *stack, const char *name) {
|
|
for (size_t i = 0; i < stack->len; i++) {
|
|
if (strcmp(stack->data[i], name) == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool is_float_type(const char *type) {
|
|
return type && (strcmp(type, "double") == 0 || strcmp(type, "float") == 0);
|
|
}
|
|
|
|
static void emit_extern_call(EmitContext *ctx, FunctionEmitter *builder, Word *word) {
|
|
emit_extern(ctx, word->name);
|
|
if (!word->extern_arg_types || word->extern_arg_count == 0) {
|
|
emit_line(builder, str_printf(" call %s", word->name));
|
|
if (word->extern_ret_type && strcmp(word->extern_ret_type, "void") != 0) {
|
|
emit_push_from(builder, "rax");
|
|
}
|
|
return;
|
|
}
|
|
const char *int_regs[] = {"rdi", "rsi", "rdx", "rcx", "r8", "r9"};
|
|
const char *float_regs[] = {"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"};
|
|
int int_idx = 0;
|
|
int float_idx = 0;
|
|
for (int i = 0; i < word->extern_arg_count; i++) {
|
|
const char *type = word->extern_arg_types[i];
|
|
int offset = (word->extern_arg_count - 1 - i) * 8;
|
|
if (is_float_type(type)) {
|
|
if (float_idx >= 8) {
|
|
fprintf(stderr, "[error] too many float args for extern %s\n", word->name);
|
|
exit(1);
|
|
}
|
|
emit_line(builder, str_printf(" movq %s, [r12 + %d]", float_regs[float_idx], offset));
|
|
float_idx++;
|
|
} else {
|
|
if (int_idx >= 6) {
|
|
fprintf(stderr, "[error] too many int args for extern %s\n", word->name);
|
|
exit(1);
|
|
}
|
|
emit_line(builder, str_printf(" mov %s, [r12 + %d]", int_regs[int_idx], offset));
|
|
int_idx++;
|
|
}
|
|
}
|
|
emit_line(builder, str_printf(" add r12, %d", word->extern_arg_count * 8));
|
|
emit_line(builder, " mov r11, rsp");
|
|
emit_line(builder, " and r11, 15");
|
|
char *align_label = str_printf(".L_align_%d", ctx->unique_id++);
|
|
emit_line(builder, str_printf(" cmp r11, 0"));
|
|
emit_line(builder, str_printf(" je %s", align_label));
|
|
emit_line(builder, " sub rsp, 8");
|
|
emit_line(builder, " xor eax, eax");
|
|
emit_line(builder, str_printf(" mov al, %d", float_idx));
|
|
emit_line(builder, str_printf(" call %s", word->name));
|
|
emit_line(builder, " add rsp, 8");
|
|
emit_line(builder, str_printf(" jmp %s_done", align_label));
|
|
emit_line(builder, str_printf("%s:", align_label));
|
|
emit_line(builder, " xor eax, eax");
|
|
emit_line(builder, str_printf(" mov al, %d", float_idx));
|
|
emit_line(builder, str_printf(" call %s", word->name));
|
|
emit_line(builder, str_printf("%s_done:", align_label));
|
|
free(align_label);
|
|
if (word->extern_ret_type && strcmp(word->extern_ret_type, "void") == 0) {
|
|
return;
|
|
}
|
|
if (word->extern_ret_type && is_float_type(word->extern_ret_type)) {
|
|
emit_line(builder, " sub r12, 8");
|
|
emit_line(builder, " movq [r12], xmm0");
|
|
} else {
|
|
emit_push_from(builder, "rax");
|
|
}
|
|
}
|
|
|
|
static void emit_ops(EmitContext *ctx, FunctionEmitter *builder, OpVec *body, StrVec *inline_stack);
|
|
|
|
static void emit_word_call(EmitContext *ctx, FunctionEmitter *builder, const char *name, StrVec *inline_stack) {
|
|
Word *word = dictionary_lookup(ctx->dictionary, name);
|
|
if (!word) {
|
|
fprintf(stderr, "[error] unknown word '%s' during emission\n", name);
|
|
exit(1);
|
|
}
|
|
if (word->inline_def && word->definition) {
|
|
if (inline_stack_has(inline_stack, word->name)) {
|
|
fprintf(stderr, "[error] recursive inline word '%s'\n", word->name);
|
|
exit(1);
|
|
}
|
|
VEC_PUSH(inline_stack, word->name);
|
|
emit_ops(ctx, builder, &word->definition->body, inline_stack);
|
|
inline_stack->len--;
|
|
return;
|
|
}
|
|
if (word->is_extern && !word->extern_arg_types) {
|
|
emit_extern(ctx, word->name);
|
|
emit_line(builder, str_printf(" call %s", word->name));
|
|
return;
|
|
}
|
|
if (word->asm_def) {
|
|
emit_line(builder, str_printf(" call %s", emit_word_label(ctx, word->name)));
|
|
return;
|
|
}
|
|
if (word->is_extern && word->extern_arg_types) {
|
|
emit_extern_call(ctx, builder, word);
|
|
return;
|
|
}
|
|
emit_line(builder, str_printf(" call %s", emit_word_label(ctx, word->name)));
|
|
}
|
|
|
|
static void emit_op(EmitContext *ctx, FunctionEmitter *builder, Op *op, StrVec *inline_stack) {
|
|
switch (op->kind) {
|
|
case OP_LITERAL: {
|
|
if (op->lit_kind == LIT_INT) {
|
|
emit_push_literal(builder, op->data.i64);
|
|
} else if (op->lit_kind == LIT_FLOAT) {
|
|
union { double f; uint64_t u; } conv;
|
|
conv.f = op->data.f64;
|
|
emit_push_literal_u64(builder, conv.u);
|
|
} else if (op->lit_kind == LIT_STRING) {
|
|
const char *label = emit_string_literal(ctx, op->data.str);
|
|
emit_push_label(builder, label);
|
|
emit_push_literal(builder, (int64_t)strlen(op->data.str));
|
|
}
|
|
break;
|
|
}
|
|
case OP_WORD:
|
|
emit_word_call(ctx, builder, op->data.word, inline_stack);
|
|
break;
|
|
case OP_BRANCH_ZERO:
|
|
emit_pop_to(builder, "rax");
|
|
emit_line(builder, " cmp rax, 0");
|
|
emit_line(builder, str_printf(" je %s", op->data.label));
|
|
break;
|
|
case OP_JUMP:
|
|
emit_line(builder, str_printf(" jmp %s", op->data.label));
|
|
break;
|
|
case OP_LABEL:
|
|
emit_line(builder, str_printf("%s:", op->data.label));
|
|
break;
|
|
case OP_FOR_BEGIN:
|
|
emit_pop_to(builder, "rax");
|
|
emit_line(builder, " cmp rax, 0");
|
|
emit_line(builder, str_printf(" jle %s", op->data.loop.end));
|
|
emit_line(builder, " sub r13, 8");
|
|
emit_line(builder, " mov [r13], rax");
|
|
emit_line(builder, str_printf("%s:", op->data.loop.loop));
|
|
break;
|
|
case OP_FOR_END:
|
|
emit_line(builder, " mov rax, [r13]");
|
|
emit_line(builder, " dec rax");
|
|
emit_line(builder, " mov [r13], rax");
|
|
emit_line(builder, " cmp rax, 0");
|
|
emit_line(builder, str_printf(" jg %s", op->data.loop.loop));
|
|
emit_line(builder, " add r13, 8");
|
|
emit_line(builder, str_printf("%s:", op->data.loop.end));
|
|
break;
|
|
case OP_LIST_BEGIN:
|
|
emit_line(builder, " mov rax, [rel list_capture_sp]");
|
|
emit_line(builder, " mov [rax], r12");
|
|
emit_line(builder, " add rax, 8");
|
|
emit_line(builder, " mov [rel list_capture_sp], rax");
|
|
break;
|
|
case OP_LIST_END:
|
|
char *list_done = str_printf(".list_copy_done_%d", ctx->unique_id++);
|
|
char *list_loop = str_printf(".list_copy_loop_%d", ctx->unique_id++);
|
|
emit_line(builder, " mov rax, [rel list_capture_sp]");
|
|
emit_line(builder, " sub rax, 8");
|
|
emit_line(builder, " mov [rel list_capture_sp], rax");
|
|
emit_line(builder, " mov rbx, [rax]");
|
|
emit_line(builder, " mov rcx, rbx");
|
|
emit_line(builder, " sub rcx, r12");
|
|
emit_line(builder, " shr rcx, 3");
|
|
emit_line(builder, " mov r15, rcx");
|
|
emit_line(builder, " mov rdi, 0");
|
|
emit_line(builder, " mov rsi, rcx");
|
|
emit_line(builder, " add rsi, 1");
|
|
emit_line(builder, " shl rsi, 3");
|
|
emit_line(builder, " mov rdx, 3");
|
|
emit_line(builder, " mov r10, 34");
|
|
emit_line(builder, " mov r8, -1");
|
|
emit_line(builder, " mov r9, 0");
|
|
emit_line(builder, " mov rax, 9");
|
|
emit_line(builder, " syscall");
|
|
emit_line(builder, " mov [rax], r15");
|
|
emit_line(builder, " mov rcx, r15");
|
|
emit_line(builder, " cmp rcx, 0");
|
|
emit_line(builder, str_printf(" je %s", list_done));
|
|
emit_line(builder, " lea rsi, [r12 + rcx*8 - 8]");
|
|
emit_line(builder, " lea rdi, [rax + 8]");
|
|
emit_line(builder, str_printf("%s:", list_loop));
|
|
emit_line(builder, " mov rdx, [rsi]");
|
|
emit_line(builder, " mov [rdi], rdx");
|
|
emit_line(builder, " sub rsi, 8");
|
|
emit_line(builder, " add rdi, 8");
|
|
emit_line(builder, " dec rcx");
|
|
emit_line(builder, str_printf(" jnz %s", list_loop));
|
|
emit_line(builder, str_printf("%s:", list_done));
|
|
emit_line(builder, " mov r12, rbx");
|
|
emit_line(builder, " sub r12, 8");
|
|
emit_line(builder, " mov [r12], rax");
|
|
free(list_done);
|
|
free(list_loop);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void emit_ops(EmitContext *ctx, FunctionEmitter *builder, OpVec *body, StrVec *inline_stack) {
|
|
for (size_t i = 0; i < body->len; i++) {
|
|
emit_op(ctx, builder, &body->data[i], inline_stack);
|
|
}
|
|
}
|
|
|
|
static void emit_definition(EmitContext *ctx, Definition *def) {
|
|
FunctionEmitter builder;
|
|
emitter_init(&builder, &ctx->emission->text, ctx->debug);
|
|
const char *label = emit_word_label(ctx, def->name);
|
|
if (strcmp(def->name, "main") == 0) {
|
|
emit_line(&builder, str_printf("global %s", label));
|
|
}
|
|
emit_line(&builder, str_printf("%s:", label));
|
|
StrVec inline_stack;
|
|
VEC_INIT(&inline_stack);
|
|
emit_ops(ctx, &builder, &def->body, &inline_stack);
|
|
emit_line(&builder, " ret");
|
|
}
|
|
|
|
static void emit_asm_definition(EmitContext *ctx, AsmDefinition *def) {
|
|
if (!def || !def->body) {
|
|
return;
|
|
}
|
|
VEC_PUSH(&ctx->emission->text, str_printf("%s:", emit_word_label(ctx, def->name)));
|
|
const char *cursor = def->body;
|
|
while (*cursor) {
|
|
const char *line_end = strchr(cursor, '\n');
|
|
size_t len = line_end ? (size_t)(line_end - cursor) : strlen(cursor);
|
|
char *line = (char *)xmalloc(len + 1);
|
|
memcpy(line, cursor, len);
|
|
line[len] = '\0';
|
|
if (len > 0) {
|
|
char *trim = line;
|
|
while (*trim && isspace((unsigned char)*trim)) {
|
|
trim++;
|
|
}
|
|
size_t trim_len = strlen(trim);
|
|
if (trim_len > 0 && trim[trim_len - 1] == ':') {
|
|
VEC_PUSH(&ctx->emission->text, str_dup(trim));
|
|
free(line);
|
|
} else {
|
|
VEC_PUSH(&ctx->emission->text, line);
|
|
}
|
|
} else {
|
|
free(line);
|
|
}
|
|
if (!line_end) {
|
|
break;
|
|
}
|
|
cursor = line_end + 1;
|
|
}
|
|
VEC_PUSH(&ctx->emission->text, str_dup(" ret"));
|
|
}
|
|
|
|
static void emit_default_prelude(Emission *emission) {
|
|
VEC_PUSH(&emission->text, str_dup("%define DSTK_BYTES 65536"));
|
|
VEC_PUSH(&emission->text, str_dup("%define RSTK_BYTES 65536"));
|
|
VEC_PUSH(&emission->text, str_dup("%define PRINT_BUF_BYTES 4096"));
|
|
VEC_PUSH(&emission->text, str_dup("global _start"));
|
|
VEC_PUSH(&emission->text, str_dup("_start:"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov rbx, rsp"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov rax, [rbx]"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argc], rax"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea rax, [rbx + 8]"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argv], rax"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea r12, [rel dstack_top]"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea r13, [rel rstack_top]"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea rax, [rel list_capture_stack]"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov [rel list_capture_sp], rax"));
|
|
VEC_PUSH(&emission->text, str_dup(" call w_main"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov rax, [r12]"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov rdi, rax"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov rax, 60"));
|
|
VEC_PUSH(&emission->text, str_dup(" syscall"));
|
|
}
|
|
|
|
static void emit_libc_prelude(Emission *emission) {
|
|
VEC_PUSH(&emission->text, str_dup("%define DSTK_BYTES 65536"));
|
|
VEC_PUSH(&emission->text, str_dup("%define RSTK_BYTES 65536"));
|
|
VEC_PUSH(&emission->text, str_dup("%define PRINT_BUF_BYTES 4096"));
|
|
VEC_PUSH(&emission->text, str_dup("global main"));
|
|
VEC_PUSH(&emission->text, str_dup("main:"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argc], rdi"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov [rel sys_argv], rsi"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea r12, [rel dstack_top]"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea r13, [rel rstack_top]"));
|
|
VEC_PUSH(&emission->text, str_dup(" lea rax, [rel list_capture_stack]"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov [rel list_capture_sp], rax"));
|
|
VEC_PUSH(&emission->text, str_dup(" call w_main"));
|
|
VEC_PUSH(&emission->text, str_dup(" mov rax, [r12]"));
|
|
VEC_PUSH(&emission->text, str_dup(" ret"));
|
|
}
|
|
|
|
static void emit_default_bss(Emission *emission) {
|
|
VEC_PUSH(&emission->bss, str_dup("align 16"));
|
|
VEC_PUSH(&emission->bss, str_dup("dstack: resb DSTK_BYTES"));
|
|
VEC_PUSH(&emission->bss, str_dup("dstack_top:"));
|
|
VEC_PUSH(&emission->bss, str_dup("align 16"));
|
|
VEC_PUSH(&emission->bss, str_dup("rstack: resb RSTK_BYTES"));
|
|
VEC_PUSH(&emission->bss, str_dup("rstack_top:"));
|
|
VEC_PUSH(&emission->bss, str_dup("align 16"));
|
|
VEC_PUSH(&emission->bss, str_dup("print_buf: resb PRINT_BUF_BYTES"));
|
|
VEC_PUSH(&emission->bss, str_dup("print_buf_end:"));
|
|
VEC_PUSH(&emission->bss, str_dup("align 16"));
|
|
VEC_PUSH(&emission->bss, str_dup("persistent: resb 64"));
|
|
VEC_PUSH(&emission->bss, str_dup("persistent_end:"));
|
|
VEC_PUSH(&emission->bss, str_dup("align 16"));
|
|
VEC_PUSH(&emission->bss, str_dup("list_capture_sp: resq 1"));
|
|
VEC_PUSH(&emission->bss, str_dup("list_capture_tmp: resq 1"));
|
|
VEC_PUSH(&emission->bss, str_dup("list_capture_stack: resq 1024"));
|
|
}
|
|
|
|
static Emission emit_module(Parser *parser, Dictionary *dict, bool debug) {
|
|
Emission emission;
|
|
emission_init(&emission);
|
|
EmitContext ctx;
|
|
ctx.emission = &emission;
|
|
ctx.dictionary = dict;
|
|
strmap_init(&ctx.string_labels);
|
|
strmap_init(&ctx.externs);
|
|
strmap_init(&ctx.label_cache);
|
|
ctx.unique_id = 0;
|
|
ctx.debug = debug;
|
|
|
|
if (parser->custom_prelude) {
|
|
for (size_t i = 0; i < parser->custom_prelude->len; i++) {
|
|
VEC_PUSH(&emission.text, str_dup(parser->custom_prelude->data[i]));
|
|
}
|
|
} else if (parser->uses_libc) {
|
|
emit_libc_prelude(&emission);
|
|
} else {
|
|
emit_default_prelude(&emission);
|
|
}
|
|
|
|
VEC_PUSH(&emission.data, str_dup("sys_argc: dq 0"));
|
|
VEC_PUSH(&emission.data, str_dup("sys_argv: dq 0"));
|
|
|
|
if (parser->custom_bss) {
|
|
for (size_t i = 0; i < parser->custom_bss->len; i++) {
|
|
VEC_PUSH(&emission.bss, str_dup(parser->custom_bss->data[i]));
|
|
}
|
|
} else {
|
|
emit_default_bss(&emission);
|
|
}
|
|
|
|
for (size_t i = 0; i < parser->module.forms.len; i++) {
|
|
Form form = parser->module.forms.data[i];
|
|
if (form.kind == FORM_DEF) {
|
|
Definition *def = (Definition *)form.ptr;
|
|
if (def->compile_only) {
|
|
continue;
|
|
}
|
|
Word *word = dictionary_lookup(dict, def->name);
|
|
if (!word || word->definition != def) {
|
|
continue;
|
|
}
|
|
emit_definition(&ctx, def);
|
|
} else if (form.kind == FORM_ASM) {
|
|
AsmDefinition *def = (AsmDefinition *)form.ptr;
|
|
if (def->compile_only) {
|
|
continue;
|
|
}
|
|
Word *word = dictionary_lookup(dict, def->name);
|
|
if (!word || word->asm_def != def) {
|
|
continue;
|
|
}
|
|
emit_asm_definition(&ctx, def);
|
|
}
|
|
}
|
|
|
|
for (size_t i = 0; i < parser->variable_labels.cap; i++) {
|
|
if (!parser->variable_labels.keys || !parser->variable_labels.keys[i]) {
|
|
continue;
|
|
}
|
|
const char *label = (const char *)parser->variable_labels.values[i];
|
|
if (label) {
|
|
VEC_PUSH(&emission.data, str_printf("%s: dq 0", label));
|
|
}
|
|
}
|
|
|
|
return emission;
|
|
}
|
|
|
|
static char *emission_snapshot(Emission *emission) {
|
|
StrVec parts;
|
|
VEC_INIT(&parts);
|
|
if (emission->text.len) {
|
|
VEC_PUSH(&parts, str_dup("section .text"));
|
|
for (size_t i = 0; i < emission->text.len; i++) {
|
|
if (emission->text.data[i]) {
|
|
VEC_PUSH(&parts, str_dup(emission->text.data[i]));
|
|
}
|
|
}
|
|
}
|
|
if (emission->data.len) {
|
|
VEC_PUSH(&parts, str_dup("section .data"));
|
|
VEC_PUSH(&parts, str_dup("data_start:"));
|
|
for (size_t i = 0; i < emission->data.len; i++) {
|
|
if (emission->data.data[i]) {
|
|
VEC_PUSH(&parts, str_dup(emission->data.data[i]));
|
|
}
|
|
}
|
|
VEC_PUSH(&parts, str_dup("data_end:"));
|
|
}
|
|
if (emission->bss.len) {
|
|
VEC_PUSH(&parts, str_dup("section .bss"));
|
|
for (size_t i = 0; i < emission->bss.len; i++) {
|
|
if (emission->bss.data[i]) {
|
|
VEC_PUSH(&parts, str_dup(emission->bss.data[i]));
|
|
}
|
|
}
|
|
}
|
|
VEC_PUSH(&parts, str_dup("section .note.GNU-stack noalloc noexec nowrite"));
|
|
size_t total = 0;
|
|
for (size_t i = 0; i < parts.len; i++) {
|
|
if (parts.data[i]) {
|
|
total += strlen(parts.data[i]) + 1;
|
|
}
|
|
}
|
|
char *buf = (char *)xmalloc(total + 1);
|
|
buf[0] = '\0';
|
|
for (size_t i = 0; i < parts.len; i++) {
|
|
strcat(buf, parts.data[i]);
|
|
strcat(buf, "\n");
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
static void write_file(const char *path, const char *data) {
|
|
FILE *f = fopen(path, "w");
|
|
if (!f) {
|
|
fprintf(stderr, "[error] failed to write %s: %s\n", path, strerror(errno));
|
|
exit(1);
|
|
}
|
|
fputs(data, f);
|
|
fclose(f);
|
|
}
|
|
|
|
static void run_cmd(char *const argv[]) {
|
|
pid_t pid = fork();
|
|
if (pid < 0) {
|
|
fprintf(stderr, "[error] fork failed: %s\n", strerror(errno));
|
|
exit(1);
|
|
}
|
|
if (pid == 0) {
|
|
execvp(argv[0], argv);
|
|
fprintf(stderr, "[error] failed to exec %s: %s\n", argv[0], strerror(errno));
|
|
_exit(1);
|
|
}
|
|
int status = 0;
|
|
if (waitpid(pid, &status, 0) < 0) {
|
|
fprintf(stderr, "[error] waitpid failed: %s\n", strerror(errno));
|
|
exit(1);
|
|
}
|
|
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
|
|
fprintf(stderr, "[error] command failed\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
static void run_nasm(const char *asm_path, const char *obj_path, bool debug) {
|
|
char *argv[8];
|
|
int idx = 0;
|
|
argv[idx++] = "nasm";
|
|
argv[idx++] = "-f";
|
|
argv[idx++] = "elf64";
|
|
if (debug) {
|
|
argv[idx++] = "-g";
|
|
argv[idx++] = "-F";
|
|
argv[idx++] = "dwarf";
|
|
}
|
|
argv[idx++] = "-o";
|
|
argv[idx++] = (char *)obj_path;
|
|
argv[idx++] = (char *)asm_path;
|
|
argv[idx++] = NULL;
|
|
run_cmd(argv);
|
|
}
|
|
|
|
static void run_linker(const char *obj_path, const char *exe_path, bool debug, StrVec *libs, bool shared, bool use_libc) {
|
|
const char *linker = NULL;
|
|
if (use_libc) {
|
|
if (access("/usr/bin/cc", X_OK) == 0) {
|
|
linker = "cc";
|
|
} else if (access("/usr/bin/gcc", X_OK) == 0) {
|
|
linker = "gcc";
|
|
} else {
|
|
fprintf(stderr, "[error] no C compiler found for libc linking\n");
|
|
exit(1);
|
|
}
|
|
} else if (access("/usr/bin/ld.lld", X_OK) == 0) {
|
|
linker = "ld.lld";
|
|
} else if (access("/usr/bin/ld", X_OK) == 0) {
|
|
linker = "ld";
|
|
} else {
|
|
fprintf(stderr, "[error] no linker found\n");
|
|
exit(1);
|
|
}
|
|
StrVec argv;
|
|
VEC_INIT(&argv);
|
|
VEC_PUSH(&argv, str_dup((char *)linker));
|
|
if (!use_libc && strstr(linker, "lld")) {
|
|
VEC_PUSH(&argv, str_dup("-m"));
|
|
VEC_PUSH(&argv, str_dup("elf_x86_64"));
|
|
}
|
|
if (shared) {
|
|
VEC_PUSH(&argv, str_dup("-shared"));
|
|
}
|
|
VEC_PUSH(&argv, str_dup("-o"));
|
|
VEC_PUSH(&argv, str_dup((char *)exe_path));
|
|
VEC_PUSH(&argv, str_dup((char *)obj_path));
|
|
if (use_libc) {
|
|
VEC_PUSH(&argv, str_dup("-no-pie"));
|
|
} else if (!shared && (!libs || libs->len == 0)) {
|
|
VEC_PUSH(&argv, str_dup("-nostdlib"));
|
|
VEC_PUSH(&argv, str_dup("-static"));
|
|
} else if (!shared) {
|
|
const char *candidates[] = {
|
|
"/lib64/ld-linux-x86-64.so.2",
|
|
"/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2",
|
|
"/lib/ld-linux-x86-64.so.2",
|
|
"/lib/ld64.so.1"
|
|
};
|
|
const char *interp = NULL;
|
|
for (size_t i = 0; i < ARRAY_LEN(candidates); i++) {
|
|
if (access(candidates[i], R_OK) == 0) {
|
|
interp = candidates[i];
|
|
break;
|
|
}
|
|
}
|
|
if (interp) {
|
|
VEC_PUSH(&argv, str_dup("-dynamic-linker"));
|
|
VEC_PUSH(&argv, str_dup(interp));
|
|
}
|
|
}
|
|
if (libs) {
|
|
for (size_t i = 0; i < libs->len; i++) {
|
|
VEC_PUSH(&argv, str_dup(libs->data[i]));
|
|
}
|
|
}
|
|
if (debug) {
|
|
VEC_PUSH(&argv, str_dup("-g"));
|
|
}
|
|
VEC_PUSH(&argv, NULL);
|
|
run_cmd(argv.data);
|
|
}
|
|
|
|
static char *read_text_file(const char *path) {
|
|
FILE *f = fopen(path, "r");
|
|
if (!f) {
|
|
return NULL;
|
|
}
|
|
fseek(f, 0, SEEK_END);
|
|
long size = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
if (size < 0) {
|
|
fclose(f);
|
|
return NULL;
|
|
}
|
|
char *buf = (char *)xmalloc((size_t)size + 1);
|
|
size_t n = fread(buf, 1, (size_t)size, f);
|
|
buf[n] = '\0';
|
|
fclose(f);
|
|
return buf;
|
|
}
|
|
|
|
static bool file_exists(const char *path) {
|
|
return access(path, R_OK) == 0;
|
|
}
|
|
|
|
static char *path_dirname(const char *path) {
|
|
const char *slash = strrchr(path, '/');
|
|
if (!slash) {
|
|
return str_dup(".");
|
|
}
|
|
size_t len = (size_t)(slash - path);
|
|
if (len == 0) {
|
|
return str_dup("/");
|
|
}
|
|
char *out = (char *)xmalloc(len + 1);
|
|
memcpy(out, path, len);
|
|
out[len] = '\0';
|
|
return out;
|
|
}
|
|
|
|
static char *path_basename(const char *path) {
|
|
if (!path) {
|
|
return str_dup("");
|
|
}
|
|
const char *slash = strrchr(path, '/');
|
|
if (!slash || !slash[1]) {
|
|
return str_dup(path);
|
|
}
|
|
return str_dup(slash + 1);
|
|
}
|
|
|
|
static char *path_join(const char *a, const char *b) {
|
|
if (!a || !*a) {
|
|
return str_dup(b);
|
|
}
|
|
if (!b || !*b) {
|
|
return str_dup(a);
|
|
}
|
|
size_t len_a = strlen(a);
|
|
bool has_sep = a[len_a - 1] == '/';
|
|
return str_printf("%s%s%s", a, has_sep ? "" : "/", b);
|
|
}
|
|
|
|
static char *resolve_import(const char *base_dir, const char *import_path, StrVec *include_dirs) {
|
|
if (!import_path || !*import_path) {
|
|
return NULL;
|
|
}
|
|
if (import_path[0] == '/') {
|
|
return file_exists(import_path) ? str_dup(import_path) : NULL;
|
|
}
|
|
if (base_dir) {
|
|
char *candidate = path_join(base_dir, import_path);
|
|
if (file_exists(candidate)) {
|
|
return candidate;
|
|
}
|
|
free(candidate);
|
|
}
|
|
if (include_dirs) {
|
|
for (size_t i = 0; i < include_dirs->len; i++) {
|
|
char *candidate = path_join(include_dirs->data[i], import_path);
|
|
if (file_exists(candidate)) {
|
|
return candidate;
|
|
}
|
|
free(candidate);
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static char *expand_imports(const char *path, StrVec *include_dirs, StrMap *visited, FileSpanVec *spans, int *line_counter) {
|
|
if (strmap_has(visited, path)) {
|
|
return str_dup("");
|
|
}
|
|
strmap_set(visited, path, (void *)1);
|
|
char *content = read_text_file(path);
|
|
if (!content) {
|
|
fprintf(stderr, "[error] failed to read %s\n", path);
|
|
exit(1);
|
|
}
|
|
char *base_dir = path_dirname(path);
|
|
StrVec parts;
|
|
VEC_INIT(&parts);
|
|
const char *cursor = content;
|
|
int local_line = 1;
|
|
int span_start = *line_counter;
|
|
int span_local_start = local_line;
|
|
bool span_active = false;
|
|
while (*cursor) {
|
|
const char *line_end = strchr(cursor, '\n');
|
|
size_t len = line_end ? (size_t)(line_end - cursor) : strlen(cursor);
|
|
char *line = (char *)xmalloc(len + 1);
|
|
memcpy(line, cursor, len);
|
|
line[len] = '\0';
|
|
char *trim = line;
|
|
while (*trim && isspace((unsigned char)*trim)) {
|
|
trim++;
|
|
}
|
|
bool is_import = false;
|
|
if (str_starts_with(trim, "import") && (trim[6] == ' ' || trim[6] == '\t')) {
|
|
trim += 6;
|
|
while (*trim && isspace((unsigned char)*trim)) {
|
|
trim++;
|
|
}
|
|
char *end = trim;
|
|
while (*end && !isspace((unsigned char)*end) && *end != '#') {
|
|
end++;
|
|
}
|
|
if (end > trim) {
|
|
char *import_path = (char *)xmalloc((size_t)(end - trim) + 1);
|
|
memcpy(import_path, trim, (size_t)(end - trim));
|
|
import_path[end - trim] = '\0';
|
|
char *resolved = resolve_import(base_dir, import_path, include_dirs);
|
|
if (!resolved) {
|
|
fprintf(stderr, "[error] import not found: %s\n", import_path);
|
|
exit(1);
|
|
}
|
|
if (span_active) {
|
|
FileSpan span = {0};
|
|
span.path = str_dup(path);
|
|
span.start_line = span_start;
|
|
span.end_line = *line_counter;
|
|
span.local_start_line = span_local_start;
|
|
VEC_PUSH(spans, span);
|
|
span_active = false;
|
|
}
|
|
char *expanded = expand_imports(resolved, include_dirs, visited, spans, line_counter);
|
|
if (expanded && *expanded) {
|
|
VEC_PUSH(&parts, expanded);
|
|
}
|
|
VEC_PUSH(&parts, str_dup("\n"));
|
|
(*line_counter)++;
|
|
local_line++;
|
|
free(resolved);
|
|
free(import_path);
|
|
is_import = true;
|
|
}
|
|
}
|
|
if (!is_import) {
|
|
if (!span_active) {
|
|
span_start = *line_counter;
|
|
span_local_start = local_line;
|
|
span_active = true;
|
|
}
|
|
VEC_PUSH(&parts, line);
|
|
VEC_PUSH(&parts, str_dup("\n"));
|
|
(*line_counter)++;
|
|
local_line++;
|
|
} else {
|
|
free(line);
|
|
}
|
|
if (!line_end) {
|
|
break;
|
|
}
|
|
cursor = line_end + 1;
|
|
}
|
|
if (span_active) {
|
|
FileSpan span = {0};
|
|
span.path = str_dup(path);
|
|
span.start_line = span_start;
|
|
span.end_line = *line_counter;
|
|
span.local_start_line = span_local_start;
|
|
VEC_PUSH(spans, span);
|
|
}
|
|
size_t total = 0;
|
|
for (size_t i = 0; i < parts.len; i++) {
|
|
total += strlen(parts.data[i]);
|
|
}
|
|
char *out = (char *)xmalloc(total + 1);
|
|
out[0] = '\0';
|
|
for (size_t i = 0; i < parts.len; i++) {
|
|
strcat(out, parts.data[i]);
|
|
}
|
|
free(content);
|
|
free(base_dir);
|
|
return out;
|
|
}
|
|
|
|
static bool parse_string_literal(const char *lexeme, char **out) {
|
|
size_t len = strlen(lexeme);
|
|
if (len < 2 || lexeme[0] != '"' || lexeme[len - 1] != '"') {
|
|
return false;
|
|
}
|
|
const char *body = lexeme + 1;
|
|
size_t body_len = len - 2;
|
|
char *buf = (char *)xmalloc(body_len + 1);
|
|
size_t pos = 0;
|
|
for (size_t i = 0; i < body_len; i++) {
|
|
char ch = body[i];
|
|
if (ch != '\\') {
|
|
buf[pos++] = ch;
|
|
continue;
|
|
}
|
|
i++;
|
|
if (i >= body_len) {
|
|
fprintf(stderr, "[error] unterminated escape sequence\n");
|
|
exit(1);
|
|
}
|
|
char esc = body[i];
|
|
if (esc == 'n') {
|
|
buf[pos++] = '\n';
|
|
} else if (esc == 't') {
|
|
buf[pos++] = '\t';
|
|
} else if (esc == 'r') {
|
|
buf[pos++] = '\r';
|
|
} else if (esc == '0') {
|
|
buf[pos++] = '\0';
|
|
} else if (esc == '"') {
|
|
buf[pos++] = '"';
|
|
} else if (esc == '\\') {
|
|
buf[pos++] = '\\';
|
|
} else {
|
|
fprintf(stderr, "[error] unsupported escape sequence \\%c\n", esc);
|
|
exit(1);
|
|
}
|
|
}
|
|
buf[pos] = '\0';
|
|
*out = buf;
|
|
return true;
|
|
}
|
|
|
|
static bool try_parse_int(const char *lexeme, int64_t *out) {
|
|
char *end = NULL;
|
|
errno = 0;
|
|
long long val = strtoll(lexeme, &end, 0);
|
|
if (errno != 0 || !end || *end != '\0') {
|
|
return false;
|
|
}
|
|
*out = (int64_t)val;
|
|
return true;
|
|
}
|
|
|
|
static bool try_parse_float(const char *lexeme, double *out) {
|
|
if (!strchr(lexeme, '.') && !strchr(lexeme, 'e') && !strchr(lexeme, 'E')) {
|
|
return false;
|
|
}
|
|
char *end = NULL;
|
|
errno = 0;
|
|
double val = strtod(lexeme, &end);
|
|
if (errno != 0 || !end || *end != '\0') {
|
|
return false;
|
|
}
|
|
*out = val;
|
|
return true;
|
|
}
|
|
|
|
static void parser_inject_tokens(Parser *parser, TokenVec *injected) {
|
|
if (!injected || injected->len == 0) {
|
|
return;
|
|
}
|
|
if (parser->pos > parser->tokens.len) {
|
|
parser->pos = parser->tokens.len;
|
|
}
|
|
size_t new_len = parser->tokens.len + injected->len;
|
|
if (new_len > parser->tokens.cap) {
|
|
parser->tokens.cap = new_len + 16;
|
|
parser->tokens.data = xrealloc(parser->tokens.data, parser->tokens.cap * sizeof(Token));
|
|
}
|
|
memmove(&parser->tokens.data[parser->pos + injected->len],
|
|
&parser->tokens.data[parser->pos],
|
|
(parser->tokens.len - parser->pos) * sizeof(Token));
|
|
for (size_t i = 0; i < injected->len; i++) {
|
|
parser->tokens.data[parser->pos + i] = injected->data[i];
|
|
}
|
|
parser->tokens.len = new_len;
|
|
}
|
|
|
|
static void parser_start_macro(Parser *parser, const char *name, int param_count) {
|
|
if (parser->macro_recording.active) {
|
|
fprintf(stderr, "[error] nested macro definitions are not supported\n");
|
|
exit(1);
|
|
}
|
|
parser->macro_recording.active = true;
|
|
parser->macro_recording.name = str_dup(name);
|
|
VEC_INIT(&parser->macro_recording.tokens);
|
|
parser->macro_recording.param_count = param_count;
|
|
}
|
|
|
|
static void parser_finish_macro(Parser *parser) {
|
|
if (!parser->macro_recording.active) {
|
|
fprintf(stderr, "[error] unexpected ';' closing a macro\n");
|
|
exit(1);
|
|
}
|
|
Word *word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(parser->macro_recording.name);
|
|
word->macro_expansion = (char **)xmalloc((parser->macro_recording.tokens.len + 1) * sizeof(char *));
|
|
word->macro_param_count = parser->macro_recording.param_count;
|
|
for (size_t i = 0; i < parser->macro_recording.tokens.len; i++) {
|
|
word->macro_expansion[i] = str_dup(parser->macro_recording.tokens.data[i]);
|
|
}
|
|
word->macro_expansion[parser->macro_recording.tokens.len] = NULL;
|
|
dictionary_register(parser->dictionary, word);
|
|
parser->macro_recording.active = false;
|
|
}
|
|
|
|
static void parser_emit_literal(Parser *parser, LiteralKind kind, int64_t i64, double f64, const char *str) {
|
|
Op op = {0};
|
|
op.kind = OP_LITERAL;
|
|
op.lit_kind = kind;
|
|
if (kind == LIT_INT) {
|
|
op.data.i64 = i64;
|
|
} else if (kind == LIT_FLOAT) {
|
|
op.data.f64 = f64;
|
|
} else {
|
|
op.data.str = str_dup(str);
|
|
}
|
|
parser_emit_op(parser, op);
|
|
}
|
|
|
|
static void parser_handle_token(Parser *parser, Token token);
|
|
|
|
static void parse_tokens(Parser *parser, const char *source) {
|
|
parser->source = str_dup(source);
|
|
tokenizer_init(&parser->tokenizer, parser->reader, source);
|
|
parser->tokenizer_exhausted = false;
|
|
parser->pos = 0;
|
|
parser->current_def = NULL;
|
|
parser->control_len = 0;
|
|
parser->label_counter = 0;
|
|
parser->token_hook = NULL;
|
|
parser->has_last_token = false;
|
|
parser->custom_prelude = NULL;
|
|
parser->custom_bss = NULL;
|
|
parser->pending_inline_def = false;
|
|
|
|
while (!parser_eof(parser)) {
|
|
Token token = parser_next_token(parser);
|
|
if (!token.lexeme) {
|
|
break;
|
|
}
|
|
if (parser->macro_recording.active) {
|
|
if (strcmp(token.lexeme, ";") == 0) {
|
|
parser_finish_macro(parser);
|
|
} else {
|
|
VEC_PUSH(&parser->macro_recording.tokens, str_dup(token.lexeme));
|
|
}
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "[") == 0) {
|
|
Op op = {0};
|
|
op.kind = OP_LIST_BEGIN;
|
|
op.data.label = parser_new_label(parser, "list");
|
|
parser_emit_op(parser, op);
|
|
parser_push_control(parser, "list");
|
|
parser->control_stack[parser->control_len - 1].begin_label = op.data.label;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "]") == 0) {
|
|
if (!parser->control_len || strcmp(parser->control_stack[parser->control_len - 1].type, "list") != 0) {
|
|
fprintf(stderr, "[error] mismatched ']'\n");
|
|
exit(1);
|
|
}
|
|
char *label = parser->control_stack[parser->control_len - 1].begin_label;
|
|
parser->control_len--;
|
|
Op op = {0};
|
|
op.kind = OP_LIST_END;
|
|
op.data.label = str_dup(label);
|
|
parser_emit_op(parser, op);
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "word") == 0) {
|
|
Token name_tok = parser_next_token(parser);
|
|
if (!name_tok.lexeme) {
|
|
fprintf(stderr, "[error] definition name missing after 'word'\n");
|
|
exit(1);
|
|
}
|
|
Definition *def = (Definition *)xmalloc(sizeof(Definition));
|
|
memset(def, 0, sizeof(Definition));
|
|
def->name = str_dup(name_tok.lexeme);
|
|
VEC_INIT(&def->body);
|
|
def->terminator = str_dup("end");
|
|
def->inline_def = parser->pending_inline_def;
|
|
parser->pending_inline_def = false;
|
|
parser->current_def = def;
|
|
Word *word = dictionary_lookup(parser->dictionary, def->name);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(def->name);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->prev_definition = word->definition;
|
|
word->prev_asm_def = word->asm_def;
|
|
word->immediate = false;
|
|
word->compile_only = false;
|
|
word->definition = def;
|
|
word->asm_def = NULL;
|
|
word->inline_def = def->inline_def;
|
|
if (parser->definition_stack_len + 1 > parser->definition_stack_cap) {
|
|
parser->definition_stack_cap = parser->definition_stack_cap ? parser->definition_stack_cap * 2 : 8;
|
|
parser->definition_stack = xrealloc(parser->definition_stack, parser->definition_stack_cap * sizeof(Word *));
|
|
}
|
|
parser->definition_stack[parser->definition_stack_len++] = word;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "end") == 0) {
|
|
if (parser->control_len) {
|
|
const char *type = parser->control_stack[parser->control_len - 1].type;
|
|
if (strcmp(type, "if") == 0 || strcmp(type, "elif") == 0) {
|
|
if (parser->control_stack[parser->control_len - 1].false_label) {
|
|
Op op = {0};
|
|
op.kind = OP_LABEL;
|
|
op.data.label = str_dup(parser->control_stack[parser->control_len - 1].false_label);
|
|
parser_emit_op(parser, op);
|
|
}
|
|
if (parser->control_stack[parser->control_len - 1].end_label) {
|
|
Op op = {0};
|
|
op.kind = OP_LABEL;
|
|
op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label);
|
|
parser_emit_op(parser, op);
|
|
}
|
|
parser->control_len--;
|
|
continue;
|
|
}
|
|
if (strcmp(type, "else") == 0) {
|
|
Op op = {0};
|
|
op.kind = OP_LABEL;
|
|
op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label);
|
|
parser_emit_op(parser, op);
|
|
parser->control_len--;
|
|
continue;
|
|
}
|
|
if (strcmp(type, "begin") == 0) {
|
|
Op op = {0};
|
|
op.kind = OP_JUMP;
|
|
op.data.label = str_dup(parser->control_stack[parser->control_len - 1].begin_label);
|
|
parser_emit_op(parser, op);
|
|
op.kind = OP_LABEL;
|
|
op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label);
|
|
parser_emit_op(parser, op);
|
|
parser->control_len--;
|
|
continue;
|
|
}
|
|
if (strcmp(type, "for") == 0) {
|
|
Op op = {0};
|
|
op.kind = OP_FOR_END;
|
|
op.data.loop.loop = str_dup(parser->control_stack[parser->control_len - 1].loop_label);
|
|
op.data.loop.end = str_dup(parser->control_stack[parser->control_len - 1].end_label);
|
|
parser_emit_op(parser, op);
|
|
parser->control_len--;
|
|
continue;
|
|
}
|
|
if (strcmp(type, "with") == 0) {
|
|
StrVec *with_names = &parser->control_stack[parser->control_len - 1].with_names;
|
|
for (size_t i = 0; i < with_names->len; i++) {
|
|
const char *name = with_names->data[i];
|
|
strmap_set(&parser->variable_words, name, NULL);
|
|
free(with_names->data[i]);
|
|
}
|
|
VEC_FREE(with_names);
|
|
parser->control_len--;
|
|
continue;
|
|
}
|
|
}
|
|
if (parser->current_def) {
|
|
Definition *def = parser->current_def;
|
|
Word *word = parser->definition_stack[parser->definition_stack_len - 1];
|
|
def->immediate = word->immediate;
|
|
def->compile_only = word->compile_only;
|
|
def->inline_def = word->inline_def;
|
|
Form form = {0};
|
|
form.kind = FORM_DEF;
|
|
form.ptr = def;
|
|
VEC_PUSH(&parser->module.forms, form);
|
|
parser->current_def = NULL;
|
|
parser->definition_stack_len--;
|
|
parser->last_defined = word;
|
|
continue;
|
|
}
|
|
fprintf(stderr, "[error] unexpected 'end'\n");
|
|
exit(1);
|
|
}
|
|
if (strcmp(token.lexeme, ":asm") == 0) {
|
|
Token name_tok = parser_next_token(parser);
|
|
if (!name_tok.lexeme) {
|
|
fprintf(stderr, "[error] definition name missing after ':asm'\n");
|
|
exit(1);
|
|
}
|
|
bool effect_string_io = false;
|
|
Token brace = parser_next_token(parser);
|
|
if (brace.lexeme && strcmp(brace.lexeme, "(") == 0) {
|
|
while (!parser_eof(parser)) {
|
|
Token meta = parser_next_token(parser);
|
|
if (!meta.lexeme) {
|
|
break;
|
|
}
|
|
if (strcmp(meta.lexeme, ")") == 0) {
|
|
break;
|
|
}
|
|
if (strcmp(meta.lexeme, "string-io") == 0) {
|
|
effect_string_io = true;
|
|
}
|
|
}
|
|
brace = parser_next_token(parser);
|
|
}
|
|
if (!brace.lexeme || strcmp(brace.lexeme, "{") != 0) {
|
|
fprintf(stderr, "[error] expected '{' after asm name, got '%s'\n", brace.lexeme ? brace.lexeme : "<eof>");
|
|
exit(1);
|
|
}
|
|
size_t body_start = (size_t)brace.end;
|
|
size_t body_end = body_start;
|
|
while (!parser_eof(parser)) {
|
|
Token next = parser_next_token(parser);
|
|
if (next.lexeme && strcmp(next.lexeme, "}") == 0) {
|
|
body_end = (size_t)next.start;
|
|
break;
|
|
}
|
|
}
|
|
if (body_end <= body_start) {
|
|
fprintf(stderr, "[error] missing '}' to terminate asm body\n");
|
|
exit(1);
|
|
}
|
|
size_t body_len = body_end - body_start;
|
|
char *body = (char *)xmalloc(body_len + 1);
|
|
memcpy(body, parser->source + body_start, body_len);
|
|
body[body_len] = '\0';
|
|
AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition));
|
|
memset(def, 0, sizeof(AsmDefinition));
|
|
def->name = str_dup(name_tok.lexeme);
|
|
def->body = body;
|
|
def->effect_string_io = effect_string_io;
|
|
Token term = parser_next_token(parser);
|
|
if (!term.lexeme || strcmp(term.lexeme, ";") != 0) {
|
|
fprintf(stderr, "[error] expected ';' after asm definition\n");
|
|
exit(1);
|
|
}
|
|
Word *word = dictionary_lookup(parser->dictionary, def->name);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(def->name);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->prev_definition = word->definition;
|
|
word->prev_asm_def = word->asm_def;
|
|
word->immediate = false;
|
|
word->compile_only = false;
|
|
word->asm_def = def;
|
|
word->definition = NULL;
|
|
Form form = {0};
|
|
form.kind = FORM_ASM;
|
|
form.ptr = def;
|
|
VEC_PUSH(&parser->module.forms, form);
|
|
parser->last_defined = word;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "extern") == 0) {
|
|
Token tok1 = parser_next_token(parser);
|
|
if (!tok1.lexeme) {
|
|
fprintf(stderr, "[error] extern missing name or return type\n");
|
|
exit(1);
|
|
}
|
|
Token peek = parser_peek_token(parser);
|
|
if (peek.lexeme && isdigit((unsigned char)peek.lexeme[0])) {
|
|
Word *word = dictionary_lookup(parser->dictionary, tok1.lexeme);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(tok1.lexeme);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->is_extern = true;
|
|
parser_next_token(parser);
|
|
word->extern_inputs = atoi(peek.lexeme);
|
|
Token next = parser_peek_token(parser);
|
|
if (next.lexeme && isdigit((unsigned char)next.lexeme[0])) {
|
|
parser_next_token(parser);
|
|
word->extern_outputs = atoi(next.lexeme);
|
|
} else {
|
|
word->extern_outputs = 0;
|
|
}
|
|
continue;
|
|
}
|
|
Token tok2 = parser_next_token(parser);
|
|
Token tok3 = parser_next_token(parser);
|
|
if (tok2.lexeme && tok3.lexeme && strcmp(tok3.lexeme, "(") == 0) {
|
|
Word *word = dictionary_lookup(parser->dictionary, tok2.lexeme);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(tok2.lexeme);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->is_extern = true;
|
|
word->extern_ret_type = str_dup(tok1.lexeme);
|
|
parser->uses_libc = true;
|
|
if (strcmp(tok1.lexeme, "double") == 0 || strcmp(tok1.lexeme, "float") == 0) {
|
|
if (strcmp(tok2.lexeme, "printf") != 0) {
|
|
parser->uses_libm = true;
|
|
}
|
|
}
|
|
word->extern_arg_types = NULL;
|
|
word->extern_arg_count = 0;
|
|
int cap = 0;
|
|
Token arg = parser_peek_token(parser);
|
|
if (arg.lexeme && strcmp(arg.lexeme, ")") == 0) {
|
|
parser_next_token(parser);
|
|
} else {
|
|
while (true) {
|
|
Token type_tok = parser_next_token(parser);
|
|
if (!type_tok.lexeme) {
|
|
fprintf(stderr, "[error] unterminated extern signature\n");
|
|
exit(1);
|
|
}
|
|
if (word->extern_arg_count + 1 > cap) {
|
|
cap = cap ? cap * 2 : 4;
|
|
word->extern_arg_types = xrealloc(word->extern_arg_types, (size_t)cap * sizeof(char *));
|
|
}
|
|
word->extern_arg_types[word->extern_arg_count++] = str_dup(type_tok.lexeme);
|
|
if (strcmp(type_tok.lexeme, "double") == 0 || strcmp(type_tok.lexeme, "float") == 0) {
|
|
if (strcmp(tok2.lexeme, "printf") != 0) {
|
|
parser->uses_libm = true;
|
|
}
|
|
}
|
|
Token maybe_name = parser_peek_token(parser);
|
|
if (maybe_name.lexeme && strcmp(maybe_name.lexeme, ",") != 0 && strcmp(maybe_name.lexeme, ")") != 0) {
|
|
parser_next_token(parser);
|
|
}
|
|
Token sep = parser_next_token(parser);
|
|
if (!sep.lexeme) {
|
|
fprintf(stderr, "[error] unterminated extern signature\n");
|
|
exit(1);
|
|
}
|
|
if (strcmp(sep.lexeme, ")") == 0) {
|
|
break;
|
|
}
|
|
if (strcmp(sep.lexeme, ",") != 0) {
|
|
fprintf(stderr, "[error] expected ',' or ')' in extern signature\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
TokenVec reinject;
|
|
VEC_INIT(&reinject);
|
|
if (tok2.lexeme) {
|
|
VEC_PUSH(&reinject, tok2);
|
|
}
|
|
if (tok3.lexeme) {
|
|
VEC_PUSH(&reinject, tok3);
|
|
}
|
|
parser_inject_tokens(parser, &reinject);
|
|
Word *word = dictionary_lookup(parser->dictionary, tok1.lexeme);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(tok1.lexeme);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->is_extern = true;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "if") == 0) {
|
|
char *false_label = parser_new_label(parser, "if_false");
|
|
Op op = {0};
|
|
op.kind = OP_BRANCH_ZERO;
|
|
op.data.label = str_dup(false_label);
|
|
parser_emit_op(parser, op);
|
|
parser_push_control(parser, "if");
|
|
parser->control_stack[parser->control_len - 1].false_label = false_label;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "else") == 0) {
|
|
if (!parser->control_len || (strcmp(parser->control_stack[parser->control_len - 1].type, "if") != 0 && strcmp(parser->control_stack[parser->control_len - 1].type, "elif") != 0)) {
|
|
fprintf(stderr, "[error] 'else' without matching if\n");
|
|
exit(1);
|
|
}
|
|
char *end_label = parser->control_stack[parser->control_len - 1].end_label;
|
|
if (!end_label) {
|
|
end_label = parser_new_label(parser, "if_end");
|
|
}
|
|
Op jump = {0};
|
|
jump.kind = OP_JUMP;
|
|
jump.data.label = str_dup(end_label);
|
|
parser_emit_op(parser, jump);
|
|
Op label = {0};
|
|
label.kind = OP_LABEL;
|
|
label.data.label = str_dup(parser->control_stack[parser->control_len - 1].false_label);
|
|
parser_emit_op(parser, label);
|
|
Token next = parser_peek_token(parser);
|
|
if (next.lexeme && next.line == token.line && strcmp(next.lexeme, "if") != 0) {
|
|
TokenVec cond_tokens;
|
|
VEC_INIT(&cond_tokens);
|
|
bool shorthand = false;
|
|
while (!parser_eof(parser)) {
|
|
Token cond = parser_next_token(parser);
|
|
if (!cond.lexeme) {
|
|
break;
|
|
}
|
|
if (cond.line != token.line) {
|
|
VEC_PUSH(&cond_tokens, cond);
|
|
break;
|
|
}
|
|
if (strcmp(cond.lexeme, "if") == 0) {
|
|
shorthand = true;
|
|
break;
|
|
}
|
|
VEC_PUSH(&cond_tokens, cond);
|
|
}
|
|
if (shorthand) {
|
|
for (size_t i = 0; i < cond_tokens.len; i++) {
|
|
parser_handle_token(parser, cond_tokens.data[i]);
|
|
}
|
|
char *false_label = parser_new_label(parser, "if_false");
|
|
Op br = {0};
|
|
br.kind = OP_BRANCH_ZERO;
|
|
br.data.label = str_dup(false_label);
|
|
parser_emit_op(parser, br);
|
|
parser->control_stack[parser->control_len - 1].type = str_dup("elif");
|
|
parser->control_stack[parser->control_len - 1].false_label = false_label;
|
|
parser->control_stack[parser->control_len - 1].end_label = end_label;
|
|
} else {
|
|
parser_inject_tokens(parser, &cond_tokens);
|
|
parser->control_stack[parser->control_len - 1].type = str_dup("else");
|
|
parser->control_stack[parser->control_len - 1].end_label = end_label;
|
|
}
|
|
} else {
|
|
parser->control_stack[parser->control_len - 1].type = str_dup("else");
|
|
parser->control_stack[parser->control_len - 1].end_label = end_label;
|
|
}
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "for") == 0) {
|
|
char *loop_label = parser_new_label(parser, "for_loop");
|
|
char *end_label = parser_new_label(parser, "for_end");
|
|
Op op = {0};
|
|
op.kind = OP_FOR_BEGIN;
|
|
op.data.loop.loop = str_dup(loop_label);
|
|
op.data.loop.end = str_dup(end_label);
|
|
parser_emit_op(parser, op);
|
|
parser_push_control(parser, "for");
|
|
parser->control_stack[parser->control_len - 1].loop_label = loop_label;
|
|
parser->control_stack[parser->control_len - 1].end_label = end_label;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "while") == 0) {
|
|
char *begin_label = parser_new_label(parser, "begin");
|
|
char *end_label = parser_new_label(parser, "end");
|
|
Op label = {0};
|
|
label.kind = OP_LABEL;
|
|
label.data.label = str_dup(begin_label);
|
|
parser_emit_op(parser, label);
|
|
parser_push_control(parser, "begin");
|
|
parser->control_stack[parser->control_len - 1].begin_label = begin_label;
|
|
parser->control_stack[parser->control_len - 1].end_label = end_label;
|
|
continue;
|
|
}
|
|
if (strcmp(token.lexeme, "do") == 0) {
|
|
if (!parser->control_len || strcmp(parser->control_stack[parser->control_len - 1].type, "begin") != 0) {
|
|
fprintf(stderr, "[error] 'do' without matching while\n");
|
|
exit(1);
|
|
}
|
|
Op op = {0};
|
|
op.kind = OP_BRANCH_ZERO;
|
|
op.data.label = str_dup(parser->control_stack[parser->control_len - 1].end_label);
|
|
parser_emit_op(parser, op);
|
|
continue;
|
|
}
|
|
parser_handle_token(parser, token);
|
|
}
|
|
if (parser->macro_recording.active) {
|
|
fprintf(stderr, "[error] unterminated macro definition\n");
|
|
exit(1);
|
|
}
|
|
if (parser->control_len) {
|
|
fprintf(stderr, "[error] unclosed control structure\n");
|
|
exit(1);
|
|
}
|
|
if (parser->current_def) {
|
|
fprintf(stderr, "[error] unclosed definition at EOF\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
|
|
static void parser_expand_macro(Parser *parser, Word *word) {
|
|
int param_count = word->macro_param_count;
|
|
char **params = NULL;
|
|
if (param_count > 0) {
|
|
params = (char **)xmalloc((size_t)param_count * sizeof(char *));
|
|
for (int i = 0; i < param_count; i++) {
|
|
Token tok = parser_next_token(parser);
|
|
if (!tok.lexeme) {
|
|
fprintf(stderr, "[error] not enough macro parameters for '%s'\n", word->name);
|
|
exit(1);
|
|
}
|
|
params[i] = str_dup(tok.lexeme);
|
|
}
|
|
}
|
|
TokenVec injected;
|
|
VEC_INIT(&injected);
|
|
for (size_t i = 0; word->macro_expansion && word->macro_expansion[i]; i++) {
|
|
const char *item = word->macro_expansion[i];
|
|
if (item && item[0] == '$' && isdigit((unsigned char)item[1])) {
|
|
int idx = atoi(item + 1) - 1;
|
|
if (idx >= 0 && idx < param_count) {
|
|
Token tok = {0};
|
|
tok.lexeme = str_dup(params[idx]);
|
|
VEC_PUSH(&injected, tok);
|
|
continue;
|
|
}
|
|
}
|
|
Token tok = {0};
|
|
tok.lexeme = str_dup(item);
|
|
VEC_PUSH(&injected, tok);
|
|
}
|
|
parser_inject_tokens(parser, &injected);
|
|
for (int i = 0; i < param_count; i++) {
|
|
free(params[i]);
|
|
}
|
|
free(params);
|
|
}
|
|
|
|
static void parser_handle_struct(Parser *parser) {
|
|
Token name_tok = parser_next_token(parser);
|
|
if (!name_tok.lexeme) {
|
|
fprintf(stderr, "[error] struct missing name\n");
|
|
exit(1);
|
|
}
|
|
typedef struct {
|
|
char *name;
|
|
int64_t size;
|
|
int64_t offset;
|
|
} Field;
|
|
Field *fields = NULL;
|
|
size_t field_len = 0;
|
|
size_t field_cap = 0;
|
|
int64_t offset = 0;
|
|
while (!parser_eof(parser)) {
|
|
Token tok = parser_next_token(parser);
|
|
if (!tok.lexeme) {
|
|
break;
|
|
}
|
|
if (strcmp(tok.lexeme, "end") == 0) {
|
|
break;
|
|
}
|
|
if (strcmp(tok.lexeme, "field") != 0) {
|
|
fprintf(stderr, "[error] unexpected token '%s' in struct\n", tok.lexeme);
|
|
exit(1);
|
|
}
|
|
Token field_name = parser_next_token(parser);
|
|
Token field_size = parser_next_token(parser);
|
|
if (!field_name.lexeme || !field_size.lexeme) {
|
|
fprintf(stderr, "[error] malformed struct field\n");
|
|
exit(1);
|
|
}
|
|
int64_t size = 0;
|
|
if (!try_parse_int(field_size.lexeme, &size)) {
|
|
fprintf(stderr, "[error] invalid struct field size '%s'\n", field_size.lexeme);
|
|
exit(1);
|
|
}
|
|
if (field_len + 1 > field_cap) {
|
|
field_cap = field_cap ? field_cap * 2 : 8;
|
|
fields = xrealloc(fields, field_cap * sizeof(Field));
|
|
}
|
|
fields[field_len++] = (Field){str_dup(field_name.lexeme), size, offset};
|
|
offset += size;
|
|
}
|
|
TokenVec injected;
|
|
VEC_INIT(&injected);
|
|
Token tok = {0};
|
|
tok.lexeme = str_dup("word");
|
|
VEC_PUSH(&injected, tok);
|
|
tok.lexeme = str_printf("%s.size", name_tok.lexeme);
|
|
VEC_PUSH(&injected, tok);
|
|
tok.lexeme = str_printf("%lld", (long long)offset);
|
|
VEC_PUSH(&injected, tok);
|
|
tok.lexeme = str_dup("end");
|
|
VEC_PUSH(&injected, tok);
|
|
for (size_t i = 0; i < field_len; i++) {
|
|
Field f = fields[i];
|
|
Token t = {0};
|
|
t.lexeme = str_dup("word");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%s.%s.size", name_tok.lexeme, f.name);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%lld", (long long)f.size);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("end");
|
|
VEC_PUSH(&injected, t);
|
|
|
|
t.lexeme = str_dup("word");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%lld", (long long)f.offset);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("end");
|
|
VEC_PUSH(&injected, t);
|
|
|
|
t.lexeme = str_dup("word");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%s.%s@", name_tok.lexeme, f.name);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("+");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("@");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("end");
|
|
VEC_PUSH(&injected, t);
|
|
|
|
t.lexeme = str_dup("word");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%s.%s!", name_tok.lexeme, f.name);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("swap");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_printf("%s.%s.offset", name_tok.lexeme, f.name);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("+");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("swap");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("!");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("end");
|
|
VEC_PUSH(&injected, t);
|
|
free(f.name);
|
|
}
|
|
free(fields);
|
|
parser_inject_tokens(parser, &injected);
|
|
}
|
|
|
|
static void parser_handle_with(Parser *parser) {
|
|
StrVec names;
|
|
VEC_INIT(&names);
|
|
while (!parser_eof(parser)) {
|
|
Token tok = parser_next_token(parser);
|
|
if (!tok.lexeme) {
|
|
fprintf(stderr, "[error] unterminated with block\n");
|
|
exit(1);
|
|
}
|
|
if (strcmp(tok.lexeme, "in") == 0) {
|
|
break;
|
|
}
|
|
VEC_PUSH(&names, str_dup(tok.lexeme));
|
|
}
|
|
for (size_t i = 0; i < names.len; i++) {
|
|
const char *name = names.data[i];
|
|
int id = parser->label_counter++;
|
|
char *cell_label = str_printf("__with_%s_%d_cell", name, id);
|
|
char *word_name = str_printf("__with_%s_%d", name, id);
|
|
strmap_set(&parser->variable_labels, name, cell_label);
|
|
strmap_set(&parser->variable_words, name, str_dup(word_name));
|
|
|
|
AsmDefinition *def = (AsmDefinition *)xmalloc(sizeof(AsmDefinition));
|
|
memset(def, 0, sizeof(AsmDefinition));
|
|
def->name = str_dup(word_name);
|
|
def->body = str_printf(" lea rax, [rel %s]\n sub r12, 8\n mov [r12], rax\n", cell_label);
|
|
Word *word = dictionary_lookup(parser->dictionary, word_name);
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(word_name);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
word->asm_def = def;
|
|
Form form = {0};
|
|
form.kind = FORM_ASM;
|
|
form.ptr = def;
|
|
VEC_PUSH(&parser->module.forms, form);
|
|
}
|
|
|
|
parser_push_control(parser, "with");
|
|
parser->control_stack[parser->control_len - 1].with_names = names;
|
|
TokenVec injected;
|
|
VEC_INIT(&injected);
|
|
for (size_t i = names.len; i-- > 0;) {
|
|
Token t = {0};
|
|
char *label = (char *)strmap_get(&parser->variable_words, names.data[i]);
|
|
t.lexeme = str_dup(label);
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("swap");
|
|
VEC_PUSH(&injected, t);
|
|
t.lexeme = str_dup("!");
|
|
VEC_PUSH(&injected, t);
|
|
}
|
|
parser_inject_tokens(parser, &injected);
|
|
names.data = NULL;
|
|
names.len = 0;
|
|
names.cap = 0;
|
|
}
|
|
|
|
static void parser_handle_token(Parser *parser, Token token) {
|
|
if (parser->token_hook) {
|
|
Word *hook = dictionary_lookup(parser->dictionary, parser->token_hook);
|
|
if (!hook) {
|
|
fprintf(stderr, "[error] unknown token hook '%s'\n", parser->token_hook);
|
|
exit(1);
|
|
}
|
|
ct_stack_push(&parser->ct_vm->stack, ct_make_token(token));
|
|
ct_word_call(parser->ct_vm, hook);
|
|
CtValue handled = ct_stack_pop(&parser->ct_vm->stack);
|
|
if (ct_truthy(handled)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "macro") == 0) {
|
|
Token name = parser_next_token(parser);
|
|
if (!name.lexeme) {
|
|
fprintf(stderr, "[error] macro missing name\n");
|
|
exit(1);
|
|
}
|
|
int param_count = 0;
|
|
Token maybe_num = parser_peek_token(parser);
|
|
if (maybe_num.lexeme && isdigit((unsigned char)maybe_num.lexeme[0])) {
|
|
parser_next_token(parser);
|
|
param_count = atoi(maybe_num.lexeme);
|
|
}
|
|
parser_start_macro(parser, name.lexeme, param_count);
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "inline") == 0) {
|
|
parser->pending_inline_def = true;
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "immediate") == 0) {
|
|
if (!parser->last_defined) {
|
|
fprintf(stderr, "[error] immediate used without a preceding definition\n");
|
|
exit(1);
|
|
}
|
|
parser->last_defined->immediate = true;
|
|
if (parser->last_defined->definition) {
|
|
parser->last_defined->definition->immediate = true;
|
|
}
|
|
if (parser->last_defined->asm_def) {
|
|
parser->last_defined->asm_def->immediate = true;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "compile-only") == 0) {
|
|
if (!parser->last_defined) {
|
|
fprintf(stderr, "[error] compile-only used without a preceding definition\n");
|
|
exit(1);
|
|
}
|
|
parser->last_defined->compile_only = true;
|
|
if (parser->last_defined->definition) {
|
|
parser->last_defined->definition->compile_only = true;
|
|
}
|
|
if (parser->last_defined->asm_def) {
|
|
parser->last_defined->asm_def->compile_only = true;
|
|
}
|
|
if (parser->last_defined->prev_definition) {
|
|
parser->last_defined->ct_definition = parser->last_defined->definition;
|
|
parser->last_defined->definition = parser->last_defined->prev_definition;
|
|
parser->last_defined->prev_definition = NULL;
|
|
}
|
|
if (parser->last_defined->prev_asm_def) {
|
|
parser->last_defined->ct_asm_def = parser->last_defined->asm_def;
|
|
parser->last_defined->asm_def = parser->last_defined->prev_asm_def;
|
|
parser->last_defined->prev_asm_def = NULL;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "compile-time") == 0) {
|
|
Token name = parser_next_token(parser);
|
|
if (!name.lexeme) {
|
|
fprintf(stderr, "[error] compile-time missing word name\n");
|
|
exit(1);
|
|
}
|
|
Word *word = dictionary_lookup(parser->dictionary, name.lexeme);
|
|
if (!word) {
|
|
fprintf(stderr, "[error] unknown word '%s' for compile-time\n", name.lexeme);
|
|
exit(1);
|
|
}
|
|
ct_word_call(parser->ct_vm, word);
|
|
if (parser->current_def) {
|
|
Op op = {0};
|
|
op.kind = OP_WORD;
|
|
op.data.word = str_dup(name.lexeme);
|
|
parser_emit_op(parser, op);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "here") == 0) {
|
|
SourceLocation *loc = location_for_token(parser, token);
|
|
char *text = str_printf("%s:%d:%d", loc->path, loc->line, loc->column);
|
|
parser_emit_literal(parser, LIT_STRING, 0, 0.0, text);
|
|
free(text);
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "label") == 0) {
|
|
Token name = parser_next_token(parser);
|
|
if (!name.lexeme) {
|
|
fprintf(stderr, "[error] label missing name\n");
|
|
exit(1);
|
|
}
|
|
Op op = {0};
|
|
op.kind = OP_LABEL;
|
|
op.data.label = str_dup(name.lexeme);
|
|
parser_emit_op(parser, op);
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "goto") == 0) {
|
|
Token name = parser_next_token(parser);
|
|
if (!name.lexeme) {
|
|
fprintf(stderr, "[error] goto missing label\n");
|
|
exit(1);
|
|
}
|
|
Op op = {0};
|
|
op.kind = OP_JUMP;
|
|
op.data.label = str_dup(name.lexeme);
|
|
parser_emit_op(parser, op);
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "struct") == 0) {
|
|
parser_handle_struct(parser);
|
|
return;
|
|
}
|
|
|
|
if (strcmp(token.lexeme, "with") == 0) {
|
|
parser_handle_with(parser);
|
|
return;
|
|
}
|
|
|
|
char *str_lit = NULL;
|
|
if (parse_string_literal(token.lexeme, &str_lit)) {
|
|
parser_emit_literal(parser, LIT_STRING, 0, 0.0, str_lit);
|
|
free(str_lit);
|
|
return;
|
|
}
|
|
int64_t int_val = 0;
|
|
if (try_parse_int(token.lexeme, &int_val)) {
|
|
parser_emit_literal(parser, LIT_INT, int_val, 0.0, NULL);
|
|
return;
|
|
}
|
|
double float_val = 0.0;
|
|
if (try_parse_float(token.lexeme, &float_val)) {
|
|
parser_emit_literal(parser, LIT_FLOAT, 0, float_val, NULL);
|
|
return;
|
|
}
|
|
|
|
const char *var_label = (const char *)strmap_get(&parser->variable_words, token.lexeme);
|
|
if (var_label) {
|
|
Token peek = parser_peek_token(parser);
|
|
Op op = {0};
|
|
op.kind = OP_WORD;
|
|
op.data.word = str_dup(var_label);
|
|
parser_emit_op(parser, op);
|
|
if (!peek.lexeme || strcmp(peek.lexeme, "!") != 0) {
|
|
op.data.word = str_dup("@");
|
|
parser_emit_op(parser, op);
|
|
}
|
|
return;
|
|
}
|
|
|
|
Word *word = dictionary_lookup(parser->dictionary, token.lexeme);
|
|
if (word && word->macro_expansion) {
|
|
parser_expand_macro(parser, word);
|
|
return;
|
|
}
|
|
if (word && word->immediate) {
|
|
ct_word_call(parser->ct_vm, word);
|
|
if (parser->current_def && !word->compile_only) {
|
|
Op op = {0};
|
|
op.kind = OP_WORD;
|
|
op.data.word = str_dup(word->name);
|
|
parser_emit_op(parser, op);
|
|
}
|
|
return;
|
|
}
|
|
if (word && word->compile_only && parser->current_def && parser->definition_stack_len) {
|
|
Word *current = parser->definition_stack[parser->definition_stack_len - 1];
|
|
current->compile_only = true;
|
|
if (current->definition) {
|
|
current->definition->compile_only = true;
|
|
}
|
|
}
|
|
|
|
if (!word) {
|
|
word = (Word *)xmalloc(sizeof(Word));
|
|
memset(word, 0, sizeof(Word));
|
|
word->name = str_dup(token.lexeme);
|
|
dictionary_register(parser->dictionary, word);
|
|
}
|
|
Op op = {0};
|
|
op.kind = OP_WORD;
|
|
op.data.word = str_dup(token.lexeme);
|
|
parser_emit_op(parser, op);
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
StrVec inputs;
|
|
StrVec include_dirs;
|
|
StrVec libs;
|
|
VEC_INIT(&inputs);
|
|
VEC_INIT(&include_dirs);
|
|
VEC_INIT(&libs);
|
|
const char *output = "a.out";
|
|
const char *temp_dir = "build";
|
|
bool emit_asm = false;
|
|
bool debug = false;
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
const char *arg = argv[i];
|
|
if (strcmp(arg, "-o") == 0 && i + 1 < argc) {
|
|
output = argv[++i];
|
|
continue;
|
|
}
|
|
if (strcmp(arg, "--emit-asm") == 0) {
|
|
emit_asm = true;
|
|
continue;
|
|
}
|
|
if (strcmp(arg, "--dbg") == 0) {
|
|
debug = true;
|
|
continue;
|
|
}
|
|
if ((strcmp(arg, "-I") == 0 || strcmp(arg, "--include") == 0) && i + 1 < argc) {
|
|
VEC_PUSH(&include_dirs, str_dup(argv[++i]));
|
|
continue;
|
|
}
|
|
if (strncmp(arg, "-I", 2) == 0 && strlen(arg) > 2) {
|
|
VEC_PUSH(&include_dirs, str_dup(arg + 2));
|
|
continue;
|
|
}
|
|
if ((strcmp(arg, "-l") == 0) && i + 1 < argc) {
|
|
const char *lib = argv[++i];
|
|
if (strchr(lib, '/') || strstr(lib, ".so") || strstr(lib, ".a")) {
|
|
VEC_PUSH(&libs, str_printf("-l:%s", lib));
|
|
} else {
|
|
VEC_PUSH(&libs, str_printf("-l%s", lib));
|
|
}
|
|
continue;
|
|
}
|
|
if (strncmp(arg, "-l", 2) == 0 && strlen(arg) > 2) {
|
|
VEC_PUSH(&libs, str_dup(arg));
|
|
continue;
|
|
}
|
|
if (strcmp(arg, "--temp-dir") == 0 && i + 1 < argc) {
|
|
temp_dir = argv[++i];
|
|
continue;
|
|
}
|
|
if (arg[0] == '-') {
|
|
fprintf(stderr, "[error] unknown option: %s\n", arg);
|
|
return 1;
|
|
}
|
|
VEC_PUSH(&inputs, str_dup(arg));
|
|
}
|
|
|
|
if (inputs.len == 0) {
|
|
fprintf(stderr, "usage: %s <source.sl> [-o output] [--emit-asm]\n", argv[0]);
|
|
return 1;
|
|
}
|
|
|
|
VEC_PUSH(&include_dirs, str_dup("."));
|
|
VEC_PUSH(&include_dirs, str_dup("./stdlib"));
|
|
|
|
StrMap visited;
|
|
strmap_init(&visited);
|
|
StrVec sources;
|
|
VEC_INIT(&sources);
|
|
FileSpanVec file_spans;
|
|
VEC_INIT(&file_spans);
|
|
int line_counter = 1;
|
|
for (size_t i = 0; i < inputs.len; i++) {
|
|
char *expanded = expand_imports(inputs.data[i], &include_dirs, &visited, &file_spans, &line_counter);
|
|
VEC_PUSH(&sources, expanded);
|
|
}
|
|
size_t total = 0;
|
|
for (size_t i = 0; i < sources.len; i++) {
|
|
total += strlen(sources.data[i]);
|
|
}
|
|
char *combined = (char *)xmalloc(total + 1);
|
|
combined[0] = '\0';
|
|
for (size_t i = 0; i < sources.len; i++) {
|
|
strcat(combined, sources.data[i]);
|
|
}
|
|
|
|
Dictionary dict;
|
|
dictionary_init(&dict);
|
|
Reader reader;
|
|
reader_init(&reader);
|
|
Parser parser;
|
|
parser_init(&parser, &dict, &reader);
|
|
parser.file_spans = file_spans;
|
|
parser.primary_path = inputs.len ? str_dup(inputs.data[0]) : NULL;
|
|
CompileTimeVM vm;
|
|
ct_vm_init(&vm, &parser);
|
|
parser.ct_vm = &vm;
|
|
bootstrap_dictionary(&dict, &parser, &vm);
|
|
register_builtin_syscall(&parser);
|
|
|
|
parse_tokens(&parser, combined);
|
|
|
|
if (parser.uses_libc && !strvec_contains(&libs, "-lc")) {
|
|
VEC_PUSH(&libs, str_dup("-lc"));
|
|
}
|
|
if (parser.uses_libm && !strvec_contains(&libs, "-lm")) {
|
|
VEC_PUSH(&libs, str_dup("-lm"));
|
|
}
|
|
|
|
Emission emission = emit_module(&parser, &dict, debug);
|
|
char *asm_text = emission_snapshot(&emission);
|
|
|
|
char *asm_path = NULL;
|
|
char *obj_path = NULL;
|
|
if (emit_asm) {
|
|
asm_path = str_dup(output);
|
|
} else {
|
|
mkdir(temp_dir, 0755);
|
|
const char *base = strrchr(output, '/');
|
|
base = base ? base + 1 : output;
|
|
asm_path = str_printf("%s/%s.asm", temp_dir, base);
|
|
obj_path = str_printf("%s/%s.o", temp_dir, base);
|
|
}
|
|
|
|
write_file(asm_path, asm_text);
|
|
if (emit_asm) {
|
|
return 0;
|
|
}
|
|
run_nasm(asm_path, obj_path, debug);
|
|
run_linker(obj_path, output, debug, &libs, false, parser.uses_libc);
|
|
return 0;
|
|
}
|