From ecf90feab94c8681b8c16fd6d2a7dce893ddd1a4 Mon Sep 17 00:00:00 2001
From: igor <igorcielniak.contact@gmail.com>
Date: Wed, 11 Mar 2026 21:27:48 +0100
Subject: [PATCH] general update, updated cimport so it parses struct
 definitions from imported headers, improved the error reporting, added ifdef
 and ifndef, added proper support for extern variadic functions, added some
 new sections and pages to the doc tool and added --check to only check the
 program corectness and run compile time defs without producing a binary

---
 main.py                               | 1047 +++++++++++++++++++++++--
 test.py                               |   14 +-
 tests/cimport_structs.c               |    9 +
 tests/cimport_structs.expected        |    6 +
 tests/cimport_structs.h               |   18 +
 tests/cimport_structs.sl              |   31 +
 tests/error_recovery.compile.expected |   12 +
 tests/error_recovery.meta.json        |    4 +
 tests/error_recovery.sl               |   15 +
 tests/ifdef.expected                  |    7 +
 tests/ifdef.meta.json                 |    3 +
 tests/ifdef.sl                        |   44 ++
 tests/ifndef.expected                 |    3 +
 tests/ifndef.sl                       |   33 +
 tests/variadic_extern.c               |   20 +
 tests/variadic_extern.expected        |    5 +
 tests/variadic_extern.meta.json       |    4 +
 tests/variadic_extern.sl              |   34 +
 18 files changed, 1225 insertions(+), 84 deletions(-)
 create mode 100644 tests/cimport_structs.c
 create mode 100644 tests/cimport_structs.expected
 create mode 100644 tests/cimport_structs.h
 create mode 100644 tests/cimport_structs.sl
 create mode 100644 tests/error_recovery.compile.expected
 create mode 100644 tests/error_recovery.meta.json
 create mode 100644 tests/error_recovery.sl
 create mode 100644 tests/ifdef.expected
 create mode 100644 tests/ifdef.meta.json
 create mode 100644 tests/ifdef.sl
 create mode 100644 tests/ifndef.expected
 create mode 100644 tests/ifndef.sl
 create mode 100644 tests/variadic_extern.c
 create mode 100644 tests/variadic_extern.expected
 create mode 100644 tests/variadic_extern.meta.json
 create mode 100644 tests/variadic_extern.sl

diff --git a/main.py b/main.py
index 9e8fa54..1c28106 100644
--- a/main.py
+++ b/main.py
@@ -50,12 +50,94 @@ def _get_struct():
     return _struct_mod
 
 
+class Diagnostic:
+    """Structured error/warning with optional source context and suggestions."""
+    __slots__ = ('level', 'message', 'path', 'line', 'column', 'length', 'hint', 'suggestion')
+
+    def __init__(
+        self,
+        level: str,
+        message: str,
+        path: Optional[Path] = None,
+        line: int = 0,
+        column: int = 0,
+        length: int = 0,
+        hint: str = "",
+        suggestion: str = "",
+    ) -> None:
+        self.level = level       # "error", "warning", "note"
+        self.message = message
+        self.path = path
+        self.line = line
+        self.column = column
+        self.length = length
+        self.hint = hint
+        self.suggestion = suggestion
+
+    def format(self, *, color: bool = True) -> str:
+        """Format the diagnostic in Rust-style with source context."""
+        _RED = "\033[1;31m" if color else ""
+        _YELLOW = "\033[1;33m" if color else ""
+        _BLUE = "\033[1;34m" if color else ""
+        _CYAN = "\033[1;36m" if color else ""
+        _BOLD = "\033[1m" if color else ""
+        _DIM = "\033[2m" if color else ""
+        _RST = "\033[0m" if color else ""
+
+        level_color = _RED if self.level == "error" else (_YELLOW if self.level == "warning" else _BLUE)
+        parts: List[str] = []
+        parts.append(f"{level_color}{self.level}{_RST}{_BOLD}: {self.message}{_RST}")
+
+        if self.path and self.line > 0:
+            loc = f"{self.path}:{self.line}"
+            if self.column > 0:
+                loc += f":{self.column}"
+            parts.append(f"  {_BLUE}-->{_RST} {loc}")
+
+            # Try to show the source line
+            try:
+                src_lines = self.path.read_text(encoding="utf-8", errors="ignore").splitlines()
+                if 0 < self.line <= len(src_lines):
+                    src_line = src_lines[self.line - 1]
+                    line_no = str(self.line)
+                    pad = " " * len(line_no)
+                    parts.append(f"  {_BLUE}{pad} |{_RST}")
+                    parts.append(f"  {_BLUE}{line_no} |{_RST} {src_line}")
+                    if self.column > 0:
+                        caret_len = max(1, self.length) if self.length else 1
+                        arrow = " " * (self.column - 1) + level_color + "^" * caret_len + _RST
+                        parts.append(f"  {_BLUE}{pad} |{_RST} {arrow}")
+                    if self.hint:
+                        parts.append(f"  {_BLUE}{pad} |{_RST} {_CYAN}= note: {self.hint}{_RST}")
+                    if self.suggestion:
+                        parts.append(f"  {_BLUE}{pad} |{_RST}")
+                        parts.append(f"  {_BLUE}{pad} = {_CYAN}help{_RST}: {self.suggestion}")
+            except Exception:
+                pass
+
+        elif self.hint:
+            parts.append(f"  {_CYAN}= note: {self.hint}{_RST}")
+
+        return "\n".join(parts)
+
+    def __str__(self) -> str:
+        if self.path and self.line > 0:
+            return f"{self.level}: {self.message} at {self.path}:{self.line}:{self.column}"
+        return f"{self.level}: {self.message}"
+
+
 class ParseError(Exception):
     """Raised when the source stream cannot be parsed."""
+    def __init__(self, message: str = "", *, diagnostic: Optional[Diagnostic] = None) -> None:
+        self.diagnostic = diagnostic
+        super().__init__(message)
 
 
 class CompileError(Exception):
     """Raised when IR cannot be turned into assembly."""
+    def __init__(self, message: str = "", *, diagnostic: Optional[Diagnostic] = None) -> None:
+        self.diagnostic = diagnostic
+        super().__init__(message)
 
 
 class CompileTimeError(ParseError):
@@ -321,7 +403,7 @@ _PEEPHOLE_WORD_RULES: List[Tuple[Tuple[str, ...], Tuple[str, ...]]] = [
     (("swap", "or"), ("or",)),
     (("swap", "min"), ("min",)),
     (("swap", "max"), ("max",)),
-    # --- dup + self-idempotent binary → identity ---
+    # --- dup + self-idempotent binary -> identity ---
     (("dup", "bor"), ()),
     (("dup", "band"), ()),
     (("dup", "bxor"), ("drop", "literal_0")),
@@ -339,7 +421,7 @@ for _pat, _repl in _PEEPHOLE_WORD_RULES:
 
 _PEEPHOLE_MAX_PAT_LEN = max(len(p) for p, _ in _PEEPHOLE_WORD_RULES) if _PEEPHOLE_WORD_RULES else 0
 
-# Unified dict: pattern tuple → replacement tuple (for O(1) lookup)
+# Unified dict: pattern tuple -> replacement tuple (for O(1) lookup)
 _PEEPHOLE_ALL_RULES: Dict[Tuple[str, ...], Tuple[str, ...]] = {}
 for _pat, _repl in _PEEPHOLE_WORD_RULES:
     _PEEPHOLE_ALL_RULES[_pat] = _repl
@@ -573,7 +655,8 @@ class Word:
     __slots__ = ('name', 'priority', 'immediate', 'definition', 'macro', 'intrinsic',
                  'macro_expansion', 'macro_params', 'compile_time_intrinsic',
                  'runtime_intrinsic', 'compile_only', 'compile_time_override',
-                 'is_extern', 'extern_inputs', 'extern_outputs', 'extern_signature', 'inline')
+                 'is_extern', 'extern_inputs', 'extern_outputs', 'extern_signature',
+                 'extern_variadic', 'inline')
 
     def __init__(self, name: str, priority: int = 0, immediate: bool = False,
                  definition=None, macro=None, intrinsic=None,
@@ -581,7 +664,8 @@ class Word:
                  compile_time_intrinsic=None, runtime_intrinsic=None,
                  compile_only: bool = False, compile_time_override: bool = False,
                  is_extern: bool = False, extern_inputs: int = 0, extern_outputs: int = 0,
-                 extern_signature=None, inline: bool = False) -> None:
+                 extern_signature=None, extern_variadic: bool = False,
+                 inline: bool = False) -> None:
         self.name = name
         self.priority = priority
         self.immediate = immediate
@@ -598,6 +682,7 @@ class Word:
         self.extern_inputs = extern_inputs
         self.extern_outputs = extern_outputs
         self.extern_signature = extern_signature
+        self.extern_variadic = extern_variadic
         self.inline = inline
 
 
@@ -610,10 +695,11 @@ def _suppress_redefine_warnings_set(value: bool) -> None:
 
 
 class Dictionary:
-    __slots__ = ('words',)
+    __slots__ = ('words', 'warn_callback')
 
     def __init__(self, words: Dict[str, Word] = None) -> None:
         self.words = words if words is not None else {}
+        self.warn_callback: Optional[Callable] = None
 
     def register(self, word: Word) -> Word:
         existing = self.words.get(word.name)
@@ -646,7 +732,10 @@ class Dictionary:
             return word
 
         if not _suppress_redefine_warnings:
-            sys.stderr.write(f"[warn] redefining word {word.name} (priority {word.priority})\n")
+            if self.warn_callback is not None:
+                self.warn_callback(word.name, word.priority)
+            else:
+                sys.stderr.write(f"[warn] redefining word {word.name} (priority {word.priority})\n")
         self.words[word.name] = word
         return word
 
@@ -702,6 +791,10 @@ class Parser:
         self.cstruct_layouts: Dict[str, CStructLayout] = {}
         self._pending_inline_definition: bool = False
         self._pending_priority: Optional[int] = None
+        self.diagnostics: List[Diagnostic] = []
+        self._max_errors: int = 20
+        self._warnings_enabled: Set[str] = set()
+        self._werror: bool = False
 
     def _rebuild_span_index(self) -> None:
         """Rebuild bisect index after file_spans changes."""
@@ -731,6 +824,57 @@ class Parser:
                 return _make_loc(span.path, span.local_start_line + (tl - span.start_line), token.column)
         return _make_loc(_SOURCE_PATH, tl, token.column)
 
+    def _record_diagnostic(self, token: Optional[Token], message: str, *, level: str = "error", hint: str = "", suggestion: str = "") -> None:
+        """Record a diagnostic and raise ParseError if too many errors."""
+        loc = self.location_for_token(token) if token else _make_loc(_SOURCE_PATH, 0, 0)
+        diag = Diagnostic(
+            level=level, message=message,
+            path=loc.path, line=loc.line, column=loc.column,
+            length=len(token.lexeme) if token else 0,
+            hint=hint, suggestion=suggestion,
+        )
+        self.diagnostics.append(diag)
+        if level == "error" and sum(1 for d in self.diagnostics if d.level == "error") >= self._max_errors:
+            raise ParseError(f"too many errors ({self._max_errors}), aborting", diagnostic=diag)
+
+    def _warn(self, token: Optional[Token], category: str, message: str, *, hint: str = "", suggestion: str = "") -> None:
+        """Record a warning if the category is enabled. Promotes to error under --Werror."""
+        if "all" not in self._warnings_enabled and category not in self._warnings_enabled:
+            return
+        level = "error" if self._werror else "warning"
+        self._record_diagnostic(token, message, level=level, hint=hint, suggestion=suggestion)
+
+    def _skip_to_recovery_point(self) -> None:
+        """Skip tokens until we reach a safe recovery point (end, ;, or top-level definition keyword)."""
+        _recovery_keywords = {"word", "end", ";", ":asm", ":py", "extern", "macro"}
+        depth = 0
+        while self.pos < len(self.tokens):
+            lex = self.tokens[self.pos].lexeme
+            if lex == "word" or lex == ":asm" or lex == ":py":
+                if depth == 0:
+                    break  # Don't consume — let the main loop pick it up
+                depth += 1
+            elif lex == "end":
+                if depth <= 1:
+                    self.pos += 1
+                    break
+                depth -= 1
+            elif lex == ";":
+                self.pos += 1
+                break
+            elif lex == "extern" and depth == 0:
+                break
+            self.pos += 1
+        # Reset state for recovery
+        self.macro_recording = None
+        self._pending_priority = None
+        self._pending_inline_definition = False
+        while self.definition_stack:
+            self.definition_stack.pop()
+        while len(self.context_stack) > 1:
+            self.context_stack.pop()
+        self.control_stack.clear()
+
     def inject_token_objects(self, tokens: Sequence[Token]) -> None:
         """Insert tokens at the current parse position."""
         self.tokens[self.pos:self.pos] = list(tokens)
@@ -867,6 +1011,7 @@ class Parser:
         _tokens = self.tokens
         try:
             while self.pos < len(_tokens):
+              try:
                 token = _tokens[self.pos]
                 self.pos += 1
                 self._last_token = token
@@ -922,6 +1067,15 @@ class Parser:
                     continue
                 if self._handle_token(token):
                     _tokens = self.tokens
+              except CompileTimeError:
+                raise
+              except ParseError as _recov_exc:
+                self._record_diagnostic(self._last_token, str(_recov_exc))
+                self._skip_to_recovery_point()
+                _tokens = self.tokens
+                continue
+        except CompileTimeError:
+            raise
         except ParseError:
             raise
         except Exception as exc:
@@ -933,14 +1087,19 @@ class Parser:
             ) from None
 
         if self.macro_recording is not None:
-            raise ParseError("unterminated macro definition (missing ';')")
+            self._record_diagnostic(self._last_token, "unterminated macro definition (missing ';')")
         if self._pending_priority is not None:
-            raise ParseError(f"dangling priority {self._pending_priority} without following definition")
+            self._record_diagnostic(self._last_token, f"dangling priority {self._pending_priority} without following definition")
 
         if len(self.context_stack) != 1:
-            raise ParseError("unclosed definition at EOF")
+            self._record_diagnostic(self._last_token, "unclosed definition at EOF")
         if self.control_stack:
-            raise ParseError("unclosed control structure at EOF")
+            self._record_diagnostic(self._last_token, "unclosed control structure at EOF")
+
+        # If any errors were accumulated, raise with all diagnostics
+        error_count = sum(1 for d in self.diagnostics if d.level == "error")
+        if error_count > 0:
+            raise ParseError(f"compilation failed with {error_count} error(s)")
 
         module = self.context_stack.pop()
         if not isinstance(module, Module):  # pragma: no cover - defensive
@@ -1048,23 +1207,37 @@ class Parser:
             raise ParseError(f"extern expected identifier before '(' but got '{name_lexeme}'")
 
         ret_type = _normalize_c_type_tokens(prefix_tokens, allow_default=True)
-        inputs, arg_types = self._parse_c_param_list()
+        inputs, arg_types, variadic = self._parse_c_param_list()
         outputs = 0 if ret_type == "void" else 1
-        self._register_c_extern(name_lexeme, inputs, outputs, arg_types, ret_type, priority=priority)
+        self._register_c_extern(name_lexeme, inputs, outputs, arg_types, ret_type,
+                                priority=priority, variadic=variadic)
         return True
 
-    def _parse_c_param_list(self) -> Tuple[int, List[str]]:
+    def _parse_c_param_list(self) -> Tuple[int, List[str], bool]:
+        """Parse C-style parameter list. Returns (count, types, is_variadic)."""
         inputs = 0
         arg_types: List[str] = []
+        variadic = False
 
         if self._eof():
             raise ParseError("extern unclosed '('")
         peek = self.peek_token()
         if peek.lexeme == ")":
             self._consume()
-            return inputs, arg_types
+            return inputs, arg_types, False
 
         while True:
+            # Check for ... (variadic)
+            peek = self.peek_token()
+            if peek is not None and peek.lexeme == "...":
+                self._consume()
+                variadic = True
+                if self._eof():
+                    raise ParseError("extern unclosed '(' after '...'")
+                closing = self._consume()
+                if closing.lexeme != ")":
+                    raise ParseError("expected ')' after '...' in extern parameter list")
+                break
             lexemes = self._collect_c_param_lexemes()
             arg_type = _normalize_c_type_tokens(lexemes, allow_default=False)
             if arg_type == "void" and inputs == 0:
@@ -1073,7 +1246,7 @@ class Parser:
                 closing = self._consume()
                 if closing.lexeme != ")":
                     raise ParseError("expected ')' after 'void' in extern parameter list")
-                return 0, []
+                return 0, [], False
             inputs += 1
             arg_types.append(arg_type)
             if self._eof():
@@ -1085,7 +1258,7 @@ class Parser:
                 raise ParseError(
                     f"expected ',' or ')' in extern parameter list, got '{separator.lexeme}'"
                 )
-        return inputs, arg_types
+        return inputs, arg_types, variadic
 
     def _collect_c_param_lexemes(self) -> List[str]:
         lexemes: List[str] = []
@@ -1121,6 +1294,7 @@ class Parser:
         ret_type: str,
         *,
         priority: int = 0,
+        variadic: bool = False,
     ) -> None:
         candidate = Word(name=name, priority=priority)
         word = self.dictionary.register(candidate)
@@ -1130,6 +1304,7 @@ class Parser:
         word.extern_inputs = inputs
         word.extern_outputs = outputs
         word.extern_signature = (arg_types, ret_type)
+        word.extern_variadic = variadic
 
     def _handle_token(self, token: Token) -> bool:
         """Handle a token. Returns True if the token list was modified (macro expansion)."""
@@ -1824,7 +1999,7 @@ class CompileTimeVM:
         self._repl_libs: List[str] = []
         self._native_return_stack: Optional[Any] = None  # ctypes buffer
         self._native_return_top: int = 0
-        # JIT cache: word name → ctypes callable
+        # JIT cache: word name -> ctypes callable
         self._jit_cache: Dict[str, Any] = {}
         self._jit_code_pages: List[Any] = []  # keep mmap pages alive
         # Pre-allocated output structs for JIT calls (lazily allocated)
@@ -1836,7 +2011,7 @@ class CompileTimeVM:
         self._bss_symbols: Dict[str, int] = {}
         # dlopen handles for C extern support
         self._dl_handles: List[Any] = []  # ctypes.CDLL handles
-        self._dl_func_cache: Dict[str, Any] = {}  # name → ctypes callable
+        self._dl_func_cache: Dict[str, Any] = {}  # name -> ctypes callable
         self._ct_libs: List[str] = []  # library names from -l flags
         self._ctypes_struct_cache: Dict[str, Any] = {}
         self.current_location: Optional[SourceLocation] = None
@@ -2325,7 +2500,18 @@ class CompileTimeVM:
 
         inputs = func._ct_inputs
         outputs = func._ct_outputs
-        arg_types = func._ct_signature[0] if func._ct_signature else []
+        arg_types = list(func._ct_signature[0]) if func._ct_signature else []
+
+        # For variadic externs, the TOS value is the extra arg count
+        # (consumed by the compiler, not passed to C).
+        va_extra = 0
+        if getattr(word, "extern_variadic", False):
+            va_extra = int(self.pop())
+            inputs += va_extra
+            for _ in range(va_extra):
+                arg_types.append("long")
+            # Update ctypes argtypes to include the variadic args
+            func.argtypes = list(func._ct_arg_types) + [ctypes.c_int64] * va_extra
 
         # Pop arguments off the native data stack (right-to-left / reverse order)
         raw_args = []
@@ -2513,13 +2699,13 @@ class CompileTimeVM:
                 line = re.sub(rf'(?<!\w){re.escape(lbl)}(?=\s|:|,|$|\]|\))',
                               '_jl' + lbl[1:], line)
 
-            # Patch [rel SYMBOL] → concrete address
+            # Patch [rel SYMBOL] -> concrete address
             m = _RE_REL_PAT.search(line)
             if m and m.group(1) in bss:
                 sym = m.group(1)
                 addr = bss[sym]
                 if line.lstrip().startswith("lea"):
-                    # lea REG, [rel X] → mov REG, addr
+                    # lea REG, [rel X] -> mov REG, addr
                     line = _RE_REL_PAT.sub(str(addr), line).replace("lea", "mov", 1)
                 else:
                     # e.g. mov rax, [rel X] or mov byte [rel X], val
@@ -3302,7 +3488,7 @@ class CompileTimeVM:
         ])
         if asm_body:
             patched_body = []
-            # Build BSS symbol table for [rel X] → concrete address substitution
+            # Build BSS symbol table for [rel X] -> concrete address substitution
             _bss_symbols: Dict[str, int] = {
                 "data_start": data_start,
                 "data_end": data_end,
@@ -3323,7 +3509,7 @@ class CompileTimeVM:
                 if m and m.group(1) in _bss_symbols:
                     sym = m.group(1)
                     addr = _bss_symbols[sym]
-                    # lea REG, [rel X]  →  mov REG, addr
+                    # lea REG, [rel X]  ->  mov REG, addr
                     if line.lstrip().startswith("lea"):
                         line = _RE_REL_PAT.sub(str(addr), line).replace("lea", "mov", 1)
                     else:
@@ -3461,7 +3647,7 @@ class CompileTimeVM:
         self._call_word(word)
 
     def _resolve_words_in_body(self, defn: Definition) -> None:
-        """Pre-resolve word name → Word objects on Op nodes (once per Definition)."""
+        """Pre-resolve word name -> Word objects on Op nodes (once per Definition)."""
         if defn._words_resolved:
             return
         lookup = self.dictionary.lookup
@@ -3568,7 +3754,7 @@ class CompileTimeVM:
                 if line == "ret":
                     # Last word: jmp to save; others: fall through
                     if word_idx < len(words) - 1:
-                        continue  # just skip ret → fall through
+                        continue  # just skip ret -> fall through
                     else:
                         line = "jmp _ct_save"
 
@@ -3577,7 +3763,7 @@ class CompileTimeVM:
                     # Use word-boundary replacement to avoid partial matches
                     line = re.sub(rf'(?<!\w){re.escape(label)}(?=\s|:|,|$|\]|\))', prefix + label, line)
 
-                # Patch [rel SYMBOL] → concrete address
+                # Patch [rel SYMBOL] -> concrete address
                 m = _RE_REL_PAT.search(line)
                 if m and m.group(1) in bss:
                     sym = m.group(1)
@@ -5013,7 +5199,7 @@ class Assembler:
                         b = nodes[idx + 1]
                         b_oc = b._opcode
 
-                        # literal + dup → literal literal
+                        # literal + dup -> literal literal
                         if b_oc == OP_WORD and b.data == "dup":
                             _opt_a(node)
                             _opt_a(_make_literal_op(node.data, node.loc))
@@ -5021,7 +5207,7 @@ class Assembler:
                             changed = True
                             continue
 
-                        # literal + drop → (nothing)
+                        # literal + drop -> (nothing)
                         if b_oc == OP_WORD and b.data == "drop":
                             idx += 2
                             changed = True
@@ -5432,7 +5618,7 @@ class Assembler:
             # depth tracks values on the data stack relative to entry.
             # 'main' starts with an empty stack.  For other words we can
             # only check underflows when a stack-effect comment provides
-            # the input count (e.g. ``# a b -- c`` → 2 inputs).
+            # the input count (e.g. ``# a b -- c`` -> 2 inputs).
             si = defn.stack_inputs
             if si is not None:
                 known_entry_depth = si
@@ -5872,7 +6058,7 @@ class Assembler:
             # stub now (after definitions) so the emitter does not
             # produce duplicate `_start` labels.
             if is_program and not (user_has_start or getattr(self, "_emitted_start", False)):
-                emission.text.extend([
+                _start_lines = [
                     "; __ORIGIN__ default_stub",
                     "global _start",
                     "_start:",
@@ -5892,10 +6078,13 @@ class Assembler:
                     "    mov rax, [r12]",
                     "    add r12, 8",
                     ".no_exit_value:",
+                ]
+                _start_lines.extend([
                     "    mov rdi, rax",
                     "    mov rax, 60",
                     "    syscall",
                 ])
+                emission.text.extend(_start_lines)
 
             self._emit_variables(module.variables)
 
@@ -6304,6 +6493,27 @@ class Assembler:
             builder.emit(f"    mov byte [{dst_expr} + {copied}], r11b")
             copied += 1
 
+    @staticmethod
+    def _pop_preceding_literal(builder: FunctionEmitter) -> Optional[int]:
+        """If the last emitted instructions are a literal push, remove them and return the value."""
+        text = builder.text
+        n = len(text)
+        if n < 3:
+            return None
+        # push_literal emits:  "; push N" / "sub r12, 8" / "mov qword [r12], N"
+        mov_line = text[n - 1].strip()
+        sub_line = text[n - 2].strip()
+        cmt_line = text[n - 3].strip()
+        if not (sub_line == "sub r12, 8" and mov_line.startswith("mov qword [r12],") and cmt_line.startswith("; push")):
+            return None
+        val_str = mov_line.split(",", 1)[1].strip()
+        try:
+            value = int(val_str)
+        except ValueError:
+            return None
+        del text[n - 3:n]
+        return value
+
     def _emit_extern_wordref(self, name: str, word: Word, builder: FunctionEmitter) -> None:
         inputs = getattr(word, "extern_inputs", 0)
         outputs = getattr(word, "extern_outputs", 0)
@@ -6316,6 +6526,21 @@ class Assembler:
         arg_types = list(signature[0]) if signature else ["long"] * inputs
         ret_type = signature[1] if signature else ("long" if outputs > 0 else "void")
 
+        # For variadic externs, consume the preceding literal as the count of
+        # extra variadic arguments.  These are NOT passed to the C function as
+        # a count parameter – they simply tell the compiler how many additional
+        # stack values to pop and place into registers / the C stack.
+        if getattr(word, "extern_variadic", False):
+            va_count = self._pop_preceding_literal(builder)
+            if va_count is None:
+                suffix = f" while emitting '{self._emit_stack[-1]}'" if self._emit_stack else ""
+                raise CompileError(
+                    f"variadic extern '{name}' requires a literal arg count on TOS{suffix}"
+                )
+            for _ in range(va_count):
+                arg_types.append("long")
+            inputs += va_count
+
         if len(arg_types) != inputs and signature is not None:
             suffix = f" while emitting '{self._emit_stack[-1]}'" if self._emit_stack else ""
             raise CompileError(f"extern '{name}' mismatch: {inputs} inputs vs {len(arg_types)} types{suffix}")
@@ -7449,8 +7674,8 @@ def _compile_syscall_stub(vm: CompileTimeVM) -> Any:
     # Output struct: [r12, r13, exit_flag, exit_code]
     #
     # Stack protocol (matching _emit_syscall_intrinsic):
-    #   TOS:   syscall number → rax
-    #   TOS-1: arg count → rcx
+    #   TOS:   syscall number -> rax
+    #   TOS-1: arg count -> rcx
     #   then args on stack as ... arg0 arg1 ... argN (argN is top)
     #
 
@@ -7590,7 +7815,7 @@ def _compile_syscall_stub(vm: CompileTimeVM) -> Any:
         raise RuntimeError("mmap failed for JIT syscall stub")
     ctypes.memmove(ptr, code, len(code))
     vm._jit_code_pages.append((ptr, page_size))
-    # Same signature: (r12, r13, out_ptr) → void
+    # Same signature: (r12, r13, out_ptr) -> void
     if CompileTimeVM._JIT_FUNC_TYPE is None:
         CompileTimeVM._JIT_FUNC_TYPE = ctypes.CFUNCTYPE(None, ctypes.c_int64, ctypes.c_int64, ctypes.c_void_p)
     func = CompileTimeVM._JIT_FUNC_TYPE(ptr)
@@ -7939,7 +8164,12 @@ def _parse_c_header_externs(header_text: str) -> List[str]:
         params_raw = m.group(2).strip()
 
         if "..." in params_raw:
-            continue
+            # Variadic function: strip the ... from parameter list, keep fixed args
+            params_fixed = re.sub(r",?\s*\.\.\.", "", params_raw).strip()
+            param_str = "void" if params_fixed in ("void", "") else params_fixed
+            is_variadic = True
+        else:
+            is_variadic = False
 
         tokens = prefix.split()
         if len(tokens) < 2:
@@ -7950,8 +8180,9 @@ def _parse_c_header_externs(header_text: str) -> List[str]:
         if not func_name or not re.match(r"^[A-Za-z_]\w*$", func_name):
             continue
 
-        # Skip typedef, struct/enum/union definitions
-        if tokens[0] in ("typedef", "struct", "enum", "union"):
+        # Skip typedef (struct/enum/union return types are fine — the regex
+        # already ensures this matched a function declaration with parentheses)
+        if tokens[0] in ("typedef",):
             continue
 
         # Build return type: strip API macros and calling-convention qualifiers
@@ -7974,18 +8205,109 @@ def _parse_c_header_externs(header_text: str) -> List[str]:
         if not ret_type:
             ret_type = "int"
 
-        param_str = "void" if params_raw in ("void", "") else params_raw
+        if not is_variadic:
+            param_str = "void" if params_raw in ("void", "") else params_raw
 
-        results.append(f"extern {ret_type} {func_name}({param_str})")
+        va_suffix = ", ..." if is_variadic else ""
+        results.append(f"extern {ret_type} {func_name}({param_str}{va_suffix})")
     return results
 
 
+# Map C types to L2 cstruct field types
+_C_TO_L2_FIELD_TYPE: Dict[str, str] = {
+    "char": "i8", "signed char": "i8", "unsigned char": "u8",
+    "short": "i16", "unsigned short": "u16", "short int": "i16",
+    "int": "i32", "unsigned int": "u32", "unsigned": "u32",
+    "long": "i64", "unsigned long": "u64", "long int": "i64",
+    "long long": "i64", "unsigned long long": "u64",
+    "float": "f32", "double": "f64",
+    "size_t": "u64", "ssize_t": "i64",
+    "int8_t": "i8", "uint8_t": "u8",
+    "int16_t": "i16", "uint16_t": "u16",
+    "int32_t": "i32", "uint32_t": "u32",
+    "int64_t": "i64", "uint64_t": "u64",
+}
+
+
+def _parse_c_header_structs(header_text: str) -> List[str]:
+    """Extract struct definitions from C header text and return L2 ``cstruct`` lines."""
+    text = re.sub(r"/\*.*?\*/", " ", header_text, flags=re.DOTALL)
+    text = re.sub(r"//[^\n]*", "", text)
+    text = re.sub(r"#[^\n]*", "", text)
+    text = re.sub(r"\s+", " ", text)
+
+    results: List[str] = []
+    # Match: struct Name { fields }; or typedef struct Name { fields } Alias;
+    # or typedef struct { fields } Name;
+    _RE_STRUCT = re.compile(
+        r"(?:typedef\s+)?struct\s*(\w*)\s*\{([^}]*)\}\s*(\w*)\s*;",
+    )
+    for m in _RE_STRUCT.finditer(text):
+        struct_name = m.group(1).strip()
+        body = m.group(2).strip()
+        typedef_name = m.group(3).strip()
+        # Prefer typedef name if present
+        name = typedef_name if typedef_name else struct_name
+        if not name or name.startswith("_"):
+            continue
+        fields = _extract_struct_fields(body)
+        if not fields:
+            continue
+        # Generate L2 cstruct declaration
+        field_parts = []
+        for fname, ftype in fields:
+            field_parts.append(f"cfield {fname} {ftype}")
+        results.append(f"cstruct {name} {' '.join(field_parts)} end")
+    return results
+
+
+def _extract_struct_fields(body: str) -> List[Tuple[str, str]]:
+    """Parse C struct field declarations into (name, l2_type) pairs."""
+    fields: List[Tuple[str, str]] = []
+    for decl in body.split(";"):
+        decl = decl.strip()
+        if not decl:
+            continue
+        # Skip bitfields
+        if ":" in decl:
+            continue
+        # Skip nested struct/union definitions (but allow struct pointers)
+        if ("struct " in decl or "union " in decl) and "*" not in decl:
+            continue
+        tokens = decl.split()
+        if len(tokens) < 2:
+            continue
+        # Last token is field name (may have * prefix for pointers)
+        field_name = tokens[-1].lstrip("*")
+        if not field_name or not re.match(r"^[A-Za-z_]\w*$", field_name):
+            continue
+        # Check if pointer
+        is_ptr = "*" in decl
+        if is_ptr:
+            fields.append((field_name, "ptr"))
+            continue
+        # Build type from all tokens except field name
+        type_tokens = tokens[:-1]
+        # Remove qualifiers
+        type_tokens = [t for t in type_tokens if t not in ("const", "volatile", "static",
+                                                            "register", "restrict", "_Atomic")]
+        ctype = " ".join(type_tokens)
+        l2_type = _C_TO_L2_FIELD_TYPE.get(ctype)
+        if l2_type is None:
+            # Unknown type, treat as pointer-sized
+            fields.append((field_name, "ptr"))
+        else:
+            fields.append((field_name, l2_type))
+    return fields
+
+
 class Compiler:
     def __init__(
         self,
         include_paths: Optional[Sequence[Path]] = None,
         *,
         macro_expansion_limit: int = DEFAULT_MACRO_EXPANSION_LIMIT,
+        defines: Optional[Sequence[str]] = None,
     ) -> None:
         self.reader = Reader()
         self.dictionary = bootstrap_dictionary()
@@ -8001,6 +8323,7 @@ class Compiler:
             include_paths = [Path("."), Path("./stdlib")]
         self.include_paths: List[Path] = [p.expanduser().resolve() for p in include_paths]
         self._loaded_files: Set[Path] = set()
+        self.defines: Set[str] = set(defines or [])
 
     def compile_source(
         self,
@@ -8044,6 +8367,23 @@ class Compiler:
 
     _import_resolve_cache: Dict[Tuple[Path, str], Path] = {}
 
+    def _preprocess_c_header(self, header_path: Path, raw_text: str) -> str:
+        """Try running the C preprocessor on a header file for accurate parsing.
+
+        Falls back to raw_text if the preprocessor is not available."""
+        import subprocess
+        try:
+            result = subprocess.run(
+                ["cc", "-E", "-P", "-D__attribute__(x)=", "-D__extension__=",
+                 "-D__restrict=", "-D__asm__(x)=", str(header_path)],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                return result.stdout
+        except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+            pass
+        return raw_text
+
     def _resolve_import_target(self, importing_file: Path, target: str) -> Path:
         cache_key = (importing_file.parent, target)
         cached = self._import_resolve_cache.get(cache_key)
@@ -8160,9 +8500,54 @@ class Compiler:
         _spans_append = spans.append
         _FileSpan = FileSpan
 
+        # ifdef/ifndef/else/endif conditional compilation stack
+        # Each entry is True (include lines) or False (skip lines)
+        _ifdef_stack: List[bool] = []
+
+        def _ifdef_active() -> bool:
+            return all(_ifdef_stack)
+
         for line in contents.splitlines():
             stripped = line.strip()
 
+            # --- Conditional compilation directives ---
+            if stripped[:6] == "ifdef " or stripped == "ifdef":
+                name = stripped[6:].strip() if len(stripped) > 6 else ""
+                if not name:
+                    raise ParseError(f"ifdef missing symbol name at {path}:{file_line_no}")
+                _ifdef_stack.append(name in self.defines if _ifdef_active() else False)
+                _out_append("")  # placeholder to keep line numbers aligned
+                file_line_no += 1
+                continue
+            if stripped[:7] == "ifndef " or stripped == "ifndef":
+                name = stripped[7:].strip() if len(stripped) > 7 else ""
+                if not name:
+                    raise ParseError(f"ifndef missing symbol name at {path}:{file_line_no}")
+                _ifdef_stack.append(name not in self.defines if _ifdef_active() else False)
+                _out_append("")
+                file_line_no += 1
+                continue
+            if stripped == "elsedef":
+                if not _ifdef_stack:
+                    raise ParseError(f"elsedef without matching ifdef/ifndef at {path}:{file_line_no}")
+                _ifdef_stack[-1] = not _ifdef_stack[-1]
+                _out_append("")
+                file_line_no += 1
+                continue
+            if stripped == "endif":
+                if not _ifdef_stack:
+                    raise ParseError(f"endif without matching ifdef/ifndef at {path}:{file_line_no}")
+                _ifdef_stack.pop()
+                _out_append("")
+                file_line_no += 1
+                continue
+
+            # If inside a false ifdef branch, skip the line
+            if not _ifdef_active():
+                _out_append("")
+                file_line_no += 1
+                continue
+
             if not in_py_block and stripped[:3] == ":py" and "{" in stripped:
                 in_py_block = True
                 brace_depth = 0
@@ -8259,15 +8644,22 @@ class Compiler:
                     header_text = header_path.read_text()
                 except FileNotFoundError as exc:
                     raise ParseError(f"cimport cannot read {header_path}: {exc}") from exc
+
+                # Try running the C preprocessor for more accurate parsing
+                header_text = self._preprocess_c_header(header_path, header_text)
+
                 extern_lines = _parse_c_header_externs(header_text)
+                struct_lines = _parse_c_header_structs(header_text)
 
                 # begin_segment_if_needed inline
                 if segment_start_global is None:
                     segment_start_global = len(out_lines) + 1
                     segment_start_local = file_line_no
-                # Replace the cimport line with the extracted extern declarations
+                # Replace the cimport line with the extracted extern + struct declarations
                 for ext_line in extern_lines:
                     _out_append(ext_line)
+                for st_line in struct_lines:
+                    _out_append(st_line)
                 _out_append("")  # blank line after externs
                 file_line_no += 1
                 continue
@@ -8290,6 +8682,9 @@ class Compiler:
                 )
             )
 
+        if _ifdef_stack:
+            raise ParseError(f"unterminated ifdef/ifndef ({len(_ifdef_stack)} level(s) deep) in {path}")
+
 
 class BuildCache:
     """Caches compilation artifacts keyed by source content and compiler flags."""
@@ -8492,7 +8887,12 @@ def run_linker(obj_path: Path, exe_path: Path, debug: bool = False, libs=None, *
         cmd.extend(["-nostdlib", "-static"])
 
     if libs:
-        if not shared:
+        # Determine if any libs require dynamic linking (shared libraries).
+        needs_dynamic = any(
+            not (str(lib).endswith(".a") or str(lib).endswith(".o"))
+            for lib in libs if lib
+        )
+        if not shared and needs_dynamic:
             cmd.extend([
                 "-dynamic-linker", "/lib64/ld-linux-x86-64.so.2",
             ])
@@ -9129,7 +9529,7 @@ _DOC_STACK_RE = re.compile(r"^\s*#\s*([^\s]+)\s*(.*)$")
 _DOC_WORD_RE = re.compile(r"^\s*(?:inline\s+)?word\s+([^\s]+)\b")
 _DOC_ASM_RE = re.compile(r"^\s*:asm\s+([^\s{]+)")
 _DOC_PY_RE = re.compile(r"^\s*:py\s+([^\s{]+)")
-_DOC_MACRO_RE = re.compile(r"^\s*macro\s+([^\s]+)")
+_DOC_MACRO_RE = re.compile(r"^\s*macro\s+([^\s]+)(?:\s+(\d+))?")
 
 
 def _extract_stack_comment(text: str) -> Optional[Tuple[str, str]]:
@@ -9145,15 +9545,16 @@ def _extract_stack_comment(text: str) -> Optional[Tuple[str, str]]:
     return name, tail
 
 
-def _extract_definition_name(text: str, *, include_macros: bool = False) -> Optional[Tuple[str, str]]:
+def _extract_definition_name(text: str, *, include_macros: bool = False) -> Optional[Tuple[str, str, int]]:
     for kind, regex in (("word", _DOC_WORD_RE), ("asm", _DOC_ASM_RE), ("py", _DOC_PY_RE)):
         match = regex.match(text)
         if match is not None:
-            return kind, match.group(1)
+            return kind, match.group(1), -1
     if include_macros:
         match = _DOC_MACRO_RE.match(text)
         if match is not None:
-            return "macro", match.group(1)
+            arg_count = int(match.group(2)) if match.group(2) is not None else 0
+            return "macro", match.group(1), arg_count
     return None
 
 
@@ -9215,11 +9616,18 @@ def _scan_doc_file(
         parsed = _extract_definition_name(line, include_macros=include_macros)
         if parsed is None:
             continue
-        kind, name = parsed
+        kind, name, macro_args = parsed
         if not _is_doc_symbol_name(name, include_private=include_private):
             continue
         defined_names.add(name)
         stack_effect, description = _collect_leading_doc_comments(lines, idx, name)
+        # Auto-generate stack effect for macros from arg count
+        if kind == "macro" and not stack_effect:
+            if macro_args > 0:
+                params = " ".join(f"${i}" for i in range(macro_args))
+                stack_effect = f"macro({macro_args}): {params} -> expanded"
+            else:
+                stack_effect = "macro(0): -> expanded"
         if not include_undocumented and not stack_effect and not description:
             continue
         entries.append(
@@ -9416,6 +9824,8 @@ def _run_docs_tui(
     _MODE_LICENSE = 6
     _MODE_PHILOSOPHY = 7
     _MODE_CT_REF = 8
+    _MODE_QA = 9
+    _MODE_HOW = 10
 
     _TAB_LIBRARY = 0
     _TAB_LANG_REF = 1
@@ -9499,20 +9909,20 @@ def _run_docs_tui(
         {
             "name": "macro ... ;",
             "category": "Definitions",
-            "syntax": "macro <name> [<param_count>] <tokens...> ;",
+            "syntax": "macro <name> <param_count> <tokens...> ;",
             "summary": "Define a text macro with positional substitution.",
             "detail": (
                 "Records raw tokens until `;`. On expansion, `$0`, `$1`, ... "
                 "are replaced by positional arguments. Macros cannot nest.\n\n"
                 "Example:\n"
-                "  macro max2 [2] $0 $1 > if $0 else $1 end ;\n"
+                "  macro max2 2 $0 $1 > if $0 else $1 end ;\n"
                 "  5 3 max2   # leaves 5 on stack"
             ),
         },
         {
             "name": "struct ... end",
             "category": "Definitions",
-            "syntax": "struct <Name>\n  <size> <field>\n  ...\nend",
+            "syntax": "struct <Name>\n  field <field> <size>\n  ...\nend",
             "summary": "Define a packed struct with auto-generated accessors.",
             "detail": (
                 "Emits helper words:\n"
@@ -9524,8 +9934,8 @@ def _run_docs_tui(
                 "Layout is tightly packed with no implicit padding.\n\n"
                 "Example:\n"
                 "  struct Point\n"
-                "    8 x\n"
-                "    8 y\n"
+                "    field x 8\n"
+                "    field y 8\n"
                 "  end\n"
                 "  # Now Point.x@, Point.x!, Point.y@, Point.y! exist"
             ),
@@ -9575,8 +9985,8 @@ def _run_docs_tui(
             "summary": "Counted loop — pops count, loops that many times.",
             "detail": (
                 "Pops the loop count from the stack, stores it on the return stack, "
-                "and decrements it each pass. The loop index is accessible via "
-                "the compile-time word `i` inside macros.\n\n"
+                "and decrements it each pass. Use `r@` (return "
+                "stack peek) to read the current counter value.\n\n"
                 "Example:\n"
                 "  10 for\n"
                 "    \"hello\" puts\n"
@@ -9590,8 +10000,8 @@ def _run_docs_tui(
             "syntax": "begin <body> again",
             "summary": "Infinite loop (use `exit` or `goto` to break out).",
             "detail": (
-                "Creates an unconditional loop. The body repeats forever "
-                "available only at compile time.\n\n"
+                "Creates an unconditional loop. The body repeats forever.\n"
+                "Available only at compile time.\n\n"
                 "Example:\n"
                 "  begin\n"
                 "    read_stdin\n"
@@ -9607,7 +10017,7 @@ def _run_docs_tui(
             "summary": "Local jumps within a definition.",
             "detail": (
                 "Defines a local label and jumps to it. "
-                "to the enclosing word definition.\n\n"
+                "Labels are scoped to the enclosing word definition.\n\n"
                 "Example:\n"
                 "  word example\n"
                 "    label start\n"
@@ -10001,10 +10411,40 @@ def _run_docs_tui(
         "\n"
         "───────────────────────────────────────────────────────────\n"
         "\n"
-        "  L2 is a stack-based systems language that compiles\n"
-        "  ahead-of-time to native x86-64 Linux binaries. It\n"
-        "  descends from the Forth tradition: small words compose\n"
-        "  into larger words, and the machine is always visible.\n"
+        "  WHAT IS L2?\n"
+        "\n"
+        "  At its core, L2 is a programmable assembly templating\n"
+        "  engine with a Forth-style stack interface. You write\n"
+        "  small 'words' that compose into larger programs, and\n"
+        "  each word compiles to a known, inspectable sequence of\n"
+        "  x86-64 instructions. The language sits just above raw\n"
+        "  assembly — close enough to see every byte, high enough\n"
+        "  to be genuinely productive.\n"
+        "\n"
+        "  But L2 is more than a glorified macro assembler. Its\n"
+        "  compile-time virtual machine lets you run arbitrary L2\n"
+        "  code at compile time: generate words, compute lookup\n"
+        "  tables, build structs, or emit entire subsystems before\n"
+        "  a single byte of native code is produced. Text macros,\n"
+        "  :py blocks, and token hooks extend the syntax in ways\n"
+        "  that feel like language features — because they are.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────\n"
+        "\n"
+        "  WHY DOES L2 EXIST?\n"
+        "\n"
+        "  L2 was built for fun — and that's a feature, not an\n"
+        "  excuse. It exists because writing a compiler is deeply\n"
+        "  satisfying, because Forth's ideas deserve to be pushed\n"
+        "  further, and because sometimes you want to write a\n"
+        "  program that does exactly what you told it to.\n"
+        "\n"
+        "  That said, 'fun' doesn't mean 'toy'. L2 produces real\n"
+        "  native binaries, links against C libraries, and handles\n"
+        "  practical tasks like file I/O, hashmap manipulation,\n"
+        "  and async scheduling — all with a minimal runtime.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────\n"
         "\n"
         "  CORE TENETS\n"
         "\n"
@@ -10025,9 +10465,12 @@ def _run_docs_tui(
         "     it composes.\n"
         "\n"
         "  4. META-PROGRAMMABILITY\n"
-        "     The front-end is user-extensible: immediate words,\n"
-        "     text macros, :py blocks, and token hooks let you\n"
-        "     reshape syntax without forking the compiler.\n"
+        "     The front-end is user-extensible: text macros, :py\n"
+        "     blocks, immediate words, and token hooks reshape\n"
+        "     syntax at compile time. The compile-time VM can\n"
+        "     execute full L2 programs during compilation, making\n"
+        "     the boundary between 'language' and 'metaprogram'\n"
+        "     deliberately blurry.\n"
         "\n"
         "  5. UNSAFE BY DESIGN\n"
         "     Safety is the programmer's job, not the language's.\n"
@@ -10039,6 +10482,12 @@ def _run_docs_tui(
         "     It gives you alloc/free, puts/puti, arrays, and\n"
         "     file I/O. Everything else is your choice.\n"
         "\n"
+        "  7. FUN FIRST\n"
+        "     If using L2 feels like a chore, the design has\n"
+        "     failed. The language should reward curiosity and\n"
+        "     make you want to dig deeper into how things work.\n"
+        "     At least its fun for me to write programs in. ;)"
+        "\n"
         "───────────────────────────────────────────────────────────\n"
         "\n"
         "  L2 is for programmers who want to understand every\n"
@@ -10434,14 +10883,14 @@ def _run_docs_tui(
         "  § 16  WITH (SCOPED VARIABLES)\n"
         "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
         "\n"
-        "  with <name> ... end\n"
-        "    Bind the top of the data stack to a compile-time name.\n"
-        "    Inside the with block, referencing <name> pushes the\n"
-        "    bound value. At the end of the block the binding is\n"
-        "    removed. Useful for readability and avoiding stack shuffling.\n"
+        "  with <names...> in <body> end\n"
+        "    Pop values from the stack into named local variables.\n"
+        "    Inside the body, referencing a name reads the variable;\n"
+        "    `name !` writes to it. Variables are backed by hidden\n"
+        "    globals and are NOT re-entrant.\n"
         "\n"
-        "      10 with x\n"
-        "        x x +   # pushes 10 twice, adds → 20\n"
+        "      10 20 with x y in\n"
+        "        x y +   # reads x (10) and y (20), adds -> 30\n"
         "      end\n"
         "\n"
         "\n"
@@ -10510,6 +10959,282 @@ def _run_docs_tui(
         "═══════════════════════════════════════════════════════════════\n"
     )
 
+    _L2_QA_TEXT = (
+        "═══════════════════════════════════════════════════════════\n"
+        "              Q & A   /   T I P S   &   T R I C K S\n"
+        "═══════════════════════════════════════════════════════════\n"
+        "\n"
+        "  HOW DO I DEBUG AN L2 PROGRAM?\n"
+        "\n"
+        "    Compile with --debug to embed DWARF debug info, then\n"
+        "    launch with --dbg to drop straight into GDB:\n"
+        "\n"
+        "      python3 main.py my_program.sl --debug --dbg\n"
+        "\n"
+        "    Inside GDB you can:\n"
+        "      - Set breakpoints on word labels  (b word_main)\n"
+        "      - Inspect the data stack via r12  (x/8gx $r12)\n"
+        "      - Step through asm instructions   (si / ni)\n"
+        "      - View registers                  (info registers)\n"
+        "      - Disassemble a word              (disas word_foo)\n"
+        "\n"
+        "    Tip: r12 is the stack pointer. [r12] = TOS,\n"
+        "    [r12+8] = second element, etc.\n"
+        "\n"
+        "  HOW DO I VIEW THE GENERATED ASSEMBLY?\n"
+        "\n"
+        "    Use --emit-asm to stop after generating assembly:\n"
+        "\n"
+        "      python3 main.py my_program.sl --emit-asm\n"
+        "\n"
+        "    The .asm file is written to build/<name>.asm.\n"
+        "    You can also use -v1 or higher for timing info,\n"
+        "    -v2 for per-function details, and -v3 or -v4 for\n"
+        "    full optimization tracing.\n"
+        "\n"
+        "  HOW DO I CALL C FUNCTIONS?\n"
+        "\n"
+        "    Declare them with the C-style extern syntax:\n"
+        "\n"
+        "      extern int printf(const char* fmt, ...)\n"
+        "      extern void* malloc(size_t size)\n"
+        "\n"
+        "    Or use the legacy style:\n"
+        "\n"
+        "      extern printf 2 1\n"
+        "\n"
+        "    Link the library with -l:\n"
+        "\n"
+        "      python3 main.py my_program.sl -l c\n"
+        "\n"
+        "    You can also use cimport to auto-extract externs:\n"
+        "\n"
+        "      cimport \"my_header.h\"\n"
+        "\n"
+        "  HOW DO MACROS WORK?\n"
+        "\n"
+        "    Text macros are template expansions. Define with\n"
+        "    an optional parameter count:\n"
+        "\n"
+        "      macro square       # 0-arg: inline expansion\n"
+        "        dup *\n"
+        "      ;\n"
+        "\n"
+        "      macro defconst 2   # 2-arg: $0 and $1 are args\n"
+        "        word $0\n"
+        "          $1\n"
+        "        end\n"
+        "      ;\n"
+        "\n"
+        "    Use them normally; macro args are positional:\n"
+        "\n"
+        "      5 square           # expands to: 5 dup *\n"
+        "      defconst TEN 10    # defines: word TEN 10 end\n"
+        "\n"
+        "  HOW DO I RUN CODE AT COMPILE TIME?\n"
+        "\n"
+        "    Use --ct-run-main or --script to execute 'main' at\n"
+        "    compile time. The CT VM supports most stack ops, I/O,\n"
+        "    lists, hashmaps, and string manipulation.\n"
+        "\n"
+        "    You can also mark words as compile-time:\n"
+        "\n"
+        "      word generate-table\n"
+        "        # ... runs during compilation\n"
+        "      end\n"
+        "      compile-time generate-table\n"
+        "\n"
+        "  WHAT IS THE --SCRIPT FLAG?\n"
+        "\n"
+        "    Shorthand for --no-artifact --ct-run-main. It parses\n"
+        "    and runs 'main' in the compile-time VM without\n"
+        "    producing a binary — useful for scripts as the name suggests.\n"
+        "\n"
+        "  HOW DO I USE THE BUILD CACHE?\n"
+        "\n"
+        "    The cache is automatic. It stores assembly output\n"
+        "    and skips recompilation when source files haven't\n"
+        "    changed. Disable with --no-cache if needed.\n"
+        "\n"
+        "  HOW DO I DUMP THE CONTROL-FLOW GRAPH?\n"
+        "\n"
+        "    Use --dump-cfg to produce a Graphviz DOT file:\n"
+        "\n"
+        "      python3 main.py prog.sl --dump-cfg\n"
+        "      dot -Tpng build/prog.cfg.dot -o cfg.png\n"
+        "\n"
+        "  WHAT OPTIMIZATIONS DOES L2 PERFORM?\n"
+        "\n"
+        "    - Constant folding (--no-folding to disable)\n"
+        "    - Peephole optimization (--no-peephole)\n"
+        "    - Loop unrolling (--no-loop-unroll)\n"
+        "    - Auto-inlining of small asm bodies (--no-auto-inline)\n"
+        "    - Static list folding (--no-static-list-folding)\n"
+        "    - Dead code elimination (automatic)\n"
+        "    - -O0 disables all optimizations\n"
+        "    - -O2 disables all optimizations AND checks\n"
+        "\n"
+        "══════════════════════════════════════════════════════════\n"
+    )
+
+    _L2_HOW_TEXT = (
+        "═══════════════════════════════════════════════════════════════\n"
+        "          H O W   L 2   W O R K S   (I N T E R N A L S)\n"
+        "═══════════════════════════════════════════════════════════════\n"
+        "\n"
+        "  ARCHITECTURE OVERVIEW\n"
+        "\n"
+        "    The L2 compiler is a single-pass, single-file Python\n"
+        "    program (~13K lines) with these major stages:\n"
+        "\n"
+        "    1. READER/TOKENIZER\n"
+        "       Splits source into whitespace-delimited tokens.\n"
+        "       Tracks line, column, and byte offsets per token.\n"
+        "       Comment lines (starting with #) in word bodies are\n"
+        "       preserved as metadata but not compiled.\n"
+        "\n"
+        "    2. IMPORT RESOLUTION\n"
+        "       'import' and 'cimport' directives are resolved\n"
+        "       recursively. Each file is loaded once. Imports are\n"
+        "       concatenated into a single token stream with\n"
+        "       FileSpan markers for error reporting.\n"
+        "\n"
+        "    3. PARSER\n"
+        "       Walks the token stream and builds an IR Module of\n"
+        "       Op lists (one per word definition). Key features:\n"
+        "       - Word/asm/py/extern definitions -> dictionary\n"
+        "       - Control flow (if/else/end, while/do/end, for)\n"
+        "         compiled to label-based jumps\n"
+        "       - Macro expansion (text macros with $N params)\n"
+        "       - Token hooks for user-extensible syntax\n"
+        "       - Compile-time VM execution of immediate words\n"
+        "\n"
+        "    4. ASSEMBLER / CODE GENERATOR\n"
+        "       Converts the Op IR into NASM x86-64 assembly.\n"
+        "       Handles calling conventions, extern C FFI with\n"
+        "       full System V ABI support (register classification,\n"
+        "       struct passing, SSE arguments).\n"
+        "\n"
+        "    5. NASM + LINKER\n"
+        "       The assembly is assembled by NASM into an object\n"
+        "       file, then linked (via ld or gcc) into the final\n"
+        "       binary.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  THE STACKS\n"
+        "\n"
+        "    L2 uses register r12 as the stack pointer for its data\n"
+        "    stack. The stack grows downward:\n"
+        "\n"
+        "      push:  sub r12, 8; mov [r12], rax\n"
+        "      pop:   mov rax, [r12]; add r12, 8\n"
+        "\n"
+        "    The return stack lives in a separate buffer with r13 as\n"
+        "    its stack pointer (also grows downward). The native x86\n"
+        "    call/ret stack (rsp) is used only for word call/return\n"
+        "    linkage and C interop.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  THE COMPILE-TIME VM\n"
+        "\n"
+        "    The CT VM is a stack-based interpreter that runs during\n"
+        "    parsing. It maintains:\n"
+        "\n"
+        "      - A value stack (Python list of ints/strings/lists)\n"
+        "      - A dictionary of CT-callable words\n"
+        "      - A return stack for nested calls\n"
+        "\n"
+        "    CT words can:\n"
+        "      - Emit token sequences into the compiler's stream\n"
+        "      - Inspect/modify the parser state\n"
+        "      - Call other CT words or builtins\n"
+        "      - Perform I/O, string ops, list/hashmap manipulation\n"
+        "\n"
+        "    When --ct-run-main is used, the CT VM can also JIT-compile\n"
+        "    and execute native x86-64 code via the Keystone assembler\n"
+        "    engine (for words that need native performance).\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  OPTIMIZATION PASSES\n"
+        "\n"
+        "    CONSTANT FOLDING\n"
+        "      Evaluates pure arithmetic sequences (e.g., 3 4 +\n"
+        "      becomes push 7). Works across word boundaries for\n"
+        "      inlined words.\n"
+        "\n"
+        "    PEEPHOLE OPTIMIZATION\n"
+        "      Pattern-matches instruction sequences and\n"
+        "      replaces them with shorter equivalents. Examples:\n"
+        "        swap drop -> nip\n"
+        "        swap nip  -> drop\n"
+        "\n"
+        "    LOOP UNROLLING\n"
+        "      Small deterministic loops (e.g., '4 for ... next')\n"
+        "      are unrolled into straight-line code when the\n"
+        "      iteration count is known at compile time.\n"
+        "\n"
+        "    AUTO-INLINING\n"
+        "      Small asm-body words (below a size threshold) are\n"
+        "      automatically inlined at call sites, eliminating\n"
+        "      call/ret overhead.\n"
+        "\n"
+        "    STATIC LIST FOLDING\n"
+        "      List literals like [1 2 3] with all-constant\n"
+        "      elements are placed in .data instead of being\n"
+        "      built at runtime.\n"
+        "\n"
+        "    DEAD CODE ELIMINATION\n"
+        "      Words that are never called (and not 'main') are\n"
+        "      excluded from the final assembly output.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  EXTERN C FFI\n"
+        "\n"
+        "    L2's extern system supports the full System V AMD64 ABI:\n"
+        "\n"
+        "    - Integer args -> rdi, rsi, rdx, rcx, r8, r9, then stack\n"
+        "    - Float/double args -> xmm0..xmm7, then stack\n"
+        "    - Struct args classified per ABI eightbyte rules\n"
+        "    - Return values in rax (int), xmm0 (float), or via\n"
+        "      hidden sret pointer for large structs\n"
+        "    - RSP is aligned to 16 bytes before each call\n"
+        "\n"
+        "    The compiler auto-classifies argument types from the\n"
+        "    C-style declaration and generates the correct register\n"
+        "    shuffle and stack layout.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  QUIRKS & GOTCHAS\n"
+        "\n"
+        "    - No type system: everything is a 64-bit integer on\n"
+        "      the stack. Pointers, booleans, characters — all\n"
+        "      just numbers. Type safety is your responsibility.\n"
+        "\n"
+        "    - Macro expansion depth: macros can expand macros,\n"
+        "      but there's a limit (default 64, configurable via\n"
+        "      --macro-expansion-limit).\n"
+        "\n"
+        "    - :py blocks: Python code embedded in :py { ... }\n"
+        "      runs in the compiler's Python process. It has full\n"
+        "      access to the parser and dictionary — powerful but\n"
+        "      dangerous.\n"
+        "\n"
+        "    - The CT VM and native codegen share a dictionary\n"
+        "      but have separate stacks. A word defined at CT\n"
+        "      exists at CT only unless also compiled normally.\n"
+        "\n"
+        "    - The build cache tracks file mtimes and a hash of\n"
+        "      compiler flags. CT side effects invalidate the\n"
+        "      cache for that file.\n"
+        "\n"
+        "═══════════════════════════════════════════════════════════════\n"
+    )
+
     def _parse_sig_counts(effect: str) -> Tuple[int, int]:
         """Parse stack effect to (n_args, n_returns).
 
@@ -10629,6 +11354,15 @@ def _run_docs_tui(
                         extra_after += 1
                         if extra_after >= 3 or not stripped:
                             break
+            elif entry.kind == "macro":
+                # Show macro body until closing ';'
+                end = min(len(src_lines), start + 200)
+                for i in range(start, end):
+                    prefix = f"  {i + 1:4d}| "
+                    lines.append(prefix + src_lines[i])
+                    stripped = src_lines[i].strip()
+                    if stripped.endswith(";") and i >= start:
+                        break
             else:
                 end = min(len(src_lines), start + 30)
                 for i in range(start, end):
@@ -10645,6 +11379,27 @@ def _run_docs_tui(
             pass
         stdscr.keypad(True)
 
+        # Initialize color pairs for kind tags
+        _has_colors = False
+        try:
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_CYAN, -1)     # word
+                curses.init_pair(2, curses.COLOR_GREEN, -1)    # asm
+                curses.init_pair(3, curses.COLOR_YELLOW, -1)   # py
+                curses.init_pair(4, curses.COLOR_MAGENTA, -1)  # macro
+                _has_colors = True
+        except Exception:
+            pass
+
+        _KIND_COLORS = {
+            "word": curses.color_pair(1) if _has_colors else 0,
+            "asm": curses.color_pair(2) if _has_colors else 0,
+            "py": curses.color_pair(3) if _has_colors else 0,
+            "macro": curses.color_pair(4) if _has_colors else 0,
+        }
+
         nonlocal entries
         query = initial_query
         selected = 0
@@ -10738,7 +11493,7 @@ def _run_docs_tui(
                 _safe_addnstr(scr, y, x, label, width - x - 1, attr)
                 x += len(label) + 1
             # Right-aligned shortcuts
-            shortcuts = " L license  P philosophy "
+            shortcuts = " ? Q&A  H how  P philosophy  L license "
             if x + len(shortcuts) < width:
                 _safe_addnstr(scr, y, width - len(shortcuts) - 1, shortcuts, len(shortcuts), curses.A_DIM)
 
@@ -11104,7 +11859,7 @@ def _run_docs_tui(
                 cat_label = cat_names[lang_cat_filter]
                 header = f" Language Reference  {len(lang_entries)} entries  category: {cat_label}"
                 _safe_addnstr(stdscr, 1, 0, header, width - 1, curses.A_BOLD)
-                hint = " c category  Enter detail  j/k nav  Tab switch  C ct-ref  L license  P philosophy  q quit"
+                hint = " c category  Enter detail  j/k nav  Tab switch  C ct-ref  ? Q&A  H how  P philosophy  q quit"
                 _safe_addnstr(stdscr, 2, 0, hint, width - 1, curses.A_DIM)
 
                 for row in range(list_height):
@@ -11173,6 +11928,16 @@ def _run_docs_tui(
                     info_scroll = 0
                     mode = _MODE_PHILOSOPHY
                     continue
+                if key == ord("?"):
+                    info_lines = _L2_QA_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_QA
+                    continue
+                if key == ord("H"):
+                    info_lines = _L2_HOW_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_HOW
+                    continue
                 if key == ord("C"):
                     active_tab = _TAB_CT_REF
                     info_lines = _L2_CT_REF_TEXT.splitlines()
@@ -11227,9 +11992,15 @@ def _run_docs_tui(
                     continue
                 continue
 
-            # -- LICENSE / PHILOSOPHY MODE --
-            if mode in (_MODE_LICENSE, _MODE_PHILOSOPHY):
-                title = "License" if mode == _MODE_LICENSE else "Philosophy of L2"
+            # -- LICENSE / PHILOSOPHY / Q&A / HOW-IT-WORKS MODE --
+            if mode in (_MODE_LICENSE, _MODE_PHILOSOPHY, _MODE_QA, _MODE_HOW):
+                _info_titles = {
+                    _MODE_LICENSE: "License",
+                    _MODE_PHILOSOPHY: "Philosophy of L2",
+                    _MODE_QA: "Q&A / Tips & Tricks",
+                    _MODE_HOW: "How L2 Works (Internals)",
+                }
+                title = _info_titles.get(mode, "")
                 stdscr.erase()
                 _safe_addnstr(stdscr, 0, 0, f" {title} ", width - 1, curses.A_BOLD)
                 _safe_addnstr(stdscr, 1, 0, " q/Esc: back  j/k: scroll  PgUp/PgDn ", width - 1, curses.A_DIM)
@@ -11249,6 +12020,10 @@ def _run_docs_tui(
                 prev_mode = _MODE_LANG_REF if active_tab == _TAB_LANG_REF else (_MODE_CT_REF if active_tab == _TAB_CT_REF else _MODE_BROWSE)
                 if key in (27, ord("q"), ord("h"), curses.KEY_LEFT):
                     mode = prev_mode
+                    # Restore info_lines when returning to CT ref
+                    if prev_mode == _MODE_CT_REF:
+                        info_lines = _L2_CT_REF_TEXT.splitlines()
+                        info_scroll = 0
                     continue
                 if key in (curses.KEY_DOWN, ord("j")):
                     if info_scroll < max_iscroll:
@@ -11277,7 +12052,7 @@ def _run_docs_tui(
                 stdscr.erase()
                 _safe_addnstr(stdscr, 0, 0, " Compile-Time Reference ", width - 1, curses.A_BOLD)
                 _render_tab_bar(stdscr, 1, width)
-                _safe_addnstr(stdscr, 2, 0, " j/k scroll  PgUp/PgDn  Tab switch  L license  P philosophy  q quit", width - 1, curses.A_DIM)
+                _safe_addnstr(stdscr, 2, 0, " j/k scroll  PgUp/PgDn  Tab switch  ? Q&A  H how  P philosophy  L license  q quit", width - 1, curses.A_DIM)
                 body_height = max(1, height - 4)
                 max_iscroll = max(0, len(info_lines) - body_height)
                 if info_scroll > max_iscroll:
@@ -11327,6 +12102,16 @@ def _run_docs_tui(
                     info_scroll = 0
                     mode = _MODE_PHILOSOPHY
                     continue
+                if key == ord("?"):
+                    info_lines = _L2_QA_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_QA
+                    continue
+                if key == ord("H"):
+                    info_lines = _L2_HOW_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_HOW
+                    continue
                 if key == ord("C"):
                     active_tab = _TAB_CT_REF
                     info_lines = _L2_CT_REF_TEXT.splitlines()
@@ -11372,7 +12157,7 @@ def _run_docs_tui(
             header = f" L2 docs  {len(filtered)}/{len(entries)}" + (f"  search: {query}" if query else "") + filter_info
             _safe_addnstr(stdscr, 0, 0, header, width - 1, curses.A_BOLD)
             _render_tab_bar(stdscr, 1, width)
-            hint = " / search  f filters  r reload  Enter detail  j/k nav  Tab switch  C ct-ref  L license  P philosophy  q quit"
+            hint = " / search  f filters  r reload  Enter detail  Tab switch  C ct-ref  ? Q&A  H how  P philosophy  L license  q quit"
             _safe_addnstr(stdscr, 2, 0, hint, width - 1, curses.A_DIM)
 
             for row in range(list_height):
@@ -11381,10 +12166,23 @@ def _run_docs_tui(
                     break
                 entry = filtered[idx]
                 effect = entry.stack_effect if entry.stack_effect else ""
-                kind_tag = f"[{entry.kind}]"
-                line = f" {entry.name:24} {effect:30} {kind_tag}"
-                attr = curses.A_REVERSE if idx == selected else 0
-                _safe_addnstr(stdscr, 3 + row, 0, line, width - 1, attr)
+                kind_tag = f"[{entry.kind:5}]"
+                name_part = f" {entry.name:24} "
+                effect_part = f"{effect:30} "
+                is_sel = idx == selected
+                base_attr = curses.A_REVERSE if is_sel else 0
+                y = 3 + row
+                # Draw name
+                _safe_addnstr(stdscr, y, 0, name_part, width - 1, base_attr | curses.A_BOLD if is_sel else base_attr)
+                # Draw stack effect
+                x = len(name_part)
+                if x < width - 1:
+                    _safe_addnstr(stdscr, y, x, effect_part, width - x - 1, base_attr)
+                # Draw kind tag with color
+                x2 = x + len(effect_part)
+                if x2 < width - 1:
+                    kind_color = _KIND_COLORS.get(entry.kind, 0) if not is_sel else 0
+                    _safe_addnstr(stdscr, y, x2, kind_tag, width - x2 - 1, base_attr | kind_color)
 
             if filtered:
                 current = filtered[selected]
@@ -11414,6 +12212,16 @@ def _run_docs_tui(
                 info_scroll = 0
                 mode = _MODE_PHILOSOPHY
                 continue
+            if key == ord("?"):
+                info_lines = _L2_QA_TEXT.splitlines()
+                info_scroll = 0
+                mode = _MODE_QA
+                continue
+            if key == ord("H"):
+                info_lines = _L2_HOW_TEXT.splitlines()
+                info_scroll = 0
+                mode = _MODE_HOW
+                continue
             if key == ord("C"):
                 active_tab = _TAB_CT_REF
                 info_lines = _L2_CT_REF_TEXT.splitlines()
@@ -11608,6 +12416,32 @@ def cli(argv: Sequence[str]) -> int:
         default=DEFAULT_MACRO_EXPANSION_LIMIT,
         help="maximum nested macro expansion depth (default: %(default)s)",
     )
+    parser.add_argument(
+        "-D",
+        dest="defines",
+        action="append",
+        default=[],
+        metavar="NAME",
+        help="define a preprocessor symbol for ifdef/ifndef (repeatable)",
+    )
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="validate source without producing artifacts (parse + compile only)",
+    )
+    parser.add_argument(
+        "-W",
+        dest="warnings",
+        action="append",
+        default=[],
+        metavar="NAME",
+        help="enable warning (e.g. -W redefine, -W stack-depth, -W all)",
+    )
+    parser.add_argument(
+        "--Werror",
+        action="store_true",
+        help="treat all warnings as errors",
+    )
 
     # Parse known and unknown args to allow -l flags anywhere
     args, unknown = parser.parse_known_args(argv)
@@ -11627,6 +12461,9 @@ def cli(argv: Sequence[str]) -> int:
         args.no_artifact = True
         args.ct_run_main = True
 
+    if args.check:
+        args.no_artifact = True
+
     if args.macro_expansion_limit < 1:
         parser.error("--macro-expansion-limit must be >= 1")
 
@@ -11740,6 +12577,7 @@ def cli(argv: Sequence[str]) -> int:
     compiler = Compiler(
         include_paths=[Path("."), Path("./stdlib"), *args.include_paths],
         macro_expansion_limit=args.macro_expansion_limit,
+        defines=args.defines,
     )
     compiler.assembler.enable_constant_folding = folding_enabled
     compiler.assembler.enable_static_list_folding = static_list_folding_enabled
@@ -11751,7 +12589,26 @@ def cli(argv: Sequence[str]) -> int:
     compiler.assembler.verbosity = verbosity
     if args.dump_cfg is not None:
         compiler.assembler._need_cfg = True
-
+    # Warning configuration
+    warnings_set = set(args.warnings)
+    werror = args.Werror
+    # Support GCC-style -Werror (single dash, parsed as -W error)
+    if "error" in warnings_set:
+        warnings_set.discard("error")
+        werror = True
+    # -Werror without explicit -W categories implies -W all
+    if werror and not warnings_set:
+        warnings_set.add("all")
+    compiler.parser._warnings_enabled = warnings_set
+    compiler.parser._werror = werror
+    # Route dictionary redefine warnings through the parser's _warn system
+    if warnings_set or werror:
+        def _dict_warn_cb(name: str, priority: int) -> None:
+            compiler.parser._warn(
+                compiler.parser._last_token, "redefine",
+                f"redefining word {name} (priority {priority})",
+            )
+        compiler.parser.dictionary.warn_callback = _dict_warn_cb
     cache: Optional[BuildCache] = None
     if not args.no_cache:
         cache = BuildCache(args.temp_dir / ".l2cache")
@@ -11822,12 +12679,36 @@ def cli(argv: Sequence[str]) -> int:
                 print(f"[error] compile-time execution of 'main' failed: {exc}")
                 return 1
     except (ParseError, CompileError, CompileTimeError) as exc:
-        print(f"[error] {exc}")
+        # Print all collected diagnostics in Rust-style format
+        use_color = sys.stderr.isatty()
+        diags = getattr(compiler.parser, 'diagnostics', []) if 'compiler' in dir() else []
+        if diags:
+            for diag in diags:
+                print(diag.format(color=use_color), file=sys.stderr)
+            error_count = sum(1 for d in diags if d.level == "error")
+            warn_count = sum(1 for d in diags if d.level == "warning")
+            summary_parts = []
+            if error_count:
+                summary_parts.append(f"{error_count} error(s)")
+            if warn_count:
+                summary_parts.append(f"{warn_count} warning(s)")
+            if summary_parts:
+                print(f"\n{' and '.join(summary_parts)} emitted", file=sys.stderr)
+        else:
+            print(f"[error] {exc}", file=sys.stderr)
         return 1
     except Exception as exc:
-        print(f"[error] unexpected failure: {exc}")
+        print(f"[error] unexpected failure: {exc}", file=sys.stderr)
         return 1
 
+    # Print any warnings accumulated during successful compilation
+    use_color = sys.stderr.isatty()
+    warnings = [d for d in compiler.parser.diagnostics if d.level == "warning"]
+    if warnings:
+        for diag in warnings:
+            print(diag.format(color=use_color), file=sys.stderr)
+        print(f"\n{len(warnings)} warning(s) emitted", file=sys.stderr)
+
     args.temp_dir.mkdir(parents=True, exist_ok=True)
     asm_path = args.temp_dir / (args.source.stem + ".asm")
     obj_path = args.temp_dir / (args.source.stem + ".o")
diff --git a/test.py b/test.py
index d5a5f83..5ffaad3 100644
--- a/test.py
+++ b/test.py
@@ -152,6 +152,7 @@ class TestCaseConfig:
     tags: List[str] = field(default_factory=list)
     requires: List[str] = field(default_factory=list)
     libs: List[str] = field(default_factory=list)
+    compile_args: List[str] = field(default_factory=list)
 
     @classmethod
     def from_meta(cls, data: Dict[str, Any]) -> "TestCaseConfig":
@@ -207,6 +208,11 @@ class TestCaseConfig:
             if not isinstance(libs, list) or not all(isinstance(item, str) for item in libs):
                 raise ValueError("libs must be a list of strings")
             cfg.libs = [item.strip() for item in libs if item.strip()]
+        if "compile_args" in data:
+            ca = data["compile_args"]
+            if not isinstance(ca, list) or not all(isinstance(item, str) for item in ca):
+                raise ValueError("compile_args must be a list of strings")
+            cfg.compile_args = list(ca)
         return cfg
 
 
@@ -442,6 +448,7 @@ class TestRunner:
             cmd.extend(["-l", lib])
         for lib in (extra_libs or []):
             cmd.extend(["-l", lib])
+        cmd.extend(case.config.compile_args)
         if self.args.ct_run_main:
             cmd.append("--ct-run-main")
         if self.args.verbose:
@@ -477,7 +484,7 @@ class TestRunner:
         obj_path = case.build_dir / f"{case.binary_stub}_fixture.o"
         archive_path = case.build_dir / f"lib{case.binary_stub}_fixture.a"
 
-        compile_cmd = [cc, "-O2", "-c", str(c_source), "-o", str(obj_path)]
+        compile_cmd = [cc, "-O2", "-fno-stack-protector", "-c", str(c_source), "-o", str(obj_path)]
         archive_cmd = [ar, "rcs", str(archive_path), str(obj_path)]
 
         if self.args.verbose:
@@ -655,6 +662,11 @@ class TestRunner:
             return self._sort_lines(text)
         if case.source.stem == "ct_test" and label == "compile":
             return self._mask_build_path(text, case.binary_stub)
+        if label == "compile":
+            # Normalize absolute source paths to relative for stable compile error comparison
+            source_dir = str(case.source.parent.resolve())
+            if source_dir:
+                text = text.replace(source_dir + "/", "")
         return text
 
     def _sort_lines(self, text: str) -> str:
diff --git a/tests/cimport_structs.c b/tests/cimport_structs.c
new file mode 100644
index 0000000..d58ff46
--- /dev/null
+++ b/tests/cimport_structs.c
@@ -0,0 +1,9 @@
+#include "cimport_structs.h"
+
+long point_sum_ptr(struct Point *p) {
+    return p->x + p->y;
+}
+
+long pair_sum_ptr(struct Pair *p) {
+    return p->a + p->b;
+}
diff --git a/tests/cimport_structs.expected b/tests/cimport_structs.expected
new file mode 100644
index 0000000..647a40f
--- /dev/null
+++ b/tests/cimport_structs.expected
@@ -0,0 +1,6 @@
+16
+10
+20
+30
+16
+300
diff --git a/tests/cimport_structs.h b/tests/cimport_structs.h
new file mode 100644
index 0000000..006753a
--- /dev/null
+++ b/tests/cimport_structs.h
@@ -0,0 +1,18 @@
+#ifndef TEST_STRUCTS_H
+#define TEST_STRUCTS_H
+
+struct Point {
+    long x;
+    long y;
+};
+
+struct Pair {
+    long a;
+    long b;
+};
+
+/* Pointer-based helpers (simple scalar ABI). */
+long point_sum_ptr(struct Point *p);
+long pair_sum_ptr(struct Pair *p);
+
+#endif
diff --git a/tests/cimport_structs.sl b/tests/cimport_structs.sl
new file mode 100644
index 0000000..a140f07
--- /dev/null
+++ b/tests/cimport_structs.sl
@@ -0,0 +1,31 @@
+import stdlib.sl
+
+# Test cimport: extract struct definitions and extern functions from a C header.
+cimport "cimport_structs.h"
+
+word main
+    # Verify that cstruct Point was generated with correct layout
+    Point.size puti cr       # 16 bytes (two i64 = 8+8)
+
+    # Allocate a Point, set fields, read them back
+    Point.size alloc dup >r
+    r@ 10 Point.x!
+    r@ 20 Point.y!
+    r@ Point.x@ puti cr      # 10
+    r@ Point.y@ puti cr      # 20
+
+    # Call C helper that takes a pointer (simple scalar ABI)
+    r@ point_sum_ptr puti cr  # 30
+    r> Point.size free
+
+    # Verify Pair struct layout
+    Pair.size puti cr         # 16
+
+    Pair.size alloc dup >r
+    r@ 100 Pair.a!
+    r@ 200 Pair.b!
+    r@ pair_sum_ptr puti cr   # 300
+    r> Pair.size free
+
+    0
+end
diff --git a/tests/error_recovery.compile.expected b/tests/error_recovery.compile.expected
new file mode 100644
index 0000000..01c570f
--- /dev/null
+++ b/tests/error_recovery.compile.expected
@@ -0,0 +1,12 @@
+error: unexpected 'end' at 6:8
+  --> error_recovery.sl:6:8
+    |
+  6 |     end end
+    |        ^^^
+error: unexpected 'end' at 10:8
+  --> error_recovery.sl:10:8
+     |
+  10 |     end end
+     |        ^^^
+
+2 error(s) emitted
diff --git a/tests/error_recovery.meta.json b/tests/error_recovery.meta.json
new file mode 100644
index 0000000..fa8dc2f
--- /dev/null
+++ b/tests/error_recovery.meta.json
@@ -0,0 +1,4 @@
+{
+  "expect_compile_error": true,
+  "description": "Compiler reports multiple errors via error recovery"
+}
diff --git a/tests/error_recovery.sl b/tests/error_recovery.sl
new file mode 100644
index 0000000..9bffb6b
--- /dev/null
+++ b/tests/error_recovery.sl
@@ -0,0 +1,15 @@
+# This file intentionally has multiple errors to test error recovery.
+# The compiler should report all of them rather than stopping at the first.
+# No stdlib import — keeps line numbers stable.
+
+word foo
+    end end
+end
+
+word bar
+    end end
+end
+
+word main
+    0
+end
diff --git a/tests/ifdef.expected b/tests/ifdef.expected
new file mode 100644
index 0000000..6c3f03c
--- /dev/null
+++ b/tests/ifdef.expected
@@ -0,0 +1,7 @@
+flag_on
+
+nope_off
+
+yes
+
+nested_ok
diff --git a/tests/ifdef.meta.json b/tests/ifdef.meta.json
new file mode 100644
index 0000000..dadba46
--- /dev/null
+++ b/tests/ifdef.meta.json
@@ -0,0 +1,3 @@
+{
+  "compile_args": ["-D", "TESTFLAG"]
+}
diff --git a/tests/ifdef.sl b/tests/ifdef.sl
new file mode 100644
index 0000000..baea3d9
--- /dev/null
+++ b/tests/ifdef.sl
@@ -0,0 +1,44 @@
+import stdlib/stdlib.sl
+import stdlib/io.sl
+
+# Test ifdef: TESTFLAG is defined via -D TESTFLAG
+ifdef TESTFLAG
+word show_flag
+    "flag_on" puts cr
+end
+endif
+
+# Test ifndef: NOPE is NOT defined
+ifndef NOPE
+word show_nope
+    "nope_off" puts cr
+end
+endif
+
+# Test ifdef with elsedef
+ifdef TESTFLAG
+word branch
+    "yes" puts cr
+end
+elsedef
+word branch
+    "no" puts cr
+end
+endif
+
+# Test nested: inner depends on outer
+ifdef TESTFLAG
+ifndef NOPE
+word nested
+    "nested_ok" puts cr
+end
+endif
+endif
+
+word main
+    show_flag
+    show_nope
+    branch
+    nested
+    0
+end
diff --git a/tests/ifndef.expected b/tests/ifndef.expected
new file mode 100644
index 0000000..81ddc2a
--- /dev/null
+++ b/tests/ifndef.expected
@@ -0,0 +1,3 @@
+guard_ok
+
+else_ok
diff --git a/tests/ifndef.sl b/tests/ifndef.sl
new file mode 100644
index 0000000..fb76c57
--- /dev/null
+++ b/tests/ifndef.sl
@@ -0,0 +1,33 @@
+import stdlib/stdlib.sl
+import stdlib/io.sl
+
+# No -D flags, so ifdef FOO is false, ifndef FOO is true
+
+ifdef FOO
+word dead_code
+    "BUG" puts cr
+end
+endif
+
+ifndef FOO
+word guarded
+    "guard_ok" puts cr
+end
+endif
+
+# elsedef: ifdef FALSE → skip, elsedef → include
+ifdef MISSING
+word wrong
+    "BUG" puts cr
+end
+elsedef
+word right
+    "else_ok" puts cr
+end
+endif
+
+word main
+    guarded
+    right
+    0
+end
diff --git a/tests/variadic_extern.c b/tests/variadic_extern.c
new file mode 100644
index 0000000..2ac2f2b
--- /dev/null
+++ b/tests/variadic_extern.c
@@ -0,0 +1,20 @@
+#include <stdarg.h>
+
+/* Sums variadic long args until sentinel -1 is seen. */
+long va_sum_sentinel(long first, ...) {
+    va_list ap;
+    va_start(ap, first);
+    long total = first;
+    while (1) {
+        long v = va_arg(ap, long);
+        if (v == -1) break;
+        total += v;
+    }
+    va_end(ap);
+    return total;
+}
+
+/* Non-variadic helper for comparison. */
+long add_two(long a, long b) {
+    return a + b;
+}
diff --git a/tests/variadic_extern.expected b/tests/variadic_extern.expected
new file mode 100644
index 0000000..889f330
--- /dev/null
+++ b/tests/variadic_extern.expected
@@ -0,0 +1,5 @@
+30
+hello
+42 99
+60
+
diff --git a/tests/variadic_extern.meta.json b/tests/variadic_extern.meta.json
new file mode 100644
index 0000000..79c4ecd
--- /dev/null
+++ b/tests/variadic_extern.meta.json
@@ -0,0 +1,4 @@
+{
+  "description": "Test variadic and non-variadic extern declarations with companion C file",
+  "libs": ["libc.so.6"]
+}
diff --git a/tests/variadic_extern.sl b/tests/variadic_extern.sl
new file mode 100644
index 0000000..600fe10
--- /dev/null
+++ b/tests/variadic_extern.sl
@@ -0,0 +1,34 @@
+import stdlib.sl
+
+# Test variadic extern declarations.
+# For variadic externs, the TOS literal before the call is the number of
+# extra variadic arguments.  The compiler consumes it (not passed to C).
+# String literals push (ptr, len) — use drop to discard the length for C.
+
+# printf: 1 fixed param (fmt), variadic args via TOS count
+extern int printf(const char *fmt, ...)
+extern int fflush(long stream)
+
+# Custom C variadic: sums args until sentinel -1 is seen
+extern long va_sum_sentinel(long first, ...)
+
+# Non-variadic extern for comparison
+extern long add_two(long a, long b)
+
+word main
+    # Test 1: non-variadic add_two
+    10 20 add_two puti cr
+
+    # Test 2: printf with 0 variadic args (just format string)
+    "hello\n" drop 0 printf drop
+    0 fflush drop
+
+    # Test 3: printf with 2 variadic args
+    "%d %d\n" drop 42 99 2 printf drop
+    0 fflush drop
+
+    # Test 4: va_sum_sentinel(10, 20, 30, -1) = 60
+    10 20 30 -1 3 va_sum_sentinel puti cr
+
+    0
+end