diff --git a/main.py b/main.py
index 9e8fa54..1c28106 100644
--- a/main.py
+++ b/main.py
@@ -50,12 +50,94 @@ def _get_struct():
     return _struct_mod
 
 
+class Diagnostic:
+    """Structured error/warning with optional source context and suggestions."""
+    __slots__ = ('level', 'message', 'path', 'line', 'column', 'length', 'hint', 'suggestion')
+
+    def __init__(
+        self,
+        level: str,
+        message: str,
+        path: Optional[Path] = None,
+        line: int = 0,
+        column: int = 0,
+        length: int = 0,
+        hint: str = "",
+        suggestion: str = "",
+    ) -> None:
+        self.level = level       # "error", "warning", "note"
+        self.message = message
+        self.path = path
+        self.line = line
+        self.column = column
+        self.length = length
+        self.hint = hint
+        self.suggestion = suggestion
+
+    def format(self, *, color: bool = True) -> str:
+        """Format the diagnostic in Rust-style with source context."""
+        _RED = "\033[1;31m" if color else ""
+        _YELLOW = "\033[1;33m" if color else ""
+        _BLUE = "\033[1;34m" if color else ""
+        _CYAN = "\033[1;36m" if color else ""
+        _BOLD = "\033[1m" if color else ""
+        _DIM = "\033[2m" if color else ""
+        _RST = "\033[0m" if color else ""
+
+        level_color = _RED if self.level == "error" else (_YELLOW if self.level == "warning" else _BLUE)
+        parts: List[str] = []
+        parts.append(f"{level_color}{self.level}{_RST}{_BOLD}: {self.message}{_RST}")
+
+        if self.path and self.line > 0:
+            loc = f"{self.path}:{self.line}"
+            if self.column > 0:
+                loc += f":{self.column}"
+            parts.append(f"  {_BLUE}-->{_RST} {loc}")
+
+            # Try to show the source line
+            try:
+                src_lines = self.path.read_text(encoding="utf-8", errors="ignore").splitlines()
+                if 0 < self.line <= len(src_lines):
+                    src_line = src_lines[self.line - 1]
+                    line_no = str(self.line)
+                    pad = " " * len(line_no)
+                    parts.append(f"  {_BLUE}{pad} |{_RST}")
+                    parts.append(f"  {_BLUE}{line_no} |{_RST} {src_line}")
+                    if self.column > 0:
+                        caret_len = max(1, self.length) if self.length else 1
+                        arrow = " " * (self.column - 1) + level_color + "^" * caret_len + _RST
+                        parts.append(f"  {_BLUE}{pad} |{_RST} {arrow}")
+                    if self.hint:
+                        parts.append(f"  {_BLUE}{pad} |{_RST} {_CYAN}= note: {self.hint}{_RST}")
+                    if self.suggestion:
+                        parts.append(f"  {_BLUE}{pad} |{_RST}")
+                        parts.append(f"  {_BLUE}{pad} = {_CYAN}help{_RST}: {self.suggestion}")
+            except Exception:
+                pass
+
+        elif self.hint:
+            parts.append(f"  {_CYAN}= note: {self.hint}{_RST}")
+
+        return "\n".join(parts)
+
+    def __str__(self) -> str:
+        if self.path and self.line > 0:
+            return f"{self.level}: {self.message} at {self.path}:{self.line}:{self.column}"
+        return f"{self.level}: {self.message}"
+
+
 class ParseError(Exception):
     """Raised when the source stream cannot be parsed."""
+    def __init__(self, message: str = "", *, diagnostic: Optional[Diagnostic] = None) -> None:
+        self.diagnostic = diagnostic
+        super().__init__(message)
 
 
 class CompileError(Exception):
     """Raised when IR cannot be turned into assembly."""
+    def __init__(self, message: str = "", *, diagnostic: Optional[Diagnostic] = None) -> None:
+        self.diagnostic = diagnostic
+        super().__init__(message)
 
 
 class CompileTimeError(ParseError):
@@ -321,7 +403,7 @@ _PEEPHOLE_WORD_RULES: List[Tuple[Tuple[str, ...], Tuple[str, ...]]] = [
     (("swap", "or"), ("or",)),
     (("swap", "min"), ("min",)),
     (("swap", "max"), ("max",)),
-    # --- dup + self-idempotent binary → identity ---
+    # --- dup + self-idempotent binary -> identity ---
     (("dup", "bor"), ()),
     (("dup", "band"), ()),
     (("dup", "bxor"), ("drop", "literal_0")),
@@ -339,7 +421,7 @@ for _pat, _repl in _PEEPHOLE_WORD_RULES:
 
 _PEEPHOLE_MAX_PAT_LEN = max(len(p) for p, _ in _PEEPHOLE_WORD_RULES) if _PEEPHOLE_WORD_RULES else 0
 
-# Unified dict: pattern tuple → replacement tuple (for O(1) lookup)
+# Unified dict: pattern tuple -> replacement tuple (for O(1) lookup)
 _PEEPHOLE_ALL_RULES: Dict[Tuple[str, ...], Tuple[str, ...]] = {}
 for _pat, _repl in _PEEPHOLE_WORD_RULES:
     _PEEPHOLE_ALL_RULES[_pat] = _repl
@@ -573,7 +655,8 @@ class Word:
     __slots__ = ('name', 'priority', 'immediate', 'definition', 'macro', 'intrinsic',
                  'macro_expansion', 'macro_params', 'compile_time_intrinsic',
                  'runtime_intrinsic', 'compile_only', 'compile_time_override',
-                 'is_extern', 'extern_inputs', 'extern_outputs', 'extern_signature', 'inline')
+                 'is_extern', 'extern_inputs', 'extern_outputs', 'extern_signature',
+                 'extern_variadic', 'inline')
 
     def __init__(self, name: str, priority: int = 0, immediate: bool = False,
                  definition=None, macro=None, intrinsic=None,
@@ -581,7 +664,8 @@ class Word:
                  compile_time_intrinsic=None, runtime_intrinsic=None,
                  compile_only: bool = False, compile_time_override: bool = False,
                  is_extern: bool = False, extern_inputs: int = 0, extern_outputs: int = 0,
-                 extern_signature=None, inline: bool = False) -> None:
+                 extern_signature=None, extern_variadic: bool = False,
+                 inline: bool = False) -> None:
         self.name = name
         self.priority = priority
         self.immediate = immediate
@@ -598,6 +682,7 @@ class Word:
         self.extern_inputs = extern_inputs
         self.extern_outputs = extern_outputs
         self.extern_signature = extern_signature
+        self.extern_variadic = extern_variadic
         self.inline = inline
 
 
@@ -610,10 +695,11 @@ def _suppress_redefine_warnings_set(value: bool) -> None:
 
 
 class Dictionary:
-    __slots__ = ('words',)
+    __slots__ = ('words', 'warn_callback')
 
     def __init__(self, words: Dict[str, Word] = None) -> None:
         self.words = words if words is not None else {}
+        self.warn_callback: Optional[Callable] = None
 
     def register(self, word: Word) -> Word:
         existing = self.words.get(word.name)
@@ -646,7 +732,10 @@ class Dictionary:
             return word
 
         if not _suppress_redefine_warnings:
-            sys.stderr.write(f"[warn] redefining word {word.name} (priority {word.priority})\n")
+            if self.warn_callback is not None:
+                self.warn_callback(word.name, word.priority)
+            else:
+                sys.stderr.write(f"[warn] redefining word {word.name} (priority {word.priority})\n")
         self.words[word.name] = word
         return word
 
@@ -702,6 +791,10 @@ class Parser:
         self.cstruct_layouts: Dict[str, CStructLayout] = {}
         self._pending_inline_definition: bool = False
         self._pending_priority: Optional[int] = None
+        self.diagnostics: List[Diagnostic] = []
+        self._max_errors: int = 20
+        self._warnings_enabled: Set[str] = set()
+        self._werror: bool = False
 
     def _rebuild_span_index(self) -> None:
         """Rebuild bisect index after file_spans changes."""
@@ -731,6 +824,57 @@ class Parser:
                 return _make_loc(span.path, span.local_start_line + (tl - span.start_line), token.column)
         return _make_loc(_SOURCE_PATH, tl, token.column)
 
+    def _record_diagnostic(self, token: Optional[Token], message: str, *, level: str = "error", hint: str = "", suggestion: str = "") -> None:
+        """Record a diagnostic and raise ParseError if too many errors."""
+        loc = self.location_for_token(token) if token else _make_loc(_SOURCE_PATH, 0, 0)
+        diag = Diagnostic(
+            level=level, message=message,
+            path=loc.path, line=loc.line, column=loc.column,
+            length=len(token.lexeme) if token else 0,
+            hint=hint, suggestion=suggestion,
+        )
+        self.diagnostics.append(diag)
+        if level == "error" and sum(1 for d in self.diagnostics if d.level == "error") >= self._max_errors:
+            raise ParseError(f"too many errors ({self._max_errors}), aborting", diagnostic=diag)
+
+    def _warn(self, token: Optional[Token], category: str, message: str, *, hint: str = "", suggestion: str = "") -> None:
+        """Record a warning if the category is enabled. Promotes to error under --Werror."""
+        if "all" not in self._warnings_enabled and category not in self._warnings_enabled:
+            return
+        level = "error" if self._werror else "warning"
+        self._record_diagnostic(token, message, level=level, hint=hint, suggestion=suggestion)
+
+    def _skip_to_recovery_point(self) -> None:
+        """Skip tokens until we reach a safe recovery point (end, ;, or top-level definition keyword)."""
+        _recovery_keywords = {"word", "end", ";", ":asm", ":py", "extern", "macro"}
+        depth = 0
+        while self.pos < len(self.tokens):
+            lex = self.tokens[self.pos].lexeme
+            if lex == "word" or lex == ":asm" or lex == ":py":
+                if depth == 0:
+                    break  # Don't consume — let the main loop pick it up
+                depth += 1
+            elif lex == "end":
+                if depth <= 1:
+                    self.pos += 1
+                    break
+                depth -= 1
+            elif lex == ";":
+                self.pos += 1
+                break
+            elif lex == "extern" and depth == 0:
+                break
+            self.pos += 1
+        # Reset state for recovery
+        self.macro_recording = None
+        self._pending_priority = None
+        self._pending_inline_definition = False
+        while self.definition_stack:
+            self.definition_stack.pop()
+        while len(self.context_stack) > 1:
+            self.context_stack.pop()
+        self.control_stack.clear()
+
     def inject_token_objects(self, tokens: Sequence[Token]) -> None:
         """Insert tokens at the current parse position."""
         self.tokens[self.pos:self.pos] = list(tokens)
@@ -867,6 +1011,7 @@ class Parser:
         _tokens = self.tokens
         try:
             while self.pos < len(_tokens):
+              try:
                 token = _tokens[self.pos]
                 self.pos += 1
                 self._last_token = token
@@ -922,6 +1067,15 @@ class Parser:
                     continue
                 if self._handle_token(token):
                     _tokens = self.tokens
+              except CompileTimeError:
+                raise
+              except ParseError as _recov_exc:
+                self._record_diagnostic(self._last_token, str(_recov_exc))
+                self._skip_to_recovery_point()
+                _tokens = self.tokens
+                continue
+        except CompileTimeError:
+            raise
         except ParseError:
             raise
         except Exception as exc:
@@ -933,14 +1087,19 @@ class Parser:
             ) from None
 
         if self.macro_recording is not None:
-            raise ParseError("unterminated macro definition (missing ';')")
+            self._record_diagnostic(self._last_token, "unterminated macro definition (missing ';')")
         if self._pending_priority is not None:
-            raise ParseError(f"dangling priority {self._pending_priority} without following definition")
+            self._record_diagnostic(self._last_token, f"dangling priority {self._pending_priority} without following definition")
 
         if len(self.context_stack) != 1:
-            raise ParseError("unclosed definition at EOF")
+            self._record_diagnostic(self._last_token, "unclosed definition at EOF")
         if self.control_stack:
-            raise ParseError("unclosed control structure at EOF")
+            self._record_diagnostic(self._last_token, "unclosed control structure at EOF")
+
+        # If any errors were accumulated, raise with all diagnostics
+        error_count = sum(1 for d in self.diagnostics if d.level == "error")
+        if error_count > 0:
+            raise ParseError(f"compilation failed with {error_count} error(s)")
 
         module = self.context_stack.pop()
         if not isinstance(module, Module):  # pragma: no cover - defensive
@@ -1048,23 +1207,37 @@ class Parser:
             raise ParseError(f"extern expected identifier before '(' but got '{name_lexeme}'")
 
         ret_type = _normalize_c_type_tokens(prefix_tokens, allow_default=True)
-        inputs, arg_types = self._parse_c_param_list()
+        inputs, arg_types, variadic = self._parse_c_param_list()
         outputs = 0 if ret_type == "void" else 1
-        self._register_c_extern(name_lexeme, inputs, outputs, arg_types, ret_type, priority=priority)
+        self._register_c_extern(name_lexeme, inputs, outputs, arg_types, ret_type,
+                                priority=priority, variadic=variadic)
         return True
 
-    def _parse_c_param_list(self) -> Tuple[int, List[str]]:
+    def _parse_c_param_list(self) -> Tuple[int, List[str], bool]:
+        """Parse C-style parameter list. Returns (count, types, is_variadic)."""
         inputs = 0
         arg_types: List[str] = []
+        variadic = False
 
         if self._eof():
             raise ParseError("extern unclosed '('")
         peek = self.peek_token()
         if peek.lexeme == ")":
             self._consume()
-            return inputs, arg_types
+            return inputs, arg_types, False
 
         while True:
+            # Check for ... (variadic)
+            peek = self.peek_token()
+            if peek is not None and peek.lexeme == "...":
+                self._consume()
+                variadic = True
+                if self._eof():
+                    raise ParseError("extern unclosed '(' after '...'")
+                closing = self._consume()
+                if closing.lexeme != ")":
+                    raise ParseError("expected ')' after '...' in extern parameter list")
+                break
             lexemes = self._collect_c_param_lexemes()
             arg_type = _normalize_c_type_tokens(lexemes, allow_default=False)
             if arg_type == "void" and inputs == 0:
@@ -1073,7 +1246,7 @@ class Parser:
                 closing = self._consume()
                 if closing.lexeme != ")":
                     raise ParseError("expected ')' after 'void' in extern parameter list")
-                return 0, []
+                return 0, [], False
             inputs += 1
             arg_types.append(arg_type)
             if self._eof():
@@ -1085,7 +1258,7 @@ class Parser:
                 raise ParseError(
                     f"expected ',' or ')' in extern parameter list, got '{separator.lexeme}'"
                 )
-        return inputs, arg_types
+        return inputs, arg_types, variadic
 
     def _collect_c_param_lexemes(self) -> List[str]:
         lexemes: List[str] = []
@@ -1121,6 +1294,7 @@ class Parser:
         ret_type: str,
         *,
         priority: int = 0,
+        variadic: bool = False,
     ) -> None:
         candidate = Word(name=name, priority=priority)
         word = self.dictionary.register(candidate)
@@ -1130,6 +1304,7 @@ class Parser:
         word.extern_inputs = inputs
         word.extern_outputs = outputs
         word.extern_signature = (arg_types, ret_type)
+        word.extern_variadic = variadic
 
     def _handle_token(self, token: Token) -> bool:
         """Handle a token. Returns True if the token list was modified (macro expansion)."""
@@ -1824,7 +1999,7 @@ class CompileTimeVM:
         self._repl_libs: List[str] = []
         self._native_return_stack: Optional[Any] = None  # ctypes buffer
         self._native_return_top: int = 0
-        # JIT cache: word name → ctypes callable
+        # JIT cache: word name -> ctypes callable
         self._jit_cache: Dict[str, Any] = {}
         self._jit_code_pages: List[Any] = []  # keep mmap pages alive
         # Pre-allocated output structs for JIT calls (lazily allocated)
@@ -1836,7 +2011,7 @@ class CompileTimeVM:
         self._bss_symbols: Dict[str, int] = {}
         # dlopen handles for C extern support
         self._dl_handles: List[Any] = []  # ctypes.CDLL handles
-        self._dl_func_cache: Dict[str, Any] = {}  # name → ctypes callable
+        self._dl_func_cache: Dict[str, Any] = {}  # name -> ctypes callable
         self._ct_libs: List[str] = []  # library names from -l flags
         self._ctypes_struct_cache: Dict[str, Any] = {}
         self.current_location: Optional[SourceLocation] = None
@@ -2325,7 +2500,18 @@ class CompileTimeVM:
 
         inputs = func._ct_inputs
         outputs = func._ct_outputs
-        arg_types = func._ct_signature[0] if func._ct_signature else []
+        arg_types = list(func._ct_signature[0]) if func._ct_signature else []
+
+        # For variadic externs, the TOS value is the extra arg count
+        # (consumed by the compiler, not passed to C).
+        va_extra = 0
+        if getattr(word, "extern_variadic", False):
+            va_extra = int(self.pop())
+            inputs += va_extra
+            for _ in range(va_extra):
+                arg_types.append("long")
+            # Update ctypes argtypes to include the variadic args
+            func.argtypes = list(func._ct_arg_types) + [ctypes.c_int64] * va_extra
 
         # Pop arguments off the native data stack (right-to-left / reverse order)
         raw_args = []
@@ -2513,13 +2699,13 @@ class CompileTimeVM:
                 line = re.sub(rf'(?<!\w){re.escape(lbl)}(?=\s|:|,|$|\]|\))',
                               '_jl' + lbl[1:], line)
 
-            # Patch [rel SYMBOL] → concrete address
+            # Patch [rel SYMBOL] -> concrete address
             m = _RE_REL_PAT.search(line)
             if m and m.group(1) in bss:
                 sym = m.group(1)
                 addr = bss[sym]
                 if line.lstrip().startswith("lea"):
-                    # lea REG, [rel X] → mov REG, addr
+                    # lea REG, [rel X] -> mov REG, addr
                     line = _RE_REL_PAT.sub(str(addr), line).replace("lea", "mov", 1)
                 else:
                     # e.g. mov rax, [rel X] or mov byte [rel X], val
@@ -3302,7 +3488,7 @@ class CompileTimeVM:
         ])
         if asm_body:
             patched_body = []
-            # Build BSS symbol table for [rel X] → concrete address substitution
+            # Build BSS symbol table for [rel X] -> concrete address substitution
             _bss_symbols: Dict[str, int] = {
                 "data_start": data_start,
                 "data_end": data_end,
@@ -3323,7 +3509,7 @@ class CompileTimeVM:
                 if m and m.group(1) in _bss_symbols:
                     sym = m.group(1)
                     addr = _bss_symbols[sym]
-                    # lea REG, [rel X]  →  mov REG, addr
+                    # lea REG, [rel X]  ->  mov REG, addr
                     if line.lstrip().startswith("lea"):
                         line = _RE_REL_PAT.sub(str(addr), line).replace("lea", "mov", 1)
                     else:
@@ -3461,7 +3647,7 @@ class CompileTimeVM:
         self._call_word(word)
 
     def _resolve_words_in_body(self, defn: Definition) -> None:
-        """Pre-resolve word name → Word objects on Op nodes (once per Definition)."""
+        """Pre-resolve word name -> Word objects on Op nodes (once per Definition)."""
         if defn._words_resolved:
             return
         lookup = self.dictionary.lookup
@@ -3568,7 +3754,7 @@ class CompileTimeVM:
                 if line == "ret":
                     # Last word: jmp to save; others: fall through
                     if word_idx < len(words) - 1:
-                        continue  # just skip ret → fall through
+                        continue  # just skip ret -> fall through
                     else:
                         line = "jmp _ct_save"
 
@@ -3577,7 +3763,7 @@ class CompileTimeVM:
                     # Use word-boundary replacement to avoid partial matches
                     line = re.sub(rf'(?<!\w){re.escape(label)}(?=\s|:|,|$|\]|\))', prefix + label, line)
 
-                # Patch [rel SYMBOL] → concrete address
+                # Patch [rel SYMBOL] -> concrete address
                 m = _RE_REL_PAT.search(line)
                 if m and m.group(1) in bss:
                     sym = m.group(1)
@@ -5013,7 +5199,7 @@ class Assembler:
                         b = nodes[idx + 1]
                         b_oc = b._opcode
 
-                        # literal + dup → literal literal
+                        # literal + dup -> literal literal
                         if b_oc == OP_WORD and b.data == "dup":
                             _opt_a(node)
                             _opt_a(_make_literal_op(node.data, node.loc))
@@ -5021,7 +5207,7 @@ class Assembler:
                             changed = True
                             continue
 
-                        # literal + drop → (nothing)
+                        # literal + drop -> (nothing)
                         if b_oc == OP_WORD and b.data == "drop":
                             idx += 2
                             changed = True
@@ -5432,7 +5618,7 @@ class Assembler:
             # depth tracks values on the data stack relative to entry.
             # 'main' starts with an empty stack.  For other words we can
             # only check underflows when a stack-effect comment provides
-            # the input count (e.g. ``# a b -- c`` → 2 inputs).
+            # the input count (e.g. ``# a b -- c`` -> 2 inputs).
             si = defn.stack_inputs
             if si is not None:
                 known_entry_depth = si
@@ -5872,7 +6058,7 @@ class Assembler:
             # stub now (after definitions) so the emitter does not
             # produce duplicate `_start` labels.
             if is_program and not (user_has_start or getattr(self, "_emitted_start", False)):
-                emission.text.extend([
+                _start_lines = [
                     "; __ORIGIN__ default_stub",
                     "global _start",
                     "_start:",
@@ -5892,10 +6078,13 @@ class Assembler:
                     "    mov rax, [r12]",
                     "    add r12, 8",
                     ".no_exit_value:",
+                ]
+                _start_lines.extend([
                     "    mov rdi, rax",
                     "    mov rax, 60",
                     "    syscall",
                 ])
+                emission.text.extend(_start_lines)
 
             self._emit_variables(module.variables)
 
@@ -6304,6 +6493,27 @@ class Assembler:
             builder.emit(f"    mov byte [{dst_expr} + {copied}], r11b")
             copied += 1
 
+    @staticmethod
+    def _pop_preceding_literal(builder: FunctionEmitter) -> Optional[int]:
+        """If the last emitted instructions are a literal push, remove them and return the value."""
+        text = builder.text
+        n = len(text)
+        if n < 3:
+            return None
+        # push_literal emits:  "; push N" / "sub r12, 8" / "mov qword [r12], N"
+        mov_line = text[n - 1].strip()
+        sub_line = text[n - 2].strip()
+        cmt_line = text[n - 3].strip()
+        if not (sub_line == "sub r12, 8" and mov_line.startswith("mov qword [r12],") and cmt_line.startswith("; push")):
+            return None
+        val_str = mov_line.split(",", 1)[1].strip()
+        try:
+            value = int(val_str)
+        except ValueError:
+            return None
+        del text[n - 3:n]
+        return value
+
     def _emit_extern_wordref(self, name: str, word: Word, builder: FunctionEmitter) -> None:
         inputs = getattr(word, "extern_inputs", 0)
         outputs = getattr(word, "extern_outputs", 0)
@@ -6316,6 +6526,21 @@ class Assembler:
         arg_types = list(signature[0]) if signature else ["long"] * inputs
         ret_type = signature[1] if signature else ("long" if outputs > 0 else "void")
 
+        # For variadic externs, consume the preceding literal as the count of
+        # extra variadic arguments.  These are NOT passed to the C function as
+        # a count parameter – they simply tell the compiler how many additional
+        # stack values to pop and place into registers / the C stack.
+        if getattr(word, "extern_variadic", False):
+            va_count = self._pop_preceding_literal(builder)
+            if va_count is None:
+                suffix = f" while emitting '{self._emit_stack[-1]}'" if self._emit_stack else ""
+                raise CompileError(
+                    f"variadic extern '{name}' requires a literal arg count on TOS{suffix}"
+                )
+            for _ in range(va_count):
+                arg_types.append("long")
+            inputs += va_count
+
         if len(arg_types) != inputs and signature is not None:
             suffix = f" while emitting '{self._emit_stack[-1]}'" if self._emit_stack else ""
             raise CompileError(f"extern '{name}' mismatch: {inputs} inputs vs {len(arg_types)} types{suffix}")
@@ -7449,8 +7674,8 @@ def _compile_syscall_stub(vm: CompileTimeVM) -> Any:
     # Output struct: [r12, r13, exit_flag, exit_code]
     #
     # Stack protocol (matching _emit_syscall_intrinsic):
-    #   TOS:   syscall number → rax
-    #   TOS-1: arg count → rcx
+    #   TOS:   syscall number -> rax
+    #   TOS-1: arg count -> rcx
     #   then args on stack as ... arg0 arg1 ... argN (argN is top)
     #
 
@@ -7590,7 +7815,7 @@ def _compile_syscall_stub(vm: CompileTimeVM) -> Any:
         raise RuntimeError("mmap failed for JIT syscall stub")
     ctypes.memmove(ptr, code, len(code))
     vm._jit_code_pages.append((ptr, page_size))
-    # Same signature: (r12, r13, out_ptr) → void
+    # Same signature: (r12, r13, out_ptr) -> void
     if CompileTimeVM._JIT_FUNC_TYPE is None:
         CompileTimeVM._JIT_FUNC_TYPE = ctypes.CFUNCTYPE(None, ctypes.c_int64, ctypes.c_int64, ctypes.c_void_p)
     func = CompileTimeVM._JIT_FUNC_TYPE(ptr)
@@ -7939,7 +8164,12 @@ def _parse_c_header_externs(header_text: str) -> List[str]:
         params_raw = m.group(2).strip()
 
         if "..." in params_raw:
-            continue
+            # Variadic function: strip the ... from parameter list, keep fixed args
+            params_fixed = re.sub(r",?\s*\.\.\.", "", params_raw).strip()
+            param_str = "void" if params_fixed in ("void", "") else params_fixed
+            is_variadic = True
+        else:
+            is_variadic = False
 
         tokens = prefix.split()
         if len(tokens) < 2:
@@ -7950,8 +8180,9 @@ def _parse_c_header_externs(header_text: str) -> List[str]:
         if not func_name or not re.match(r"^[A-Za-z_]\w*$", func_name):
             continue
 
-        # Skip typedef, struct/enum/union definitions
-        if tokens[0] in ("typedef", "struct", "enum", "union"):
+        # Skip typedef (struct/enum/union return types are fine — the regex
+        # already ensures this matched a function declaration with parentheses)
+        if tokens[0] in ("typedef",):
             continue
 
         # Build return type: strip API macros and calling-convention qualifiers
@@ -7974,18 +8205,109 @@ def _parse_c_header_externs(header_text: str) -> List[str]:
         if not ret_type:
             ret_type = "int"
 
-        param_str = "void" if params_raw in ("void", "") else params_raw
+        if not is_variadic:
+            param_str = "void" if params_raw in ("void", "") else params_raw
 
-        results.append(f"extern {ret_type} {func_name}({param_str})")
+        va_suffix = ", ..." if is_variadic else ""
+        results.append(f"extern {ret_type} {func_name}({param_str}{va_suffix})")
     return results
 
 
+# Map C types to L2 cstruct field types
+_C_TO_L2_FIELD_TYPE: Dict[str, str] = {
+    "char": "i8", "signed char": "i8", "unsigned char": "u8",
+    "short": "i16", "unsigned short": "u16", "short int": "i16",
+    "int": "i32", "unsigned int": "u32", "unsigned": "u32",
+    "long": "i64", "unsigned long": "u64", "long int": "i64",
+    "long long": "i64", "unsigned long long": "u64",
+    "float": "f32", "double": "f64",
+    "size_t": "u64", "ssize_t": "i64",
+    "int8_t": "i8", "uint8_t": "u8",
+    "int16_t": "i16", "uint16_t": "u16",
+    "int32_t": "i32", "uint32_t": "u32",
+    "int64_t": "i64", "uint64_t": "u64",
+}
+
+
+def _parse_c_header_structs(header_text: str) -> List[str]:
+    """Extract struct definitions from C header text and return L2 ``cstruct`` lines."""
+    text = re.sub(r"/\*.*?\*/", " ", header_text, flags=re.DOTALL)
+    text = re.sub(r"//[^\n]*", "", text)
+    text = re.sub(r"#[^\n]*", "", text)
+    text = re.sub(r"\s+", " ", text)
+
+    results: List[str] = []
+    # Match: struct Name { fields }; or typedef struct Name { fields } Alias;
+    # or typedef struct { fields } Name;
+    _RE_STRUCT = re.compile(
+        r"(?:typedef\s+)?struct\s*(\w*)\s*\{([^}]*)\}\s*(\w*)\s*;",
+    )
+    for m in _RE_STRUCT.finditer(text):
+        struct_name = m.group(1).strip()
+        body = m.group(2).strip()
+        typedef_name = m.group(3).strip()
+        # Prefer typedef name if present
+        name = typedef_name if typedef_name else struct_name
+        if not name or name.startswith("_"):
+            continue
+        fields = _extract_struct_fields(body)
+        if not fields:
+            continue
+        # Generate L2 cstruct declaration
+        field_parts = []
+        for fname, ftype in fields:
+            field_parts.append(f"cfield {fname} {ftype}")
+        results.append(f"cstruct {name} {' '.join(field_parts)} end")
+    return results
+
+
+def _extract_struct_fields(body: str) -> List[Tuple[str, str]]:
+    """Parse C struct field declarations into (name, l2_type) pairs."""
+    fields: List[Tuple[str, str]] = []
+    for decl in body.split(";"):
+        decl = decl.strip()
+        if not decl:
+            continue
+        # Skip bitfields
+        if ":" in decl:
+            continue
+        # Skip nested struct/union definitions (but allow struct pointers)
+        if ("struct " in decl or "union " in decl) and "*" not in decl:
+            continue
+        tokens = decl.split()
+        if len(tokens) < 2:
+            continue
+        # Last token is field name (may have * prefix for pointers)
+        field_name = tokens[-1].lstrip("*")
+        if not field_name or not re.match(r"^[A-Za-z_]\w*$", field_name):
+            continue
+        # Check if pointer
+        is_ptr = "*" in decl
+        if is_ptr:
+            fields.append((field_name, "ptr"))
+            continue
+        # Build type from all tokens except field name
+        type_tokens = tokens[:-1]
+        # Remove qualifiers
+        type_tokens = [t for t in type_tokens if t not in ("const", "volatile", "static",
+                                                            "register", "restrict", "_Atomic")]
+        ctype = " ".join(type_tokens)
+        l2_type = _C_TO_L2_FIELD_TYPE.get(ctype)
+        if l2_type is None:
+            # Unknown type, treat as pointer-sized
+            fields.append((field_name, "ptr"))
+        else:
+            fields.append((field_name, l2_type))
+    return fields
+
+
 class Compiler:
     def __init__(
         self,
         include_paths: Optional[Sequence[Path]] = None,
         *,
         macro_expansion_limit: int = DEFAULT_MACRO_EXPANSION_LIMIT,
+        defines: Optional[Sequence[str]] = None,
     ) -> None:
         self.reader = Reader()
         self.dictionary = bootstrap_dictionary()
@@ -8001,6 +8323,7 @@ class Compiler:
             include_paths = [Path("."), Path("./stdlib")]
         self.include_paths: List[Path] = [p.expanduser().resolve() for p in include_paths]
         self._loaded_files: Set[Path] = set()
+        self.defines: Set[str] = set(defines or [])
 
     def compile_source(
         self,
@@ -8044,6 +8367,23 @@ class Compiler:
 
     _import_resolve_cache: Dict[Tuple[Path, str], Path] = {}
 
+    def _preprocess_c_header(self, header_path: Path, raw_text: str) -> str:
+        """Try running the C preprocessor on a header file for accurate parsing.
+
+        Falls back to raw_text if the preprocessor is not available."""
+        import subprocess
+        try:
+            result = subprocess.run(
+                ["cc", "-E", "-P", "-D__attribute__(x)=", "-D__extension__=",
+                 "-D__restrict=", "-D__asm__(x)=", str(header_path)],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                return result.stdout
+        except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+            pass
+        return raw_text
+
     def _resolve_import_target(self, importing_file: Path, target: str) -> Path:
         cache_key = (importing_file.parent, target)
         cached = self._import_resolve_cache.get(cache_key)
@@ -8160,9 +8500,54 @@ class Compiler:
         _spans_append = spans.append
         _FileSpan = FileSpan
 
+        # ifdef/ifndef/else/endif conditional compilation stack
+        # Each entry is True (include lines) or False (skip lines)
+        _ifdef_stack: List[bool] = []
+
+        def _ifdef_active() -> bool:
+            return all(_ifdef_stack)
+
         for line in contents.splitlines():
             stripped = line.strip()
 
+            # --- Conditional compilation directives ---
+            if stripped[:6] == "ifdef " or stripped == "ifdef":
+                name = stripped[6:].strip() if len(stripped) > 6 else ""
+                if not name:
+                    raise ParseError(f"ifdef missing symbol name at {path}:{file_line_no}")
+                _ifdef_stack.append(name in self.defines if _ifdef_active() else False)
+                _out_append("")  # placeholder to keep line numbers aligned
+                file_line_no += 1
+                continue
+            if stripped[:7] == "ifndef " or stripped == "ifndef":
+                name = stripped[7:].strip() if len(stripped) > 7 else ""
+                if not name:
+                    raise ParseError(f"ifndef missing symbol name at {path}:{file_line_no}")
+                _ifdef_stack.append(name not in self.defines if _ifdef_active() else False)
+                _out_append("")
+                file_line_no += 1
+                continue
+            if stripped == "elsedef":
+                if not _ifdef_stack:
+                    raise ParseError(f"elsedef without matching ifdef/ifndef at {path}:{file_line_no}")
+                _ifdef_stack[-1] = not _ifdef_stack[-1]
+                _out_append("")
+                file_line_no += 1
+                continue
+            if stripped == "endif":
+                if not _ifdef_stack:
+                    raise ParseError(f"endif without matching ifdef/ifndef at {path}:{file_line_no}")
+                _ifdef_stack.pop()
+                _out_append("")
+                file_line_no += 1
+                continue
+
+            # If inside a false ifdef branch, skip the line
+            if not _ifdef_active():
+                _out_append("")
+                file_line_no += 1
+                continue
+
             if not in_py_block and stripped[:3] == ":py" and "{" in stripped:
                 in_py_block = True
                 brace_depth = 0
@@ -8259,15 +8644,22 @@ class Compiler:
                     header_text = header_path.read_text()
                 except FileNotFoundError as exc:
                     raise ParseError(f"cimport cannot read {header_path}: {exc}") from exc
+
+                # Try running the C preprocessor for more accurate parsing
+                header_text = self._preprocess_c_header(header_path, header_text)
+
                 extern_lines = _parse_c_header_externs(header_text)
+                struct_lines = _parse_c_header_structs(header_text)
 
                 # begin_segment_if_needed inline
                 if segment_start_global is None:
                     segment_start_global = len(out_lines) + 1
                     segment_start_local = file_line_no
-                # Replace the cimport line with the extracted extern declarations
+                # Replace the cimport line with the extracted extern + struct declarations
                 for ext_line in extern_lines:
                     _out_append(ext_line)
+                for st_line in struct_lines:
+                    _out_append(st_line)
                 _out_append("")  # blank line after externs
                 file_line_no += 1
                 continue
@@ -8290,6 +8682,9 @@ class Compiler:
                 )
             )
 
+        if _ifdef_stack:
+            raise ParseError(f"unterminated ifdef/ifndef ({len(_ifdef_stack)} level(s) deep) in {path}")
+
 
 class BuildCache:
     """Caches compilation artifacts keyed by source content and compiler flags."""
@@ -8492,7 +8887,12 @@ def run_linker(obj_path: Path, exe_path: Path, debug: bool = False, libs=None, *
         cmd.extend(["-nostdlib", "-static"])
 
     if libs:
-        if not shared:
+        # Determine if any libs require dynamic linking (shared libraries).
+        needs_dynamic = any(
+            not (str(lib).endswith(".a") or str(lib).endswith(".o"))
+            for lib in libs if lib
+        )
+        if not shared and needs_dynamic:
             cmd.extend([
                 "-dynamic-linker", "/lib64/ld-linux-x86-64.so.2",
             ])
@@ -9129,7 +9529,7 @@ _DOC_STACK_RE = re.compile(r"^\s*#\s*([^\s]+)\s*(.*)$")
 _DOC_WORD_RE = re.compile(r"^\s*(?:inline\s+)?word\s+([^\s]+)\b")
 _DOC_ASM_RE = re.compile(r"^\s*:asm\s+([^\s{]+)")
 _DOC_PY_RE = re.compile(r"^\s*:py\s+([^\s{]+)")
-_DOC_MACRO_RE = re.compile(r"^\s*macro\s+([^\s]+)")
+_DOC_MACRO_RE = re.compile(r"^\s*macro\s+([^\s]+)(?:\s+(\d+))?")
 
 
 def _extract_stack_comment(text: str) -> Optional[Tuple[str, str]]:
@@ -9145,15 +9545,16 @@ def _extract_stack_comment(text: str) -> Optional[Tuple[str, str]]:
     return name, tail
 
 
-def _extract_definition_name(text: str, *, include_macros: bool = False) -> Optional[Tuple[str, str]]:
+def _extract_definition_name(text: str, *, include_macros: bool = False) -> Optional[Tuple[str, str, int]]:
     for kind, regex in (("word", _DOC_WORD_RE), ("asm", _DOC_ASM_RE), ("py", _DOC_PY_RE)):
         match = regex.match(text)
         if match is not None:
-            return kind, match.group(1)
+            return kind, match.group(1), -1
     if include_macros:
         match = _DOC_MACRO_RE.match(text)
         if match is not None:
-            return "macro", match.group(1)
+            arg_count = int(match.group(2)) if match.group(2) is not None else 0
+            return "macro", match.group(1), arg_count
     return None
 
 
@@ -9215,11 +9616,18 @@ def _scan_doc_file(
         parsed = _extract_definition_name(line, include_macros=include_macros)
         if parsed is None:
             continue
-        kind, name = parsed
+        kind, name, macro_args = parsed
         if not _is_doc_symbol_name(name, include_private=include_private):
             continue
         defined_names.add(name)
         stack_effect, description = _collect_leading_doc_comments(lines, idx, name)
+        # Auto-generate stack effect for macros from arg count
+        if kind == "macro" and not stack_effect:
+            if macro_args > 0:
+                params = " ".join(f"${i}" for i in range(macro_args))
+                stack_effect = f"macro({macro_args}): {params} -> expanded"
+            else:
+                stack_effect = "macro(0): -> expanded"
         if not include_undocumented and not stack_effect and not description:
             continue
         entries.append(
@@ -9416,6 +9824,8 @@ def _run_docs_tui(
     _MODE_LICENSE = 6
     _MODE_PHILOSOPHY = 7
     _MODE_CT_REF = 8
+    _MODE_QA = 9
+    _MODE_HOW = 10
 
     _TAB_LIBRARY = 0
     _TAB_LANG_REF = 1
@@ -9499,20 +9909,20 @@ def _run_docs_tui(
         {
             "name": "macro ... ;",
             "category": "Definitions",
-            "syntax": "macro <name> [<param_count>] <tokens...> ;",
+            "syntax": "macro <name> <param_count> <tokens...> ;",
             "summary": "Define a text macro with positional substitution.",
             "detail": (
                 "Records raw tokens until `;`. On expansion, `$0`, `$1`, ... "
                 "are replaced by positional arguments. Macros cannot nest.\n\n"
                 "Example:\n"
-                "  macro max2 [2] $0 $1 > if $0 else $1 end ;\n"
+                "  macro max2 2 $0 $1 > if $0 else $1 end ;\n"
                 "  5 3 max2   # leaves 5 on stack"
             ),
         },
         {
             "name": "struct ... end",
             "category": "Definitions",
-            "syntax": "struct <Name>\n  <size> <field>\n  ...\nend",
+            "syntax": "struct <Name>\n  field <field> <size>\n  ...\nend",
             "summary": "Define a packed struct with auto-generated accessors.",
             "detail": (
                 "Emits helper words:\n"
@@ -9524,8 +9934,8 @@ def _run_docs_tui(
                 "Layout is tightly packed with no implicit padding.\n\n"
                 "Example:\n"
                 "  struct Point\n"
-                "    8 x\n"
-                "    8 y\n"
+                "    field x 8\n"
+                "    field y 8\n"
                 "  end\n"
                 "  # Now Point.x@, Point.x!, Point.y@, Point.y! exist"
             ),
@@ -9575,8 +9985,8 @@ def _run_docs_tui(
             "summary": "Counted loop — pops count, loops that many times.",
             "detail": (
                 "Pops the loop count from the stack, stores it on the return stack, "
-                "and decrements it each pass. The loop index is accessible via "
-                "the compile-time word `i` inside macros.\n\n"
+                "and decrements it each pass. Use `r@` (return "
+                "stack peek) to read the current counter value.\n\n"
                 "Example:\n"
                 "  10 for\n"
                 "    \"hello\" puts\n"
@@ -9590,8 +10000,8 @@ def _run_docs_tui(
             "syntax": "begin <body> again",
             "summary": "Infinite loop (use `exit` or `goto` to break out).",
             "detail": (
-                "Creates an unconditional loop. The body repeats forever "
-                "available only at compile time.\n\n"
+                "Creates an unconditional loop. The body repeats forever.\n"
+                "Available only at compile time.\n\n"
                 "Example:\n"
                 "  begin\n"
                 "    read_stdin\n"
@@ -9607,7 +10017,7 @@ def _run_docs_tui(
             "summary": "Local jumps within a definition.",
             "detail": (
                 "Defines a local label and jumps to it. "
-                "to the enclosing word definition.\n\n"
+                "Labels are scoped to the enclosing word definition.\n\n"
                 "Example:\n"
                 "  word example\n"
                 "    label start\n"
@@ -10001,10 +10411,40 @@ def _run_docs_tui(
         "\n"
         "───────────────────────────────────────────────────────────\n"
         "\n"
-        "  L2 is a stack-based systems language that compiles\n"
-        "  ahead-of-time to native x86-64 Linux binaries. It\n"
-        "  descends from the Forth tradition: small words compose\n"
-        "  into larger words, and the machine is always visible.\n"
+        "  WHAT IS L2?\n"
+        "\n"
+        "  At its core, L2 is a programmable assembly templating\n"
+        "  engine with a Forth-style stack interface. You write\n"
+        "  small 'words' that compose into larger programs, and\n"
+        "  each word compiles to a known, inspectable sequence of\n"
+        "  x86-64 instructions. The language sits just above raw\n"
+        "  assembly — close enough to see every byte, high enough\n"
+        "  to be genuinely productive.\n"
+        "\n"
+        "  But L2 is more than a glorified macro assembler. Its\n"
+        "  compile-time virtual machine lets you run arbitrary L2\n"
+        "  code at compile time: generate words, compute lookup\n"
+        "  tables, build structs, or emit entire subsystems before\n"
+        "  a single byte of native code is produced. Text macros,\n"
+        "  :py blocks, and token hooks extend the syntax in ways\n"
+        "  that feel like language features — because they are.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────\n"
+        "\n"
+        "  WHY DOES L2 EXIST?\n"
+        "\n"
+        "  L2 was built for fun — and that's a feature, not an\n"
+        "  excuse. It exists because writing a compiler is deeply\n"
+        "  satisfying, because Forth's ideas deserve to be pushed\n"
+        "  further, and because sometimes you want to write a\n"
+        "  program that does exactly what you told it to.\n"
+        "\n"
+        "  That said, 'fun' doesn't mean 'toy'. L2 produces real\n"
+        "  native binaries, links against C libraries, and handles\n"
+        "  practical tasks like file I/O, hashmap manipulation,\n"
+        "  and async scheduling — all with a minimal runtime.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────\n"
         "\n"
         "  CORE TENETS\n"
         "\n"
@@ -10025,9 +10465,12 @@ def _run_docs_tui(
         "     it composes.\n"
         "\n"
         "  4. META-PROGRAMMABILITY\n"
-        "     The front-end is user-extensible: immediate words,\n"
-        "     text macros, :py blocks, and token hooks let you\n"
-        "     reshape syntax without forking the compiler.\n"
+        "     The front-end is user-extensible: text macros, :py\n"
+        "     blocks, immediate words, and token hooks reshape\n"
+        "     syntax at compile time. The compile-time VM can\n"
+        "     execute full L2 programs during compilation, making\n"
+        "     the boundary between 'language' and 'metaprogram'\n"
+        "     deliberately blurry.\n"
         "\n"
         "  5. UNSAFE BY DESIGN\n"
         "     Safety is the programmer's job, not the language's.\n"
@@ -10039,6 +10482,12 @@ def _run_docs_tui(
         "     It gives you alloc/free, puts/puti, arrays, and\n"
         "     file I/O. Everything else is your choice.\n"
         "\n"
+        "  7. FUN FIRST\n"
+        "     If using L2 feels like a chore, the design has\n"
+        "     failed. The language should reward curiosity and\n"
+        "     make you want to dig deeper into how things work.\n"
+        "     At least its fun for me to write programs in. ;)"
+        "\n"
         "───────────────────────────────────────────────────────────\n"
         "\n"
         "  L2 is for programmers who want to understand every\n"
@@ -10434,14 +10883,14 @@ def _run_docs_tui(
         "  § 16  WITH (SCOPED VARIABLES)\n"
         "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
         "\n"
-        "  with <name> ... end\n"
-        "    Bind the top of the data stack to a compile-time name.\n"
-        "    Inside the with block, referencing <name> pushes the\n"
-        "    bound value. At the end of the block the binding is\n"
-        "    removed. Useful for readability and avoiding stack shuffling.\n"
+        "  with <names...> in <body> end\n"
+        "    Pop values from the stack into named local variables.\n"
+        "    Inside the body, referencing a name reads the variable;\n"
+        "    `name !` writes to it. Variables are backed by hidden\n"
+        "    globals and are NOT re-entrant.\n"
         "\n"
-        "      10 with x\n"
-        "        x x +   # pushes 10 twice, adds → 20\n"
+        "      10 20 with x y in\n"
+        "        x y +   # reads x (10) and y (20), adds -> 30\n"
         "      end\n"
         "\n"
         "\n"
@@ -10510,6 +10959,282 @@ def _run_docs_tui(
         "═══════════════════════════════════════════════════════════════\n"
     )
 
+    _L2_QA_TEXT = (
+        "═══════════════════════════════════════════════════════════\n"
+        "              Q & A   /   T I P S   &   T R I C K S\n"
+        "═══════════════════════════════════════════════════════════\n"
+        "\n"
+        "  HOW DO I DEBUG AN L2 PROGRAM?\n"
+        "\n"
+        "    Compile with --debug to embed DWARF debug info, then\n"
+        "    launch with --dbg to drop straight into GDB:\n"
+        "\n"
+        "      python3 main.py my_program.sl --debug --dbg\n"
+        "\n"
+        "    Inside GDB you can:\n"
+        "      - Set breakpoints on word labels  (b word_main)\n"
+        "      - Inspect the data stack via r12  (x/8gx $r12)\n"
+        "      - Step through asm instructions   (si / ni)\n"
+        "      - View registers                  (info registers)\n"
+        "      - Disassemble a word              (disas word_foo)\n"
+        "\n"
+        "    Tip: r12 is the stack pointer. [r12] = TOS,\n"
+        "    [r12+8] = second element, etc.\n"
+        "\n"
+        "  HOW DO I VIEW THE GENERATED ASSEMBLY?\n"
+        "\n"
+        "    Use --emit-asm to stop after generating assembly:\n"
+        "\n"
+        "      python3 main.py my_program.sl --emit-asm\n"
+        "\n"
+        "    The .asm file is written to build/<name>.asm.\n"
+        "    You can also use -v1 or higher for timing info,\n"
+        "    -v2 for per-function details, and -v3 or -v4 for\n"
+        "    full optimization tracing.\n"
+        "\n"
+        "  HOW DO I CALL C FUNCTIONS?\n"
+        "\n"
+        "    Declare them with the C-style extern syntax:\n"
+        "\n"
+        "      extern int printf(const char* fmt, ...)\n"
+        "      extern void* malloc(size_t size)\n"
+        "\n"
+        "    Or use the legacy style:\n"
+        "\n"
+        "      extern printf 2 1\n"
+        "\n"
+        "    Link the library with -l:\n"
+        "\n"
+        "      python3 main.py my_program.sl -l c\n"
+        "\n"
+        "    You can also use cimport to auto-extract externs:\n"
+        "\n"
+        "      cimport \"my_header.h\"\n"
+        "\n"
+        "  HOW DO MACROS WORK?\n"
+        "\n"
+        "    Text macros are template expansions. Define with\n"
+        "    an optional parameter count:\n"
+        "\n"
+        "      macro square       # 0-arg: inline expansion\n"
+        "        dup *\n"
+        "      ;\n"
+        "\n"
+        "      macro defconst 2   # 2-arg: $0 and $1 are args\n"
+        "        word $0\n"
+        "          $1\n"
+        "        end\n"
+        "      ;\n"
+        "\n"
+        "    Use them normally; macro args are positional:\n"
+        "\n"
+        "      5 square           # expands to: 5 dup *\n"
+        "      defconst TEN 10    # defines: word TEN 10 end\n"
+        "\n"
+        "  HOW DO I RUN CODE AT COMPILE TIME?\n"
+        "\n"
+        "    Use --ct-run-main or --script to execute 'main' at\n"
+        "    compile time. The CT VM supports most stack ops, I/O,\n"
+        "    lists, hashmaps, and string manipulation.\n"
+        "\n"
+        "    You can also mark words as compile-time:\n"
+        "\n"
+        "      word generate-table\n"
+        "        # ... runs during compilation\n"
+        "      end\n"
+        "      compile-time generate-table\n"
+        "\n"
+        "  WHAT IS THE --SCRIPT FLAG?\n"
+        "\n"
+        "    Shorthand for --no-artifact --ct-run-main. It parses\n"
+        "    and runs 'main' in the compile-time VM without\n"
+        "    producing a binary — useful for scripts as the name suggests.\n"
+        "\n"
+        "  HOW DO I USE THE BUILD CACHE?\n"
+        "\n"
+        "    The cache is automatic. It stores assembly output\n"
+        "    and skips recompilation when source files haven't\n"
+        "    changed. Disable with --no-cache if needed.\n"
+        "\n"
+        "  HOW DO I DUMP THE CONTROL-FLOW GRAPH?\n"
+        "\n"
+        "    Use --dump-cfg to produce a Graphviz DOT file:\n"
+        "\n"
+        "      python3 main.py prog.sl --dump-cfg\n"
+        "      dot -Tpng build/prog.cfg.dot -o cfg.png\n"
+        "\n"
+        "  WHAT OPTIMIZATIONS DOES L2 PERFORM?\n"
+        "\n"
+        "    - Constant folding (--no-folding to disable)\n"
+        "    - Peephole optimization (--no-peephole)\n"
+        "    - Loop unrolling (--no-loop-unroll)\n"
+        "    - Auto-inlining of small asm bodies (--no-auto-inline)\n"
+        "    - Static list folding (--no-static-list-folding)\n"
+        "    - Dead code elimination (automatic)\n"
+        "    - -O0 disables all optimizations\n"
+        "    - -O2 disables all optimizations AND checks\n"
+        "\n"
+        "══════════════════════════════════════════════════════════\n"
+    )
+
+    _L2_HOW_TEXT = (
+        "═══════════════════════════════════════════════════════════════\n"
+        "          H O W   L 2   W O R K S   (I N T E R N A L S)\n"
+        "═══════════════════════════════════════════════════════════════\n"
+        "\n"
+        "  ARCHITECTURE OVERVIEW\n"
+        "\n"
+        "    The L2 compiler is a single-pass, single-file Python\n"
+        "    program (~13K lines) with these major stages:\n"
+        "\n"
+        "    1. READER/TOKENIZER\n"
+        "       Splits source into whitespace-delimited tokens.\n"
+        "       Tracks line, column, and byte offsets per token.\n"
+        "       Comment lines (starting with #) in word bodies are\n"
+        "       preserved as metadata but not compiled.\n"
+        "\n"
+        "    2. IMPORT RESOLUTION\n"
+        "       'import' and 'cimport' directives are resolved\n"
+        "       recursively. Each file is loaded once. Imports are\n"
+        "       concatenated into a single token stream with\n"
+        "       FileSpan markers for error reporting.\n"
+        "\n"
+        "    3. PARSER\n"
+        "       Walks the token stream and builds an IR Module of\n"
+        "       Op lists (one per word definition). Key features:\n"
+        "       - Word/asm/py/extern definitions -> dictionary\n"
+        "       - Control flow (if/else/end, while/do/end, for)\n"
+        "         compiled to label-based jumps\n"
+        "       - Macro expansion (text macros with $N params)\n"
+        "       - Token hooks for user-extensible syntax\n"
+        "       - Compile-time VM execution of immediate words\n"
+        "\n"
+        "    4. ASSEMBLER / CODE GENERATOR\n"
+        "       Converts the Op IR into NASM x86-64 assembly.\n"
+        "       Handles calling conventions, extern C FFI with\n"
+        "       full System V ABI support (register classification,\n"
+        "       struct passing, SSE arguments).\n"
+        "\n"
+        "    5. NASM + LINKER\n"
+        "       The assembly is assembled by NASM into an object\n"
+        "       file, then linked (via ld or gcc) into the final\n"
+        "       binary.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  THE STACKS\n"
+        "\n"
+        "    L2 uses register r12 as the stack pointer for its data\n"
+        "    stack. The stack grows downward:\n"
+        "\n"
+        "      push:  sub r12, 8; mov [r12], rax\n"
+        "      pop:   mov rax, [r12]; add r12, 8\n"
+        "\n"
+        "    The return stack lives in a separate buffer with r13 as\n"
+        "    its stack pointer (also grows downward). The native x86\n"
+        "    call/ret stack (rsp) is used only for word call/return\n"
+        "    linkage and C interop.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  THE COMPILE-TIME VM\n"
+        "\n"
+        "    The CT VM is a stack-based interpreter that runs during\n"
+        "    parsing. It maintains:\n"
+        "\n"
+        "      - A value stack (Python list of ints/strings/lists)\n"
+        "      - A dictionary of CT-callable words\n"
+        "      - A return stack for nested calls\n"
+        "\n"
+        "    CT words can:\n"
+        "      - Emit token sequences into the compiler's stream\n"
+        "      - Inspect/modify the parser state\n"
+        "      - Call other CT words or builtins\n"
+        "      - Perform I/O, string ops, list/hashmap manipulation\n"
+        "\n"
+        "    When --ct-run-main is used, the CT VM can also JIT-compile\n"
+        "    and execute native x86-64 code via the Keystone assembler\n"
+        "    engine (for words that need native performance).\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  OPTIMIZATION PASSES\n"
+        "\n"
+        "    CONSTANT FOLDING\n"
+        "      Evaluates pure arithmetic sequences (e.g., 3 4 +\n"
+        "      becomes push 7). Works across word boundaries for\n"
+        "      inlined words.\n"
+        "\n"
+        "    PEEPHOLE OPTIMIZATION\n"
+        "      Pattern-matches instruction sequences and\n"
+        "      replaces them with shorter equivalents. Examples:\n"
+        "        swap drop -> nip\n"
+        "        swap nip  -> drop\n"
+        "\n"
+        "    LOOP UNROLLING\n"
+        "      Small deterministic loops (e.g., '4 for ... next')\n"
+        "      are unrolled into straight-line code when the\n"
+        "      iteration count is known at compile time.\n"
+        "\n"
+        "    AUTO-INLINING\n"
+        "      Small asm-body words (below a size threshold) are\n"
+        "      automatically inlined at call sites, eliminating\n"
+        "      call/ret overhead.\n"
+        "\n"
+        "    STATIC LIST FOLDING\n"
+        "      List literals like [1 2 3] with all-constant\n"
+        "      elements are placed in .data instead of being\n"
+        "      built at runtime.\n"
+        "\n"
+        "    DEAD CODE ELIMINATION\n"
+        "      Words that are never called (and not 'main') are\n"
+        "      excluded from the final assembly output.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  EXTERN C FFI\n"
+        "\n"
+        "    L2's extern system supports the full System V AMD64 ABI:\n"
+        "\n"
+        "    - Integer args -> rdi, rsi, rdx, rcx, r8, r9, then stack\n"
+        "    - Float/double args -> xmm0..xmm7, then stack\n"
+        "    - Struct args classified per ABI eightbyte rules\n"
+        "    - Return values in rax (int), xmm0 (float), or via\n"
+        "      hidden sret pointer for large structs\n"
+        "    - RSP is aligned to 16 bytes before each call\n"
+        "\n"
+        "    The compiler auto-classifies argument types from the\n"
+        "    C-style declaration and generates the correct register\n"
+        "    shuffle and stack layout.\n"
+        "\n"
+        "───────────────────────────────────────────────────────────────\n"
+        "\n"
+        "  QUIRKS & GOTCHAS\n"
+        "\n"
+        "    - No type system: everything is a 64-bit integer on\n"
+        "      the stack. Pointers, booleans, characters — all\n"
+        "      just numbers. Type safety is your responsibility.\n"
+        "\n"
+        "    - Macro expansion depth: macros can expand macros,\n"
+        "      but there's a limit (default 64, configurable via\n"
+        "      --macro-expansion-limit).\n"
+        "\n"
+        "    - :py blocks: Python code embedded in :py { ... }\n"
+        "      runs in the compiler's Python process. It has full\n"
+        "      access to the parser and dictionary — powerful but\n"
+        "      dangerous.\n"
+        "\n"
+        "    - The CT VM and native codegen share a dictionary\n"
+        "      but have separate stacks. A word defined at CT\n"
+        "      exists at CT only unless also compiled normally.\n"
+        "\n"
+        "    - The build cache tracks file mtimes and a hash of\n"
+        "      compiler flags. CT side effects invalidate the\n"
+        "      cache for that file.\n"
+        "\n"
+        "═══════════════════════════════════════════════════════════════\n"
+    )
+
     def _parse_sig_counts(effect: str) -> Tuple[int, int]:
         """Parse stack effect to (n_args, n_returns).
 
@@ -10629,6 +11354,15 @@ def _run_docs_tui(
                         extra_after += 1
                         if extra_after >= 3 or not stripped:
                             break
+            elif entry.kind == "macro":
+                # Show macro body until closing ';'
+                end = min(len(src_lines), start + 200)
+                for i in range(start, end):
+                    prefix = f"  {i + 1:4d}| "
+                    lines.append(prefix + src_lines[i])
+                    stripped = src_lines[i].strip()
+                    if stripped.endswith(";") and i >= start:
+                        break
             else:
                 end = min(len(src_lines), start + 30)
                 for i in range(start, end):
@@ -10645,6 +11379,27 @@ def _run_docs_tui(
             pass
         stdscr.keypad(True)
 
+        # Initialize color pairs for kind tags
+        _has_colors = False
+        try:
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_CYAN, -1)     # word
+                curses.init_pair(2, curses.COLOR_GREEN, -1)    # asm
+                curses.init_pair(3, curses.COLOR_YELLOW, -1)   # py
+                curses.init_pair(4, curses.COLOR_MAGENTA, -1)  # macro
+                _has_colors = True
+        except Exception:
+            pass
+
+        _KIND_COLORS = {
+            "word": curses.color_pair(1) if _has_colors else 0,
+            "asm": curses.color_pair(2) if _has_colors else 0,
+            "py": curses.color_pair(3) if _has_colors else 0,
+            "macro": curses.color_pair(4) if _has_colors else 0,
+        }
+
         nonlocal entries
         query = initial_query
         selected = 0
@@ -10738,7 +11493,7 @@ def _run_docs_tui(
                 _safe_addnstr(scr, y, x, label, width - x - 1, attr)
                 x += len(label) + 1
             # Right-aligned shortcuts
-            shortcuts = " L license  P philosophy "
+            shortcuts = " ? Q&A  H how  P philosophy  L license "
             if x + len(shortcuts) < width:
                 _safe_addnstr(scr, y, width - len(shortcuts) - 1, shortcuts, len(shortcuts), curses.A_DIM)
 
@@ -11104,7 +11859,7 @@ def _run_docs_tui(
                 cat_label = cat_names[lang_cat_filter]
                 header = f" Language Reference  {len(lang_entries)} entries  category: {cat_label}"
                 _safe_addnstr(stdscr, 1, 0, header, width - 1, curses.A_BOLD)
-                hint = " c category  Enter detail  j/k nav  Tab switch  C ct-ref  L license  P philosophy  q quit"
+                hint = " c category  Enter detail  j/k nav  Tab switch  C ct-ref  ? Q&A  H how  P philosophy  q quit"
                 _safe_addnstr(stdscr, 2, 0, hint, width - 1, curses.A_DIM)
 
                 for row in range(list_height):
@@ -11173,6 +11928,16 @@ def _run_docs_tui(
                     info_scroll = 0
                     mode = _MODE_PHILOSOPHY
                     continue
+                if key == ord("?"):
+                    info_lines = _L2_QA_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_QA
+                    continue
+                if key == ord("H"):
+                    info_lines = _L2_HOW_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_HOW
+                    continue
                 if key == ord("C"):
                     active_tab = _TAB_CT_REF
                     info_lines = _L2_CT_REF_TEXT.splitlines()
@@ -11227,9 +11992,15 @@ def _run_docs_tui(
                     continue
                 continue
 
-            # -- LICENSE / PHILOSOPHY MODE --
-            if mode in (_MODE_LICENSE, _MODE_PHILOSOPHY):
-                title = "License" if mode == _MODE_LICENSE else "Philosophy of L2"
+            # -- LICENSE / PHILOSOPHY / Q&A / HOW-IT-WORKS MODE --
+            if mode in (_MODE_LICENSE, _MODE_PHILOSOPHY, _MODE_QA, _MODE_HOW):
+                _info_titles = {
+                    _MODE_LICENSE: "License",
+                    _MODE_PHILOSOPHY: "Philosophy of L2",
+                    _MODE_QA: "Q&A / Tips & Tricks",
+                    _MODE_HOW: "How L2 Works (Internals)",
+                }
+                title = _info_titles.get(mode, "")
                 stdscr.erase()
                 _safe_addnstr(stdscr, 0, 0, f" {title} ", width - 1, curses.A_BOLD)
                 _safe_addnstr(stdscr, 1, 0, " q/Esc: back  j/k: scroll  PgUp/PgDn ", width - 1, curses.A_DIM)
@@ -11249,6 +12020,10 @@ def _run_docs_tui(
                 prev_mode = _MODE_LANG_REF if active_tab == _TAB_LANG_REF else (_MODE_CT_REF if active_tab == _TAB_CT_REF else _MODE_BROWSE)
                 if key in (27, ord("q"), ord("h"), curses.KEY_LEFT):
                     mode = prev_mode
+                    # Restore info_lines when returning to CT ref
+                    if prev_mode == _MODE_CT_REF:
+                        info_lines = _L2_CT_REF_TEXT.splitlines()
+                        info_scroll = 0
                     continue
                 if key in (curses.KEY_DOWN, ord("j")):
                     if info_scroll < max_iscroll:
@@ -11277,7 +12052,7 @@ def _run_docs_tui(
                 stdscr.erase()
                 _safe_addnstr(stdscr, 0, 0, " Compile-Time Reference ", width - 1, curses.A_BOLD)
                 _render_tab_bar(stdscr, 1, width)
-                _safe_addnstr(stdscr, 2, 0, " j/k scroll  PgUp/PgDn  Tab switch  L license  P philosophy  q quit", width - 1, curses.A_DIM)
+                _safe_addnstr(stdscr, 2, 0, " j/k scroll  PgUp/PgDn  Tab switch  ? Q&A  H how  P philosophy  L license  q quit", width - 1, curses.A_DIM)
                 body_height = max(1, height - 4)
                 max_iscroll = max(0, len(info_lines) - body_height)
                 if info_scroll > max_iscroll:
@@ -11327,6 +12102,16 @@ def _run_docs_tui(
                     info_scroll = 0
                     mode = _MODE_PHILOSOPHY
                     continue
+                if key == ord("?"):
+                    info_lines = _L2_QA_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_QA
+                    continue
+                if key == ord("H"):
+                    info_lines = _L2_HOW_TEXT.splitlines()
+                    info_scroll = 0
+                    mode = _MODE_HOW
+                    continue
                 if key == ord("C"):
                     active_tab = _TAB_CT_REF
                     info_lines = _L2_CT_REF_TEXT.splitlines()
@@ -11372,7 +12157,7 @@ def _run_docs_tui(
             header = f" L2 docs  {len(filtered)}/{len(entries)}" + (f"  search: {query}" if query else "") + filter_info
             _safe_addnstr(stdscr, 0, 0, header, width - 1, curses.A_BOLD)
             _render_tab_bar(stdscr, 1, width)
-            hint = " / search  f filters  r reload  Enter detail  j/k nav  Tab switch  C ct-ref  L license  P philosophy  q quit"
+            hint = " / search  f filters  r reload  Enter detail  Tab switch  C ct-ref  ? Q&A  H how  P philosophy  L license  q quit"
             _safe_addnstr(stdscr, 2, 0, hint, width - 1, curses.A_DIM)
 
             for row in range(list_height):
@@ -11381,10 +12166,23 @@ def _run_docs_tui(
                     break
                 entry = filtered[idx]
                 effect = entry.stack_effect if entry.stack_effect else ""
-                kind_tag = f"[{entry.kind}]"
-                line = f" {entry.name:24} {effect:30} {kind_tag}"
-                attr = curses.A_REVERSE if idx == selected else 0
-                _safe_addnstr(stdscr, 3 + row, 0, line, width - 1, attr)
+                kind_tag = f"[{entry.kind:5}]"
+                name_part = f" {entry.name:24} "
+                effect_part = f"{effect:30} "
+                is_sel = idx == selected
+                base_attr = curses.A_REVERSE if is_sel else 0
+                y = 3 + row
+                # Draw name
+                _safe_addnstr(stdscr, y, 0, name_part, width - 1, base_attr | curses.A_BOLD if is_sel else base_attr)
+                # Draw stack effect
+                x = len(name_part)
+                if x < width - 1:
+                    _safe_addnstr(stdscr, y, x, effect_part, width - x - 1, base_attr)
+                # Draw kind tag with color
+                x2 = x + len(effect_part)
+                if x2 < width - 1:
+                    kind_color = _KIND_COLORS.get(entry.kind, 0) if not is_sel else 0
+                    _safe_addnstr(stdscr, y, x2, kind_tag, width - x2 - 1, base_attr | kind_color)
 
             if filtered:
                 current = filtered[selected]
@@ -11414,6 +12212,16 @@ def _run_docs_tui(
                 info_scroll = 0
                 mode = _MODE_PHILOSOPHY
                 continue
+            if key == ord("?"):
+                info_lines = _L2_QA_TEXT.splitlines()
+                info_scroll = 0
+                mode = _MODE_QA
+                continue
+            if key == ord("H"):
+                info_lines = _L2_HOW_TEXT.splitlines()
+                info_scroll = 0
+                mode = _MODE_HOW
+                continue
             if key == ord("C"):
                 active_tab = _TAB_CT_REF
                 info_lines = _L2_CT_REF_TEXT.splitlines()
@@ -11608,6 +12416,32 @@ def cli(argv: Sequence[str]) -> int:
         default=DEFAULT_MACRO_EXPANSION_LIMIT,
         help="maximum nested macro expansion depth (default: %(default)s)",
     )
+    parser.add_argument(
+        "-D",
+        dest="defines",
+        action="append",
+        default=[],
+        metavar="NAME",
+        help="define a preprocessor symbol for ifdef/ifndef (repeatable)",
+    )
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="validate source without producing artifacts (parse + compile only)",
+    )
+    parser.add_argument(
+        "-W",
+        dest="warnings",
+        action="append",
+        default=[],
+        metavar="NAME",
+        help="enable warning (e.g. -W redefine, -W stack-depth, -W all)",
+    )
+    parser.add_argument(
+        "--Werror",
+        action="store_true",
+        help="treat all warnings as errors",
+    )
 
     # Parse known and unknown args to allow -l flags anywhere
     args, unknown = parser.parse_known_args(argv)
@@ -11627,6 +12461,9 @@ def cli(argv: Sequence[str]) -> int:
         args.no_artifact = True
         args.ct_run_main = True
 
+    if args.check:
+        args.no_artifact = True
+
     if args.macro_expansion_limit < 1:
         parser.error("--macro-expansion-limit must be >= 1")
 
@@ -11740,6 +12577,7 @@ def cli(argv: Sequence[str]) -> int:
     compiler = Compiler(
         include_paths=[Path("."), Path("./stdlib"), *args.include_paths],
         macro_expansion_limit=args.macro_expansion_limit,
+        defines=args.defines,
     )
     compiler.assembler.enable_constant_folding = folding_enabled
     compiler.assembler.enable_static_list_folding = static_list_folding_enabled
@@ -11751,7 +12589,26 @@ def cli(argv: Sequence[str]) -> int:
     compiler.assembler.verbosity = verbosity
     if args.dump_cfg is not None:
         compiler.assembler._need_cfg = True
-
+    # Warning configuration
+    warnings_set = set(args.warnings)
+    werror = args.Werror
+    # Support GCC-style -Werror (single dash, parsed as -W error)
+    if "error" in warnings_set:
+        warnings_set.discard("error")
+        werror = True
+    # -Werror without explicit -W categories implies -W all
+    if werror and not warnings_set:
+        warnings_set.add("all")
+    compiler.parser._warnings_enabled = warnings_set
+    compiler.parser._werror = werror
+    # Route dictionary redefine warnings through the parser's _warn system
+    if warnings_set or werror:
+        def _dict_warn_cb(name: str, priority: int) -> None:
+            compiler.parser._warn(
+                compiler.parser._last_token, "redefine",
+                f"redefining word {name} (priority {priority})",
+            )
+        compiler.parser.dictionary.warn_callback = _dict_warn_cb
     cache: Optional[BuildCache] = None
     if not args.no_cache:
         cache = BuildCache(args.temp_dir / ".l2cache")
@@ -11822,12 +12679,36 @@ def cli(argv: Sequence[str]) -> int:
                 print(f"[error] compile-time execution of 'main' failed: {exc}")
                 return 1
     except (ParseError, CompileError, CompileTimeError) as exc:
-        print(f"[error] {exc}")
+        # Print all collected diagnostics in Rust-style format
+        use_color = sys.stderr.isatty()
+        diags = getattr(compiler.parser, 'diagnostics', []) if 'compiler' in dir() else []
+        if diags:
+            for diag in diags:
+                print(diag.format(color=use_color), file=sys.stderr)
+            error_count = sum(1 for d in diags if d.level == "error")
+            warn_count = sum(1 for d in diags if d.level == "warning")
+            summary_parts = []
+            if error_count:
+                summary_parts.append(f"{error_count} error(s)")
+            if warn_count:
+                summary_parts.append(f"{warn_count} warning(s)")
+            if summary_parts:
+                print(f"\n{' and '.join(summary_parts)} emitted", file=sys.stderr)
+        else:
+            print(f"[error] {exc}", file=sys.stderr)
         return 1
     except Exception as exc:
-        print(f"[error] unexpected failure: {exc}")
+        print(f"[error] unexpected failure: {exc}", file=sys.stderr)
         return 1
 
+    # Print any warnings accumulated during successful compilation
+    use_color = sys.stderr.isatty()
+    warnings = [d for d in compiler.parser.diagnostics if d.level == "warning"]
+    if warnings:
+        for diag in warnings:
+            print(diag.format(color=use_color), file=sys.stderr)
+        print(f"\n{len(warnings)} warning(s) emitted", file=sys.stderr)
+
     args.temp_dir.mkdir(parents=True, exist_ok=True)
     asm_path = args.temp_dir / (args.source.stem + ".asm")
     obj_path = args.temp_dir / (args.source.stem + ".o")
diff --git a/test.py b/test.py
index d5a5f83..5ffaad3 100644
--- a/test.py
+++ b/test.py
@@ -152,6 +152,7 @@ class TestCaseConfig:
     tags: List[str] = field(default_factory=list)
     requires: List[str] = field(default_factory=list)
     libs: List[str] = field(default_factory=list)
+    compile_args: List[str] = field(default_factory=list)
 
     @classmethod
     def from_meta(cls, data: Dict[str, Any]) -> "TestCaseConfig":
@@ -207,6 +208,11 @@ class TestCaseConfig:
             if not isinstance(libs, list) or not all(isinstance(item, str) for item in libs):
                 raise ValueError("libs must be a list of strings")
             cfg.libs = [item.strip() for item in libs if item.strip()]
+        if "compile_args" in data:
+            ca = data["compile_args"]
+            if not isinstance(ca, list) or not all(isinstance(item, str) for item in ca):
+                raise ValueError("compile_args must be a list of strings")
+            cfg.compile_args = list(ca)
         return cfg
 
 
@@ -442,6 +448,7 @@ class TestRunner:
             cmd.extend(["-l", lib])
         for lib in (extra_libs or []):
             cmd.extend(["-l", lib])
+        cmd.extend(case.config.compile_args)
         if self.args.ct_run_main:
             cmd.append("--ct-run-main")
         if self.args.verbose:
@@ -477,7 +484,7 @@ class TestRunner:
         obj_path = case.build_dir / f"{case.binary_stub}_fixture.o"
         archive_path = case.build_dir / f"lib{case.binary_stub}_fixture.a"
 
-        compile_cmd = [cc, "-O2", "-c", str(c_source), "-o", str(obj_path)]
+        compile_cmd = [cc, "-O2", "-fno-stack-protector", "-c", str(c_source), "-o", str(obj_path)]
         archive_cmd = [ar, "rcs", str(archive_path), str(obj_path)]
 
         if self.args.verbose:
@@ -655,6 +662,11 @@ class TestRunner:
             return self._sort_lines(text)
         if case.source.stem == "ct_test" and label == "compile":
             return self._mask_build_path(text, case.binary_stub)
+        if label == "compile":
+            # Normalize absolute source paths to relative for stable compile error comparison
+            source_dir = str(case.source.parent.resolve())
+            if source_dir:
+                text = text.replace(source_dir + "/", "")
         return text
 
     def _sort_lines(self, text: str) -> str:
diff --git a/tests/cimport_structs.c b/tests/cimport_structs.c
new file mode 100644
index 0000000..d58ff46
--- /dev/null
+++ b/tests/cimport_structs.c
@@ -0,0 +1,9 @@
+#include "cimport_structs.h"
+
+long point_sum_ptr(struct Point *p) {
+    return p->x + p->y;
+}
+
+long pair_sum_ptr(struct Pair *p) {
+    return p->a + p->b;
+}
diff --git a/tests/cimport_structs.expected b/tests/cimport_structs.expected
new file mode 100644
index 0000000..647a40f
--- /dev/null
+++ b/tests/cimport_structs.expected
@@ -0,0 +1,6 @@
+16
+10
+20
+30
+16
+300
diff --git a/tests/cimport_structs.h b/tests/cimport_structs.h
new file mode 100644
index 0000000..006753a
--- /dev/null
+++ b/tests/cimport_structs.h
@@ -0,0 +1,18 @@
+#ifndef TEST_STRUCTS_H
+#define TEST_STRUCTS_H
+
+struct Point {
+    long x;
+    long y;
+};
+
+struct Pair {
+    long a;
+    long b;
+};
+
+/* Pointer-based helpers (simple scalar ABI). */
+long point_sum_ptr(struct Point *p);
+long pair_sum_ptr(struct Pair *p);
+
+#endif
diff --git a/tests/cimport_structs.sl b/tests/cimport_structs.sl
new file mode 100644
index 0000000..a140f07
--- /dev/null
+++ b/tests/cimport_structs.sl
@@ -0,0 +1,31 @@
+import stdlib.sl
+
+# Test cimport: extract struct definitions and extern functions from a C header.
+cimport "cimport_structs.h"
+
+word main
+    # Verify that cstruct Point was generated with correct layout
+    Point.size puti cr       # 16 bytes (two i64 = 8+8)
+
+    # Allocate a Point, set fields, read them back
+    Point.size alloc dup >r
+    r@ 10 Point.x!
+    r@ 20 Point.y!
+    r@ Point.x@ puti cr      # 10
+    r@ Point.y@ puti cr      # 20
+
+    # Call C helper that takes a pointer (simple scalar ABI)
+    r@ point_sum_ptr puti cr  # 30
+    r> Point.size free
+
+    # Verify Pair struct layout
+    Pair.size puti cr         # 16
+
+    Pair.size alloc dup >r
+    r@ 100 Pair.a!
+    r@ 200 Pair.b!
+    r@ pair_sum_ptr puti cr   # 300
+    r> Pair.size free
+
+    0
+end
diff --git a/tests/error_recovery.compile.expected b/tests/error_recovery.compile.expected
new file mode 100644
index 0000000..01c570f
--- /dev/null
+++ b/tests/error_recovery.compile.expected
@@ -0,0 +1,12 @@
+error: unexpected 'end' at 6:8
+  --> error_recovery.sl:6:8
+    |
+  6 |     end end
+    |        ^^^
+error: unexpected 'end' at 10:8
+  --> error_recovery.sl:10:8
+     |
+  10 |     end end
+     |        ^^^
+
+2 error(s) emitted
diff --git a/tests/error_recovery.meta.json b/tests/error_recovery.meta.json
new file mode 100644
index 0000000..fa8dc2f
--- /dev/null
+++ b/tests/error_recovery.meta.json
@@ -0,0 +1,4 @@
+{
+  "expect_compile_error": true,
+  "description": "Compiler reports multiple errors via error recovery"
+}
diff --git a/tests/error_recovery.sl b/tests/error_recovery.sl
new file mode 100644
index 0000000..9bffb6b
--- /dev/null
+++ b/tests/error_recovery.sl
@@ -0,0 +1,15 @@
+# This file intentionally has multiple errors to test error recovery.
+# The compiler should report all of them rather than stopping at the first.
+# No stdlib import — keeps line numbers stable.
+
+word foo
+    end end
+end
+
+word bar
+    end end
+end
+
+word main
+    0
+end
diff --git a/tests/ifdef.expected b/tests/ifdef.expected
new file mode 100644
index 0000000..6c3f03c
--- /dev/null
+++ b/tests/ifdef.expected
@@ -0,0 +1,7 @@
+flag_on
+
+nope_off
+
+yes
+
+nested_ok
diff --git a/tests/ifdef.meta.json b/tests/ifdef.meta.json
new file mode 100644
index 0000000..dadba46
--- /dev/null
+++ b/tests/ifdef.meta.json
@@ -0,0 +1,3 @@
+{
+  "compile_args": ["-D", "TESTFLAG"]
+}
diff --git a/tests/ifdef.sl b/tests/ifdef.sl
new file mode 100644
index 0000000..baea3d9
--- /dev/null
+++ b/tests/ifdef.sl
@@ -0,0 +1,44 @@
+import stdlib/stdlib.sl
+import stdlib/io.sl
+
+# Test ifdef: TESTFLAG is defined via -D TESTFLAG
+ifdef TESTFLAG
+word show_flag
+    "flag_on" puts cr
+end
+endif
+
+# Test ifndef: NOPE is NOT defined
+ifndef NOPE
+word show_nope
+    "nope_off" puts cr
+end
+endif
+
+# Test ifdef with elsedef
+ifdef TESTFLAG
+word branch
+    "yes" puts cr
+end
+elsedef
+word branch
+    "no" puts cr
+end
+endif
+
+# Test nested: inner depends on outer
+ifdef TESTFLAG
+ifndef NOPE
+word nested
+    "nested_ok" puts cr
+end
+endif
+endif
+
+word main
+    show_flag
+    show_nope
+    branch
+    nested
+    0
+end
diff --git a/tests/ifndef.expected b/tests/ifndef.expected
new file mode 100644
index 0000000..81ddc2a
--- /dev/null
+++ b/tests/ifndef.expected
@@ -0,0 +1,3 @@
+guard_ok
+
+else_ok
diff --git a/tests/ifndef.sl b/tests/ifndef.sl
new file mode 100644
index 0000000..fb76c57
--- /dev/null
+++ b/tests/ifndef.sl
@@ -0,0 +1,33 @@
+import stdlib/stdlib.sl
+import stdlib/io.sl
+
+# No -D flags, so ifdef FOO is false, ifndef FOO is true
+
+ifdef FOO
+word dead_code
+    "BUG" puts cr
+end
+endif
+
+ifndef FOO
+word guarded
+    "guard_ok" puts cr
+end
+endif
+
+# elsedef: ifdef FALSE → skip, elsedef → include
+ifdef MISSING
+word wrong
+    "BUG" puts cr
+end
+elsedef
+word right
+    "else_ok" puts cr
+end
+endif
+
+word main
+    guarded
+    right
+    0
+end
diff --git a/tests/variadic_extern.c b/tests/variadic_extern.c
new file mode 100644
index 0000000..2ac2f2b
--- /dev/null
+++ b/tests/variadic_extern.c
@@ -0,0 +1,20 @@
+#include <stdarg.h>
+
+/* Sums variadic long args until sentinel -1 is seen. */
+long va_sum_sentinel(long first, ...) {
+    va_list ap;
+    va_start(ap, first);
+    long total = first;
+    while (1) {
+        long v = va_arg(ap, long);
+        if (v == -1) break;
+        total += v;
+    }
+    va_end(ap);
+    return total;
+}
+
+/* Non-variadic helper for comparison. */
+long add_two(long a, long b) {
+    return a + b;
+}
diff --git a/tests/variadic_extern.expected b/tests/variadic_extern.expected
new file mode 100644
index 0000000..889f330
--- /dev/null
+++ b/tests/variadic_extern.expected
@@ -0,0 +1,5 @@
+30
+hello
+42 99
+60
+
diff --git a/tests/variadic_extern.meta.json b/tests/variadic_extern.meta.json
new file mode 100644
index 0000000..79c4ecd
--- /dev/null
+++ b/tests/variadic_extern.meta.json
@@ -0,0 +1,4 @@
+{
+  "description": "Test variadic and non-variadic extern declarations with companion C file",
+  "libs": ["libc.so.6"]
+}
diff --git a/tests/variadic_extern.sl b/tests/variadic_extern.sl
new file mode 100644
index 0000000..600fe10
--- /dev/null
+++ b/tests/variadic_extern.sl
@@ -0,0 +1,34 @@
+import stdlib.sl
+
+# Test variadic extern declarations.
+# For variadic externs, the TOS literal before the call is the number of
+# extra variadic arguments.  The compiler consumes it (not passed to C).
+# String literals push (ptr, len) — use drop to discard the length for C.
+
+# printf: 1 fixed param (fmt), variadic args via TOS count
+extern int printf(const char *fmt, ...)
+extern int fflush(long stream)
+
+# Custom C variadic: sums args until sentinel -1 is seen
+extern long va_sum_sentinel(long first, ...)
+
+# Non-variadic extern for comparison
+extern long add_two(long a, long b)
+
+word main
+    # Test 1: non-variadic add_two
+    10 20 add_two puti cr
+
+    # Test 2: printf with 0 variadic args (just format string)
+    "hello\n" drop 0 printf drop
+    0 fflush drop
+
+    # Test 3: printf with 2 variadic args
+    "%d %d\n" drop 42 99 2 printf drop
+    0 fflush drop
+
+    # Test 4: va_sum_sentinel(10, 20, 30, -1) = 60
+    10 20 30 -1 3 va_sum_sentinel puti cr
+
+    0
+end