added the option to customize _start
This commit is contained in:
204
main.py
204
main.py
@@ -2918,6 +2918,11 @@ _sanitize_label_cache: Dict[str, str] = {}
|
|||||||
|
|
||||||
|
|
||||||
def sanitize_label(name: str) -> str:
|
def sanitize_label(name: str) -> str:
|
||||||
|
# Keep the special `_start` label unchanged so the program entrypoint
|
||||||
|
# remains a plain `_start` symbol expected by the linker.
|
||||||
|
if name == "_start":
|
||||||
|
_sanitize_label_cache[name] = name
|
||||||
|
return name
|
||||||
cached = _sanitize_label_cache.get(name)
|
cached = _sanitize_label_cache.get(name)
|
||||||
if cached is not None:
|
if cached is not None:
|
||||||
return cached
|
return cached
|
||||||
@@ -2930,8 +2935,13 @@ def sanitize_label(name: str) -> str:
|
|||||||
safe = "".join(parts) or "anon"
|
safe = "".join(parts) or "anon"
|
||||||
if safe[0].isdigit():
|
if safe[0].isdigit():
|
||||||
safe = "_" + safe
|
safe = "_" + safe
|
||||||
_sanitize_label_cache[name] = safe
|
# Prefix sanitized labels to avoid accidental collisions with
|
||||||
return safe
|
# assembler pseudo-ops or common identifiers (e.g. `abs`). The
|
||||||
|
# prefix is applied consistently so all emitted references using
|
||||||
|
# `sanitize_label` remain correct.
|
||||||
|
prefixed = f"w_{safe}"
|
||||||
|
_sanitize_label_cache[name] = prefixed
|
||||||
|
return prefixed
|
||||||
|
|
||||||
|
|
||||||
def _is_identifier(text: str) -> bool:
|
def _is_identifier(text: str) -> bool:
|
||||||
@@ -3467,8 +3477,34 @@ class Assembler:
|
|||||||
self._export_all_defs = not is_program
|
self._export_all_defs = not is_program
|
||||||
try:
|
try:
|
||||||
self._emit_externs(emission.text)
|
self._emit_externs(emission.text)
|
||||||
prelude_lines = module.prelude if module.prelude is not None else self._runtime_prelude(entry_mode)
|
# Determine whether user provided a top-level `:asm _start` in
|
||||||
emission.text.extend(prelude_lines)
|
# the module forms so the prelude can avoid emitting the
|
||||||
|
# default startup stub.
|
||||||
|
# Detect whether the user supplied a `_start` either as a top-level
|
||||||
|
# AsmDefinition form or as a registered dictionary word (imports
|
||||||
|
# or CT execution may register it). This influences prelude
|
||||||
|
# generation so the default stub is suppressed when present.
|
||||||
|
user_has_start = any(
|
||||||
|
isinstance(f, AsmDefinition) and f.name == "_start" for f in module.forms
|
||||||
|
) or (
|
||||||
|
(self.dictionary.lookup("_start") is not None)
|
||||||
|
and isinstance(self.dictionary.lookup("_start").definition, AsmDefinition)
|
||||||
|
) or (
|
||||||
|
(module.prelude is not None) and any(l.strip().startswith("_start:") for l in module.prelude)
|
||||||
|
)
|
||||||
|
# Defer runtime prelude generation until after top-level forms are
|
||||||
|
# parsed into `definitions` so we can accurately detect a user
|
||||||
|
# provided `_start` AsmDefinition and suppress the default stub.
|
||||||
|
# Note: module.prelude was already inspected above when
|
||||||
|
# computing `user_has_start`, so avoid referencing
|
||||||
|
# `prelude_lines` before it's constructed.
|
||||||
|
# Prelude will be generated after definitions are known.
|
||||||
|
# If user provided a raw assembly `_start` via `:asm _start {...}`
|
||||||
|
# inject it verbatim into the text section so it becomes the
|
||||||
|
# program entrypoint. Emit the raw body (no automatic `ret`).
|
||||||
|
# Do not inject `_start` body here; rely on definitions emission
|
||||||
|
# and the earlier `user_has_start` check to suppress the default
|
||||||
|
# startup stub. This avoids emitting `_start` twice.
|
||||||
self._string_literals = {}
|
self._string_literals = {}
|
||||||
self._float_literals = {}
|
self._float_literals = {}
|
||||||
self._data_section = emission.data
|
self._data_section = emission.data
|
||||||
@@ -3494,15 +3530,19 @@ class Assembler:
|
|||||||
if stray_forms:
|
if stray_forms:
|
||||||
raise CompileError("top-level literals or word references are not supported yet")
|
raise CompileError("top-level literals or word references are not supported yet")
|
||||||
|
|
||||||
runtime_defs = [
|
runtime_defs = [defn for defn in definitions if not getattr(defn, "compile_only", False)]
|
||||||
defn for defn in definitions if not getattr(defn, "compile_only", False)
|
|
||||||
]
|
|
||||||
if is_program:
|
if is_program:
|
||||||
if not any(defn.name == "main" for defn in runtime_defs):
|
if not any(defn.name == "main" for defn in runtime_defs):
|
||||||
raise CompileError("missing 'main' definition")
|
raise CompileError("missing 'main' definition")
|
||||||
reachable = self._reachable_runtime_defs(runtime_defs)
|
reachable = self._reachable_runtime_defs(runtime_defs)
|
||||||
if len(reachable) != len(runtime_defs):
|
if len(reachable) != len(runtime_defs):
|
||||||
runtime_defs = [defn for defn in runtime_defs if defn.name in reachable]
|
runtime_defs = [defn for defn in runtime_defs if defn.name in reachable]
|
||||||
|
# Always include any top-level assembly definitions so user
|
||||||
|
# provided `:asm` bodies (including `_start`) are emitted even
|
||||||
|
# if they aren't referenced from `main`.
|
||||||
|
for defn in definitions:
|
||||||
|
if isinstance(defn, AsmDefinition) and defn not in runtime_defs:
|
||||||
|
runtime_defs.append(defn)
|
||||||
elif self._export_all_defs:
|
elif self._export_all_defs:
|
||||||
exported = sorted({sanitize_label(defn.name) for defn in runtime_defs})
|
exported = sorted({sanitize_label(defn.name) for defn in runtime_defs})
|
||||||
for label in exported:
|
for label in exported:
|
||||||
@@ -3514,6 +3554,80 @@ class Assembler:
|
|||||||
for definition in runtime_defs:
|
for definition in runtime_defs:
|
||||||
self._emit_definition(definition, emission.text, debug=debug)
|
self._emit_definition(definition, emission.text, debug=debug)
|
||||||
|
|
||||||
|
# --- now generate and emit the runtime prelude ---
|
||||||
|
# Determine whether a user-provided `_start` exists among the
|
||||||
|
# parsed definitions or in a compile-time-injected prelude. If
|
||||||
|
# present, suppress the default startup stub emitted by the
|
||||||
|
# runtime prelude.
|
||||||
|
user_has_start = any(isinstance(d, AsmDefinition) and d.name == "_start" for d in definitions)
|
||||||
|
if module.prelude is not None and not user_has_start:
|
||||||
|
if any(line.strip().startswith("_start:") for line in module.prelude):
|
||||||
|
user_has_start = True
|
||||||
|
base_prelude = self._runtime_prelude(entry_mode, has_user_start=user_has_start)
|
||||||
|
# Use the generated base prelude. Avoid directly prepending
|
||||||
|
# `module.prelude` which can contain raw, unsanitized assembly
|
||||||
|
# fragments (often sourced from cached stdlib assembly) that
|
||||||
|
# duplicate or conflict with the sanitized definitions the
|
||||||
|
# emitter produces. Prepending `module.prelude` has caused
|
||||||
|
# duplicate `_start` and symbol conflicts; prefer the
|
||||||
|
# canonical `base_prelude` produced by the emitter.
|
||||||
|
prelude_lines = base_prelude
|
||||||
|
if user_has_start and prelude_lines is not None:
|
||||||
|
# Avoid re-declaring the default startup symbol when the
|
||||||
|
# user provided their own `_start`. Do not remove the
|
||||||
|
# user's `_start` body. Only
|
||||||
|
# filter out any stray `global _start` markers.
|
||||||
|
prelude_lines = [l for l in prelude_lines if l.strip() != "global _start"]
|
||||||
|
# Tag any `_start:` occurrences in the prelude with a
|
||||||
|
# provenance comment so generated ASM files make it easy
|
||||||
|
# to see where each `_start` originated. This is
|
||||||
|
# non-destructive (comments only) and helps debug duplicates.
|
||||||
|
if prelude_lines is not None:
|
||||||
|
tagged = []
|
||||||
|
for l in prelude_lines:
|
||||||
|
if l.strip().startswith("_start:"):
|
||||||
|
tagged.append("; __ORIGIN__ prelude")
|
||||||
|
tagged.append(l)
|
||||||
|
else:
|
||||||
|
tagged.append(l)
|
||||||
|
prelude_lines = tagged
|
||||||
|
# Prepend prelude lines to any already-emitted text (definitions).
|
||||||
|
emission.text = (prelude_lines if prelude_lines is not None else []) + list(emission.text)
|
||||||
|
try:
|
||||||
|
self._emitted_start = any(l.strip().startswith("_start:") for l in emission.text)
|
||||||
|
except Exception:
|
||||||
|
self._emitted_start = False
|
||||||
|
# If no `_start` has been emitted (either detected in
|
||||||
|
# definitions/module.prelude or already present in the
|
||||||
|
# composed `emission.text`), append the default startup
|
||||||
|
# stub now (after definitions) so the emitter does not
|
||||||
|
# produce duplicate `_start` labels.
|
||||||
|
if is_program and not (user_has_start or getattr(self, "_emitted_start", False)):
|
||||||
|
emission.text.extend([
|
||||||
|
"; __ORIGIN__ default_stub",
|
||||||
|
"global _start",
|
||||||
|
"_start:",
|
||||||
|
" ; Linux x86-64 startup: argc/argv from stack",
|
||||||
|
" mov rdi, [rsp]", # argc
|
||||||
|
" lea rsi, [rsp+8]", # argv
|
||||||
|
" mov [rel sys_argc], rdi",
|
||||||
|
" mov [rel sys_argv], rsi",
|
||||||
|
" ; initialize data/return stack pointers",
|
||||||
|
" lea r12, [rel dstack_top]",
|
||||||
|
" mov r15, r12",
|
||||||
|
" lea r13, [rel rstack_top]",
|
||||||
|
f" call {sanitize_label('main')}",
|
||||||
|
" mov rax, 0",
|
||||||
|
" cmp r12, r15",
|
||||||
|
" je .no_exit_value",
|
||||||
|
" mov rax, [r12]",
|
||||||
|
" add r12, 8",
|
||||||
|
".no_exit_value:",
|
||||||
|
" mov rdi, rax",
|
||||||
|
" mov rax, 60",
|
||||||
|
" syscall",
|
||||||
|
])
|
||||||
|
|
||||||
self._emit_variables(module.variables)
|
self._emit_variables(module.variables)
|
||||||
|
|
||||||
if self._data_section is not None:
|
if self._data_section is not None:
|
||||||
@@ -3597,7 +3711,28 @@ class Assembler:
|
|||||||
*,
|
*,
|
||||||
debug: bool = False,
|
debug: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
# If a `_start` label has already been emitted in the prelude,
|
||||||
|
# skip emitting a second `_start` definition which would cause
|
||||||
|
# assembler redefinition errors. The prelude-provided `_start`
|
||||||
|
# (if present) is taken to be authoritative.
|
||||||
|
if definition.name == "_start" and getattr(self, "_emitted_start", False):
|
||||||
|
return
|
||||||
|
# If this is a raw assembly definition, tag its origin so the
|
||||||
|
# generated ASM clearly shows the source of the label (helpful
|
||||||
|
# when diagnosing duplicate `_start` occurrences).
|
||||||
|
if isinstance(definition, AsmDefinition):
|
||||||
|
text.append(f"; __ORIGIN__ AsmDefinition {definition.name}")
|
||||||
label = sanitize_label(definition.name)
|
label = sanitize_label(definition.name)
|
||||||
|
|
||||||
|
# Record start index so we can write a per-definition snapshot
|
||||||
|
start_index = len(text)
|
||||||
|
# If this definition is the program entry `_start`, ensure it's
|
||||||
|
# exported as a global symbol so the linker sets the process
|
||||||
|
# entry point correctly. Some earlier sanitizer passes may
|
||||||
|
# remove `global _start` from prelude fragments; make sure user
|
||||||
|
# provided `_start` remains globally visible.
|
||||||
|
if label == "_start":
|
||||||
|
text.append("global _start")
|
||||||
text.append(f"{label}:")
|
text.append(f"{label}:")
|
||||||
builder = FunctionEmitter(text, debug_enabled=debug)
|
builder = FunctionEmitter(text, debug_enabled=debug)
|
||||||
self._emit_stack.append(definition.name)
|
self._emit_stack.append(definition.name)
|
||||||
@@ -3671,11 +3806,25 @@ class Assembler:
|
|||||||
body = definition.body.strip("\n")
|
body = definition.body.strip("\n")
|
||||||
if not body:
|
if not body:
|
||||||
return
|
return
|
||||||
|
import re
|
||||||
for line in body.splitlines():
|
for line in body.splitlines():
|
||||||
if line.strip():
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
# Sanitize symbol references in raw asm bodies so they match
|
||||||
|
# the sanitized labels emitted for high-level definitions.
|
||||||
|
# Handle common patterns: `call NAME`, `global NAME`, `extern NAME`.
|
||||||
|
def repl_sym(m: re.Match) -> str:
|
||||||
|
name = m.group(1)
|
||||||
|
return m.group(0).replace(name, sanitize_label(name))
|
||||||
|
|
||||||
|
# `call NAME`
|
||||||
|
line = re.sub(r"\bcall\s+([A-Za-z_][A-Za-z0-9_]*)\b", repl_sym, line)
|
||||||
|
# `global NAME`
|
||||||
|
line = re.sub(r"\bglobal\s+([A-Za-z_][A-Za-z0-9_]*)\b", repl_sym, line)
|
||||||
|
# `extern NAME`
|
||||||
|
line = re.sub(r"\bextern\s+([A-Za-z_][A-Za-z0-9_]*)\b", repl_sym, line)
|
||||||
|
|
||||||
builder.emit(line)
|
builder.emit(line)
|
||||||
else:
|
|
||||||
builder.emit("")
|
|
||||||
|
|
||||||
def _emit_node(self, node: Op, builder: FunctionEmitter) -> None:
|
def _emit_node(self, node: Op, builder: FunctionEmitter) -> None:
|
||||||
kind = node._opcode
|
kind = node._opcode
|
||||||
@@ -3939,14 +4088,13 @@ class Assembler:
|
|||||||
builder.emit(" add r13, 8")
|
builder.emit(" add r13, 8")
|
||||||
builder.emit(f"{end_label}:")
|
builder.emit(f"{end_label}:")
|
||||||
|
|
||||||
def _runtime_prelude(self, entry_mode: str) -> List[str]:
|
def _runtime_prelude(self, entry_mode: str, has_user_start: bool = False) -> List[str]:
|
||||||
lines: List[str] = [
|
lines: List[str] = [
|
||||||
"%define DSTK_BYTES 65536",
|
"%define DSTK_BYTES 65536",
|
||||||
"%define RSTK_BYTES 65536",
|
"%define RSTK_BYTES 65536",
|
||||||
"%define PRINT_BUF_BYTES 128",
|
"%define PRINT_BUF_BYTES 128",
|
||||||
]
|
]
|
||||||
if entry_mode == "program":
|
is_program = entry_mode == "program"
|
||||||
lines.append("global _start")
|
|
||||||
lines.extend([
|
lines.extend([
|
||||||
"global sys_argc",
|
"global sys_argc",
|
||||||
"global sys_argv",
|
"global sys_argv",
|
||||||
@@ -3955,32 +4103,9 @@ class Assembler:
|
|||||||
"sys_argv: dq 0",
|
"sys_argv: dq 0",
|
||||||
"section .text",
|
"section .text",
|
||||||
])
|
])
|
||||||
|
# Do not emit the default `_start` stub here; it will be appended
|
||||||
if entry_mode == "program":
|
# after definitions have been emitted if no user `_start` was
|
||||||
lines.extend([
|
# provided. This avoids duplicate or partial `_start` blocks.
|
||||||
"_start:",
|
|
||||||
" ; Linux x86-64 startup: argc/argv from stack",
|
|
||||||
" mov rdi, [rsp]", # argc
|
|
||||||
" lea rsi, [rsp+8]", # argv
|
|
||||||
" mov [rel sys_argc], rdi",
|
|
||||||
" mov [rel sys_argv], rsi",
|
|
||||||
" ; initialize data/return stack pointers",
|
|
||||||
" lea r12, [rel dstack_top]",
|
|
||||||
" mov r15, r12",
|
|
||||||
" lea r13, [rel rstack_top]",
|
|
||||||
" call main",
|
|
||||||
" mov rax, 0",
|
|
||||||
" cmp r12, r15",
|
|
||||||
" je .no_exit_value",
|
|
||||||
" mov rax, [r12]",
|
|
||||||
" add r12, 8",
|
|
||||||
".no_exit_value:",
|
|
||||||
" mov rdi, rax",
|
|
||||||
" mov rax, 60",
|
|
||||||
" syscall",
|
|
||||||
])
|
|
||||||
else:
|
|
||||||
lines.append(" ; library build: provide your own entry point")
|
|
||||||
|
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
@@ -4010,7 +4135,6 @@ class Assembler:
|
|||||||
def write_asm(self, emission: Emission, path: Path) -> None:
|
def write_asm(self, emission: Emission, path: Path) -> None:
|
||||||
path.write_text(emission.snapshot())
|
path.write_text(emission.snapshot())
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Built-in macros and intrinsics
|
# Built-in macros and intrinsics
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
2
tests/start_override.expected
Normal file
2
tests/start_override.expected
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
hello world
|
||||||
|
24
|
||||||
44
tests/start_override.sl
Normal file
44
tests/start_override.sl
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
import stdlib.sl
|
||||||
|
|
||||||
|
:asm _start {
|
||||||
|
mov rdi, [rsp]
|
||||||
|
lea rsi, [rsp+8]
|
||||||
|
mov [rel sys_argc], rdi
|
||||||
|
mov [rel sys_argv], rsi
|
||||||
|
lea r12, [rel dstack_top]
|
||||||
|
mov r15, r12
|
||||||
|
lea r13, [rel rstack_top]
|
||||||
|
; print "hello world\n" before calling main using runtime `print_buf`
|
||||||
|
mov byte [rel print_buf], 'h'
|
||||||
|
mov byte [rel print_buf + 1], 'e'
|
||||||
|
mov byte [rel print_buf + 2], 'l'
|
||||||
|
mov byte [rel print_buf + 3], 'l'
|
||||||
|
mov byte [rel print_buf + 4], 'o'
|
||||||
|
mov byte [rel print_buf + 5], ' '
|
||||||
|
mov byte [rel print_buf + 6], 'w'
|
||||||
|
mov byte [rel print_buf + 7], 'o'
|
||||||
|
mov byte [rel print_buf + 8], 'r'
|
||||||
|
mov byte [rel print_buf + 9], 'l'
|
||||||
|
mov byte [rel print_buf + 10], 'd'
|
||||||
|
mov byte [rel print_buf + 11], 10
|
||||||
|
lea rsi, [rel print_buf]
|
||||||
|
mov rdx, 12
|
||||||
|
mov rax, 1
|
||||||
|
mov rdi, 1
|
||||||
|
syscall
|
||||||
|
call main
|
||||||
|
mov rax, 0
|
||||||
|
cmp r12, r15
|
||||||
|
je .no_exit_value
|
||||||
|
mov rax, [r12]
|
||||||
|
add r12, 8
|
||||||
|
.no_exit_value:
|
||||||
|
mov rdi, rax
|
||||||
|
mov rax, 60
|
||||||
|
syscall
|
||||||
|
};
|
||||||
|
|
||||||
|
word main
|
||||||
|
24 puti cr
|
||||||
|
end
|
||||||
|
|
||||||
Reference in New Issue
Block a user