fixed dce with user defined _start
This commit is contained in:
82
main.py
82
main.py
@@ -3442,7 +3442,7 @@ class Assembler:
|
|||||||
|
|
||||||
definition.body = rebuilt
|
definition.body = rebuilt
|
||||||
|
|
||||||
def _reachable_runtime_defs(self, runtime_defs: Sequence[Union[Definition, AsmDefinition]]) -> Set[str]:
|
def _reachable_runtime_defs(self, runtime_defs: Sequence[Union[Definition, AsmDefinition]], extra_roots: Optional[Sequence[str]] = None) -> Set[str]:
|
||||||
edges: Dict[str, Set[str]] = {}
|
edges: Dict[str, Set[str]] = {}
|
||||||
for definition in runtime_defs:
|
for definition in runtime_defs:
|
||||||
refs: Set[str] = set()
|
refs: Set[str] = set()
|
||||||
@@ -3450,20 +3450,57 @@ class Assembler:
|
|||||||
for node in definition.body:
|
for node in definition.body:
|
||||||
if node.op in {"word", "word_ptr"}:
|
if node.op in {"word", "word_ptr"}:
|
||||||
refs.add(str(node.data))
|
refs.add(str(node.data))
|
||||||
|
elif isinstance(definition, AsmDefinition):
|
||||||
|
# Collect obvious textual `call` targets from asm bodies so
|
||||||
|
# asm-defined entry points can create edges into the word
|
||||||
|
# graph. The extractor below will tolerate common call forms
|
||||||
|
# such as `call foo` and `call [rel foo]`.
|
||||||
|
asm_calls = self._extract_called_symbols_from_asm(definition.body)
|
||||||
|
for sym in asm_calls:
|
||||||
|
refs.add(sym)
|
||||||
edges[definition.name] = refs
|
edges[definition.name] = refs
|
||||||
|
|
||||||
|
# Map sanitized labels back to their original definition names so
|
||||||
|
# calls to emitted/sanitized labels (e.g. `w_foo`) can be resolved
|
||||||
|
# to the corresponding word names present in `edges`.
|
||||||
|
sanitized_map: Dict[str, str] = {sanitize_label(n): n for n in edges}
|
||||||
|
|
||||||
reachable: Set[str] = set()
|
reachable: Set[str] = set()
|
||||||
stack: List[str] = ["main"]
|
stack: List[str] = ["main"]
|
||||||
|
if extra_roots:
|
||||||
|
for r in extra_roots:
|
||||||
|
if r and r not in stack:
|
||||||
|
stack.append(r)
|
||||||
while stack:
|
while stack:
|
||||||
name = stack.pop()
|
name = stack.pop()
|
||||||
if name in reachable:
|
if name in reachable:
|
||||||
continue
|
continue
|
||||||
reachable.add(name)
|
reachable.add(name)
|
||||||
for dep in edges.get(name, ()):
|
for dep in edges.get(name, ()):
|
||||||
if dep not in reachable and dep in edges:
|
# Direct name hit
|
||||||
|
if dep in edges and dep not in reachable:
|
||||||
stack.append(dep)
|
stack.append(dep)
|
||||||
|
continue
|
||||||
|
# Possibly a sanitized label; resolve back to original name
|
||||||
|
resolved = sanitized_map.get(dep)
|
||||||
|
if resolved and resolved not in reachable:
|
||||||
|
stack.append(resolved)
|
||||||
return reachable
|
return reachable
|
||||||
|
|
||||||
|
def _extract_called_symbols_from_asm(self, asm_body: str) -> Set[str]:
|
||||||
|
"""Return set of symbol names called from a raw asm body.
|
||||||
|
|
||||||
|
This looks for typical `call <symbol>` forms and also
|
||||||
|
`call [rel <symbol>]` and `call qword [rel <symbol>]`.
|
||||||
|
"""
|
||||||
|
calls: Set[str] = set()
|
||||||
|
pattern = re.compile(r"call\s+(?:qword\s+)?(?:\[rel\s+([A-Za-z0-9_.$@]+)\]|([A-Za-z0-9_.$@]+))")
|
||||||
|
for m in pattern.finditer(asm_body):
|
||||||
|
sym = m.group(1) or m.group(2)
|
||||||
|
if sym:
|
||||||
|
calls.add(sym)
|
||||||
|
return calls
|
||||||
|
|
||||||
def _emit_externs(self, text: List[str]) -> None:
|
def _emit_externs(self, text: List[str]) -> None:
|
||||||
externs = sorted([w.name for w in self.dictionary.words.values() if getattr(w, "is_extern", False)])
|
externs = sorted([w.name for w in self.dictionary.words.values() if getattr(w, "is_extern", False)])
|
||||||
for name in externs:
|
for name in externs:
|
||||||
@@ -3534,15 +3571,42 @@ class Assembler:
|
|||||||
if is_program:
|
if is_program:
|
||||||
if not any(defn.name == "main" for defn in runtime_defs):
|
if not any(defn.name == "main" for defn in runtime_defs):
|
||||||
raise CompileError("missing 'main' definition")
|
raise CompileError("missing 'main' definition")
|
||||||
reachable = self._reachable_runtime_defs(runtime_defs)
|
# Determine if any user-provided `_start` asm calls into
|
||||||
|
# defined words and use those call targets as additional
|
||||||
|
# reachability roots. This avoids unconditionally emitting
|
||||||
|
# every `:asm` body while still preserving functions that
|
||||||
|
# are invoked from a custom `_start` stub.
|
||||||
|
# Build a quick lookup of runtime definition names -> defn
|
||||||
|
name_to_def: Dict[str, Union[Definition, AsmDefinition]] = {d.name: d for d in runtime_defs}
|
||||||
|
# Look for an asm `_start` among parsed definitions (not just runtime_defs)
|
||||||
|
asm_start = next((d for d in definitions if isinstance(d, AsmDefinition) and d.name == "_start"), None)
|
||||||
|
extra_roots: List[str] = []
|
||||||
|
if asm_start is not None:
|
||||||
|
called = self._extract_called_symbols_from_asm(asm_start.body)
|
||||||
|
# Resolve called symbols to definition names using both
|
||||||
|
# raw and sanitized forms.
|
||||||
|
sanitized_map = {sanitize_label(n): n for n in name_to_def}
|
||||||
|
for sym in called:
|
||||||
|
if sym in name_to_def:
|
||||||
|
extra_roots.append(sym)
|
||||||
|
else:
|
||||||
|
resolved = sanitized_map.get(sym)
|
||||||
|
if resolved:
|
||||||
|
extra_roots.append(resolved)
|
||||||
|
|
||||||
|
# Ensure a user-provided raw `_start` asm definition is
|
||||||
|
# always emitted (it should override the default stub).
|
||||||
|
if asm_start is not None and asm_start not in runtime_defs:
|
||||||
|
runtime_defs.append(asm_start)
|
||||||
|
|
||||||
|
reachable = self._reachable_runtime_defs(runtime_defs, extra_roots=extra_roots)
|
||||||
if len(reachable) != len(runtime_defs):
|
if len(reachable) != len(runtime_defs):
|
||||||
runtime_defs = [defn for defn in runtime_defs if defn.name in reachable]
|
runtime_defs = [defn for defn in runtime_defs if defn.name in reachable]
|
||||||
# Always include any top-level assembly definitions so user
|
# Ensure `_start` is preserved even if not reachable from
|
||||||
# provided `:asm` bodies (including `_start`) are emitted even
|
# `main` or the discovered roots; user-provided `_start`
|
||||||
# if they aren't referenced from `main`.
|
# must override the default stub.
|
||||||
for defn in definitions:
|
if asm_start is not None and asm_start not in runtime_defs:
|
||||||
if isinstance(defn, AsmDefinition) and defn not in runtime_defs:
|
runtime_defs.append(asm_start)
|
||||||
runtime_defs.append(defn)
|
|
||||||
elif self._export_all_defs:
|
elif self._export_all_defs:
|
||||||
exported = sorted({sanitize_label(defn.name) for defn in runtime_defs})
|
exported = sorted({sanitize_label(defn.name) for defn in runtime_defs})
|
||||||
for label in exported:
|
for label in exported:
|
||||||
|
|||||||
Reference in New Issue
Block a user