update to the C FFI
This commit is contained in:
271
main.py
271
main.py
@@ -556,76 +556,135 @@ class Parser:
|
|||||||
if self._eof():
|
if self._eof():
|
||||||
raise ParseError(f"extern missing name at {token.line}:{token.column}")
|
raise ParseError(f"extern missing name at {token.line}:{token.column}")
|
||||||
|
|
||||||
# Heuristic: check if the first token is a likely C type
|
first_token = self._consume()
|
||||||
c_types = {"void", "int", "long", "char", "bool", "size_t", "float", "double"}
|
if self._try_parse_c_extern(first_token):
|
||||||
|
return
|
||||||
|
self._parse_legacy_extern(first_token)
|
||||||
|
|
||||||
# Peek at tokens to decide mode
|
def _parse_legacy_extern(self, name_token: Token) -> None:
|
||||||
t1 = self._consume()
|
|
||||||
is_c_decl = False
|
|
||||||
|
|
||||||
# If t1 is a type (or type*), and next is name, and next is '(', it's C-style.
|
|
||||||
# But since we already consumed t1, we have to proceed carefully.
|
|
||||||
|
|
||||||
# Check if t1 looks like a type
|
|
||||||
base_type = t1.lexeme.rstrip("*")
|
|
||||||
if base_type in c_types:
|
|
||||||
# Likely C-style, but let's confirm next token is not a number (which would mean t1 was the name in raw mode)
|
|
||||||
peek = self.peek_token()
|
|
||||||
if peek is not None and peek.lexeme != "(" and not peek.lexeme.isdigit():
|
|
||||||
# t1=type, peek=name. Confirm peek2='('?
|
|
||||||
# Actually, if t1 is "int", it's extremely unlikely to be a function name in L2.
|
|
||||||
is_c_decl = True
|
|
||||||
|
|
||||||
if is_c_decl:
|
|
||||||
# C-Style Parsing
|
|
||||||
ret_type = t1.lexeme
|
|
||||||
if self._eof():
|
|
||||||
raise ParseError("extern missing name after return type")
|
|
||||||
name_token = self._consume()
|
|
||||||
name = name_token.lexeme
|
name = name_token.lexeme
|
||||||
|
word = self.dictionary.lookup(name)
|
||||||
|
if word is None:
|
||||||
|
word = Word(name=name)
|
||||||
|
self.dictionary.register(word)
|
||||||
|
word.is_extern = True
|
||||||
|
|
||||||
# Handle pointers in name token if tokenizer didn't split them (e.g. *name)
|
peek = self.peek_token()
|
||||||
while name.startswith("*"):
|
if peek is not None and peek.lexeme.isdigit():
|
||||||
name = name[1:]
|
word.extern_inputs = int(self._consume().lexeme)
|
||||||
|
peek = self.peek_token()
|
||||||
|
if peek is not None and peek.lexeme.isdigit():
|
||||||
|
word.extern_outputs = int(self._consume().lexeme)
|
||||||
|
else:
|
||||||
|
word.extern_outputs = 0
|
||||||
|
else:
|
||||||
|
word.extern_inputs = 0
|
||||||
|
word.extern_outputs = 0
|
||||||
|
|
||||||
if self._eof() or self._consume().lexeme != "(":
|
def _try_parse_c_extern(self, first_token: Token) -> bool:
|
||||||
raise ParseError(f"expected '(' after extern function name '{name}'")
|
saved_pos = self.pos
|
||||||
|
prefix_tokens: List[str] = [first_token.lexeme]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if self._eof():
|
||||||
|
self.pos = saved_pos
|
||||||
|
return False
|
||||||
|
lookahead = self._consume()
|
||||||
|
if lookahead.lexeme == "(":
|
||||||
|
break
|
||||||
|
if lookahead.lexeme.isdigit():
|
||||||
|
self.pos = saved_pos
|
||||||
|
return False
|
||||||
|
prefix_tokens.append(lookahead.lexeme)
|
||||||
|
|
||||||
|
if not prefix_tokens:
|
||||||
|
raise ParseError("extern missing return type/name before '('")
|
||||||
|
|
||||||
|
name_lexeme = prefix_tokens.pop()
|
||||||
|
if not _is_identifier(name_lexeme):
|
||||||
|
prefix_name, suffix_name = _split_trailing_identifier(name_lexeme)
|
||||||
|
if suffix_name is None:
|
||||||
|
raise ParseError(f"extern expected identifier before '(' but got '{name_lexeme}'")
|
||||||
|
name_lexeme = suffix_name
|
||||||
|
if prefix_name:
|
||||||
|
prefix_tokens.append(prefix_name)
|
||||||
|
|
||||||
|
if not _is_identifier(name_lexeme):
|
||||||
|
raise ParseError(f"extern expected identifier before '(' but got '{name_lexeme}'")
|
||||||
|
|
||||||
|
ret_type = _normalize_c_type_tokens(prefix_tokens, allow_default=True)
|
||||||
|
inputs, arg_types = self._parse_c_param_list()
|
||||||
|
outputs = 0 if ret_type == "void" else 1
|
||||||
|
self._register_c_extern(name_lexeme, inputs, outputs, arg_types, ret_type)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _parse_c_param_list(self) -> Tuple[int, List[str]]:
|
||||||
inputs = 0
|
inputs = 0
|
||||||
arg_types: List[str] = []
|
arg_types: List[str] = []
|
||||||
while True:
|
|
||||||
if self._eof():
|
if self._eof():
|
||||||
raise ParseError("extern unclosed '('")
|
raise ParseError("extern unclosed '('")
|
||||||
peek = self.peek_token()
|
peek = self.peek_token()
|
||||||
if peek.lexeme == ")":
|
if peek.lexeme == ")":
|
||||||
self._consume()
|
self._consume()
|
||||||
break
|
return inputs, arg_types
|
||||||
|
|
||||||
# Parse argument type
|
while True:
|
||||||
arg_type_tok = self._consume()
|
lexemes = self._collect_c_param_lexemes()
|
||||||
arg_type = arg_type_tok.lexeme
|
arg_type = _normalize_c_type_tokens(lexemes, allow_default=False)
|
||||||
|
if arg_type == "void" and inputs == 0:
|
||||||
# Handle "type *" sequence
|
if self._eof():
|
||||||
peek_ptr = self.peek_token()
|
raise ParseError("extern unclosed '(' after 'void'")
|
||||||
while peek_ptr and peek_ptr.lexeme == "*":
|
closing = self._consume()
|
||||||
self._consume()
|
if closing.lexeme != ")":
|
||||||
arg_type += "*"
|
raise ParseError("expected ')' after 'void' in extern parameter list")
|
||||||
peek_ptr = self.peek_token()
|
return 0, []
|
||||||
|
|
||||||
if arg_type != "void":
|
|
||||||
inputs += 1
|
inputs += 1
|
||||||
arg_types.append(arg_type)
|
arg_types.append(arg_type)
|
||||||
# Optional argument name
|
if self._eof():
|
||||||
peek_name = self.peek_token()
|
raise ParseError("extern unclosed '('")
|
||||||
if peek_name and peek_name.lexeme not in (",", ")"):
|
separator = self._consume()
|
||||||
self._consume() # Consume arg name
|
if separator.lexeme == ")":
|
||||||
|
break
|
||||||
|
if separator.lexeme != ",":
|
||||||
|
raise ParseError(
|
||||||
|
f"expected ',' or ')' in extern parameter list, got '{separator.lexeme}'"
|
||||||
|
)
|
||||||
|
return inputs, arg_types
|
||||||
|
|
||||||
peek_sep = self.peek_token()
|
def _collect_c_param_lexemes(self) -> List[str]:
|
||||||
if peek_sep and peek_sep.lexeme == ",":
|
lexemes: List[str] = []
|
||||||
self._consume()
|
while True:
|
||||||
|
if self._eof():
|
||||||
|
raise ParseError("extern unclosed '('")
|
||||||
|
peek = self.peek_token()
|
||||||
|
if peek.lexeme in (",", ")"):
|
||||||
|
break
|
||||||
|
lexemes.append(self._consume().lexeme)
|
||||||
|
|
||||||
outputs = 0 if ret_type == "void" else 1
|
if not lexemes:
|
||||||
|
raise ParseError("missing parameter type in extern declaration")
|
||||||
|
|
||||||
|
if len(lexemes) > 1 and _is_identifier(lexemes[-1]):
|
||||||
|
lexemes.pop()
|
||||||
|
return lexemes
|
||||||
|
|
||||||
|
prefix, suffix = _split_trailing_identifier(lexemes[-1])
|
||||||
|
if suffix is not None:
|
||||||
|
if prefix:
|
||||||
|
lexemes[-1] = prefix
|
||||||
|
else:
|
||||||
|
lexemes.pop()
|
||||||
|
return lexemes
|
||||||
|
|
||||||
|
def _register_c_extern(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
inputs: int,
|
||||||
|
outputs: int,
|
||||||
|
arg_types: List[str],
|
||||||
|
ret_type: str,
|
||||||
|
) -> None:
|
||||||
word = self.dictionary.lookup(name)
|
word = self.dictionary.lookup(name)
|
||||||
if word is None:
|
if word is None:
|
||||||
word = Word(name=name)
|
word = Word(name=name)
|
||||||
@@ -635,26 +694,6 @@ class Parser:
|
|||||||
word.extern_outputs = outputs
|
word.extern_outputs = outputs
|
||||||
word.extern_signature = (arg_types, ret_type)
|
word.extern_signature = (arg_types, ret_type)
|
||||||
|
|
||||||
else:
|
|
||||||
# Raw/Legacy Parsing
|
|
||||||
name = t1.lexeme
|
|
||||||
word = self.dictionary.lookup(name)
|
|
||||||
if word is None:
|
|
||||||
word = Word(name=name)
|
|
||||||
self.dictionary.register(word)
|
|
||||||
word.is_extern = True
|
|
||||||
|
|
||||||
# Check for optional inputs/outputs
|
|
||||||
peek = self.peek_token()
|
|
||||||
if peek is not None and peek.lexeme.isdigit():
|
|
||||||
word.extern_inputs = int(self._consume().lexeme)
|
|
||||||
peek = self.peek_token()
|
|
||||||
if peek is not None and peek.lexeme.isdigit():
|
|
||||||
word.extern_outputs = int(self._consume().lexeme)
|
|
||||||
else:
|
|
||||||
word.extern_inputs = 0
|
|
||||||
word.extern_outputs = 0
|
|
||||||
|
|
||||||
def _handle_token(self, token: Token) -> None:
|
def _handle_token(self, token: Token) -> None:
|
||||||
if self._try_literal(token):
|
if self._try_literal(token):
|
||||||
return
|
return
|
||||||
@@ -1576,6 +1615,70 @@ def _is_identifier(text: str) -> bool:
|
|||||||
return all(ch.isalnum() or ch == "_" for ch in text)
|
return all(ch.isalnum() or ch == "_" for ch in text)
|
||||||
|
|
||||||
|
|
||||||
|
_C_TYPE_IGNORED_QUALIFIERS = {
|
||||||
|
"const",
|
||||||
|
"volatile",
|
||||||
|
"register",
|
||||||
|
"restrict",
|
||||||
|
"static",
|
||||||
|
"extern",
|
||||||
|
"_Atomic",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _split_trailing_identifier(text: str) -> Tuple[str, Optional[str]]:
|
||||||
|
if not text:
|
||||||
|
return text, None
|
||||||
|
idx = len(text)
|
||||||
|
while idx > 0 and (text[idx - 1].isalnum() or text[idx - 1] == "_"):
|
||||||
|
idx -= 1
|
||||||
|
if idx == 0 or idx == len(text):
|
||||||
|
return text, None
|
||||||
|
prefix = text[:idx]
|
||||||
|
suffix = text[idx:]
|
||||||
|
if any(not ch.isalnum() and ch != "_" for ch in prefix):
|
||||||
|
return prefix, suffix
|
||||||
|
return text, None
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_c_type_tokens(tokens: Sequence[str], *, allow_default: bool) -> str:
|
||||||
|
pointer_count = 0
|
||||||
|
parts: List[str] = []
|
||||||
|
for raw in tokens:
|
||||||
|
text = raw.strip()
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
if set(text) == {"*"}:
|
||||||
|
pointer_count += len(text)
|
||||||
|
continue
|
||||||
|
while text.startswith("*"):
|
||||||
|
pointer_count += 1
|
||||||
|
text = text[1:]
|
||||||
|
while text.endswith("*"):
|
||||||
|
pointer_count += 1
|
||||||
|
text = text[:-1]
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
if text in _C_TYPE_IGNORED_QUALIFIERS:
|
||||||
|
continue
|
||||||
|
parts.append(text)
|
||||||
|
if not parts:
|
||||||
|
if allow_default:
|
||||||
|
base = "int"
|
||||||
|
else:
|
||||||
|
raise ParseError("expected C type before parameter name")
|
||||||
|
else:
|
||||||
|
base = " ".join(parts)
|
||||||
|
return base + ("*" * pointer_count)
|
||||||
|
|
||||||
|
|
||||||
|
def _ctype_uses_sse(type_name: Optional[str]) -> bool:
|
||||||
|
if type_name is None:
|
||||||
|
return False
|
||||||
|
base = type_name.rstrip("*")
|
||||||
|
return base in {"float", "double"}
|
||||||
|
|
||||||
|
|
||||||
def _parse_string_literal(token: Token) -> Optional[str]:
|
def _parse_string_literal(token: Token) -> Optional[str]:
|
||||||
text = token.lexeme
|
text = token.lexeme
|
||||||
if len(text) < 2 or text[0] != '"' or text[-1] != '"':
|
if len(text) < 2 or text[0] != '"' or text[-1] != '"':
|
||||||
@@ -1998,7 +2101,7 @@ class Assembler:
|
|||||||
outputs = getattr(word, "extern_outputs", 0)
|
outputs = getattr(word, "extern_outputs", 0)
|
||||||
signature = getattr(word, "extern_signature", None)
|
signature = getattr(word, "extern_signature", None)
|
||||||
|
|
||||||
if inputs > 0 or outputs > 0:
|
if signature is not None or inputs > 0 or outputs > 0:
|
||||||
regs = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]
|
regs = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]
|
||||||
xmm_regs = [f"xmm{i}" for i in range(8)]
|
xmm_regs = [f"xmm{i}" for i in range(8)]
|
||||||
|
|
||||||
@@ -2048,7 +2151,7 @@ class Assembler:
|
|||||||
builder.emit(" leave")
|
builder.emit(" leave")
|
||||||
|
|
||||||
# Handle Return Value
|
# Handle Return Value
|
||||||
if ret_type in ("float", "double"):
|
if _ctype_uses_sse(ret_type):
|
||||||
# Result in xmm0, move to stack
|
# Result in xmm0, move to stack
|
||||||
builder.emit(" sub r12, 8")
|
builder.emit(" sub r12, 8")
|
||||||
builder.emit(" movq rax, xmm0")
|
builder.emit(" movq rax, xmm0")
|
||||||
@@ -3320,7 +3423,25 @@ def run_linker(obj_path: Path, exe_path: Path, debug: bool = False, libs=None):
|
|||||||
"-dynamic-linker", "/lib64/ld-linux-x86-64.so.2",
|
"-dynamic-linker", "/lib64/ld-linux-x86-64.so.2",
|
||||||
])
|
])
|
||||||
for lib in libs:
|
for lib in libs:
|
||||||
cmd.append(f"-l:{lib}" if ".so" in lib else f"-l{lib}")
|
if not lib:
|
||||||
|
continue
|
||||||
|
lib = str(lib)
|
||||||
|
if lib.startswith(("-L", "-l", "-Wl,")):
|
||||||
|
cmd.append(lib)
|
||||||
|
continue
|
||||||
|
if lib.startswith(":"):
|
||||||
|
cmd.append(f"-l{lib}")
|
||||||
|
continue
|
||||||
|
if os.path.isabs(lib) or lib.startswith("./") or lib.startswith("../"):
|
||||||
|
cmd.append(lib)
|
||||||
|
continue
|
||||||
|
if os.path.sep in lib or lib.endswith(".a"):
|
||||||
|
cmd.append(lib)
|
||||||
|
continue
|
||||||
|
if ".so" in lib:
|
||||||
|
cmd.append(f"-l:{lib}")
|
||||||
|
continue
|
||||||
|
cmd.append(f"-l{lib}")
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
cmd.append("-g")
|
cmd.append("-g")
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user