Files
l2/stdlib/hashmap.sl

458 lines
12 KiB
Plaintext
Raw Permalink Normal View History

# Hash Map (open-addressing, linear probing)
#
# Layout at address `hm`:
# [hm + 0] count (qword) — number of live entries
# [hm + 8] capacity (qword) — number of slots (always power of 2)
# [hm + 16] keys_ptr (qword) — pointer to keys array (cap * 8 bytes)
# [hm + 24] vals_ptr (qword) — pointer to values array (cap * 8 bytes)
# [hm + 32] flags_ptr (qword) — pointer to flags array (cap bytes, 0=empty 1=live 2=tombstone)
#
# Keys and values are 64-bit integers. For string keys, store
# a hash or pointer; the caller is responsible for hashing.
#
# Allocation: mmap; free: munmap.
# Growth: doubles capacity when load factor exceeds 70%.
import mem.sl
# ── Hash function ─────────────────────────────────────────────
#__hm_hash [* | key] -> [* | hash]
# Integer hash (splitmix64-style mixing)
:asm __hm_hash {
mov rax, [r12]
mov rcx, rax
shr rcx, 30
xor rax, rcx
mov rcx, 0xbf58476d1ce4e5b9
imul rax, rcx
mov rcx, rax
shr rcx, 27
xor rax, rcx
mov rcx, 0x94d049bb133111eb
imul rax, rcx
mov rcx, rax
shr rcx, 31
xor rax, rcx
mov [r12], rax
} ;
# ── Accessors ─────────────────────────────────────────────────
#hm_count [* | hm] -> [* | count]
word hm_count @ end
#hm_capacity [* | hm] -> [* | cap]
word hm_capacity 8 + @ end
#hm_keys [* | hm] -> [* | ptr]
word hm_keys 16 + @ end
#hm_vals [* | hm] -> [* | ptr]
word hm_vals 24 + @ end
#hm_flags [* | hm] -> [* | ptr]
word hm_flags 32 + @ end
# ── Constructor / Destructor ──────────────────────────────────
#hm_new [* | cap_hint] -> [* | hm]
# Create a new hash map. Capacity is rounded up to next power of 2 (min 8).
# Note: alloc uses mmap(MAP_ANONYMOUS) which returns zeroed pages.
word hm_new
dup 8 < if drop 8 end
# Round up to power of 2
1 while 2dup swap < do 2 * end nip
>r # r0 = cap
# Allocate header (40 bytes)
40 alloc # stack: [* | hm]
# count = 0
0 over swap !
# capacity
r@ over 8 + swap !
# keys array: cap * 8 (zeroed by mmap)
r@ 8 * alloc
over 16 + swap !
# vals array: cap * 8 (zeroed by mmap)
r@ 8 * alloc
over 24 + swap !
# flags array: cap bytes (zeroed by mmap)
r> alloc
over 32 + swap !
end
#hm_free [* | hm] -> [*]
# Free a hash map and all its internal buffers.
word hm_free
dup hm_capacity >r
dup hm_keys r@ 8 * free
dup hm_vals r@ 8 * free
dup hm_flags r> free
40 free
end
# ── Core probe: find slot in assembly ─────────────────────────
#__hm_probe [*, hm | key] -> [*, slot_idx | found_flag]
# Linear probe. Returns slot index and 1 if found, or first empty slot and 0.
:asm __hm_probe {
; TOS = key, NOS = hm
push r14 ; save callee-saved reg
mov rdi, [r12] ; key
mov rsi, [r12 + 8] ; hm ptr
; Hash the key
mov rax, rdi
mov rcx, rax
shr rcx, 30
xor rax, rcx
mov rcx, 0xbf58476d1ce4e5b9
imul rax, rcx
mov rcx, rax
shr rcx, 27
xor rax, rcx
mov rcx, 0x94d049bb133111eb
imul rax, rcx
mov rcx, rax
shr rcx, 31
xor rax, rcx
; rax = hash
mov r8, [rsi + 8] ; capacity
mov r9, r8
dec r9 ; mask = cap - 1
and rax, r9 ; idx = hash & mask
mov r10, [rsi + 16] ; keys_ptr
mov r11, [rsi + 32] ; flags_ptr
; r14 = first tombstone slot (-1 = none)
mov r14, -1
.loop:
movzx ecx, byte [r11 + rax] ; flags[idx]
cmp ecx, 0 ; empty?
je .empty
cmp ecx, 2 ; tombstone?
je .tombstone
; live: check key match
cmp rdi, [r10 + rax*8]
je .found
; advance
inc rax
and rax, r9
jmp .loop
.tombstone:
; remember first tombstone
cmp r14, -1
jne .skip_save
mov r14, rax
.skip_save:
inc rax
and rax, r9
jmp .loop
.empty:
; Use first tombstone if available
cmp r14, -1
je .use_empty
mov rax, r14
.use_empty:
; Return: slot=rax, found=0
mov [r12 + 8], rax ; overwrite hm slot with idx
mov qword [r12], 0 ; found = 0
pop r14
ret
.found:
; Return: slot=rax, found=1
mov [r12 + 8], rax
mov qword [r12], 1
pop r14
} ;
# ── Internal: rehash ──────────────────────────────────────────
#__hm_rehash [* | hm] -> [* | hm]
# Double capacity and re-insert all live entries.
# Strategy: create new map, copy entries, swap internals, free old arrays.
:asm __hm_rehash {
push r14 ; save callee-saved regs
push r15
mov rbx, [r12] ; hm
; Load old state
mov r8, [rbx + 8] ; old_cap
mov r9, [rbx + 16] ; old_keys
mov r10, [rbx + 24] ; old_vals
mov r11, [rbx + 32] ; old_flags
; New capacity = old_cap * 2
mov rdi, r8
shl rdi, 1 ; new_cap
; Save hm, old_cap, old_keys, old_vals, old_flags, new_cap on x86 stack
push rbx
push r8
push r9
push r10
push r11
push rdi
; Allocate new_keys = alloc(new_cap * 8)
; mmap(0, size, PROT_READ|PROT_WRITE=3, MAP_PRIVATE|MAP_ANON=34, -1, 0)
mov rax, 9
xor rdi, rdi
mov rsi, [rsp] ; new_cap
shl rsi, 3 ; new_cap * 8
mov rdx, 3
mov r10, 34
push r8 ; save r8
mov r8, -1
xor r9, r9
syscall
pop r8
push rax ; save new_keys
; Allocate new_vals = alloc(new_cap * 8)
mov rax, 9
xor rdi, rdi
mov rsi, [rsp + 8] ; new_cap
shl rsi, 3
mov rdx, 3
mov r10, 34
push r8
mov r8, -1
xor r9, r9
syscall
pop r8
push rax ; save new_vals
; Allocate new_flags = alloc(new_cap)
mov rax, 9
xor rdi, rdi
mov rsi, [rsp + 16] ; new_cap
mov rdx, 3
mov r10, 34
push r8
mov r8, -1
xor r9, r9
syscall
pop r8
push rax ; save new_flags
; Stack: new_flags, new_vals, new_keys, new_cap, old_flags, old_vals, old_keys, old_cap, hm
; Offsets: [rsp]=new_flags, [rsp+8]=new_vals, [rsp+16]=new_keys
; [rsp+24]=new_cap, [rsp+32]=old_flags, [rsp+40]=old_vals
; [rsp+48]=old_keys, [rsp+56]=old_cap, [rsp+64]=hm
mov r14, [rsp + 24] ; new_cap
dec r14 ; new_mask
; Re-insert loop: for i in 0..old_cap
xor rcx, rcx ; i = 0
mov r8, [rsp + 56] ; old_cap
.rehash_loop:
cmp rcx, r8
jge .rehash_done
; Check old_flags[i]
mov rdi, [rsp + 32] ; old_flags
movzx eax, byte [rdi + rcx]
cmp eax, 1 ; live?
jne .rehash_next
; Get key and val
mov rdi, [rsp + 48] ; old_keys
mov rsi, [rdi + rcx*8] ; key
mov rdi, [rsp + 40] ; old_vals
mov rdx, [rdi + rcx*8] ; val
; Hash key to find slot in new map
push rcx
push rsi
push rdx
; Hash rsi (key)
mov rax, rsi
mov rbx, rax
shr rbx, 30
xor rax, rbx
mov rbx, 0xbf58476d1ce4e5b9
imul rax, rbx
mov rbx, rax
shr rbx, 27
xor rax, rbx
mov rbx, 0x94d049bb133111eb
imul rax, rbx
mov rbx, rax
shr rbx, 31
xor rax, rbx
and rax, r14 ; slot = hash & new_mask
; Linear probe (new map is all empty, so first empty slot is fine)
mov rdi, [rsp + 24] ; new_flags (3 pushes offset: +24)
.probe_new:
movzx ebx, byte [rdi + rax]
cmp ebx, 0
je .probe_found
inc rax
and rax, r14
jmp .probe_new
.probe_found:
; Store key, val, flag
pop rdx ; val
pop rsi ; key
mov rdi, [rsp + 16 + 8] ; new_keys (adjusted for 1 remaining push: rcx)
mov [rdi + rax*8], rsi
mov rdi, [rsp + 8 + 8] ; new_vals
mov [rdi + rax*8], rdx
mov rdi, [rsp + 0 + 8] ; new_flags
mov byte [rdi + rax], 1
pop rcx ; restore i
.rehash_next:
inc rcx
jmp .rehash_loop
.rehash_done:
; Free old arrays
; munmap(old_keys, old_cap * 8)
mov rax, 11
mov rdi, [rsp + 48] ; old_keys
mov rsi, [rsp + 56] ; old_cap
shl rsi, 3
syscall
; munmap(old_vals, old_cap * 8)
mov rax, 11
mov rdi, [rsp + 40] ; old_vals
mov rsi, [rsp + 56]
shl rsi, 3
syscall
; munmap(old_flags, old_cap)
mov rax, 11
mov rdi, [rsp + 32] ; old_flags
mov rsi, [rsp + 56] ; old_cap
syscall
; Update hm header
mov rbx, [rsp + 64] ; hm
mov rax, [rsp + 24] ; new_cap
mov [rbx + 8], rax
mov rax, [rsp + 16] ; new_keys
mov [rbx + 16], rax
mov rax, [rsp + 8] ; new_vals
mov [rbx + 24], rax
mov rax, [rsp] ; new_flags
mov [rbx + 32], rax
; Clean up x86 stack (9 pushes + 2 callee-saved)
add rsp, 72
pop r15
pop r14
; hm is still on r12 stack, unchanged
} ;
# ── Public API ────────────────────────────────────────────────
#hm_set [*, hm, key | val] -> [* | hm]
# Insert or update a key-value pair. Returns the (possibly moved) hm.
word hm_set
>r >r # r0 = val, r1 = key, stack: [* | hm]
# Return stack: [... | val | key] (key on top, 0 rpick=key, 1 rpick=val)
# Check load: count * 10 >= capacity * 7 → rehash
dup hm_count 10 * over hm_capacity 7 * >= if
__hm_rehash
end
# Probe for key (r@ = key, top of return stack)
dup r@ __hm_probe # stack: [*, hm | slot, found]
swap >r # push slot; R: [val, key, slot]
# Now: 0 rpick=slot, 1 rpick=key, 2 rpick=val
# Store key at keys[slot]
over hm_keys r@ 8 * + 1 rpick !
# Store val at vals[slot]
over hm_vals r@ 8 * + 2 rpick !
# Set flag = 1
over hm_flags r> + 1 c!
# If found=0 (new entry), increment count
0 == if
dup @ 1 + over swap !
end
rdrop rdrop # drop key, val
end
#hm_get [*, hm | key] -> [*, hm | val, found_flag]
# Look up a key. Returns (val 1) if found, (0 0) if not.
word hm_get
over swap __hm_probe # stack: [*, hm | slot, found]
dup 0 == if
nip 0 swap # stack: [*, hm | 0, 0]
else
swap
2 pick hm_vals swap 8 * + @
swap # stack: [*, hm | val, 1]
end
end
#hm_has [*, hm | key] -> [*, hm | bool]
# Check if key exists. Returns 1 or 0.
word hm_has
hm_get nip
end
#hm_del [*, hm | key] -> [*, hm | deleted_flag]
# Delete a key. Returns 1 if deleted, 0 if not found.
word hm_del
over swap __hm_probe # stack: [*, hm | slot, found]
dup 0 == if
nip # stack: [*, hm | 0]
else
drop # drop found=1; stack: [*, hm | slot]
# Set flag to tombstone (2)
over hm_flags over + 2 c!
drop # drop slot
# Decrement count
dup @ 1 - over swap !
1 # stack: [*, hm | 1]
end
end
#__hm_bzero [*, len | addr] -> [*]
# Zero len bytes at addr
:asm __hm_bzero {
mov rdi, [r12] ; addr
mov rcx, [r12 + 8] ; len
add r12, 16
xor al, al
rep stosb
} ;
#hm_clear [* | hm] -> [*]
# Remove all entries without freeing the map.
word hm_clear
dup 0 ! # count = 0
dup hm_capacity
over hm_flags __hm_bzero
end