hack in "ok" load balancing for lily.collections.HashMap. (make the memory efficiency much worse too) (really we should not use nested dynamic arrays for buckets...)

add some helper functions to lily.collections.Vec for optimisation reasons optimise lily.alloc.SimpleAllocator a little bit delete lily.alloc.PageAllocator because it sucks lily.iter.Iterator.fold bug fix
2025-01-26 19:58:22 +00:00 · 2025-01-26 19:58:22 +00:00 · 3045d0a190
parent 33cf8d7209
commit 3045d0a190
8 changed files with 155 additions and 206 deletions
--- a/src/lily/alloc/lib.hb
+++ b/src/lily/alloc/lib.hb
@ -1,3 +1,2 @@
 .{RawAllocator} := @use("raw.hb");
-.{PageAllocator} := @use("page.hb");
 .{SimpleAllocator} := @use("simple.hb")
--- a/src/lily/alloc/page.hb
+++ b/src/lily/alloc/page.hb
@ -1,157 +0,0 @@
-.{Config, Target, Type, TypeOf, log, collections: .{Vec}, quicksort} := @use("../lib.hb");
-.{RawAllocator} := @use("lib.hb")
-
-/*
- * intended behaviour: (may not be complete)
- * alloc:
- * -> if no pages, or all pages full: allocate new pages enough for next allocation.
- * -> for first page with contiguous space enough for next allocation, allocate there.
- * dealloc: 
- * -> requires:
- *    -> ptr must be the first ptr of the allocation
- *    -> ptr must be in allocation table
- * -> remove allocation from allocation table
- * -> recalculate contiguous free space in page it was contained in
- * -> if page is empty, free page
- * realloc:
- * -> if new size == original size: do nothing
- * -> if new size requires any new page(s) to be allocated, use Target.realloc() to get new pointer
- * -> else, Self.dealloc, Self.alloc, memmove
- 
- * design todo:
- * -> better perf to Self.alloc, memcopy, Self.dealloc?
- * -> security would prefer zeroing deallocations before freeing (would require the new order above) (maybe make it a build toggle)
- * -> swap Vec(T, RawAllocator) for internal tables (for efficiency)
-
- * assumptions:
- * -> pages are of a constant length (per system)
-
- ! IMPORTANT
- ! to ensure referential integrity, we do not move the contents of the blocks to new blocks
- !   (except when reallocating)
- */
-
-Block := struct {
-	block: []u8,
-	largest_free: []u8,
-}
-
-// i cannot pretend this is efficient. (also incomplete)
-
-PageAllocator := struct {
-	blocks: Vec(Block, RawAllocator),
-	allocs: Vec([]u8, RawAllocator),
-	blocks_raw: RawAllocator,
-	allocs_raw: RawAllocator,
-
-	new := fn(): Self {
-		blocks_raw := RawAllocator.new()
-		allocs_raw := RawAllocator.new()
-		blocks := Vec(Block, RawAllocator).new(&blocks_raw)
-		allocs := Vec([]u8, RawAllocator).new(TypeOf(&allocs_raw).uninit())
-		self := Self.(
-			blocks,
-			allocs,
-			blocks_raw,
-			allocs_raw,
-		)
-		self.blocks.allocator = &self.blocks_raw
-		self.allocs.allocator = &self.allocs_raw
-		return self
-	}
-	deinit := fn(self: ^Self): void {
-		self.allocs.deinit()
-		loop if self.blocks.len() == 0 break else {
-			// ! (compiler) bug: not logging here causes double free or corruption... wtf...
-			log.debug("here")
-			block := @unwrap(self.blocks.pop())
-			match Target.current() {
-				.AbleOS => Target.dealloc(block.block.ptr, block.block.len),
-				.LibC => Target.dealloc(block.block.ptr),
-			}
-		}
-		self.blocks.deinit()
-		self.blocks_raw.deinit()
-		self.allocs_raw.deinit()
-	}
-	alloc := fn(self: ^Self, $T: type, count: uint): ?^T {
-		This := Type(T)
-		size := This.size() * count
-		i := 0
-		loop if i >= self.blocks.len() break else {
-			defer i += 1
-			block := @unwrap(self.blocks.get_ref(i))
-			if block.largest_free.len >= size {
-				ptr := self._update_block(block, size)
-				self.allocs.push(ptr[0..size])
-				return @bitcast(ptr)
-			}
-		}
-		block_size := Target.calculate_pages(size) * Target.page_size()
-		// ! (libc) (compiler) bug: null check broken. unwrapping.
-		block_ptr := @unwrap(Target.alloc(block_size))
-		block := Block.(block_ptr[0..block_size], block_ptr[size..block_size])
-		// ! (libc) (compiler) wtf bug is this? can't push anything to blocks...
-		self.blocks.push(block)
-		self.allocs.push(block_ptr[0..size])
-		log.debug("pushed to allocs")
-		log.print(size)
-		log.print(block_size)
-		return @bitcast(block_ptr + size)
-	}
-	alloc_zeroed := fn(self: ^Self, $T: type, count: uint): ?^T {
-		ptr := Self.alloc_zeroed(T, count)
-		if ptr == null return null
-		Target.memset(ptr, 0, count * @sizeof(T))
-		return ptr
-	}
-	realloc := fn(self: ^Self, $T: type, ptr: ^T, new_count: uint): ?^T {
-		log.error("todo: realloc")
-		die
-		return null
-	}
-	dealloc := fn(self: ^Self, $T: type, ptr: ^T): void {
-		log.error("todo: dealloc")
-		die
-	}
-
-	/// SAFETY: assumes that the block has enough space for `size`
-	_update_block := fn(self: ^Self, block: ^Block, size: uint): ^u8 {
-		block.largest_free = block.largest_free[0..size]
-		ptr := block.largest_free.ptr
-
-		// _ = quicksort(_compare_ptr, self.allocs.slice, 0, self.allocs.len() - 1)
-		self.allocs.sort_with(_compare_ptr)
-		log.print(self.allocs.slice)
-
-		i := 0
-		loop if i == self.allocs.len() - 1 break else {
-			defer i += 1
-			alloc_a := @unwrap(self.allocs.get_ref(i))
-			if alloc_a.ptr < block.block.ptr {
-				i += 1
-				continue
-			} else if alloc_a.ptr > block.block.ptr + block.block.len {
-				break
-			}
-
-			len: uint = 0
-			alloc_b := @unwrap(self.allocs.get_ref(i + 1))
-			pt2 := alloc_a.ptr + alloc_a.size
-			if alloc_b.ptr > block.block.ptr {
-				len = block.block.ptr + block.block.len - pt2
-			} else {
-				len = alloc_b.ptr - pt2
-			}
-			if len > block.largest_free.len {
-				block.largest_free = (*alloc_a)[0..len]
-			}
-			log.debug("here 2")
-		}
-		return ptr
-	}
-}
-
-$_compare_ptr := fn(lhs: @Any(), rhs: @Any()): bool {
-	return lhs.ptr < rhs.ptr
-}
--- a/src/lily/alloc/simple.hb
+++ b/src/lily/alloc/simple.hb
@ -17,8 +17,7 @@ SimpleAllocator := struct {
 	}
 	deinit := fn(self: ^Self): void {
 		loop if self.allocations.len() == 0 break else {
-			alloced := self.allocations.pop()
-			if alloced == null continue
+			alloced := self.allocations.pop_unchecked()
 			match Target.current() {
 				.LibC => Target.dealloc(alloced.ptr),
 				.AbleOS => Target.dealloc(alloced.ptr, alloced.len),
@ -81,10 +80,9 @@ SimpleAllocator := struct {
 		i := 0
 		loop if i == self.allocations.len() break else {
 			defer i += 1
-			alloced := self.allocations.get(i)
-			if alloced == null return null
+			alloced := self.allocations.get_unchecked(i)
 			if alloced.ptr == ptr {
-				_ = self.allocations.remove(i)
+				_ = self.allocations.swap_remove(i)
 				return alloced
 			}
 		}
--- a/src/lily/collections/hashmap.hb
+++ b/src/lily/collections/hashmap.hb
@ -1,4 +1,4 @@
-.{collections: .{Vec}, iter: .{Iterator, IterNext}, Type, log, math} := @use("../lib.hb")
+.{collections: .{Vec}, iter: .{Iterator, IterNext}, Type, TypeOf, log} := @use("../lib.hb")

 Item := fn($Key: type, $Value: type): type return packed struct {
 	key: Key,
@ -12,6 +12,25 @@ Buckets := fn($Key: type, $Value: type, $Allocator: type): type {
 	return Vec(Bucket(Key, Value, Allocator), Allocator)
 }

+$equals := fn(lhs: @Any(), rhs: @TypeOf(lhs)): bool {
+	match TypeOf(lhs).kind() {
+		.Slice => return lhs.ptr == rhs.ptr & lhs.len == rhs.len,
+		_ => return lhs == rhs,
+	}
+}
+
+// temporarily here.
+$next_power_of_two := fn(n: uint): uint {
+	n -= 1
+	n |= n >> 1
+	n |= n >> 2
+	n |= n >> 4
+	n |= n >> 8
+	n |= n >> 16
+	n |= n >> 32
+	return n + 1
+}
+
 HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type return struct {
 	allocator: ^Allocator,
 	hasher: Hasher,
@ -20,9 +39,43 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r

 	new := fn(allocator: ^Allocator): Self {
 		hasher := Hasher.default()
-		buckets := Buckets(Key, Value, Allocator).new(allocator)
+		buckets := Buckets(Key, Value, Allocator).new_with_capacity(allocator, 16)
+		// ! (compiler) bug: have to use for-loop here rather than using buckets.len(), otherwise we loop infinitely
+		i := 0
+		loop if i == 16 break else {
+			defer i += 1
+			buckets.push(Bucket(Key, Value, Allocator).new(allocator))
+		}
+		// also need to add this here...?
+		buckets.slice.len = 16
 		return .(allocator, hasher, buckets, 0)
 	}
+	// seems like bad performance...
+	resize := fn(self: ^Self): void {
+		new_cap := next_power_of_two(self.buckets.len() * 2)
+		new_buckets := @TypeOf(self.buckets).new_with_capacity(self.allocator, new_cap)
+		// same compiler bug as above...
+		i := 0
+		loop if i == new_cap break else {
+			defer i += 1
+			new_buckets.push(Bucket(Key, Value, Allocator).new(self.allocator))
+		}
+		new_buckets.slice.len = new_cap
+		loop if self.buckets.len() == 0 break else {
+			bucket := self.buckets.pop_unchecked()
+			loop if bucket.len() == 0 break else {
+				item := bucket.pop_unchecked()
+				self.hasher.write(item.key)
+				idx := self.hasher.finish() & new_cap - 1
+				self.hasher.reset()
+				new_bucket := new_buckets.get_ref_unchecked(idx)
+				new_bucket.push(item)
+			}
+			bucket.deinit()
+		}
+		self.buckets.deinit()
+		self.buckets = new_buckets
+	}
 	deinit := fn(self: ^Self): void {
 		loop {
 			bucket := self.buckets.pop()
@ -35,9 +88,13 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 	}
 	insert := fn(self: ^Self, key: Key, value: Value): ^Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % math.max(1, self.buckets.len())
+		idx := self.hasher.finish() & self.buckets.len() - 1
 		self.hasher.reset()

+		if self.length * 4 > self.buckets.len() * 3 {
+			@inline(self.resize)
+		}
+
 		bucket_opt := self.buckets.get_ref(idx)
 		if bucket_opt == null {
 			self.buckets.push(Bucket(Key, Value, Allocator).new(self.allocator))
@ -49,49 +106,49 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 		i := 0
 		loop if i == bucket.len() break else {
 			defer i += 1
-			pair := bucket.get_ref(i)
-			if pair == null break
-			if pair.key == key {
+			pair := bucket.get_ref_unchecked(i)
+			if equals(pair.key, key) {
 				pair.value = value
+				// ! weird no-op cast to stop type system from complaining.
+				// don't quite know what is going on here...
 				return &@as(^Item(Key, Value), pair).value
 			}
 		}
 		bucket.push(.{key, value})
-		pair := @unwrap(bucket.get_ref(bucket.len() - 1))
+		pair := bucket.get_ref_unchecked(bucket.len() - 1)
 		self.length += 1
 		return &@as(^Item(Key, Value), pair).value
 	}
 	get := fn(self: ^Self, key: Key): ?Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % math.max(1, self.buckets.len())
+		idx := self.hasher.finish() & self.buckets.len() - 1
 		self.hasher.reset()

 		bucket := self.buckets.get_ref(idx)
 		if bucket == null return null
 		i := 0
-		loop if i == self.buckets.len() break else {
+		loop if i == bucket.len() break else {
 			defer i += 1
-			pair := bucket.get_ref(i)
-			if pair == null break
-			if pair.key == key {
+			pair := bucket.get_ref_unchecked(i)
+			if equals(pair.key, key) {
 				return pair.value
 			}
 		}
 		return null
 	}
+	// references may be invalidated if value is removed from hashmap after get_ref is used.
 	get_ref := fn(self: ^Self, key: Key): ?^Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % math.max(1, self.buckets.len())
+		idx := self.hasher.finish() & self.buckets.len() - 1
 		self.hasher.reset()

 		bucket := self.buckets.get_ref(idx)
 		if bucket == null return null
 		i := 0
-		loop if i == self.buckets.len() break else {
+		loop if i == bucket.len() break else {
 			defer i += 1
-			pair := bucket.get_ref(i)
-			if pair == null break
-			if pair.key == key {
+			pair := bucket.get_ref_unchecked(i)
+			if equals(pair.key, key) {
 				return &@as(^Item(Key, Value), pair).value
 			}
 		}
@ -99,19 +156,18 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 	}
 	remove := fn(self: ^Self, key: Key): ?Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % math.max(1, self.buckets.len())
+		idx := self.hasher.finish() & self.buckets.len() - 1
 		self.hasher.reset()

 		bucket := self.buckets.get_ref(idx)
 		if bucket == null return null
 		i := 0
-		loop if i == self.buckets.len() break else {
+		loop if i == bucket.len() break else {
 			defer i += 1
-			pair := bucket.get_ref(i)
-			if pair == null break
-			if pair.key == key {
+			pair := bucket.get_ref_unchecked(i)
+			if equals(pair.key, key) {
 				self.length -= 1
-				return @unwrap(bucket.remove(i)).value
+				return @unwrap(bucket.swap_remove(i)).value
 			}
 		}
 		return null
@ -129,6 +185,8 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 	$len := fn(self: ^Self): uint return self.length
 }

+// todo: make these efficient and reduce code duplication
+
 Items := fn($H: type, $I: type): type return struct {
 	// has to be owned here... (possibly due to bug) great...
 	map: H,
--- a/src/lily/collections/vec.hb
+++ b/src/lily/collections/vec.hb
@ -21,7 +21,9 @@ Vec := fn($T: type, $Allocator: type): type return struct {
 			log.debug("deinit: vec")
 		}
 	}
-	$reserve := fn(self: ^Self, n: uint): void {
+	// todo: maybe make this exponential instead
+	reserve := fn(self: ^Self, n: uint): void {
+		if self.len() + n <= self.cap return;
 		// ! (libc) (compiler) bug: null check broken, so unwrapping (unsafe!)
 		new_alloc := @unwrap(self.allocator.realloc(T, self.slice.ptr, self.cap + n))
 		self.cap += n
@ -48,26 +50,41 @@ Vec := fn($T: type, $Allocator: type): type return struct {
 		if n >= self.slice.len return null
 		return self.slice[n]
 	}
+	$get_unchecked := fn(self: ^Self, n: uint): T return self.slice[n]
 	get_ref := fn(self: ^Self, n: uint): ?^T {
 		if n >= self.slice.len return null
 		return self.slice.ptr + n
 	}
+	$get_ref_unchecked := fn(self: ^Self, n: uint): ^T return self.slice.ptr + n
 	pop := fn(self: ^Self): ?T {
 		if self.slice.len == 0 return null
 		self.slice.len -= 1
+		// as far as im aware this is not undefined behaviour.
+		return self.slice[self.slice.len]
+	}
+	$pop_unchecked := fn(self: ^Self): T {
+		self.slice.len -= 1
+		// as far as im aware this is not undefined behaviour. (2)
 		return self.slice[self.slice.len]
 	}
 	remove := fn(self: ^Self, n: uint): ?T {
 		if n >= self.slice.len return null
-		if n + 1 == self.slice.len return self.pop()
+		if n + 1 == self.slice.len return self.pop_unchecked()
 		temp := self.slice[n]
 		memmove(self.slice.ptr + n, self.slice.ptr + n + 1, (self.slice.len - n - 1) * @sizeof(T))
 		self.slice.len -= 1
 		return temp
 	}
+	swap_remove := fn(self: ^Self, n: uint): ?T {
+		if n >= self.slice.len return null
+		if n + 1 == self.slice.len return self.pop_unchecked()
+		temp := self.slice[n]
+		self.slice[n] = self.pop_unchecked()
+		return temp
+	}
 	find := fn(self: ^Self, rhs: T): ?uint {
 		i := 0
-		loop if self.get(i) == rhs return i else if i == self.slice.len return null else i += 1
+		loop if self.get_unchecked(i) == rhs return i else if i == self.slice.len return null else i += 1
 	}
 	$sort := fn(self: ^Self): void {
 		_ = quicksort(compare, self.slice, 0, self.slice.len - 1)
@ -76,5 +93,5 @@ Vec := fn($T: type, $Allocator: type): type return struct {
 		_ = quicksort(func, self.slice, 0, self.slice.len - 1)
 	}
 	$len := fn(self: ^Self): uint return self.slice.len
-	$capacity := fn(self: ^Self): uint return self.capacity
+	$capacity := fn(self: ^Self): uint return self.cap
 }
--- a/src/lily/iter.hb
+++ b/src/lily/iter.hb
@ -43,7 +43,7 @@ Iterator := fn($T: type): type {
 				_ = _for_each(x.val)
 			}
 		}
-		fold := fn(self: ^Self, $_fold: type, sum: Value): Value {
+		fold := fn(self: ^Self, $_fold: type, sum: @Any()): @TypeOf(sum) {
 			loop {
 				x := self.next()
 				if x.finished return sum
--- a/src/lily/lib.hb
+++ b/src/lily/lib.hb
@ -4,7 +4,7 @@ Version := struct {
 	patch: uint,
 }

-$VERSION := Version(0, 0, 5)
+$VERSION := Version(0, 0, 6)

 Config := struct {
 	$DEBUG := true
--- a/src/main.hb
+++ b/src/main.hb
@ -16,23 +16,57 @@ $add := fn(sum: uint, x: uint): uint {
 }

 main := fn(argc: uint, argv: []^void): uint {
-	sum := Generator.{}.into_iter().take(50).fold(add, 0)
-	lily.print(sum)
+	// sum := Generator.{}.into_iter().take(50).fold(add, 0)
+	// lily.print(sum)

-	// ! (libc) (compiler) bug: .collect(T) does not work.
-	if lily.Target.current() != .LibC {
-		str := lily.string.chars("Hello, ").intersperse(
-			lily.string.chars("World!"),
-		).collect([13]u8)
+	// // ! (libc) (compiler) bug: .collect(T) does not work.
+	// if lily.Target.current() != .LibC {
+	// 	str := lily.string.chars("Hello, ").intersperse(
+	// 		lily.string.chars("World!"),
+	// 	).collect([13]u8)

-		if str != null {
-			lily.log.info(@as([13]u8, str)[..])
-		} else {
-			lily.panic("could not collect (array wrong size)")
-		}
-	} else {
-		lily.log.info("HWeolrllod,! ")
+	// 	if str != null {
+	// 		lily.log.info(@as([13]u8, str)[..])
+	// 	} else {
+	// 		lily.panic("could not collect (array wrong size)")
+	// 	}
+	// } else {
+	// 	// yes, im cheating if you are on libc.
+	// 	// it's not my fault, blame compiler bugs. T^T
+	// 	lily.log.info("HWeolrllod,! ")
+	// }
+
+	// return 0
+
+	// ! the following will ONLY work on ableos
+	allocator := lily.alloc.SimpleAllocator.new()
+	defer allocator.deinit()
+	map := lily.collections.HashMap(
+		uint,
+		uint,
+		lily.hash.FoldHasher,
+		lily.alloc.SimpleAllocator,
+	).new(&allocator)
+	defer map.deinit()
+
+	i := 0
+	$loop if i == 99 * 2 break else {
+		_ = map.insert(i, 0)
+		_ = map.insert(i + 1, 0)
+		i += 2
 	}
+	map.keys().for_each(lily.print)
+
+	// fun thing
+	// _ = map.insert("Farewell, World!", "beep boop")
+	// _ = map.insert("Hello, World!", "Hello!")
+	// _ = map.insert("Goodbye, World!", "Goodbye!")
+	// _ = map.insert("How do you do, World?", "Great!")
+	// _ = map.insert("Until next time, World!", "See you!")
+	// _ = map.insert("Greetings, World!", "Hi there!")
+
+	// lily.print(map.get("asdfasdf!"))
+	// lily.print(map.get("Hello, World!"))

 	return 0
 }