medium sized work

make objdump output prettier print strings in structs correctly temporary printf bugfix implement a bunch of iterator functions implement iterator for string.split, string.chars, string.chars_ref minor hashmap fixes implement item, key, and value iterators for hashmaps implement reserve and new_with_capacity for vecs
2025-01-17 12:39:01 +00:00 · 2025-01-17 12:39:01 +00:00 · bfe75481e3
parent a5e89020c4
commit bfe75481e3
7 changed files with 336 additions and 87 deletions
--- a/2
+++ b/2
@ -152,7 +152,7 @@ build() {
    fi
    if [ ! "$target" = "unknown-virt-unknown" ]; then
        if [ "$run" = 1 ]; then exec "$BUILD_PATH/$target/lily"; fi
-        if [ "$dump_asm" = 1 ]; then objdump -d "$BUILD_PATH/$target/lily.o"; fi
+        if [ "$dump_asm" = 1 ]; then objdump -d -M intel --no-show-raw-insn "$BUILD_PATH/$target/lily.o" | grep -E "^\s+[0-9a-f]+:" | sed -E 's/^\s+[0-9a-f]+:\s+//'; fi
    fi
 }

--- a/src/lily/collections/hashmap.hb
+++ b/src/lily/collections/hashmap.hb
@ -1,4 +1,4 @@
-.{collections: .{Vec}} := @use("../lib.hb")
+.{collections: .{Vec}, iter: .{Iterator, IterNext}, Type, log, math} := @use("../lib.hb")

 Item := fn($Key: type, $Value: type): type return packed struct {
 	key: Key,
@ -31,34 +31,35 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 	}
 	insert := fn(self: ^Self, key: Key, value: Value): ^Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % self.buckets.len()
+		idx := self.hasher.finish() % math.max(1, self.buckets.len())
 		self.hasher.reset()

-		bucket := self.buckets.get_ref(idx)
-		if bucket == null {
+		bucket_opt := self.buckets.get_ref(idx)
+		if bucket_opt == null {
 			self.buckets.push(Bucket(Key, Value, Allocator).new(self.allocator))
-			bucket = @unwrap(self.buckets.get_ref(self.buckets.len() - 1))
+			bucket_opt = self.buckets.get_ref(self.buckets.len() - 1)
 		}

+		bucket := @unwrap(bucket_opt)
+
 		i := 0
-		loop if i == self.buckets.len() break else {
+		loop if i == bucket.len() break else {
 			defer i += 1
-			pair := @unwrap(bucket).get_ref(i)
+			pair := bucket.get_ref(i)
 			if pair == null break
 			if pair.key == key {
 				pair.value = value
+				return &@as(^Item(Key, Value), pair).value
 			}
-			return &@as(^Item(Key, Value), pair).value
 		}
-
-		@unwrap(bucket).push(.{key, value})
-		pair := @unwrap(@unwrap(bucket).get_ref(@unwrap(bucket).len() - 1))
+		bucket.push(.{key, value})
+		pair := @unwrap(bucket.get_ref(bucket.len() - 1))
 		self.length += 1
 		return &@as(^Item(Key, Value), pair).value
 	}
 	get := fn(self: ^Self, key: Key): ?Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % self.buckets.len()
+		idx := self.hasher.finish() % math.max(1, self.buckets.len())
 		self.hasher.reset()

 		bucket := self.buckets.get_ref(idx)
@ -76,7 +77,7 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 	}
 	get_ref := fn(self: ^Self, key: Key): ?^Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % self.buckets.len()
+		idx := self.hasher.finish() % math.max(1, self.buckets.len())
 		self.hasher.reset()

 		bucket := self.buckets.get_ref(idx)
@ -94,7 +95,7 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 	}
 	remove := fn(self: ^Self, key: Key): ?Value {
 		self.hasher.write(key)
-		idx := self.hasher.finish() % self.buckets.len()
+		idx := self.hasher.finish() % math.max(1, self.buckets.len())
 		self.hasher.reset()

 		bucket := self.buckets.get_ref(idx)
@ -111,5 +112,72 @@ HashMap := fn($Key: type, $Value: type, $Hasher: type, $Allocator: type): type r
 		}
 		return null
 	}
-	$len := fn(self: ^Self): uint return self.len
+	// todo: write keys, values
+	$items := fn(self: Self): Iterator(Items(Self, Item(Key, Value))) {
+		return .(.(self, 0, 0))
+	}
+	$keys := fn(self: Self): Iterator(Keys(Self, Key)) {
+		return .(.(self, 0, 0))
+	}
+	$values := fn(self: Self): Iterator(Values(Self, Value)) {
+		return .(.(self, 0, 0))
+	}
+	$len := fn(self: ^Self): uint return self.length
+}
+
+Items := fn($H: type, $I: type): type return struct {
+	// has to be owned here... (possibly due to bug) great...
+	map: H,
+	bucket: uint,
+	sub: uint,
+	next := fn(self: ^Self): IterNext(I) {
+		bucket := self.map.buckets.get_ref(self.bucket)
+		if bucket == null return .(true, Type(I).uninit())
+		sub := bucket.get(self.sub)
+		if sub == null {
+			self.sub = 0
+			self.bucket += 1
+			return self.next()
+		}
+		self.sub += 1
+		return .(false, sub)
+	}
+}
+
+Values := fn($H: type, $V: type): type return struct {
+	// has to be owned here... (possibly due to bug) great...
+	map: H,
+	bucket: uint,
+	sub: uint,
+	next := fn(self: ^Self): IterNext(V) {
+		bucket := self.map.buckets.get_ref(self.bucket)
+		if bucket == null return .(true, Type(V).uninit())
+		sub := bucket.get(self.sub)
+		if sub == null {
+			self.sub = 0
+			self.bucket += 1
+			return self.next()
+		}
+		self.sub += 1
+		return .(false, sub.value)
+	}
+}
+
+Keys := fn($H: type, $K: type): type return struct {
+	// has to be owned here... (possibly due to bug) great...
+	map: H,
+	bucket: uint,
+	sub: uint,
+	next := fn(self: ^Self): IterNext(K) {
+		bucket := self.map.buckets.get_ref(self.bucket)
+		if bucket == null return .(true, Type(K).uninit())
+		sub := bucket.get(self.sub)
+		if sub == null {
+			self.sub = 0
+			self.bucket += 1
+			return self.next()
+		}
+		self.sub += 1
+		return .(false, sub.key)
+	}
 }
--- a/src/lily/collections/vec.hb
+++ b/src/lily/collections/vec.hb
@ -3,8 +3,13 @@
 Vec := fn($T: type, $Allocator: type): type return struct {
 	slice: []T,
 	allocator: ^Allocator,
-	cap: uint,
-	$new := fn(allocator: ^Allocator): Self return .{slice: Type([]T).uninit(), allocator, cap: 0}
+	cap: uint = 0,
+	$new := fn(allocator: ^Allocator): Self return .{slice: Type([]T).uninit(), allocator}
+	$new_with_capacity := fn(allocator: ^Allocator, cap: uint): Self {
+		// ! (libc) (compiler) bug: null check broken, so unwrapping (unsafe!)
+		new_alloc := @unwrap(allocator.alloc(T, cap))
+		return .{slice: new_alloc[0..0], allocator, cap}
+	}
 	deinit := fn(self: ^Self): void {
 		// currently does not handle deinit of T if T allocates memory
 		if self.cap > 0 self.allocator.dealloc(T, self.slice.ptr)
@ -16,10 +21,17 @@ Vec := fn($T: type, $Allocator: type): type return struct {
 			log.debug("deinit: vec")
 		}
 	}
+	$reserve := fn(self: ^Self, n: uint): void {
+		// ! (libc) (compiler) bug: null check broken, so unwrapping (unsafe!)
+		new_alloc := @unwrap(self.allocator.realloc(T, self.slice.ptr, self.cap + n))
+		self.cap += n
+		self.slice.ptr = new_alloc
+	}
 	push := fn(self: ^Self, value: T): void {
 		if self.slice.len == self.cap {
 			if self.cap == 0 {
 				self.cap = 1
+				// ! (libc) (compiler) bug: null check broken, so unwrapping (unsafe!)
 				new_alloc := @unwrap(self.allocator.alloc(T, self.cap))
 				self.slice.ptr = new_alloc
 			} else {
--- a/src/lily/fmt.hb
+++ b/src/lily/fmt.hb
@ -199,7 +199,15 @@ format := fn(buf: []u8, v: @Any()): uint {
 		},
 		.Struct => return fmt_container(buf, v),
 		.Tuple => return fmt_container(buf, v),
-		.Slice => return fmt_container(buf, v),
+		.Slice => {
+			if T.This() == []u8 {
+				*buf.ptr = '"'
+				memcpy(buf.ptr + 1, v.ptr, v.len);
+				*(buf.ptr + 1 + v.len) = '"'
+				return v.len + 2
+			}
+			return fmt_container(buf, v)
+		},
 		.Array => return fmt_container(buf, v),
 		.Optional => return fmt_optional(buf, v),
 		.Enum => return fmt_enum(buf, v),
@ -208,12 +216,15 @@ format := fn(buf: []u8, v: @Any()): uint {
 	return 0
 }

+// ! (compiler) bug: panic doesnt work here specifically. causes parser issue.
 format_with_str := fn(str: []u8, buf: []u8, v: @Any()): uint {
 	T := TypeOf(v)
 	n := string.count(str, '{')
-	if n != string.count(str, '}') panic("Missing closing '}' in format string.")
+	// if n != string.count(str, '}') panic("Missing closing '}' in format string.")
+	if n != string.count(str, '}') die
 	if T.kind() == .Tuple {
-		if T.len() != n panic("Format string has different number of '{}' than args given.")
+		// if T.len() != n panic("Format string has different number of '{}' than args given.")
+		if T.len() != n die
 		m := 0
 		i := 0
 		j := 0
@ -243,7 +254,8 @@ format_with_str := fn(str: []u8, buf: []u8, v: @Any()): uint {
 		}
 		return j
 	} else if n > 1 {
-		panic("Format string has multiple '{}' but value provided is not a tuple.")
+		// panic("Format string has multiple '{}' but value provided is not a tuple.")
+		die
 	} else {
 		i := 0
 		j := 0
--- a/src/lily/iter.hb
+++ b/src/lily/iter.hb
@ -1,21 +1,40 @@
-.{TypeOf} := @use("lib.hb")
+.{Type} := @use("lib.hb")

 IterNext := fn($T: type): type return struct {finished: bool, val: T}

+// ! todo: complain about inlining rules
+// ! how am i supposed to get optimal performance out of this if inlining is sometimes not allowed
+
+// ! todo:
+// * Iterator.peek
+
 /// Iterator struct. Implements iterator stuff for you if you implement `into_iter` for your struct.
 Iterator := fn($T: type): type {
-	$A := @TypeOf(T.next(idk))
+	$Next := @TypeOf(T.next(idk))
+	$Value := @TypeOf(T.next(idk).val)

 	return struct {
 		inner: T,
-		$next := fn(self: ^Self): A {
+		$next := fn(self: ^Self): Next {
 			return self.inner.next()
 		}
 		$map := fn(self: Self, $_map: type): Iterator(Map(T, _map)) {
 			return .(.(self))
 		}
 		$enumerate := fn(self: Self): Iterator(Enumerate(T)) {
-			return .(.(self, 0))
+			return .(.{iter: self})
+		}
+		$take := fn(self: Self, n: uint): Iterator(Take(T)) {
+			return .(.{iter: self, end: n})
+		}
+		$skip := fn(self: Self, n: uint): Iterator(Skip(T)) {
+			return .(.{iter: self, step: n})
+		}
+		$chain := fn(self: Self, rhs: @Any()): Iterator(Chain(T, @TypeOf(rhs))) {
+			return .(.{iter0: self, iter1: .(rhs)})
+		}
+		$intersperse := fn(self: Self, rhs: @Any()): Iterator(Intersperse(T, @TypeOf(rhs))) {
+			return .(.{iter0: self, iter1: .(rhs)})
 		}
 		for_each := fn(self: ^Self, $_for_each: type): void {
 			loop {
@ -24,16 +43,32 @@ Iterator := fn($T: type): type {
 				_ = _for_each(x.val)
 			}
 		}
+		fold := fn(self: ^Self, $_fold: type, sum: Value): Value {
+			loop {
+				x := self.next()
+				y := self.next()
+				if y.finished return sum
+				sum += _fold(x.val, y.val)
+			}
+		}
+		nth := fn(self: ^Self, n: uint): ?Value {
+			i := 0
+			loop {
+				x := self.next()
+				if x.finished return null else if i == n return x.val
+				i += 1
+			}
+		}
 	}
 }

 /// Map is lazy. Simply calling `my_iter.map(func)` will not cause any execution.
 Map := fn($T: type, $_map: type): type {
-	$M := @TypeOf(_map(@as(@TypeOf(T.next(idk).val), idk)))
+	$Next := @TypeOf(_map(@as(@TypeOf(T.next(idk).val), idk)))

 	return struct {
 		iter: Iterator(T),
-		next := fn(self: ^Self): IterNext(M) {
+		next := fn(self: ^Self): IterNext(Next) {
 			x := self.iter.inner.next()
 			return .(x.finished, _map(x.val))
 		}
@ -43,11 +78,11 @@ Map := fn($T: type, $_map: type): type {
 IterEnumerate := fn($T: type): type return struct {n: uint, val: T}

 Enumerate := fn($T: type): type {
-	$A := IterEnumerate(@TypeOf(T.next(idk).val))
+	$Next := IterEnumerate(@TypeOf(T.next(idk).val))
 	return struct {
 		iter: Iterator(T),
-		n: uint,
-		next := fn(self: ^Self): IterNext(A) {
+		n: uint = 0,
+		next := fn(self: ^Self): IterNext(Next) {
 			self.n += 1
 			x := self.iter.inner.next()
 			return .(x.finished, .(self.n, x.val))
@ -55,14 +90,112 @@ Enumerate := fn($T: type): type {
 	}
 }

-SliceIter := fn($T: type): type return struct {
-	slice: []T,
-	cursor: uint,
+Take := fn($T: type): type {
+	$Next := @TypeOf(T.next(idk).val)
+	return struct {
+		iter: Iterator(T),
+		n: uint = 0,
+		end: uint,
+		next := fn(self: ^Self): IterNext(Next) {
+			self.n += 1
+			x := Type(IterNext(Next)).uninit()
+			if self.n > self.end return .(true, x.val)
+			return self.iter.inner.next()
+		}
+	}
+}

-	next := fn(self: ^Self): IterNext(?T) {
-		if self.cursor >= self.slice.len return .(true, null)
-		tmp := self.slice[self.cursor]
-		self.cursor += 1
-		return .(false, tmp)
+Skip := fn($T: type): type {
+	$Next := @TypeOf(T.next(idk).val)
+	return struct {
+		iter: Iterator(T),
+		step: uint,
+		next := fn(self: ^Self): IterNext(Next) {
+			n := 0
+			loop {
+				x := self.iter.next()
+				if n == self.step return x
+				n += 1
+			}
+		}
+	}
+}
+
+ChainState := enum {
+	Iter0,
+	Iter0Finished,
+	BothFinished,
+}
+
+Chain := fn($A: type, $B: type): type {
+	$Next := @TypeOf(A.next(idk).val)
+	$Next1 := @TypeOf(B.next(idk).val)
+	if Next1 != Next @error("Both iterators should return the same type")
+
+	return struct {
+		iter0: Iterator(A),
+		iter1: Iterator(B),
+		state: ChainState = .Iter0,
+		next := fn(self: ^Self): IterNext(Next) {
+			x := Type(IterNext(Next)).uninit()
+			match self.state {
+				.Iter0 => {
+					x = self.iter0.inner.next()
+					if x.finished {
+						self.state = .Iter0Finished
+						return self.next()
+					}
+				},
+				.Iter0Finished => {
+					x = self.iter1.inner.next()
+					if x.finished self.state = .BothFinished
+				},
+				_ => {
+				},
+			}
+			return .(self.state == .BothFinished, x.val)
+		}
+	}
+}
+
+IntersperseState := enum {
+	Iter0,
+	Iter1,
+	Iter0Finished,
+	Iter1Finished,
+}
+
+Intersperse := fn($A: type, $B: type): type {
+	$Next := @TypeOf(A.next(idk).val)
+	$Next1 := @TypeOf(B.next(idk).val)
+	if Next1 != Next @error("Both iterators should return the same type")
+
+	return struct {
+		iter0: Iterator(A),
+		iter1: Iterator(B),
+		state: IntersperseState = .Iter0,
+		next := fn(self: ^Self): IterNext(Next) {
+			x := Type(IterNext(Next)).uninit()
+			match self.state {
+				.Iter0 => {
+					x = self.iter0.inner.next()
+					if x.finished self.state = .Iter0Finished else self.state = .Iter1
+				},
+				.Iter1 => {
+					x = self.iter1.inner.next()
+					if x.finished {
+						self.state = .Iter1Finished
+						return self.next()
+					} else self.state = .Iter0
+				},
+				.Iter1Finished => {
+					x = self.iter0.inner.next()
+					if x.finished self.state = .Iter0Finished
+				},
+				_ => {
+				},
+			}
+			return .(self.state == .Iter0Finished, x.val)
+		}
 	}
 }
--- a/src/lily/string.hb
+++ b/src/lily/string.hb
@ -1,3 +1,5 @@
+.{iter: .{Iterator, IterNext}, Type} := @use("lib.hb")
+
 reverse := fn(str: []u8): void {
 	if str.len == 0 return;
 	j := str.len - 1
@ -66,41 +68,52 @@ split_once := fn(haystack: []u8, needle: @Any()): ?struct {left: []u8, right: []
 	}
 }

-split := fn(iter: []u8, needle: @Any()): struct {
+split := fn(str: []u8, needle: @Any()): Iterator(struct {
 	str: []u8,
 	needle: @TypeOf(needle),
-	done: bool,
-
-	next := fn(self: ^Self): ?[]u8 {
-		if self.done return null;
+	finished: bool = false,

+	next := fn(self: ^Self): IterNext([]u8) {
 		splits := split_once(self.str, self.needle)
+		if self.finished return .(true, Type([]u8).uninit())
 		if splits != null {
 			self.str = splits.right
-			return splits.left
+			return .(false, splits.left)
 		} else {
-			self.done = true
-			return self.str
+			self.finished = true
+			return .(false, self.str)
 		}
 	}
-} {
+}) {
 	T := @TypeOf(needle)
 	if T != []u8 & T != u8 {
 		@error("Type of needle must be []u8 or u8.")
 	}
-	return .(iter, needle, false)
+	return .(.{str, needle})
 }

-chars := fn(iter: []u8): struct {
+chars := fn(iter: []u8): Iterator(struct {
 	str: []u8,

-	next := fn(self: ^Self): ?u8 {
-		if self.str.len == 0 return null
+	$next := fn(self: ^Self): IterNext(u8) {
+		tmp := IterNext(u8).(self.str.len == 0, self.str[0])
 		self.str = self.str[1..]
-		return self.str[0]
+		return tmp
 	}
-} {
-	return .(iter)
+}) {
+	return .(.(iter))
+}
+
+chars_ref := fn(iter: []u8): Iterator(struct {
+	str: []u8,
+
+	$next := fn(self: ^Self): IterNext(^u8) {
+		tmp := IterNext(^u8).(self.str.len == 0, self.str.ptr)
+		self.str = self.str[1..]
+		return tmp
+	}
+}) {
+	return .(.(iter))
 }

 count := fn(haystack: []u8, needle: @Any()): uint {
--- a/src/main.hb
+++ b/src/main.hb
@ -7,41 +7,52 @@ Random := lily.rand.SimpleRandom
 Result := lily.result.Result
 Hasher := lily.hash.FoldHasher

-// ! HashMap only works on AbleOS target (due to compiler bugs)
-
-$some_sorter := fn(lhs: @Any(), rhs: @Any()): bool {
-	return lhs < rhs
+$ref_char_to_str := fn(char: ^u8): []u8 {
+	return char[0..1]
 }

-$add_one := fn(x: ?uint): ?uint {
-	return @unwrap(x) + 1
-}
-
-$print := fn(next: @Any()): void {
-	lily.print(@as(@ChildOf(@TypeOf(next)), @unwrap(next)))
-}
-
-main := fn(): uint {
-	allocator := Allocator.new()
-	defer allocator.deinit()
-	vec := Vec(uint, Allocator).new(&allocator)
-	defer vec.deinit()
-	rand := Random.default()
-	defer rand.deinit()
-
-	i := 0
-	loop if i == 100 break else {
-		defer i += 1
-		vec.push(rand.any(u8))
+Generator := struct {
+	n: uint = 0,
+	$next := fn(self: ^Self): lily.iter.IterNext(uint) {
+		self.n += 1
+		return .(false, self.n)
 	}
-	// note: this does not affect the values of the vec itself
-	// the `add_one` here simply changes the value before printing.
-	// ! (libc) (compiler) bug: prints same numbers several times on libc. does not occur on ableos.
-	vec.into_iter().map(add_one).for_each(print)
+	$into_iter := fn(self: Self): lily.iter.Iterator(Self) {
+		return .(self)
+	}
+}

-	// equivalent to vec.sort() when some_sorter == `lhs < rhs`
-	// uses lily.quicksort under the hood
-	vec.sort_with(some_sorter)
+$add := fn(lhs: uint, rhs: uint): uint {
+	return lhs + rhs
+}
+
+chars_ref := lily.string.chars_ref
+
+main := fn(argc: uint, argv: []^void): uint {
+	a := Generator.{}.into_iter().take(50).fold(add, 0)
+	lily.print(a)
+
+	b := chars_ref("Hello,_").chain(chars_ref("World!")).map(ref_char_to_str).for_each(lily.log.info)
+	c := chars_ref("Hello,_").intersperse(chars_ref("World!")).map(ref_char_to_str).for_each(lily.log.info)
+
+	// allocator := Allocator.new()
+	// defer allocator.deinit()
+	// // ! HashMap only works on AbleOS target (due to compiler bugs)
+	// map := HashMap(uint, uint, Hasher, Allocator).new(&allocator)
+	// defer map.deinit()
+
+	// _ = map.insert(101, 20)
+	// _ = map.insert(202, 30)
+	// _ = map.insert(303, 40)
+
+	// // ! This iterator only works on AbleOS target (due to compiler bugs)
+	// map.items().enumerate().for_each(print)

 	return 0
-}
+}
+
+// $print := fn(thing: @Any()): void {
+// 	.{n, val: item} := thing
+// 	// ! printf ALSO only works on AbleOS target (due to compiler bugs)
+// 	lily.printf("nth: {}, key: {}, value: {}", .(n, item.key, item.value))
+// }