optimize transposition for powers of two

This commit is contained in:
bendn 2023-09-22 06:52:40 +07:00
parent 05eb1c8e10
commit f74d3dc450
No known key found for this signature in database
GPG key ID: 0D9D3A2A3B2A93D6
7 changed files with 157 additions and 2 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "fimg"
version = "0.4.1"
version = "0.4.2"
authors = ["bend-n <bend.n@outlook.com>"]
license = "MIT"
edition = "2021"

BIN
benches/4_128x128.imgbuf Normal file

Binary file not shown.

Binary file not shown.

View file

@ -4,7 +4,7 @@ macro_rules! bench {
(fn $name: ident() { run $fn: ident() }) => {
fn $name() {
let mut img: Image<_, 4> =
Image::build(160, 160).buf(include_bytes!("4_160x160.imgbuf").to_vec());
Image::build(128, 128).buf(include_bytes!("4_128x128.imgbuf").to_vec());
for _ in 0..256 {
#[allow(unused_unsafe)]
unsafe {

View file

@ -138,6 +138,22 @@ impl<const CHANNELS: usize> Image<&mut [u8], CHANNELS> {
///
/// UB if supplied image rectangular
unsafe fn transpose<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) {
debug_assert_eq!(img.width(), img.height());
if img.width().is_power_of_two() {
// SAFETY: caller gurantees
unsafe { transpose_diag(img, 0, img.width() as usize) };
} else {
// SAFETY: caller gurantees
unsafe { transpose_non_power_of_two(img) };
}
}
/// Transpose a square (non power of two) image.
///
/// # Safety
///
/// UB if image not square
unsafe fn transpose_non_power_of_two<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>) {
debug_assert_eq!(img.width(), img.height());
let size = img.width() as usize;
// SAFETY: no half pixels
@ -152,11 +168,135 @@ unsafe fn transpose<const CHANNELS: usize>(img: &mut Image<&mut [u8], CHANNELS>)
}
}
/// break it down until
const TILE: usize = 4;
/// # Safety
///
/// be careful
unsafe fn transpose_tile<const CHANNELS: usize>(
img: &mut Image<&mut [u8], CHANNELS>,
row: usize,
col: usize,
size: usize,
) {
if size > TILE {
#[allow(
clippy::multiple_unsafe_ops_per_block,
clippy::undocumented_unsafe_blocks
)]
unsafe {
// top left
transpose_tile(img, row, col, size / 2);
// top right
transpose_tile(img, row, col + size / 2, size / 2);
// bottom left
transpose_tile(img, row + size / 2, col, size / 2);
// bottom right
transpose_tile(img, row + size / 2, col + size / 2, size / 2);
}
} else {
let s = img.width() as usize;
let b = img.flatten_mut();
for i in 0..size {
for j in 0..size {
// SAFETY: this should be okay if we careful
unsafe { b.swap_unchecked((row + i) * s + (col + j), (col + j) * s + (row + i)) };
}
}
}
}
/// # Safety
///
/// be careful
unsafe fn transpose_diag<const CHANNELS: usize>(
img: &mut Image<&mut [u8], CHANNELS>,
pos: usize,
size: usize,
) {
if size > TILE {
#[allow(
clippy::multiple_unsafe_ops_per_block,
clippy::undocumented_unsafe_blocks
)]
unsafe {
transpose_diag(img, pos, size / 2);
transpose_tile(img, pos, pos + size / 2, size / 2);
transpose_diag(img, pos + size / 2, size / 2);
}
} else {
let s = img.width() as usize;
let b = img.flatten_mut();
for i in 1..size {
for j in 0..i {
// SAFETY: this is fine unless pos / size is out of bounds, which it cant be
unsafe { b.swap_unchecked((pos + i) * s + (pos + j), (pos + j) * s + (pos + i)) };
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::img;
#[test]
fn transp() {
#[rustfmt::skip]
let mut i = Image::<_, 1>::build(8, 8).buf(vec![
0, 0, 1, 1, 0, 0, 1, 1,
0, 1, 0, 1, 1, 0, 1, 1,
0, 1, 1, 0, 1, 0, 1, 1,
0, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 0, 1, 1,
0, 1, 1, 1, 1, 0, 0, 1,
0, 1, 1, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 1,
]);
unsafe { transpose(&mut i.as_mut()) };
#[rustfmt::skip]
assert_eq!(i.take_buffer(), vec![
0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1,
0, 1, 1, 0, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 0, 1
]);
}
#[test]
fn transp9() {
#[rustfmt::skip]
let mut i = Image::<_, 1>::build(9, 9).buf(vec![
0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 1, 0, 1, 1, 0, 1, 1, 1,
0, 1, 1, 0, 1, 0, 1, 1, 0,
0, 1, 1, 1, 0, 0, 1, 1, 0,
0, 1, 1, 1, 1, 0, 1, 1, 1,
0, 1, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 0, 1, 0, 1,
0, 0, 1, 1, 1, 0, 1, 1, 0,
1, 1, 1, 0, 1, 1, 0, 1, 0,
]);
unsafe { transpose(&mut i.as_mut()) };
#[rustfmt::skip]
assert_eq!(i.take_buffer(), vec![
0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 0, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 1, 1, 0, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 0, 1, 1,
0, 1, 0, 0, 1, 1, 1, 0, 0
]);
}
#[test]
fn rotate_90() {
let mut from = img![

View file

@ -3,6 +3,7 @@
//! Provides fast image operations, such as rotation, flipping, and overlaying.
#![feature(
slice_swap_unchecked,
stmt_expr_attributes,
generic_const_exprs,
slice_as_chunks,
unchecked_math,
@ -218,6 +219,13 @@ impl<T: std::ops::Deref<Target = [u8]>, const CHANNELS: usize> Image<T, CHANNELS
self.buffer.array_chunks::<CHANNELS>()
}
#[inline]
/// Flatten the chunks of this image into a slice of slices.
pub fn flatten(&mut self) -> &[[u8; CHANNELS]] {
// SAFETY: buffer cannot have half pixels
unsafe { self.buffer.as_chunks_unchecked::<CHANNELS>() }
}
/// Return a pixel at (x, y).
/// # Safety
///
@ -258,6 +266,13 @@ impl<T: std::ops::DerefMut<Target = [u8]>, const CHANNELS: usize> Image<T, CHANN
self.buffer.array_chunks_mut::<CHANNELS>()
}
#[inline]
/// Flatten the chunks of this image into a mutable slice of slices.
pub fn flatten_mut(&mut self) -> &mut [[u8; CHANNELS]] {
// SAFETY: buffer cannot have half pixels
unsafe { self.buffer.as_chunks_unchecked_mut::<CHANNELS>() }
}
/// Set the pixel at x, y
///
/// # Safety

Binary file not shown.

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 73 KiB