From 853b51fc5029537fe28301d8148e1a31d10e2b96 Mon Sep 17 00:00:00 2001 From: bendn Date: Thu, 5 Oct 2023 07:03:41 +0700 Subject: [PATCH] add a `madd` function --- src/drawing/poly.rs | 21 ++++++++++----------- src/drawing/tri.rs | 23 +++++++++++++---------- src/lib.rs | 8 ++++---- src/math.rs | 8 ++++++++ 4 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 src/math.rs diff --git a/src/drawing/poly.rs b/src/drawing/poly.rs index b73f321..69ca4a3 100644 --- a/src/drawing/poly.rs +++ b/src/drawing/poly.rs @@ -1,8 +1,7 @@ //! draw polygons -use std::{ - cmp::{max, min}, - f32::consts::TAU, -}; +use crate::math::madd; +use std::cmp::{max, min}; +use std::f32::consts::TAU; use crate::Image; @@ -43,7 +42,7 @@ impl + AsRef<[u8]>, const CHANNELS: usize> Image { } } else { let fraction = (y - p0.1) as f32 / (p1.1 - p0.1) as f32; - let inter = fraction.mul_add((p1.0 - p0.0) as f32, p0.0 as f32); + let inter = madd(fraction, (p1.0 - p0.0) as f32, p0.0 as f32); intersections.push(inter.round() as i32); } } @@ -113,7 +112,7 @@ impl + AsRef<[u8]>, const CHANNELS: usize> Image { self.tri( add(trans(space + rotation)), add(trans(rotation)), - add(trans(space.mul_add(2.0, rotation))), + add(trans(madd(space, 2.0, rotation))), c, ); } @@ -122,9 +121,9 @@ impl + AsRef<[u8]>, const CHANNELS: usize> Image { for i in (0..sides - 1).step_by(2).map(|i| i as f32) { self.quad( r((x, y)), - r(add(trans(space.mul_add(i, rotation)))), - r(add(trans(space.mul_add(i + 1., rotation)))), - r(add(trans(space.mul_add(i + 2., rotation)))), + r(add(trans(madd(space, i, rotation)))), + r(add(trans(madd(space, i + 1., rotation)))), + r(add(trans(madd(space, i + 2., rotation)))), c, ); } @@ -134,8 +133,8 @@ impl + AsRef<[u8]>, const CHANNELS: usize> Image { // the missing piece self.tri( (x, y), - add(trans(space.mul_add(i, rotation))), - add(trans(space.mul_add(i + 1., rotation))), + add(trans(madd(space, i, rotation))), + add(trans(madd(space, i + 1., rotation))), c, ); } diff --git a/src/drawing/tri.rs b/src/drawing/tri.rs index b50dfe4..ca44509 100644 --- a/src/drawing/tri.rs +++ b/src/drawing/tri.rs @@ -1,5 +1,7 @@ //! trongle drawing +use crate::math::madd; +use std::cmp::{max, min}; use crate::Image; @@ -25,21 +27,22 @@ impl + AsRef<[u8]>, const CHANNELS: usize> Image { c: [u8; CHANNELS], ) { // TODO optimize - for y in y1.min(y2).min(y3) as u32..y1.max(y2).max(y3) as u32 { - for x in x1.min(x2).min(x3) as u32..x1.max(x2).max(x3) as u32 { - let s = (x1 - x3).mul_add(y as f32 - y3, -(y1 - y2) * (x as f32 - x3)); - let t = (x2 - x1).mul_add(y as f32 - y1, -(y2 - y1) * (x as f32 - x1)); + let ymin = max(y1.min(y2).min(y3) as u32, 0); + let ymax = min(y1.max(y2).max(y3) as u32, self.height()); + let xmin = max(x1.min(x2).min(x3) as u32, 0); + let xmax = min(x1.max(x2).max(x3) as u32, self.width()); + for y in ymin..ymax { + for x in xmin..xmax { + let s = madd(x1 - x3, y as f32 - y3, -(y1 - y2) * (x as f32 - x3)); + let t = madd(x2 - x1, y as f32 - y1, -(y2 - y1) * (x as f32 - x1)); if (s < 0.0) != (t < 0.0) && s != 0.0 && t != 0.0 { continue; } - let d = (x3 - x2).mul_add(y as f32 - y2, -(y3 - y2) * (x as f32 - x2)); - if (d == 0.0 || (d < 0.0) == (s + t <= 0.0)) - && x < self.width() - && y < self.height() - { - // SAFETY: we just checked the bounds + let d = madd(x3 - x2, y as f32 - y2, -(y3 - y2) * (x as f32 - x2)); + if d == 0.0 || (d < 0.0) == (s + t <= 0.0) { + // SAFETY: x, y are bounded unsafe { self.set_pixel(x, y, c) }; } } diff --git a/src/lib.rs b/src/lib.rs index 40f3bbe..d59f652 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,13 +25,13 @@ missing_docs )] #![allow(clippy::zero_prefixed_literal, incomplete_features)] - use std::{num::NonZeroU32, slice::SliceIndex}; mod affine; pub mod builder; pub mod cloner; mod drawing; +pub(crate) mod math; mod overlay; pub mod scale; use cloner::ImageCloner; @@ -271,10 +271,10 @@ impl Image, CHANNELS> { macro_rules! make { ($channels:literal channels $w:literal x $h: literal) => { unsafe { - Image::<_, $channels>::new( + $crate::Image::<_, $channels>::new( match ::core::num::NonZeroU32::new($w) { - Some(n) => n, - None => panic!("width is 0"), + ::core::option::Option::Some(n) => n, + ::core::option::Option::None => panic!("width is 0"), }, match ::core::num::NonZeroU32::new($h) { ::core::option::Option::Some(n) => n, diff --git a/src/math.rs b/src/math.rs new file mode 100644 index 0000000..212798c --- /dev/null +++ b/src/math.rs @@ -0,0 +1,8 @@ +/// Calculates `a * b + c`, with hardware support if possible. +pub fn madd(a: f32, b: f32, c: f32) -> f32 { + if cfg!(target_feature = "fma") { + a.mul_add(b, c) + } else { + a * b + c + } +}