mirror of
https://github.com/bend-n/fimg.git
synced 2024-12-22 02:28:19 -06:00
faster b64
This commit is contained in:
parent
b464d4b0fc
commit
3bc1dd6c07
|
@ -67,7 +67,8 @@
|
||||||
doc_auto_cfg,
|
doc_auto_cfg,
|
||||||
const_option,
|
const_option,
|
||||||
array_chunks,
|
array_chunks,
|
||||||
let_chains
|
let_chains,
|
||||||
|
test
|
||||||
)]
|
)]
|
||||||
#![warn(
|
#![warn(
|
||||||
clippy::undocumented_unsafe_blocks,
|
clippy::undocumented_unsafe_blocks,
|
||||||
|
|
163
src/term/b64.rs
163
src/term/b64.rs
|
@ -1,8 +1,17 @@
|
||||||
|
#![allow(clippy::undocumented_unsafe_blocks)]
|
||||||
|
use core::intrinsics::simd::simd_cast;
|
||||||
|
use std::{
|
||||||
|
arch::x86_64::*,
|
||||||
|
intrinsics::transmute_unchecked,
|
||||||
|
simd::{prelude::*, LaneCount, MaskElement, SimdElement, SupportedLaneCount},
|
||||||
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn b64() {
|
fn b64() {
|
||||||
fn t(i: &'static str, o: &'static str) {
|
fn t(i: &'static str, o: &'static str) {
|
||||||
let mut x = vec![];
|
let mut x = Vec::with_capacity(size(i.as_bytes()));
|
||||||
encode(i.as_bytes(), &mut x).unwrap();
|
unsafe { portable(i.as_bytes(), x.as_mut_ptr()) };
|
||||||
|
unsafe { x.set_len(size(i.as_bytes())) };
|
||||||
assert_eq!(x, o.as_bytes());
|
assert_eq!(x, o.as_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,16 +19,151 @@ fn b64() {
|
||||||
t("Hello World", "SGVsbG8gV29ybGQ=");
|
t("Hello World", "SGVsbG8gV29ybGQ=");
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode(mut input: &[u8], output: &mut impl std::io::Write) -> std::io::Result<()> {
|
extern crate test;
|
||||||
|
|
||||||
|
pub fn encode(i: &[u8]) -> String {
|
||||||
|
let mut x = Vec::with_capacity(size(i));
|
||||||
|
unsafe { portable(i, x.as_mut_ptr()) };
|
||||||
|
unsafe { x.set_len(size(i)) };
|
||||||
|
unsafe { String::from_utf8_unchecked(x) }
|
||||||
|
}
|
||||||
|
|
||||||
|
trait Cast<T, const N: usize> {
|
||||||
|
fn cas<U: SimdT>(self) -> U;
|
||||||
|
}
|
||||||
|
trait SimdT {}
|
||||||
|
impl<T: SimdElement, const N: usize> SimdT for Simd<T, N> where LaneCount<N>: SupportedLaneCount {}
|
||||||
|
impl<T: MaskElement, const N: usize> SimdT for Mask<T, N> where LaneCount<N>: SupportedLaneCount {}
|
||||||
|
impl<T: SimdElement, const N: usize> Cast<T, N> for Simd<T, N>
|
||||||
|
where
|
||||||
|
LaneCount<N>: SupportedLaneCount,
|
||||||
|
{
|
||||||
|
fn cas<U>(self) -> U {
|
||||||
|
assert!(std::mem::size_of::<U>() == std::mem::size_of::<Simd<T, N>>());
|
||||||
|
unsafe { transmute_unchecked(self) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: MaskElement, const N: usize> Cast<T, N> for Mask<T, N>
|
||||||
|
where
|
||||||
|
LaneCount<N>: SupportedLaneCount,
|
||||||
|
{
|
||||||
|
fn cas<U>(self) -> U {
|
||||||
|
assert!(std::mem::size_of::<U>() == std::mem::size_of::<Mask<T, N>>());
|
||||||
|
unsafe { transmute_unchecked(self) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
type c = u8x32;
|
||||||
|
unsafe fn portable(mut input: &[u8], mut output: *mut u8) {
|
||||||
|
while input.len() >= 32 {
|
||||||
|
#[allow(unsafe_op_in_unsafe_fn)]
|
||||||
|
let indices = if cfg!(all(target_feature = "avx2", not(miri))) {
|
||||||
|
let lo = _mm_loadu_si128(input.as_ptr() as *const __m128i);
|
||||||
|
let hi = _mm_loadu_si128(input.as_ptr().add(12) as *const __m128i);
|
||||||
|
let i = _mm256_shuffle_epi8(
|
||||||
|
_mm256_set_m128i(hi, lo),
|
||||||
|
_mm256_set_epi8(
|
||||||
|
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, //
|
||||||
|
10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, //
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let t0 = _mm256_and_si256(i, _mm256_set1_epi32(0x0fc0fc00));
|
||||||
|
let t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
|
||||||
|
let t2 = _mm256_and_si256(i, _mm256_set1_epi32(0x003f03f0));
|
||||||
|
let t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
|
||||||
|
|
||||||
|
c::from(_mm256_or_si256(t1, t3))
|
||||||
|
} else {
|
||||||
|
let v = c::from_slice(input);
|
||||||
|
let i = simd_swizzle!(
|
||||||
|
v,
|
||||||
|
[
|
||||||
|
1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, 13, 12, //
|
||||||
|
14, 13, 16, 15, 17, 16, 19, 18, 20, 19, 22, 21, 23, 22
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
// https://github.com/WojciechMula/base64simd
|
||||||
|
let t0 = i & u32x8::splat(0x0fc0fc00).cas::<c>();
|
||||||
|
let t1 = Cast::cas::<c>(mulhi(t0.cas(), u32x8::splat(0x04000040).cas()));
|
||||||
|
let t2 = i & u32x8::splat(0x003f03f0).cas::<c>();
|
||||||
|
let t3 = mullo(t2.cas(), u32x8::splat(0x01000010).cas()).cas::<c>();
|
||||||
|
t1 | t3
|
||||||
|
};
|
||||||
|
lookup(indices).copy_to_slice(unsafe { std::slice::from_raw_parts_mut(output, 32) });
|
||||||
|
output = unsafe { output.add(32) };
|
||||||
|
|
||||||
|
input = &input[24..];
|
||||||
|
}
|
||||||
|
unsafe { encode_simple(input, output) };
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mulhi(x: u16x16, y: u16x16) -> u16x16 {
|
||||||
|
unsafe {
|
||||||
|
simd_cast::<_, u16x16>(
|
||||||
|
simd_cast::<_, u32x16>(x) * simd_cast::<_, u32x16>(y) >> u32x16::splat(16),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mullo(x: u16x16, y: u16x16) -> u16x16 {
|
||||||
|
x * y
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lookup(x: c) -> c {
|
||||||
|
let result = x.saturating_sub(c::splat(51));
|
||||||
|
let less = cmpgt(c::splat(26), x);
|
||||||
|
let result = result | (less & c::splat(13));
|
||||||
|
|
||||||
|
#[rustfmt::skip]
|
||||||
|
const LUT: i8x32 = i8x32::from_array([
|
||||||
|
b'a' as i8 - 26, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52,
|
||||||
|
b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'+' as i8 - 62,
|
||||||
|
b'/' as i8 - 63, b'A' as i8, 0, 0,
|
||||||
|
|
||||||
|
b'a' as i8 - 26, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52,
|
||||||
|
b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'+' as i8 - 62,
|
||||||
|
b'/' as i8 - 63, b'A' as i8, 0, 0
|
||||||
|
]);
|
||||||
|
let result = if cfg!(all(target_feature = "avx2", not(miri))) {
|
||||||
|
unsafe { i8x32::from(_mm256_shuffle_epi8(LUT.into(), result.into())) }
|
||||||
|
} else {
|
||||||
|
(LUT.cas::<c>().swizzle_dyn(result)).cas::<i8x32>()
|
||||||
|
};
|
||||||
|
|
||||||
|
Cast::cas(result + x.cas::<i8x32>())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cmpgt(x: c, y: c) -> c {
|
||||||
|
x.cas::<i8x32>().simd_gt(y.cas::<i8x32>()).cas()
|
||||||
|
}
|
||||||
|
|
||||||
|
trait P {
|
||||||
|
unsafe fn p<const N: usize>(&mut self, data: [u8; N]);
|
||||||
|
}
|
||||||
|
|
||||||
|
impl P for *mut u8 {
|
||||||
|
unsafe fn p<const N: usize>(&mut self, data: [u8; N]) {
|
||||||
|
unsafe { self.copy_from(data.as_ptr(), N) };
|
||||||
|
*self = unsafe { self.add(N) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unsafe_op_in_unsafe_fn)]
|
||||||
|
/// # Safety
|
||||||
|
/// ptr valid for [`size`]`(input)` writes.
|
||||||
|
pub unsafe fn encode_simple(mut input: &[u8], mut output: *mut u8) {
|
||||||
const Α: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
const Α: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||||
while let [a, b, c, rest @ ..] = input {
|
while let [a, b, c, rest @ ..] = input {
|
||||||
let α = ((*a as usize) << 16) | ((*b as usize) << 8) | *c as usize;
|
let α = ((*a as usize) << 16) | ((*b as usize) << 8) | *c as usize;
|
||||||
output.write_all(&[
|
output.p([
|
||||||
Α[α >> 18],
|
Α[α >> 18],
|
||||||
Α[(α >> 12) & 0x3F],
|
Α[(α >> 12) & 0x3F],
|
||||||
Α[(α >> 6) & 0x3F],
|
Α[(α >> 6) & 0x3F],
|
||||||
Α[α & 0x3F],
|
Α[α & 0x3F],
|
||||||
])?;
|
]);
|
||||||
input = rest;
|
input = rest;
|
||||||
}
|
}
|
||||||
if !input.is_empty() {
|
if !input.is_empty() {
|
||||||
|
@ -27,15 +171,14 @@ pub fn encode(mut input: &[u8], output: &mut impl std::io::Write) -> std::io::Re
|
||||||
if input.len() > 1 {
|
if input.len() > 1 {
|
||||||
α |= (input[1] as usize) << 8;
|
α |= (input[1] as usize) << 8;
|
||||||
}
|
}
|
||||||
output.write_all(&[Α[α >> 18], Α[α >> 12 & 0x3F]])?;
|
output.p([Α[α >> 18], Α[α >> 12 & 0x3F]]);
|
||||||
if input.len() > 1 {
|
if input.len() > 1 {
|
||||||
output.write_all(&[Α[α >> 6 & 0x3f]])?;
|
output.p([Α[α >> 6 & 0x3f]]);
|
||||||
} else {
|
} else {
|
||||||
output.write_all(&[b'='])?;
|
output.p([b'=']);
|
||||||
}
|
}
|
||||||
output.write_all(&[b'='])?;
|
output.p([b'=']);
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn size(of: &[u8]) -> usize {
|
pub const fn size(of: &[u8]) -> usize {
|
||||||
|
|
|
@ -55,14 +55,11 @@ where
|
||||||
4 => n![4],
|
4 => n![4],
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
let mut e = Vec::with_capacity(b64::size(&d));
|
let e = b64::encode(&d);
|
||||||
b64::encode(&d, &mut e).unwrap();
|
|
||||||
writeln!(
|
writeln!(
|
||||||
to,
|
to,
|
||||||
"]1337;File=inline=1;preserveAspectRatio=1;size={}:{}",
|
"]1337;File=inline=1;preserveAspectRatio=1;size={}:{e}",
|
||||||
d.len(),
|
d.len(),
|
||||||
// SAFETY: b64
|
|
||||||
unsafe { std::str::from_utf8_unchecked(&e) }
|
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,9 +65,9 @@ impl<T: AsRef<[u8]>, const N: usize> Kitty<T, N> {
|
||||||
};
|
};
|
||||||
let (w, h) = (self.width(), self.height());
|
let (w, h) = (self.width(), self.height());
|
||||||
|
|
||||||
let mut enc = Vec::with_capacity(b64::size(&bytes));
|
let enc = b64::encode(&bytes);
|
||||||
b64::encode(&bytes, &mut enc).unwrap();
|
|
||||||
let mut chunks = enc
|
let mut chunks = enc
|
||||||
|
.as_bytes()
|
||||||
.chunks(4096)
|
.chunks(4096)
|
||||||
// SAFETY: b64
|
// SAFETY: b64
|
||||||
.map(|x| unsafe { std::str::from_utf8_unchecked(x) });
|
.map(|x| unsafe { std::str::from_utf8_unchecked(x) });
|
||||||
|
|
Loading…
Reference in a new issue