fix b64 for 32 bit

This commit is contained in:
bendn 2024-11-29 08:01:08 +07:00
parent 4082ca3dda
commit 273058dcef
No known key found for this signature in database
GPG key ID: 0D9D3A2A3B2A93D6

View file

@ -1,7 +1,8 @@
#![allow(clippy::undocumented_unsafe_blocks)] #![allow(clippy::undocumented_unsafe_blocks)]
use core::intrinsics::simd::simd_cast; use core::intrinsics::simd::simd_cast;
#[cfg(all(target_feature = "avx2", not(miri)))]
use std::arch::x86_64::*;
use std::{ use std::{
arch::x86_64::*,
intrinsics::transmute_unchecked, intrinsics::transmute_unchecked,
simd::{prelude::*, LaneCount, MaskElement, SimdElement, SupportedLaneCount}, simd::{prelude::*, LaneCount, MaskElement, SimdElement, SupportedLaneCount},
}; };
@ -57,7 +58,8 @@ type c = u8x32;
unsafe fn portable(mut input: &[u8], mut output: *mut u8) { unsafe fn portable(mut input: &[u8], mut output: *mut u8) {
while input.len() >= 32 { while input.len() >= 32 {
#[allow(unsafe_op_in_unsafe_fn)] #[allow(unsafe_op_in_unsafe_fn)]
let indices = if cfg!(all(target_feature = "avx2", not(miri))) { #[cfg(all(target_feature = "avx2", not(miri)))]
let indices = {
let lo = _mm_loadu_si128(input.as_ptr() as *const __m128i); let lo = _mm_loadu_si128(input.as_ptr() as *const __m128i);
let hi = _mm_loadu_si128(input.as_ptr().add(12) as *const __m128i); let hi = _mm_loadu_si128(input.as_ptr().add(12) as *const __m128i);
let i = _mm256_shuffle_epi8( let i = _mm256_shuffle_epi8(
@ -73,7 +75,9 @@ unsafe fn portable(mut input: &[u8], mut output: *mut u8) {
let t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); let t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
c::from(_mm256_or_si256(t1, t3)) c::from(_mm256_or_si256(t1, t3))
} else { };
#[cfg(not(all(target_feature = "avx2", not(miri))))]
let indices = {
let v = c::from_slice(input); let v = c::from_slice(input);
let i = simd_swizzle!( let i = simd_swizzle!(
v, v,
@ -125,11 +129,10 @@ fn lookup(x: c) -> c {
b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'+' as i8 - 62, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'0' as i8 - 52, b'+' as i8 - 62,
b'/' as i8 - 63, b'A' as i8, 0, 0 b'/' as i8 - 63, b'A' as i8, 0, 0
]); ]);
let result = if cfg!(all(target_feature = "avx2", not(miri))) { #[cfg(all(target_feature = "avx2", not(miri)))]
unsafe { i8x32::from(_mm256_shuffle_epi8(LUT.into(), result.into())) } let result = unsafe { i8x32::from(_mm256_shuffle_epi8(LUT.into(), result.into())) };
} else { #[cfg(not(all(target_feature = "avx2", not(miri))))]
(LUT.cas::<c>().swizzle_dyn(result)).cas::<i8x32>() let result = (LUT.cas::<c>().swizzle_dyn(result)).cas::<i8x32>();
};
Cast::cas(result + x.cas::<i8x32>()) Cast::cas(result + x.cas::<i8x32>())
} }