34 lines
861 B
Rust
34 lines
861 B
Rust
use std::{
|
|
arch::arm::{vld1q_u8, vst1q_u8, vuzpq_u8},
|
|
convert::TryInto,
|
|
};
|
|
|
|
const GROUP_BY: usize = 32;
|
|
pub const fn out_size(in_size: usize) -> usize {
|
|
in_size / 2
|
|
}
|
|
|
|
pub fn run(buf: &mut [u8]) {
|
|
let n_raw = buf.len();
|
|
let mut in_cursor = 0;
|
|
let mut out_cursor = 0;
|
|
|
|
let mut res = [0u8; 16];
|
|
|
|
while in_cursor + GROUP_BY <= n_raw {
|
|
let a: &[u8; 16] = buf[in_cursor..in_cursor + 16].try_into().unwrap();
|
|
let b: &[u8; 16] = buf[in_cursor + 16..in_cursor + 32].try_into().unwrap();
|
|
|
|
unsafe {
|
|
let a = vld1q_u8(a as *const u8);
|
|
let b = vld1q_u8(b as *const u8);
|
|
let z = vuzpq_u8(a, b);
|
|
vst1q_u8(&mut res as *mut u8, z.0);
|
|
}
|
|
|
|
buf[out_cursor..out_cursor + 16].copy_from_slice(&res);
|
|
out_cursor += 16;
|
|
in_cursor += GROUP_BY;
|
|
}
|
|
}
|