cleanup
This commit is contained in:
16
src/reduce/gray8.rs
Normal file
16
src/reduce/gray8.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
const GROUP_BY: usize = 2;
|
||||
pub const fn out_size(in_size: usize) -> usize {
|
||||
in_size / 2
|
||||
}
|
||||
|
||||
pub fn run(buf: &mut [u8]) {
|
||||
let n_raw = buf.len();
|
||||
let mut in_cursor = 0;
|
||||
let mut out_cursor = 0;
|
||||
|
||||
while in_cursor + GROUP_BY <= n_raw {
|
||||
buf[out_cursor] = buf[in_cursor];
|
||||
out_cursor += 1;
|
||||
in_cursor += GROUP_BY;
|
||||
}
|
||||
}
|
||||
33
src/reduce/gray8_simd.rs
Normal file
33
src/reduce/gray8_simd.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
use std::{
|
||||
arch::arm::{vld1q_u8, vst1q_u8, vuzpq_u8},
|
||||
convert::TryInto,
|
||||
};
|
||||
|
||||
const GROUP_BY: usize = 32;
|
||||
pub const fn out_size(in_size: usize) -> usize {
|
||||
in_size / 2
|
||||
}
|
||||
|
||||
pub fn run(buf: &mut [u8]) {
|
||||
let n_raw = buf.len();
|
||||
let mut in_cursor = 0;
|
||||
let mut out_cursor = 0;
|
||||
|
||||
let mut res = [0u8; 16];
|
||||
|
||||
while in_cursor + GROUP_BY <= n_raw {
|
||||
let a: &[u8; 16] = buf[in_cursor..in_cursor + 16].try_into().unwrap();
|
||||
let b: &[u8; 16] = buf[in_cursor + 16..in_cursor + 32].try_into().unwrap();
|
||||
|
||||
unsafe {
|
||||
let a = vld1q_u8(a as *const u8);
|
||||
let b = vld1q_u8(b as *const u8);
|
||||
let z = vuzpq_u8(a, b);
|
||||
vst1q_u8(&mut res as *mut u8, z.0);
|
||||
}
|
||||
|
||||
buf[out_cursor..out_cursor + 16].copy_from_slice(&res);
|
||||
out_cursor += 16;
|
||||
in_cursor += GROUP_BY;
|
||||
}
|
||||
}
|
||||
4
src/reduce/mod.rs
Normal file
4
src/reduce/mod.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod gray8;
|
||||
pub mod gray8_simd;
|
||||
pub mod mono;
|
||||
pub mod mono_simd;
|
||||
47
src/reduce/mono.rs
Normal file
47
src/reduce/mono.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use std::convert::TryInto;
|
||||
|
||||
const GROUP_BY: usize = 16;
|
||||
pub const fn out_size(in_size: usize) -> usize {
|
||||
in_size / 16
|
||||
}
|
||||
|
||||
pub fn run(buf: &mut [u8]) {
|
||||
let n_raw = buf.len();
|
||||
let mut in_cursor = 0;
|
||||
let mut out_cursor = 0;
|
||||
|
||||
while in_cursor + GROUP_BY <= n_raw {
|
||||
let a: &[u8; 16] = buf[in_cursor..in_cursor + 16].try_into().unwrap();
|
||||
|
||||
let mut out = 0u8;
|
||||
|
||||
if a[0] == 0x1E {
|
||||
out |= 0b10000000;
|
||||
}
|
||||
if a[2] == 0x1E {
|
||||
out |= 0b10000000 >> 1;
|
||||
}
|
||||
if a[4] == 0x1E {
|
||||
out |= 0b10000000 >> 2;
|
||||
}
|
||||
if a[6] == 0x1E {
|
||||
out |= 0b10000000 >> 3;
|
||||
}
|
||||
if a[8] == 0x1E {
|
||||
out |= 0b10000000 >> 4;
|
||||
}
|
||||
if a[10] == 0x1E {
|
||||
out |= 0b10000000 >> 5;
|
||||
}
|
||||
if a[12] == 0x1E {
|
||||
out |= 0b10000000 >> 6;
|
||||
}
|
||||
if a[14] == 0x1E {
|
||||
out |= 0b10000000 >> 7;
|
||||
}
|
||||
|
||||
buf[out_cursor] = out;
|
||||
out_cursor += 1;
|
||||
in_cursor += GROUP_BY;
|
||||
}
|
||||
}
|
||||
69
src/reduce/mono_simd.rs
Normal file
69
src/reduce/mono_simd.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
use std::{
|
||||
arch::arm::{
|
||||
vandq_u8, vgetq_lane_u64, vld1q_s8, vld1q_u8, vpaddlq_u16, vpaddlq_u32, vpaddlq_u8,
|
||||
vshlq_u8, vshrq_n_u8, vuzpq_u8,
|
||||
},
|
||||
convert::TryInto,
|
||||
};
|
||||
|
||||
const GROUP_BY: usize = 32;
|
||||
pub const fn out_size(in_size: usize) -> usize {
|
||||
in_size / 16
|
||||
}
|
||||
|
||||
pub fn run(buf: &mut [u8]) {
|
||||
let n_raw = buf.len();
|
||||
let mut in_cursor = 0;
|
||||
let mut out_cursor = 0;
|
||||
|
||||
let m = unsafe {
|
||||
let mask = &[0x01u8; 16];
|
||||
vld1q_u8(mask as *const u8)
|
||||
};
|
||||
|
||||
let h = unsafe {
|
||||
let mask = &[
|
||||
0x07i8, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
|
||||
0x01, 0x00,
|
||||
];
|
||||
vld1q_s8(mask as *const i8)
|
||||
};
|
||||
|
||||
while in_cursor + GROUP_BY <= n_raw {
|
||||
let a: &[u8; 16] = buf[in_cursor..in_cursor + 16].try_into().unwrap();
|
||||
let b: &[u8; 16] = buf[in_cursor + 16..in_cursor + 32].try_into().unwrap();
|
||||
|
||||
let res: (u8, u8) = unsafe {
|
||||
// Load 32 bytes
|
||||
let a = vld1q_u8(a as *const u8);
|
||||
let b = vld1q_u8(b as *const u8);
|
||||
|
||||
// Unzip and get first byte of each pair
|
||||
let a = vuzpq_u8(a, b).0;
|
||||
|
||||
// White = 0b1110, so >> 4.
|
||||
let a = vshrq_n_u8::<4>(a);
|
||||
|
||||
// and with 0x01 mask
|
||||
let a = vandq_u8(a, m);
|
||||
|
||||
// shift each bit left by an appropriate amount
|
||||
// (h is [0x07, 0x06, .., 0x00, 0x07, .., 0x00])
|
||||
let a = vshlq_u8(a, h);
|
||||
|
||||
// Sum everything
|
||||
let s = vpaddlq_u8(a);
|
||||
let s = vpaddlq_u16(s);
|
||||
let s = vpaddlq_u32(s);
|
||||
(
|
||||
vgetq_lane_u64(s, 0).try_into().unwrap(),
|
||||
vgetq_lane_u64(s, 1).try_into().unwrap(),
|
||||
)
|
||||
};
|
||||
|
||||
buf[out_cursor] = res.0;
|
||||
buf[out_cursor + 1] = res.1;
|
||||
out_cursor += 2;
|
||||
in_cursor += GROUP_BY;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user