diff --git a/codecs/rotate/benchmark.js b/codecs/rotate/benchmark.js new file mode 100644 index 00000000..abc7478a --- /dev/null +++ b/codecs/rotate/benchmark.js @@ -0,0 +1,45 @@ +// THIS IS NOT A NODE SCRIPT +// This is a d8 script. Please install jsvu[1] and install v8. +// Then run `npm run --silent benchmark`. +// [1]: https://github.com/GoogleChromeLabs/jsvu +async function init() { + // Adjustable constants. + const imageDimensions = 4096; + const iterations = new Array(100); + + // Constants. Don’t change. + const imageByteSize = imageDimensions * imageDimensions * 4; + const wasmPageSize = 64 * 1024; + + const buffer = readbuffer("rotate.wasm"); + const { instance } = await WebAssembly.instantiate(buffer); + + const pagesAvailable = Math.floor( + instance.exports.memory.buffer.byteLength / wasmPageSize + ); + const pagesNeeded = Math.floor((imageByteSize * 2 + 4) / wasmPageSize) + 1; + const additionalPagesNeeded = pagesNeeded - pagesAvailable; + if (additionalPagesNeeded > 0) { + instance.exports.memory.grow(additionalPagesNeeded); + } + + [0, 90, 180, 270].forEach(rotation => { + print(`\n${rotation} degrees`); + print(`==============================`); + for (let i = 0; i < 100; i++) { + const start = Date.now(); + instance.exports.rotate(imageDimensions, imageDimensions, rotation); + iterations[i] = Date.now() - start; + } + const average = iterations.reduce((sum, c) => sum + c) / iterations.length; + const stddev = Math.sqrt( + iterations + .map(i => Math.pow(i - average, 2)) + .reduce((sum, c) => sum + c) / iterations.length + ); + print(`n = ${iterations.length}`); + print(`Average: ${average}`); + print(`StdDev: ${stddev}`); + }); +} +init().catch(e => console.error(e.stack)); diff --git a/codecs/rotate/package.json b/codecs/rotate/package.json index d7388aaa..add6a95a 100644 --- a/codecs/rotate/package.json +++ b/codecs/rotate/package.json @@ -2,6 +2,10 @@ "name": "rotate", "scripts": { "build:image": "docker build -t squoosh-rotate .", - "build": "docker run --rm -v $(pwd):/src squoosh-rotate ./build.sh" + "build": "docker run --rm -v $(pwd):/src squoosh-rotate ./build.sh", + "benchmark": "echo File size after gzip && npm run benchmark:filesize && echo Optimizing && npm run -s benchmark:optimizing", + "benchmark:baseline": "v8 --liftoff --no-wasm-tier-up --no-opt ./benchmark.js", + "benchmark:optimizing": "v8 --no-liftoff --no-wasm-tier-up ./benchmark.js", + "benchmark:filesize": "cat rotate.wasm | gzip -c9n | wc -c" } } diff --git a/codecs/rotate/rotate.rs b/codecs/rotate/rotate.rs index 4a766c1e..550318e8 100644 --- a/codecs/rotate/rotate.rs +++ b/codecs/rotate/rotate.rs @@ -1,90 +1,113 @@ -use std::slice::from_raw_parts_mut; +use std::slice::{from_raw_parts, from_raw_parts_mut}; -// This function is taken from -// https://rustwasm.github.io/book/reference/code-size.html -#[cfg(not(debug_assertions))] -#[inline] -pub fn unwrap_abort(o: Option) -> T { - use std::process; - match o { - Some(t) => t, - None => process::abort(), +// This function is taken from Zachary Dremann +// https://github.com/GoogleChromeLabs/squoosh/pull/462 +trait HardUnwrap { + fn unwrap_hard(self) -> T; +} + +impl HardUnwrap for Option { + #[cfg(not(debug_assertions))] + #[inline] + fn unwrap_hard(self) -> T { + match self { + Some(t) => t, + None => std::process::abort(), + } + } + + #[cfg(debug_assertions)] + fn unwrap_hard(self) -> T { + self.unwrap() } } -// Normal panic-y behavior for debug builds -#[cfg(debug_assertions)] -unsafe fn unchecked_unwrap(o: Option) -> T { - o.unwrap() +const TILE_SIZE: usize = 16; + +fn get_buffers<'a>(width: usize, height: usize) -> (&'a [u32], &'a mut [u32]) { + let num_pixels = width * height; + let in_b: &[u32]; + let out_b: &mut [u32]; + unsafe { + in_b = from_raw_parts::(8 as *const u32, num_pixels); + out_b = from_raw_parts_mut::((num_pixels * 4 + 8) as *mut u32, num_pixels); + } + return (in_b, out_b); +} + +#[inline(never)] +fn rotate_0(width: usize, height: usize) { + let (in_b, out_b) = get_buffers(width, height); + for (in_p, out_p) in in_b.iter().zip(out_b.iter_mut()) { + *out_p = *in_p; + } +} + +#[inline(never)] +fn rotate_90(width: usize, height: usize) { + let (in_b, out_b) = get_buffers(width, height); + let new_width = height; + let _new_height = width; + for y_start in (0..height).step_by(TILE_SIZE) { + for x_start in (0..width).step_by(TILE_SIZE) { + for y in y_start..(y_start + TILE_SIZE).min(height) { + let in_offset = y * width; + let in_bounds = if x_start + TILE_SIZE < width { + (in_offset + x_start)..(in_offset + x_start + TILE_SIZE) + } else { + (in_offset + x_start)..(in_offset + width) + }; + let in_chunk = in_b.get(in_bounds).unwrap_hard(); + for (x, in_p) in in_chunk.iter().enumerate() { + let new_x = (new_width - 1) - y; + let new_y = x + x_start; + *out_b.get_mut(new_y * new_width + new_x).unwrap_hard() = *in_p; + } + } + } + } +} + +#[inline(never)] +fn rotate_180(width: usize, height: usize) { + let (in_b, out_b) = get_buffers(width, height); + for (in_p, out_p) in in_b.iter().zip(out_b.iter_mut().rev()) { + *out_p = *in_p; + } +} + +#[inline(never)] +fn rotate_270(width: usize, height: usize) { + let (in_b, out_b) = get_buffers(width, height); + let new_width = height; + let new_height = width; + for y_start in (0..height).step_by(TILE_SIZE) { + for x_start in (0..width).step_by(TILE_SIZE) { + for y in y_start..(y_start + TILE_SIZE).min(height) { + let in_offset = y * width; + let in_bounds = if x_start + TILE_SIZE < width { + (in_offset + x_start)..(in_offset + x_start + TILE_SIZE) + } else { + (in_offset + x_start)..(in_offset + width) + }; + let in_chunk = in_b.get(in_bounds).unwrap_hard(); + for (x, in_p) in in_chunk.iter().enumerate() { + let new_x = y; + let new_y = new_height - 1 - (x_start + x); + *out_b.get_mut(new_y * new_width + new_x).unwrap_hard() = *in_p; + } + } + } + } } #[no_mangle] -fn rotate(input_width: isize, input_height: isize, rotate: isize) { - let mut i = 0isize; - - // In the straight-copy case, d1 is x, d2 is y. - // x starts at 0 and increases. - // y starts at 0 and increases. - let mut d1_start: isize = 0; - let mut d1_limit: isize = input_width; - let mut d1_advance: isize = 1; - let mut d1_multiplier: isize = 1; - let mut d2_start: isize = 0; - let mut d2_limit: isize = input_height; - let mut d2_advance: isize = 1; - let mut d2_multiplier: isize = input_width; - - if rotate == 90 { - // d1 is y, d2 is x. - // y starts at its max value and decreases. - // x starts at 0 and increases. - d1_start = input_height - 1; - d1_limit = input_height; - d1_advance = -1; - d1_multiplier = input_width; - d2_start = 0; - d2_limit = input_width; - d2_advance = 1; - d2_multiplier = 1; - } else if rotate == 180 { - // d1 is x, d2 is y. - // x starts at its max and decreases. - // y starts at its max and decreases. - d1_start = input_width - 1; - d1_limit = input_width; - d1_advance = -1; - d1_multiplier = 1; - d2_start = input_height - 1; - d2_limit = input_height; - d2_advance = -1; - d2_multiplier = input_width; - } else if rotate == 270 { - // d1 is y, d2 is x. - // y starts at 0 and increases. - // x starts at its max and decreases. - d1_start = 0; - d1_limit = input_height; - d1_advance = 1; - d1_multiplier = input_width; - d2_start = input_width - 1; - d2_limit = input_width; - d2_advance = -1; - d2_multiplier = 1; - } - - let num_pixels = (input_width * input_height) as usize; - let in_b: &mut [u32]; - let out_b: &mut [u32]; - unsafe { - in_b = from_raw_parts_mut::(4 as *mut u32, num_pixels); - out_b = from_raw_parts_mut::((input_width * input_height * 4 + 4) as *mut u32, num_pixels); - } - - for d2 in 0..d2_limit { - for d1 in 0..d1_limit { - let in_idx = (d1_start + d1 * d1_advance) * d1_multiplier + (d2_start + d2 * d2_advance) * d2_multiplier; - *unwrap_abort(out_b.get_mut(i as usize)) = *unwrap_abort(in_b.get(in_idx as usize)); - i += 1; +fn rotate(width: usize, height: usize, rotate: usize) { + match rotate { + 0 => rotate_0(width, height), + 90 => rotate_90(width, height), + 180 => rotate_180(width, height), + 270 => rotate_270(width, height), + _ => std::process::abort(), } - } } diff --git a/codecs/rotate/rotate.wasm b/codecs/rotate/rotate.wasm index a3ba4265..246fe6b7 100755 Binary files a/codecs/rotate/rotate.wasm and b/codecs/rotate/rotate.wasm differ diff --git a/src/codecs/rotate/processor.ts b/src/codecs/rotate/processor.ts index 30bac75c..d880ac97 100644 --- a/src/codecs/rotate/processor.ts +++ b/src/codecs/rotate/processor.ts @@ -11,7 +11,7 @@ export async function rotate( // Number of wasm memory pages (á 64KiB) needed to store the image twice. const bytesPerImage = data.width * data.height * 4; - const numPagesNeeded = Math.ceil((bytesPerImage * 2 + 4) / (64 * 1024)); + const numPagesNeeded = Math.ceil((bytesPerImage * 2 + 8) / (64 * 1024)); // Only count full pages, just to be safe. const numPagesAvailable = Math.floor(instance.exports.memory.buffer.byteLength / (64 * 1024)); const additionalPagesToAllocate = numPagesNeeded - numPagesAvailable; @@ -20,13 +20,13 @@ export async function rotate( instance.exports.memory.grow(additionalPagesToAllocate); } const view = new Uint8ClampedArray(instance.exports.memory.buffer); - view.set(data.data, 4); + view.set(data.data, 8); instance.exports.rotate(data.width, data.height, opts.rotate); const flipDimensions = opts.rotate % 180 !== 0; return new ImageData( - view.slice(bytesPerImage + 4, bytesPerImage * 2 + 4), + view.slice(bytesPerImage + 8, bytesPerImage * 2 + 8), flipDimensions ? data.height : data.width, flipDimensions ? data.width : data.height, );