#[cfg(feature = "std")]
use crate::error::StreamResult;
use crate::error::{BufferResult, LzwError, LzwStatus};
use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
use crate::alloc::{boxed::Box, vec, vec::Vec};
#[cfg(feature = "std")]
use std::io::{self, BufRead, Write};
pub struct Decoder {
state: Box<dyn Stateful + Send + 'static>,
}
#[cfg_attr(
not(feature = "std"),
deprecated = "This type is only useful with the `std` feature."
)]
#[cfg_attr(not(feature = "std"), allow(dead_code))]
pub struct IntoStream<'d, W> {
decoder: &'d mut Decoder,
writer: W,
buffer: Option<StreamBuf<'d>>,
default_size: usize,
}
#[cfg(feature = "async")]
pub struct IntoAsync<'d, W> {
decoder: &'d mut Decoder,
writer: W,
buffer: Option<StreamBuf<'d>>,
default_size: usize,
}
trait Stateful {
fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
fn has_ended(&self) -> bool;
fn restart(&mut self);
fn reset(&mut self);
}
#[derive(Clone)]
struct Link {
prev: Code,
byte: u8,
}
#[derive(Default)]
struct MsbBuffer {
bit_buffer: u64,
code_mask: u16,
code_size: u8,
bits: u8,
}
#[derive(Default)]
struct LsbBuffer {
bit_buffer: u64,
code_mask: u16,
code_size: u8,
bits: u8,
}
trait CodeBuffer {
fn new(min_size: u8) -> Self;
fn reset(&mut self, min_size: u8);
fn bump_code_size(&mut self);
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>;
fn refill_bits(&mut self, inp: &mut &[u8]);
fn get_bits(&mut self) -> Option<Code>;
fn max_code(&self) -> Code;
fn code_size(&self) -> u8;
}
struct DecodeState<CodeBuffer> {
min_size: u8,
table: Table,
buffer: Buffer,
last: Option<(Code, Link)>,
next_code: Code,
clear_code: Code,
end_code: Code,
has_ended: bool,
is_tiff: bool,
code_buffer: CodeBuffer,
}
struct Buffer {
bytes: Box<[u8]>,
read_mark: usize,
write_mark: usize,
}
struct Table {
inner: Vec<Link>,
depths: Vec<u16>,
}
impl Decoder {
pub fn new(order: BitOrder, size: u8) -> Self {
type Boxed = Box<dyn Stateful + Send + 'static>;
super::assert_decode_size(size);
let state = match order {
BitOrder::Lsb => Box::new(DecodeState::<LsbBuffer>::new(size)) as Boxed,
BitOrder::Msb => Box::new(DecodeState::<MsbBuffer>::new(size)) as Boxed,
};
Decoder { state }
}
pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
type Boxed = Box<dyn Stateful + Send + 'static>;
super::assert_decode_size(size);
let state = match order {
BitOrder::Lsb => {
let mut state = Box::new(DecodeState::<LsbBuffer>::new(size));
state.is_tiff = true;
state as Boxed
}
BitOrder::Msb => {
let mut state = Box::new(DecodeState::<MsbBuffer>::new(size));
state.is_tiff = true;
state as Boxed
}
};
Decoder { state }
}
pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
self.state.advance(inp, out)
}
#[cfg(feature = "std")]
pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
IntoStream {
decoder: self,
writer,
buffer: None,
default_size: STREAM_BUF_SIZE,
}
}
#[cfg(feature = "async")]
pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
IntoAsync {
decoder: self,
writer,
buffer: None,
default_size: STREAM_BUF_SIZE,
}
}
pub fn has_ended(&self) -> bool {
self.state.has_ended()
}
#[allow(dead_code)]
pub(crate) fn restart(&mut self) {
self.state.restart();
}
pub fn reset(&mut self) {
self.state.reset();
}
}
#[cfg(feature = "std")]
impl<'d, W: Write> IntoStream<'d, W> {
pub fn decode(&mut self, read: impl BufRead) -> StreamResult {
self.decode_part(read, false)
}
pub fn decode_all(mut self, read: impl BufRead) -> StreamResult {
self.decode_part(read, true)
}
pub fn set_buffer_size(&mut self, size: usize) {
assert_ne!(size, 0, "Attempted to set empty buffer");
self.default_size = size;
}
pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
self.buffer = Some(StreamBuf::Borrowed(buffer));
}
fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult {
let IntoStream {
decoder,
writer,
buffer,
default_size,
} = self;
enum Progress {
Ok,
Done,
}
let mut bytes_read = 0;
let mut bytes_written = 0;
let read_bytes = &mut bytes_read;
let write_bytes = &mut bytes_written;
let outbuf: &mut [u8] =
match { buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) } {
StreamBuf::Borrowed(slice) => &mut *slice,
StreamBuf::Owned(vec) => &mut *vec,
};
assert!(!outbuf.is_empty());
let once = move || {
let data = read.fill_buf()?;
let result = decoder.decode_bytes(data, &mut outbuf[..]);
*read_bytes += result.consumed_in;
*write_bytes += result.consumed_out;
read.consume(result.consumed_in);
let done = result.status.map_err(|err| {
io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
})?;
if let LzwStatus::NoProgress = done {
debug_assert_eq!(
result.consumed_out, 0,
"No progress means we have not decoded any data"
);
if must_finish {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"No more data but no end marker detected",
));
} else {
return Ok(Progress::Done);
}
}
writer.write_all(&outbuf[..result.consumed_out])?;
Ok(if let LzwStatus::Done = done {
Progress::Done
} else {
Progress::Ok
})
};
let status = core::iter::repeat_with(once)
.scan((), |(), result| match result {
Ok(Progress::Ok) => Some(Ok(())),
Err(err) => Some(Err(err)),
Ok(Progress::Done) => None,
})
.fuse()
.collect();
StreamResult {
bytes_read,
bytes_written,
status,
}
}
}
#[cfg(feature = "async")]
#[path = "decode_into_async.rs"]
mod impl_decode_into_async;
impl<C: CodeBuffer> DecodeState<C> {
fn new(min_size: u8) -> Self {
DecodeState {
min_size: min_size,
table: Table::new(),
buffer: Buffer::new(),
last: None,
clear_code: 1 << min_size,
end_code: (1 << min_size) + 1,
next_code: (1 << min_size) + 2,
has_ended: false,
is_tiff: false,
code_buffer: CodeBuffer::new(min_size),
}
}
fn init_tables(&mut self) {
self.code_buffer.reset(self.min_size);
self.next_code = (1 << self.min_size) + 2;
self.table.init(self.min_size);
}
fn reset_tables(&mut self) {
self.code_buffer.reset(self.min_size);
self.next_code = (1 << self.min_size) + 2;
self.table.clear(self.min_size);
}
}
impl<C: CodeBuffer> Stateful for DecodeState<C> {
fn has_ended(&self) -> bool {
self.has_ended
}
fn restart(&mut self) {
self.has_ended = false;
}
fn reset(&mut self) {
self.table.init(self.min_size);
self.buffer.read_mark = 0;
self.buffer.write_mark = 0;
self.last = None;
self.restart();
self.code_buffer = CodeBuffer::new(self.min_size);
}
fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
if self.has_ended {
return BufferResult {
consumed_in: 0,
consumed_out: 0,
status: Ok(LzwStatus::Done),
};
}
let o_in = inp.len();
let o_out = out.len();
let mut code_link = None;
let mut status = Ok(LzwStatus::Ok);
match self.last.take() {
None => {
match self.next_symbol(&mut inp) {
Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
None => status = Ok(LzwStatus::NoProgress),
Some(init_code) => {
if init_code == self.clear_code {
self.init_tables();
} else if init_code == self.end_code {
self.has_ended = true;
status = Ok(LzwStatus::Done);
} else if self.table.is_empty() {
status = Err(LzwError::InvalidCode);
} else {
self.buffer.fill_reconstruct(&self.table, init_code);
let link = self.table.at(init_code).clone();
code_link = Some((init_code, link));
}
}
}
}
Some(tup) => code_link = Some(tup),
};
let mut burst_required_for_progress = false;
if let Some((code, link)) = code_link.take() {
code_link = Some((code, link));
let remain = self.buffer.buffer();
if remain.len() > out.len() {
if out.is_empty() {
status = Ok(LzwStatus::NoProgress);
} else {
out.copy_from_slice(&remain[..out.len()]);
self.buffer.consume(out.len());
out = &mut [];
}
} else if remain.is_empty() {
status = Ok(LzwStatus::NoProgress);
burst_required_for_progress = true;
} else {
let consumed = remain.len();
out[..consumed].copy_from_slice(remain);
self.buffer.consume(consumed);
out = &mut out[consumed..];
burst_required_for_progress = false;
}
}
let mut burst = [0; 6];
let mut bytes = [0u16; 6];
let mut target: [&mut [u8]; 6] = Default::default();
let mut last_decoded: Option<&[u8]> = None;
while let Some((mut code, mut link)) = code_link.take() {
if out.is_empty() && !self.buffer.buffer().is_empty() {
code_link = Some((code, link));
break;
}
let mut burst_size = 0;
self.refill_bits(&mut inp);
for b in &mut burst {
*b = match self.get_bits() {
None => break,
Some(code) => code,
};
if burst_size > 0 {
let len = bytes[burst_size - 1];
let (into, tail) = out.split_at_mut(usize::from(len));
target[burst_size - 1] = into;
out = tail;
}
let potential_code = self.next_code + burst_size as u16;
burst_size += 1;
if potential_code == self.code_buffer.max_code() - Code::from(self.is_tiff) {
break;
}
if *b == self.clear_code || *b == self.end_code || *b >= self.next_code {
break;
}
let len = self.table.depths[usize::from(*b)];
if out.len() < usize::from(len) {
break;
}
bytes[burst_size - 1] = len;
}
if burst_size == 0 {
if burst_required_for_progress {
status = Ok(LzwStatus::NoProgress);
}
code_link = Some((code, link));
break;
}
burst_required_for_progress = false;
let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
for (&burst, target) in burst.iter().zip(&mut target[..burst_size - 1]) {
let cha = self.table.reconstruct(burst, target);
let new_link = self.table.derive(&link, cha, code);
self.next_code += 1;
code = burst;
link = new_link;
}
if let Some(new_last) = target[..burst_size - 1].last_mut() {
let slice = core::mem::replace(new_last, &mut []);
last_decoded = Some(&*slice);
}
if new_code == self.clear_code {
self.reset_tables();
last_decoded = None;
continue;
}
if new_code == self.end_code {
self.has_ended = true;
status = Ok(LzwStatus::Done);
last_decoded = None;
break;
}
if new_code > self.next_code {
status = Err(LzwError::InvalidCode);
last_decoded = None;
break;
}
let required_len = if new_code == self.next_code {
self.table.depths[usize::from(code)] + 1
} else {
self.table.depths[usize::from(new_code)]
};
let cha;
let is_in_buffer;
if usize::from(required_len) > out.len() {
is_in_buffer = true;
if new_code == self.next_code {
if let Some(last) = last_decoded.take() {
self.buffer.bytes[..last.len()].copy_from_slice(last);
self.buffer.write_mark = last.len();
self.buffer.read_mark = last.len();
}
cha = self.buffer.fill_cscsc();
} else {
last_decoded = None;
cha = self.buffer.fill_reconstruct(&self.table, new_code);
}
} else {
is_in_buffer = false;
let (target, tail) = out.split_at_mut(usize::from(required_len));
out = tail;
if new_code == self.next_code {
let source = match last_decoded.take() {
Some(last) => last,
None => &self.buffer.bytes[..self.buffer.write_mark],
};
cha = source[0];
target[..source.len()].copy_from_slice(source);
target[source.len()..][0] = source[0];
} else {
cha = self.table.reconstruct(new_code, target);
}
last_decoded = Some(target);
}
let new_link;
if !self.table.is_full() {
let link = self.table.derive(&link, cha, code);
if self.next_code == self.code_buffer.max_code() - Code::from(self.is_tiff)
&& self.code_buffer.code_size() < MAX_CODESIZE
{
self.bump_code_size();
}
self.next_code += 1;
new_link = link;
} else {
new_link = link.clone();
}
code_link = Some((new_code, new_link));
if is_in_buffer {
break;
}
}
if let Some(tail) = last_decoded {
self.buffer.bytes[..tail.len()].copy_from_slice(tail);
self.buffer.write_mark = tail.len();
self.buffer.read_mark = tail.len();
}
if o_in > inp.len() {
if let Ok(LzwStatus::NoProgress) = status {
status = Ok(LzwStatus::Ok);
}
}
self.last = code_link;
BufferResult {
consumed_in: o_in.wrapping_sub(inp.len()),
consumed_out: o_out.wrapping_sub(out.len()),
status,
}
}
}
impl<C: CodeBuffer> DecodeState<C> {
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
self.code_buffer.next_symbol(inp)
}
fn bump_code_size(&mut self) {
self.code_buffer.bump_code_size()
}
fn refill_bits(&mut self, inp: &mut &[u8]) {
self.code_buffer.refill_bits(inp)
}
fn get_bits(&mut self) -> Option<Code> {
self.code_buffer.get_bits()
}
}
impl CodeBuffer for MsbBuffer {
fn new(min_size: u8) -> Self {
MsbBuffer {
code_size: min_size + 1,
code_mask: (1u16 << (min_size + 1)) - 1,
bit_buffer: 0,
bits: 0,
}
}
fn reset(&mut self, min_size: u8) {
self.code_size = min_size + 1;
self.code_mask = (1 << self.code_size) - 1;
}
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
if self.bits < self.code_size {
self.refill_bits(inp);
}
self.get_bits()
}
fn bump_code_size(&mut self) {
self.code_size += 1;
self.code_mask = (self.code_mask << 1) | 1;
}
fn refill_bits(&mut self, inp: &mut &[u8]) {
let wish_count = (64 - self.bits) / 8;
let mut buffer = [0u8; 8];
let new_bits = match inp.get(..usize::from(wish_count)) {
Some(bytes) => {
buffer[..usize::from(wish_count)].copy_from_slice(bytes);
*inp = &inp[usize::from(wish_count)..];
wish_count * 8
}
None => {
let new_bits = inp.len() * 8;
buffer[..inp.len()].copy_from_slice(inp);
*inp = &[];
new_bits as u8
}
};
self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits;
self.bits += new_bits;
}
fn get_bits(&mut self) -> Option<Code> {
if self.bits < self.code_size {
return None;
}
let mask = u64::from(self.code_mask);
let rotbuf = self.bit_buffer.rotate_left(self.code_size.into());
self.bit_buffer = rotbuf & !mask;
self.bits -= self.code_size;
Some((rotbuf & mask) as u16)
}
fn max_code(&self) -> Code {
self.code_mask
}
fn code_size(&self) -> u8 {
self.code_size
}
}
impl CodeBuffer for LsbBuffer {
fn new(min_size: u8) -> Self {
LsbBuffer {
code_size: min_size + 1,
code_mask: (1u16 << (min_size + 1)) - 1,
bit_buffer: 0,
bits: 0,
}
}
fn reset(&mut self, min_size: u8) {
self.code_size = min_size + 1;
self.code_mask = (1 << self.code_size) - 1;
}
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
if self.bits < self.code_size {
self.refill_bits(inp);
}
self.get_bits()
}
fn bump_code_size(&mut self) {
self.code_size += 1;
self.code_mask = (self.code_mask << 1) | 1;
}
fn refill_bits(&mut self, inp: &mut &[u8]) {
let wish_count = (64 - self.bits) / 8;
let mut buffer = [0u8; 8];
let new_bits = match inp.get(..usize::from(wish_count)) {
Some(bytes) => {
buffer[..usize::from(wish_count)].copy_from_slice(bytes);
*inp = &inp[usize::from(wish_count)..];
wish_count * 8
}
None => {
let new_bits = inp.len() * 8;
buffer[..inp.len()].copy_from_slice(inp);
*inp = &[];
new_bits as u8
}
};
self.bit_buffer |= u64::from_be_bytes(buffer).swap_bytes() << self.bits;
self.bits += new_bits;
}
fn get_bits(&mut self) -> Option<Code> {
if self.bits < self.code_size {
return None;
}
let mask = u64::from(self.code_mask);
let code = self.bit_buffer & mask;
self.bit_buffer >>= self.code_size;
self.bits -= self.code_size;
Some(code as u16)
}
fn max_code(&self) -> Code {
self.code_mask
}
fn code_size(&self) -> u8 {
self.code_size
}
}
impl Buffer {
fn new() -> Self {
Buffer {
bytes: vec![0; MAX_ENTRIES].into_boxed_slice(),
read_mark: 0,
write_mark: 0,
}
}
fn fill_cscsc(&mut self) -> u8 {
self.bytes[self.write_mark] = self.bytes[0];
self.write_mark += 1;
self.read_mark = 0;
self.bytes[0]
}
fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 {
self.write_mark = 0;
self.read_mark = 0;
let depth = table.depths[usize::from(code)];
let mut memory = core::mem::replace(&mut self.bytes, Box::default());
let out = &mut memory[..usize::from(depth)];
let last = table.reconstruct(code, out);
self.bytes = memory;
self.write_mark = usize::from(depth);
last
}
fn buffer(&self) -> &[u8] {
&self.bytes[self.read_mark..self.write_mark]
}
fn consume(&mut self, amt: usize) {
self.read_mark += amt;
}
}
impl Table {
fn new() -> Self {
Table {
inner: Vec::with_capacity(MAX_ENTRIES),
depths: Vec::with_capacity(MAX_ENTRIES),
}
}
fn clear(&mut self, min_size: u8) {
let static_count = usize::from(1u16 << u16::from(min_size)) + 2;
self.inner.truncate(static_count);
self.depths.truncate(static_count);
}
fn init(&mut self, min_size: u8) {
self.inner.clear();
self.depths.clear();
for i in 0..(1u16 << u16::from(min_size)) {
self.inner.push(Link::base(i as u8));
self.depths.push(1);
}
self.inner.push(Link::base(0));
self.depths.push(0);
self.inner.push(Link::base(0));
self.depths.push(0);
}
fn at(&self, code: Code) -> &Link {
&self.inner[usize::from(code)]
}
fn is_empty(&self) -> bool {
self.inner.is_empty()
}
fn is_full(&self) -> bool {
self.inner.len() >= MAX_ENTRIES
}
fn derive(&mut self, from: &Link, byte: u8, prev: Code) -> Link {
let link = from.derive(byte, prev);
let depth = self.depths[usize::from(prev)] + 1;
self.inner.push(link.clone());
self.depths.push(depth);
link
}
fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 {
let mut code_iter = code;
let table = &self.inner[..=usize::from(code)];
let len = code_iter;
for ch in out.iter_mut().rev() {
let entry = &table[usize::from(code_iter)];
code_iter = core::cmp::min(len, entry.prev);
*ch = entry.byte;
}
out[0]
}
}
impl Link {
fn base(byte: u8) -> Self {
Link { prev: 0, byte }
}
fn derive(&self, byte: u8, prev: Code) -> Self {
Link { prev, byte }
}
}
#[cfg(test)]
mod tests {
use crate::alloc::vec::Vec;
#[cfg(feature = "std")]
use crate::StreamBuf;
use crate::{decode::Decoder, BitOrder};
#[test]
fn invalid_code_size_low() {
let _ = Decoder::new(BitOrder::Msb, 0);
let _ = Decoder::new(BitOrder::Msb, 1);
}
#[test]
#[should_panic]
fn invalid_code_size_high() {
let _ = Decoder::new(BitOrder::Msb, 14);
}
fn make_encoded() -> Vec<u8> {
const FILE: &'static [u8] = include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/benches/binary-8-msb.lzw"
));
return Vec::from(FILE);
}
#[test]
#[cfg(feature = "std")]
fn into_stream_buffer_no_alloc() {
let encoded = make_encoded();
let mut decoder = Decoder::new(BitOrder::Msb, 8);
let mut output = vec![];
let mut buffer = [0; 512];
let mut istream = decoder.into_stream(&mut output);
istream.set_buffer(&mut buffer[..]);
istream.decode(&encoded[..]).status.unwrap();
match istream.buffer {
Some(StreamBuf::Borrowed(_)) => {}
None => panic!("Decoded without buffer??"),
Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
}
}
#[test]
#[cfg(feature = "std")]
fn into_stream_buffer_small_alloc() {
struct WriteTap<W: std::io::Write>(W);
const BUF_SIZE: usize = 512;
impl<W: std::io::Write> std::io::Write for WriteTap<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
assert!(buf.len() <= BUF_SIZE);
self.0.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.0.flush()
}
}
let encoded = make_encoded();
let mut decoder = Decoder::new(BitOrder::Msb, 8);
let mut output = vec![];
let mut istream = decoder.into_stream(WriteTap(&mut output));
istream.set_buffer_size(512);
istream.decode(&encoded[..]).status.unwrap();
match istream.buffer {
Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
None => panic!("Decoded without buffer??"),
}
}
#[test]
#[cfg(feature = "std")]
fn reset() {
let encoded = make_encoded();
let mut decoder = Decoder::new(BitOrder::Msb, 8);
let mut reference = None;
for _ in 0..2 {
let mut output = vec![];
let mut buffer = [0; 512];
let mut istream = decoder.into_stream(&mut output);
istream.set_buffer(&mut buffer[..]);
istream.decode_all(&encoded[..]).status.unwrap();
decoder.reset();
if let Some(reference) = &reference {
assert_eq!(output, *reference);
} else {
reference = Some(output);
}
}
}
}