use std::clone::Clone;
use std::iter::Iterator;
#[derive(Debug, PartialEq, Eq)]
pub enum EncodedLength {
Length(u8),
CopyPrevious(u8),
RepeatZero3Bits(u8),
RepeatZero7Bits(u8),
}
impl EncodedLength {
fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
match prev {
0 => {
if repeat <= 10 {
EncodedLength::RepeatZero3Bits(repeat)
} else {
EncodedLength::RepeatZero7Bits(repeat)
}
}
1..=15 => EncodedLength::CopyPrevious(repeat),
_ => panic!(),
}
}
}
pub const COPY_PREVIOUS: usize = 16;
pub const REPEAT_ZERO_3_BITS: usize = 17;
pub const REPEAT_ZERO_7_BITS: usize = 18;
const MIN_REPEAT: u8 = 3;
fn update_out_and_freq(
encoded: EncodedLength,
output: &mut Vec<EncodedLength>,
frequencies: &mut [u16; 19],
) {
let index = match encoded {
EncodedLength::Length(l) => usize::from(l),
EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
};
frequencies[index] += 1;
output.push(encoded);
}
fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
(length_value == 0 && repeats < 138) || repeats < 6
}
#[cfg(test)]
pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
where
I: Iterator<Item = &'a u8> + Clone,
{
let mut freqs = [0u16; 19];
let mut encoded: Vec<EncodedLength> = Vec::new();
encode_lengths_m(lengths, &mut encoded, &mut freqs);
(encoded, freqs)
}
pub fn encode_lengths_m<'a, I>(
lengths: I,
mut out: &mut Vec<EncodedLength>,
mut frequencies: &mut [u16; 19],
) where
I: Iterator<Item = &'a u8> + Clone,
{
out.clear();
let mut repeat = 0;
let mut iter = lengths.clone().enumerate().peekable();
let mut prev = !iter.peek().expect("No length values!").1;
while let Some((n, &l)) = iter.next() {
if l == prev && not_max_repetitions(l, repeat) {
repeat += 1;
}
if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
if repeat >= MIN_REPEAT {
let val = EncodedLength::from_prev_and_repeat(prev, repeat);
update_out_and_freq(val, &mut out, &mut frequencies);
repeat = 0;
if l != prev {
if l != 0 || iter.peek().is_none() {
update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
repeat = 0;
} else {
repeat = 1;
}
}
} else {
let extra_skip = if iter.peek().is_none() && l == prev {
1
} else {
0
};
let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
let extra = if l != 0 || iter.peek().is_none() {
1
} else {
0
};
for &i in b_iter.take(repeat as usize + extra) {
update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
}
repeat = 1 - extra as u8;
}
}
prev = l;
}
}
#[cfg(test)]
pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
in_place::gen_lengths(frequencies, max_len)
}
pub type LeafVec = Vec<in_place::Node>;
pub fn huffman_lengths_from_frequency_m(
frequencies: &[u16],
max_len: usize,
leaf_buffer: &mut LeafVec,
lens: &mut [u8],
) {
in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
}
mod in_place {
type WeightType = u32;
pub fn validate_lengths(lengths: &[u8]) -> bool {
if cfg!(any(
target_arch = "mips",
target_arch = "mipsel",
target_arch = "mips64",
target_arch = "mipsel64"
)) {
true
} else {
let v = lengths.iter().fold(0f64, |acc, &n| {
acc + if n != 0 {
2f64.powi(-(i32::from(n)))
} else {
0f64
}
});
match v.partial_cmp(&1.0) {
Some(std::cmp::Ordering::Greater) => false,
_ => true,
}
}
}
#[derive(Eq, PartialEq, Debug)]
pub struct Node {
value: WeightType,
symbol: u16,
}
fn step_1(leaves: &mut [Node]) {
debug_assert!(leaves.len() >= 2);
let mut root = 0;
let mut leaf = 2;
leaves[0].value += leaves[1].value;
for next in 1..leaves.len() - 1 {
if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
leaves[next].value = leaves[root].value;
leaves[root].value = next as WeightType;
root += 1;
} else {
leaves[next].value = leaves[leaf].value;
leaf += 1;
}
if (leaf >= leaves.len()) || (root < next && (leaves[root].value < leaves[leaf].value))
{
leaves[next].value += leaves[root].value;
leaves[root].value = next as WeightType;
root += 1;
} else {
leaves[next].value += leaves[leaf].value;
leaf += 1;
}
}
}
fn step_2(leaves: &mut [Node]) {
debug_assert!(leaves.len() >= 2);
let n = leaves.len();
leaves[n - 2].value = 0;
for t in (0..(n + 1 - 3)).rev() {
leaves[t].value = leaves[leaves[t].value as usize].value + 1;
}
let mut available = 1 as usize;
let mut used = 0;
let mut depth = 0;
let mut root = n as isize - 2;
let mut next = n as isize - 1;
while available > 0 {
while root >= 0 && leaves[root as usize].value == depth {
used += 1;
root -= 1;
}
while available > used {
leaves[next as usize].value = depth;
next -= 1;
available -= 1;
}
available = 2 * used;
depth += 1;
used = 0;
}
}
const MAX_NUMBER_OF_CODES: usize = 32;
const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;
pub fn enforce_max_code_lengths(
num_codes: &mut [u16; NUM_CODES_LENGTH],
num_used: usize,
max_len: usize,
) {
debug_assert!(max_len <= 15);
if num_used <= 1 {
return;
} else {
let mut num_above_max = 0u16;
for &l in num_codes[(max_len as usize + 1)..].iter() {
num_above_max += l;
}
num_codes[max_len] += num_above_max;
let mut total = 0u32;
for i in (1..=max_len).rev() {
total += (u32::from(num_codes[i])) << (max_len - i);
}
while total != 1u32 << max_len {
num_codes[max_len] -= 1;
for i in (1..max_len).rev() {
if num_codes[i] != 0 {
num_codes[i] -= 1;
num_codes[i + 1] += 2;
break;
}
}
total -= 1;
}
}
}
#[cfg(test)]
pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
let mut lens = vec![0u8; frequencies.len()];
let mut leaves = Vec::new();
in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
lens
}
pub fn in_place_lengths(
frequencies: &[u16],
max_len: usize,
mut leaves: &mut Vec<Node>,
lengths: &mut [u8],
) {
debug_assert!(lengths.len() >= frequencies.len());
for l in lengths.iter_mut() {
*l = 0;
}
leaves.clear();
leaves.extend(frequencies.iter().enumerate().filter_map(|(n, f)| {
if *f > 0 {
Some(Node {
value: u32::from(*f),
symbol: n as u16,
})
} else {
None
}
}));
if leaves.len() == 1 {
lengths[leaves[0].symbol as usize] = 1;
return;
} else if leaves.is_empty() {
return;
}
leaves.sort_by(|a, b| a.value.cmp(&b.value));
step_1(&mut leaves);
step_2(&mut leaves);
let mut num_codes = [0u16; NUM_CODES_LENGTH];
for l in leaves.iter() {
num_codes[l.value as usize] += 1;
}
enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);
let mut leaf_it = leaves.iter().rev();
for (&n_codes, i) in num_codes[1..=max_len].iter().zip(1..=(max_len as u8)) {
for _ in 0..n_codes {
lengths[leaf_it.next().unwrap().symbol as usize] = i;
}
}
debug_assert_eq!(leaf_it.next(), None);
debug_assert!(
validate_lengths(lengths),
"The generated length codes were not valid!"
);
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::huffman_table::NUM_LITERALS_AND_LENGTHS;
use std::u16;
fn lit(value: u8) -> EncodedLength {
EncodedLength::Length(value)
}
fn zero(repeats: u8) -> EncodedLength {
match repeats {
0..=1 => EncodedLength::Length(0),
2..=10 => EncodedLength::RepeatZero3Bits(repeats),
_ => EncodedLength::RepeatZero7Bits(repeats),
}
}
fn copy(copies: u8) -> EncodedLength {
EncodedLength::CopyPrevious(copies)
}
#[test]
fn test_encode_lengths() {
use crate::huffman_table::FIXED_CODE_LENGTHS;
let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
assert_eq!(enc.1[17..19], [0, 0]);
let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
let enc = encode_lengths(test_lengths.iter()).0;
assert_eq!(
enc,
vec![
lit(0),
lit(0),
lit(5),
lit(0),
lit(15),
lit(1),
zero(3),
lit(2),
lit(4),
copy(3),
lit(3),
lit(5),
copy(3),
]
);
let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
let enc = encode_lengths(test_lengths.iter()).0;
assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
let enc = encode_lengths(test_lengths.iter()).0;
assert_eq!(
enc,
vec![
zero(3),
lit(3),
lit(3),
lit(3),
lit(5),
lit(4),
copy(3),
lit(0),
lit(0),
]
);
let lens = [
0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
];
let _ = encode_lengths(lens.iter()).0;
let lens = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 7, 8, 7, 8, 6, 6, 8, 0, 7, 6, 7, 8, 7, 7,
8, 0, 0, 0, 0, 0, 8, 8, 0, 8, 7, 0, 10, 8, 0, 8, 0, 10, 10, 8, 8, 10, 8, 0, 8, 7, 0,
10, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 7, 7, 6, 7, 8, 8, 6, 0, 0, 8, 8, 7, 8, 8, 0,
7, 6, 6, 8, 8, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4,
3, 3, 4, 4, 5, 5, 5, 5, 5, 8, 8, 6, 7, 8, 10, 10, 0, 9,
0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 6, 6, 5, 5, 5, 5, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 4, 3,
4,
];
let enc = encode_lengths(lens.iter()).0;
assert_eq!(
&enc[..10],
&[
zero(10),
lit(9),
lit(0),
lit(0),
lit(9),
zero(18),
lit(6),
zero(3),
lit(8),
zero(4)
]
);
assert_eq!(
&enc[10..20],
&[
lit(8),
lit(0),
lit(0),
lit(7),
lit(8),
lit(7),
lit(8),
lit(6),
lit(6),
lit(8)
]
);
let enc = encode_lengths([1, 1, 1, 2].iter()).0;
assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
let enc = encode_lengths([0, 0, 3].iter()).0;
assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
assert!(*enc.last().unwrap() != lit(5));
let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
assert_eq!(*enc.last().unwrap(), zero(0));
}
#[test]
fn test_lengths_from_frequencies() {
let frequencies = [1, 1, 5, 7, 10, 14];
let expected = [4, 4, 3, 2, 2, 2];
let res = huffman_lengths_from_frequency(&frequencies, 4);
assert_eq!(expected, res.as_slice());
let frequencies = [1, 5, 1, 7, 10, 14];
let expected = [4, 3, 4, 2, 2, 2];
let res = huffman_lengths_from_frequency(&frequencies, 4);
assert_eq!(expected, res.as_slice());
let frequencies = [0, 25, 0, 10, 2, 4];
let res = huffman_lengths_from_frequency(&frequencies, 4);
assert_eq!(res[0], 0);
assert_eq!(res[2], 0);
assert!(res[1] < 4);
let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
let res = huffman_lengths_from_frequency(&frequencies, 5);
let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
assert_eq!(expected, res.as_slice());
let frequencies = [0; 30];
let res = huffman_lengths_from_frequency(&frequencies, 5);
for (a, b) in frequencies.iter().zip(res.iter()) {
assert_eq!(*a, (*b).into());
}
let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
frequencies[55] = u16::MAX / 3;
frequencies[125] = u16::MAX / 3;
let res = huffman_lengths_from_frequency(&frequencies, 15);
assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
assert!(res[55] < 3);
assert!(res[125] < 3);
}
#[test]
fn optimal_lengths() {
let freqs = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 68, 0, 14, 0, 0, 0, 0, 3, 7, 6, 1, 0, 12, 14, 9, 2, 6, 9, 4, 1, 1, 4, 1, 1, 0,
0, 1, 3, 0, 6, 0, 0, 0, 4, 4, 1, 2, 5, 3, 2, 2, 9, 0, 0, 3, 1, 5, 5, 8, 0, 6, 10, 5, 2,
0, 0, 1, 2, 0, 8, 11, 4, 0, 1, 3, 31, 13, 23, 22, 56, 22, 8, 11, 43, 0, 7, 33, 15, 45,
40, 16, 1, 28, 37, 35, 26, 3, 7, 11, 9, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 126, 114, 66, 31, 41, 25, 15, 21, 20, 16, 15, 10, 7, 5, 1, 1,
];
let lens = huffman_lengths_from_frequency(&freqs, 15);
let num_bits = lens
.iter()
.zip(freqs.iter())
.fold(0, |a, (&f, &l)| a + (f as u16 * l));
assert_eq!(num_bits, 7701);
}
}