1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
use crate::lz77::{buffer_full, ProcessStatus};
use crate::output_writer::{BufferStatus, DynamicWriter};

use std::cmp;
use std::ops::Range;

const MIN_MATCH: usize = crate::huffman_table::MIN_MATCH as usize;
const MAX_MATCH: usize = crate::huffman_table::MAX_MATCH as usize;

/// Simple match function for run-length encoding.
///
/// Checks how many of the next bytes from the start of the slice `data` matches prev.
fn get_match_length_rle(data: &[u8], prev: u8) -> usize {
    data.iter()
        .take(MAX_MATCH)
        .take_while(|&&b| b == prev)
        .count()
}

/// L77-Compress data using the RLE(Run-length encoding) strategy
///
/// This function simply looks for runs of data of at least length 3.
pub fn process_chunk_greedy_rle(
    data: &[u8],
    iterated_data: &Range<usize>,
    writer: &mut DynamicWriter,
) -> (usize, ProcessStatus) {
    if data.is_empty() {
        return (0, ProcessStatus::Ok);
    };

    let end = cmp::min(data.len(), iterated_data.end);
    // Start on at least byte 1.
    let start = cmp::max(iterated_data.start, 1);
    // The previous byte.
    let mut prev = data[start - 1];
    // Iterate through the requested range, but avoid going off the end.
    let current_chunk = &data[cmp::min(start, end)..end];
    let mut insert_it = current_chunk.iter().enumerate();
    let mut overlap = 0;
    // Make sure to output the first byte
    if iterated_data.start == 0 && !data.is_empty() {
        write_literal!(writer, data[0], 1);
    }

    while let Some((n, &b)) = insert_it.next() {
        let position = n + start;
        let match_len = if prev == b {
            //TODO: Avoid comparing with self here.
            // Would use as_slice() but that doesn't work on an enumerated iterator.
            get_match_length_rle(&data[position..], prev)
        } else {
            0
        };
        if match_len >= MIN_MATCH {
            if position + match_len > end {
                overlap = position + match_len - end;
            };
            let b_status = writer.write_length_rle(match_len as u16);
            if b_status == BufferStatus::Full {
                return (overlap, buffer_full(position + match_len));
            }
            insert_it.nth(match_len - 2);
        } else {
            write_literal!(writer, b, position + 1);
        }
        prev = b;
    }

    (overlap, ProcessStatus::Ok)
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::lzvalue::{ld, lit, LZValue};

    fn l(c: char) -> LZValue {
        lit(c as u8)
    }

    #[test]
    fn rle_compress() {
        let input = b"textaaaaaaaaatext";
        let mut w = DynamicWriter::new();
        let r = 0..input.len();
        let (overlap, _) = process_chunk_greedy_rle(input, &r, &mut w);
        let expected = [
            l('t'),
            l('e'),
            l('x'),
            l('t'),
            l('a'),
            ld(8, 1),
            l('t'),
            l('e'),
            l('x'),
            l('t'),
        ];
        //println!("expected: {:?}", expected);
        //println!("actual: {:?}", w.get_buffer());
        assert!(w.get_buffer() == expected);
        assert_eq!(overlap, 0);
    }
}