Files
addr2line
adler
adler32
ahash
aho_corasick
angle
approx
backtrace
bitflags
blender
bytemuck
byteorder
case
cast_trait
cfg_if
chrono
color
color_quant
const_fn
crc32fast
crossbeam
crossbeam_channel
crossbeam_deque
crossbeam_epoch
crossbeam_queue
crossbeam_skiplist
crossbeam_utils
darling
darling_core
darling_macro
dds
deflate
densevec
derive_builder
derive_builder_core
dot
downcast_rs
dual_quat
either
erased_serde
failure
failure_derive
fixedbitset
float_cmp
fnv
freeimage
freeimage_sys
freetype
freetype_gl_sys
freetype_sys
freetypegl
futures
futures_channel
futures_core
futures_executor
futures_io
futures_macro
futures_sink
futures_task
futures_util
async_await
future
io
lock
sink
stream
task
fxhash
generational_arena
generic_array
getrandom
gif
gimli
glfw
glfw_sys
glin
glin_derive
glsl
half
harfbuzz
harfbuzz_ft_sys
harfbuzz_sys
hashbrown
human_sort
ident_case
image
indexmap
instant
itertools
itoa
jpeg_decoder
lazy_static
libc
libm
lock_api
log
lut_parser
matrixmultiply
memchr
memoffset
meshopt
miniz_oxide
monotonic_clock
mopa
mutiny_derive
na
nalgebra
base
geometry
linalg
ncollide3d
bounding_volume
interpolation
partitioning
pipeline
procedural
query
algorithms
closest_points
contact
distance
nonlinear_time_of_impact
point
proximity
ray
time_of_impact
visitors
shape
transformation
utils
nom
num_complex
num_cpus
num_integer
num_iter
num_rational
num_traits
numext_constructor
numext_fixed_uint
numext_fixed_uint_core
numext_fixed_uint_hack
object
once_cell
parking_lot
parking_lot_core
pathfinding
pennereq
petgraph
pin_project_lite
pin_utils
png
polygon2
ppv_lite86
proc_macro2
proc_macro_crate
proc_macro_hack
proc_macro_nested
quote
rand
rand_chacha
rand_core
rand_distr
raw_window_handle
rawpointer
rayon
rayon_core
rect_packer
regex
regex_syntax
retain_mut
rin
rin_app
rin_blender
rin_core
rin_gl
rin_graphics
rin_gui
rin_material
rin_math
rin_postpo
rin_scene
rin_util
rin_window
rinblender
rinecs
rinecs_derive
rinecs_derive_utils
ringui_derive
rustc_demangle
rusty_pool
ryu
scopeguard
seitan
seitan_derive
semver
semver_parser
serde
serde_derive
serde_json
shaderdata_derive
simba
slab
slice_of_array
slotmap
smallvec
std140_data
streaming_iterator
strsim
syn
synstructure
thiserror
thiserror_impl
thread_local
tiff
time
toml
typenum
unchecked_unwrap
unicode_xid
vec2
vec3
weezl
x11
zlib_sys
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
use std::fmt;

/// A representation of byte oriented equivalence classes.
///
/// This is used in an FSM to reduce the size of the transition table. This can
/// have a particularly large impact not only on the total size of an FSM, but
/// also on compile times.
#[derive(Clone, Copy)]
pub struct ByteClasses([u8; 256]);

impl ByteClasses {
    /// Creates a new set of equivalence classes where all bytes are mapped to
    /// the same class.
    pub fn empty() -> ByteClasses {
        ByteClasses([0; 256])
    }

    /// Creates a new set of equivalence classes where each byte belongs to
    /// its own equivalence class.
    pub fn singletons() -> ByteClasses {
        let mut classes = ByteClasses::empty();
        for i in 0..256 {
            classes.set(i as u8, i as u8);
        }
        classes
    }

    /// Set the equivalence class for the given byte.
    #[inline]
    pub fn set(&mut self, byte: u8, class: u8) {
        self.0[byte as usize] = class;
    }

    /// Get the equivalence class for the given byte.
    #[inline]
    pub fn get(&self, byte: u8) -> u8 {
        // SAFETY: This is safe because all dense transitions have
        // exactly 256 elements, so all u8 values are valid indices.
        self.0[byte as usize]
    }

    /// Return the total number of elements in the alphabet represented by
    /// these equivalence classes. Equivalently, this returns the total number
    /// of equivalence classes.
    #[inline]
    pub fn alphabet_len(&self) -> usize {
        self.0[255] as usize + 1
    }

    /// Returns true if and only if every byte in this class maps to its own
    /// equivalence class. Equivalently, there are 256 equivalence classes
    /// and each class contains exactly one byte.
    #[inline]
    pub fn is_singleton(&self) -> bool {
        self.alphabet_len() == 256
    }

    /// Returns an iterator over a sequence of representative bytes from each
    /// equivalence class. Namely, this yields exactly N items, where N is
    /// equivalent to the number of equivalence classes. Each item is an
    /// arbitrary byte drawn from each equivalence class.
    ///
    /// This is useful when one is determinizing an NFA and the NFA's alphabet
    /// hasn't been converted to equivalence classes yet. Picking an arbitrary
    /// byte from each equivalence class then permits a full exploration of
    /// the NFA instead of using every possible byte value.
    pub fn representatives(&self) -> ByteClassRepresentatives {
        ByteClassRepresentatives { classes: self, byte: 0, last_class: None }
    }

    /// Returns all of the bytes in the given equivalence class.
    ///
    /// The second element in the tuple indicates the number of elements in
    /// the array.
    fn elements(&self, equiv: u8) -> ([u8; 256], usize) {
        let (mut array, mut len) = ([0; 256], 0);
        for b in 0..256 {
            if self.get(b as u8) == equiv {
                array[len] = b as u8;
                len += 1;
            }
        }
        (array, len)
    }
}

impl fmt::Debug for ByteClasses {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        if self.is_singleton() {
            write!(f, "ByteClasses({{singletons}})")
        } else {
            write!(f, "ByteClasses(")?;
            for equiv in 0..self.alphabet_len() {
                let (members, len) = self.elements(equiv as u8);
                write!(f, " {} => {:?}", equiv, &members[..len])?;
            }
            write!(f, ")")
        }
    }
}

/// An iterator over representative bytes from each equivalence class.
#[derive(Debug)]
pub struct ByteClassRepresentatives<'a> {
    classes: &'a ByteClasses,
    byte: usize,
    last_class: Option<u8>,
}

impl<'a> Iterator for ByteClassRepresentatives<'a> {
    type Item = u8;

    fn next(&mut self) -> Option<u8> {
        while self.byte < 256 {
            let byte = self.byte as u8;
            let class = self.classes.get(byte);
            self.byte += 1;

            if self.last_class != Some(class) {
                self.last_class = Some(class);
                return Some(byte);
            }
        }
        None
    }
}

/// A byte class builder keeps track of an *approximation* of equivalence
/// classes of bytes during NFA construction. That is, every byte in an
/// equivalence class cannot discriminate between a match and a non-match.
///
/// For example, in the literals `abc` and `xyz`, the bytes [\x00-`], [d-w]
/// and [{-\xFF] never discriminate between a match and a non-match, precisely
/// because they never occur in the literals anywhere.
///
/// Note though that this does not necessarily compute the minimal set of
/// equivalence classes. For example, in the literals above, the byte ranges
/// [\x00-`], [d-w] and [{-\xFF] are all treated as distinct equivalence
/// classes even though they could be treated a single class. The reason for
/// this is implementation complexity. In the future, we should endeavor to
/// compute the minimal equivalence classes since they can have a rather large
/// impact on the size of the DFA.
///
/// The representation here is 256 booleans, all initially set to false. Each
/// boolean maps to its corresponding byte based on position. A `true` value
/// indicates the end of an equivalence class, where its corresponding byte
/// and all of the bytes corresponding to all previous contiguous `false`
/// values are in the same equivalence class.
///
/// This particular representation only permits contiguous ranges of bytes to
/// be in the same equivalence class, which means that we can never discover
/// the true minimal set of equivalence classes.
#[derive(Debug)]
pub struct ByteClassBuilder(Vec<bool>);

impl ByteClassBuilder {
    /// Create a new builder of byte classes where all bytes are part of the
    /// same equivalence class.
    pub fn new() -> ByteClassBuilder {
        ByteClassBuilder(vec![false; 256])
    }

    /// Indicate the the range of byte given (inclusive) can discriminate a
    /// match between it and all other bytes outside of the range.
    pub fn set_range(&mut self, start: u8, end: u8) {
        debug_assert!(start <= end);
        if start > 0 {
            self.0[start as usize - 1] = true;
        }
        self.0[end as usize] = true;
    }

    /// Build byte classes that map all byte values to their corresponding
    /// equivalence class. The last mapping indicates the largest equivalence
    /// class identifier (which is never bigger than 255).
    pub fn build(&self) -> ByteClasses {
        let mut classes = ByteClasses::empty();
        let mut class = 0u8;
        let mut i = 0;
        loop {
            classes.set(i as u8, class as u8);
            if i >= 255 {
                break;
            }
            if self.0[i] {
                class = class.checked_add(1).unwrap();
            }
            i += 1;
        }
        classes
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn byte_classes() {
        let mut set = ByteClassBuilder::new();
        set.set_range(b'a', b'z');

        let classes = set.build();
        assert_eq!(classes.get(0), 0);
        assert_eq!(classes.get(1), 0);
        assert_eq!(classes.get(2), 0);
        assert_eq!(classes.get(b'a' - 1), 0);
        assert_eq!(classes.get(b'a'), 1);
        assert_eq!(classes.get(b'm'), 1);
        assert_eq!(classes.get(b'z'), 1);
        assert_eq!(classes.get(b'z' + 1), 2);
        assert_eq!(classes.get(254), 2);
        assert_eq!(classes.get(255), 2);

        let mut set = ByteClassBuilder::new();
        set.set_range(0, 2);
        set.set_range(4, 6);
        let classes = set.build();
        assert_eq!(classes.get(0), 0);
        assert_eq!(classes.get(1), 0);
        assert_eq!(classes.get(2), 0);
        assert_eq!(classes.get(3), 1);
        assert_eq!(classes.get(4), 2);
        assert_eq!(classes.get(5), 2);
        assert_eq!(classes.get(6), 2);
        assert_eq!(classes.get(7), 3);
        assert_eq!(classes.get(255), 3);
    }

    #[test]
    fn full_byte_classes() {
        let mut set = ByteClassBuilder::new();
        for i in 0..256u16 {
            set.set_range(i as u8, i as u8);
        }
        assert_eq!(set.build().alphabet_len(), 256);
    }
}