Files
addr2line
adler
adler32
ahash
aho_corasick
angle
approx
backtrace
bitflags
blender
bytemuck
byteorder
case
cast_trait
cfg_if
chrono
color
color_quant
const_fn
crc32fast
crossbeam
crossbeam_channel
crossbeam_deque
crossbeam_epoch
crossbeam_queue
crossbeam_skiplist
crossbeam_utils
darling
darling_core
darling_macro
dds
deflate
densevec
derive_builder
derive_builder_core
dot
downcast_rs
dual_quat
either
erased_serde
failure
failure_derive
fixedbitset
float_cmp
fnv
freeimage
freeimage_sys
freetype
freetype_gl_sys
freetype_sys
freetypegl
futures
futures_channel
futures_core
futures_executor
futures_io
futures_macro
futures_sink
futures_task
futures_util
async_await
future
io
lock
sink
stream
task
fxhash
generational_arena
generic_array
getrandom
gif
gimli
glfw
glfw_sys
glin
glin_derive
glsl
half
harfbuzz
harfbuzz_ft_sys
harfbuzz_sys
hashbrown
human_sort
ident_case
image
indexmap
instant
itertools
itoa
jpeg_decoder
lazy_static
libc
libm
lock_api
log
lut_parser
matrixmultiply
memchr
memoffset
meshopt
miniz_oxide
monotonic_clock
mopa
mutiny_derive
na
nalgebra
base
geometry
linalg
ncollide3d
bounding_volume
interpolation
partitioning
pipeline
procedural
query
algorithms
closest_points
contact
distance
nonlinear_time_of_impact
point
proximity
ray
time_of_impact
visitors
shape
transformation
utils
nom
num_complex
num_cpus
num_integer
num_iter
num_rational
num_traits
numext_constructor
numext_fixed_uint
numext_fixed_uint_core
numext_fixed_uint_hack
object
once_cell
parking_lot
parking_lot_core
pathfinding
pennereq
petgraph
pin_project_lite
pin_utils
png
polygon2
ppv_lite86
proc_macro2
proc_macro_crate
proc_macro_hack
proc_macro_nested
quote
rand
rand_chacha
rand_core
rand_distr
raw_window_handle
rawpointer
rayon
rayon_core
rect_packer
regex
regex_syntax
retain_mut
rin
rin_app
rin_blender
rin_core
rin_gl
rin_graphics
rin_gui
rin_material
rin_math
rin_postpo
rin_scene
rin_util
rin_window
rinblender
rinecs
rinecs_derive
rinecs_derive_utils
ringui_derive
rustc_demangle
rusty_pool
ryu
scopeguard
seitan
seitan_derive
semver
semver_parser
serde
serde_derive
serde_json
shaderdata_derive
simba
slab
slice_of_array
slotmap
smallvec
std140_data
streaming_iterator
strsim
syn
synstructure
thiserror
thiserror_impl
thread_local
tiff
time
toml
typenum
unchecked_unwrap
unicode_xid
vec2
vec3
weezl
x11
zlib_sys
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
// Copyright 2016 - 2018 Ulrik Sverdrup "bluss"
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use core::ops::{AddAssign, MulAssign};

/// General matrix multiply kernel
pub trait GemmKernel {
    type Elem: Element;

    /// Kernel rows
    const MR: usize = Self::MRTy::VALUE;
    /// Kernel cols
    const NR: usize = Self::NRTy::VALUE;
    /// Kernel rows as const num type
    type MRTy: ConstNum;
    /// Kernel cols as const num type
    type NRTy: ConstNum;

    /// align inputs to this
    fn align_to() -> usize;

    /// Whether to always use the masked wrapper around the kernel.
    fn always_masked() -> bool;

    fn nc() -> usize;
    fn kc() -> usize;
    fn mc() -> usize;

    /// Matrix multiplication kernel
    ///
    /// This does the matrix multiplication:
    ///
    /// C ← α A B + β C
    ///
    /// + `k`: length of data in a, b
    /// + a, b are packed
    /// + c has general strides
    /// + rsc: row stride of c
    /// + csc: col stride of c
    /// + `alpha`: scaling factor for A B product
    /// + `beta`: scaling factor for c.
    ///   Note: if `beta` is `0.`, the kernel should not (and must not)
    ///   read from c, its value is to be treated as if it was zero.
    ///
    /// When masked, the kernel is always called with β=0 but α is passed
    /// as usual. (This is only useful information if you return `true` from
    /// `always_masked`.)
    unsafe fn kernel(
        k: usize,
        alpha: Self::Elem,
        a: *const Self::Elem,
        b: *const Self::Elem,
        beta: Self::Elem,
        c: *mut Self::Elem, rsc: isize, csc: isize);
}

pub trait Element : Copy + AddAssign + MulAssign + Send + Sync {
    fn zero() -> Self;
    fn one() -> Self;
    fn is_zero(&self) -> bool;
}

impl Element for f32 {
    fn zero() -> Self { 0. }
    fn one() -> Self { 1. }
    fn is_zero(&self) -> bool { *self == 0. }
}

impl Element for f64 {
    fn zero() -> Self { 0. }
    fn one() -> Self { 1. }
    fn is_zero(&self) -> bool { *self == 0. }
}

/// Kernel selector
pub(crate) trait GemmSelect<T> {
    /// Call `select` with the selected kernel for this configuration
    fn select<K>(self, kernel: K)
        where K: GemmKernel<Elem=T>,
              T: Element;
}


pub trait ConstNum {
    const VALUE: usize;
}

pub struct U4;
pub struct U8;

impl ConstNum for U4 { const VALUE: usize = 4; }
impl ConstNum for U8 { const VALUE: usize = 8; }