kernel.rs - source

Files

addr2line

adler

adler32

ahash

convert.rs fallback_hash.rs lib.rs operations.rs random_state.rs specialize.rs

aho_corasick

packed

teddy

compile.rs mod.rs runtime.rs

api.rs mod.rs pattern.rs rabinkarp.rs vector.rs

ahocorasick.rs automaton.rs buffer.rs byte_frequencies.rs classes.rs dfa.rs error.rs lib.rs nfa.rs prefilter.rs state_id.rs

angle

approx

abs_diff_eq.rs lib.rs macros.rs relative_eq.rs ulps_eq.rs

backtrace

backtrace

libunwind.rs mod.rs

symbolize

gimli

elf.rs mmap_unix.rs stash.rs

gimli.rs mod.rs

capture.rs lib.rs print.rs types.rs

bitflags

blender

file.rs file_block.rs lib.rs object.rs sdna.rs structure.rs types.rs utils.rs

bytemuck

contiguous.rs lib.rs offset_of.rs pod.rs transparent.rs zeroable.rs

byteorder

case

cast_trait

cfg_if

chrono

format

mod.rs parse.rs parsed.rs scan.rs strftime.rs

naive

date.rs datetime.rs internals.rs isoweek.rs time.rs

offset

fixed.rs local.rs mod.rs utc.rs

sys

date.rs datetime.rs div.rs lib.rs round.rs sys.rs

color

alpha.rs channel.rs color_space.rs hsv.rs lab.rs lib.rs luma.rs rgb.rs xyz.rs ycbcr.rs yxy.rs

color_quant

const_fn

ast.rs error.rs iter.rs lib.rs to_tokens.rs utils.rs

crc32fast

specialized

mod.rs pclmulqdq.rs

baseline.rs combine.rs lib.rs table.rs

crossbeam

crossbeam_channel

flavors

array.rs at.rs list.rs mod.rs never.rs tick.rs zero.rs

channel.rs context.rs counter.rs err.rs lib.rs select.rs select_macro.rs utils.rs waker.rs

crossbeam_deque

deque.rs lib.rs

crossbeam_epoch

sync

list.rs mod.rs queue.rs

atomic.rs collector.rs default.rs deferred.rs epoch.rs guard.rs internal.rs lib.rs

crossbeam_queue

array_queue.rs lib.rs seg_queue.rs

crossbeam_skiplist

base.rs lib.rs map.rs set.rs

crossbeam_utils

atomic

atomic_cell.rs consume.rs mod.rs seq_lock.rs

sync

mod.rs parker.rs sharded_lock.rs wait_group.rs

backoff.rs cache_padded.rs lib.rs thread.rs

darling

lib.rs macros_public.rs

darling_core

darling_macro

dds

deflate

densevec

derive_builder

options

darling_opts.rs mod.rs

lib.rs log_disabled.rs

derive_builder_core

bindings.rs block.rs build_method.rs builder.rs builder_field.rs deprecation_notes.rs doc_comment.rs initializer.rs lib.rs log_disabled.rs options.rs setter.rs

dot

downcast_rs

dual_quat

either

erased_serde

features_check

any.rs de.rs error.rs lib.rs macros.rs private.rs ser.rs

failure

backtrace

internal.rs mod.rs

error

error_impl.rs mod.rs

as_fail.rs box_std.rs compat.rs context.rs error_message.rs lib.rs macros.rs result_ext.rs sync_failure.rs

failure_derive

fixedbitset

lib.rs range.rs

float_cmp

eq.rs lib.rs macros.rs ratio.rs ulps.rs ulps_eq.rs ulps_ord.rs

fnv

freeimage

consts.rs ffi.rs lib.rs

freeimage_sys

freetype

freetype.rs lib.rs tt_os2.rs

freetype_gl_sys

freetype_sys

freetypegl

ffi.rs lib.rs link.rs

futures

futures_channel

mpsc

mod.rs queue.rs sink_impl.rs

lib.rs lock.rs oneshot.rs

futures_core

task

__internal

atomic_waker.rs mod.rs

future.rs lib.rs stream.rs

futures_executor

enter.rs lib.rs local_pool.rs

futures_io

futures_macro

join.rs lib.rs select.rs

futures_sink

futures_task

arc_wake.rs future_obj.rs lib.rs noop_waker.rs spawn.rs waker.rs waker_ref.rs

futures_util

async_await

join_mod.rs mod.rs pending.rs poll.rs random.rs select_mod.rs

future

io

lock

bilock.rs mod.rs mutex.rs

sink

buffer.rs close.rs drain.rs err_into.rs fanout.rs feed.rs flush.rs map_err.rs mod.rs send.rs send_all.rs unfold.rs with.rs with_flat_map.rs

stream

task

mod.rs spawn.rs

fns.rs lib.rs never.rs unfold_state.rs

fxhash

generational_arena

generic_array

arr.rs functional.rs hex.rs impls.rs iter.rs lib.rs sequence.rs

getrandom

error.rs error_impls.rs lib.rs linux_android.rs use_file.rs util.rs util_libc.rs

gif

reader

decoder.rs mod.rs

common.rs encoder.rs lib.rs traits.rs

gimli

glfw

ffi

callbacks.rs lib.rs

glfw_sys

glin

glin_derive

glsl

parsers

transpiler

lib.rs parser.rs parsers.rs syntax.rs visitor.rs

half

bfloat

binary16

bfloat.rs binary16.rs lib.rs slice.rs

harfbuzz

blob.rs buffer.rs direction.rs language.rs lib.rs

harfbuzz_ft_sys

harfbuzz_sys

hashbrown

external_trait_impls

rayon

helpers.rs map.rs mod.rs raw.rs set.rs

mod.rs serde.rs

raw

bitmask.rs mod.rs sse2.rs

lib.rs macros.rs map.rs scopeguard.rs set.rs

human_sort

iter_pair.rs lib.rs

ident_case

image

indexmap

map

core

equivalent.rs lib.rs macros.rs map.rs mutable_keys.rs set.rs util.rs

instant

lib.rs native.rs

itertools

itoa

jpeg_decoder

worker

immediate.rs mod.rs multithreaded.rs

decoder.rs error.rs huffman.rs idct.rs lib.rs marker.rs parser.rs upsampler.rs

lazy_static

inline_lazy.rs lib.rs

libc

unix

linux_like

linux

gnu

b64

x86_64

align.rs mod.rs not_x32.rs

align.rs mod.rs

align.rs mod.rs

align.rs mod.rs

fixed_width_ints.rs lib.rs macros.rs

libm

math

acos.rs acosf.rs acosh.rs acoshf.rs asin.rs asinf.rs asinh.rs asinhf.rs atan.rs atan2.rs atan2f.rs atanf.rs atanh.rs atanhf.rs cbrt.rs cbrtf.rs ceil.rs ceilf.rs copysign.rs copysignf.rs cos.rs cosf.rs cosh.rs coshf.rs erf.rs erff.rs exp.rs exp10.rs exp10f.rs exp2.rs exp2f.rs expf.rs expm1.rs expm1f.rs expo2.rs fabs.rs fabsf.rs fdim.rs fdimf.rs fenv.rs floor.rs floorf.rs fma.rs fmaf.rs fmax.rs fmaxf.rs fmin.rs fminf.rs fmod.rs fmodf.rs frexp.rs frexpf.rs hypot.rs hypotf.rs ilogb.rs ilogbf.rs j0.rs j0f.rs j1.rs j1f.rs jn.rs jnf.rs k_cos.rs k_cosf.rs k_expo2.rs k_expo2f.rs k_sin.rs k_sinf.rs k_tan.rs k_tanf.rs ldexp.rs ldexpf.rs lgamma.rs lgamma_r.rs lgammaf.rs lgammaf_r.rs log.rs log10.rs log10f.rs log1p.rs log1pf.rs log2.rs log2f.rs logf.rs mod.rs modf.rs modff.rs nextafter.rs nextafterf.rs pow.rs powf.rs rem_pio2.rs rem_pio2_large.rs rem_pio2f.rs remainder.rs remainderf.rs remquo.rs remquof.rs round.rs roundf.rs scalbn.rs scalbnf.rs sin.rs sincos.rs sincosf.rs sinf.rs sinh.rs sinhf.rs sqrt.rs sqrtf.rs tan.rs tanf.rs tanh.rs tanhf.rs tgamma.rs tgammaf.rs trunc.rs truncf.rs

lock_api

lib.rs mutex.rs remutex.rs rwlock.rs

log

lib.rs macros.rs

lut_parser

matrixmultiply

x86

macros.rs mod.rs

aligned_alloc.rs archparam.rs debugmacros.rs dgemm_kernel.rs gemm.rs kernel.rs lib.rs loopmacros.rs sgemm_kernel.rs util.rs

memchr

x86

avx.rs mod.rs sse2.rs

fallback.rs iter.rs lib.rs naive.rs

memoffset

lib.rs offset_of.rs raw_field.rs span_of.rs

meshopt

gen

analyze.rs clusterize.rs encoding.rs error.rs ffi.rs lib.rs optimize.rs packing.rs remap.rs shadow.rs simplify.rs stripify.rs utilities.rs

miniz_oxide

deflate

buffer.rs core.rs mod.rs stream.rs

inflate

core.rs mod.rs output_buffer.rs stream.rs

lib.rs shared.rs

monotonic_clock

mopa

mutiny_derive

lib.rs material.rs

na

lib.rs traits.rs

nalgebra

base

geometry

abstract_rotation.rs dual_quaternion.rs dual_quaternion_construction.rs dual_quaternion_ops.rs isometry.rs isometry_alias.rs isometry_construction.rs isometry_conversion.rs isometry_interpolation.rs isometry_ops.rs isometry_simba.rs mod.rs op_macros.rs orthographic.rs perspective.rs point.rs point_alias.rs point_construction.rs point_conversion.rs point_coordinates.rs point_ops.rs point_simba.rs quaternion.rs quaternion_construction.rs quaternion_conversion.rs quaternion_coordinates.rs quaternion_ops.rs quaternion_simba.rs reflection.rs rotation.rs rotation_alias.rs rotation_construction.rs rotation_conversion.rs rotation_interpolation.rs rotation_ops.rs rotation_simba.rs rotation_specialization.rs similarity.rs similarity_alias.rs similarity_construction.rs similarity_conversion.rs similarity_ops.rs similarity_simba.rs swizzle.rs transform.rs transform_alias.rs transform_construction.rs transform_conversion.rs transform_ops.rs transform_simba.rs translation.rs translation_alias.rs translation_construction.rs translation_conversion.rs translation_coordinates.rs translation_ops.rs translation_simba.rs unit_complex.rs unit_complex_construction.rs unit_complex_conversion.rs unit_complex_ops.rs unit_complex_simba.rs

linalg

ncollide3d

bounding_volume

interpolation

mod.rs rigid_motion.rs

partitioning

bvh.rs bvt.rs dbvt.rs mod.rs visitor.rs

pipeline

broad_phase

broad_phase.rs broad_phase_pair_filter.rs dbvt_broad_phase.rs mod.rs

glue

mod.rs query.rs setup.rs update.rs

narrow_phase

object

collision_groups.rs collision_object.rs collision_object_set.rs mod.rs query_type.rs

mod.rs world.rs

procedural

path

arrowhead_cap.rs mod.rs no_cap.rs path.rs polyline_path.rs polyline_pattern.rs

bezier.rs capsule.rs cone.rs cuboid.rs cylinder.rs mod.rs quad.rs sphere.rs trimesh.rs utils.rs

query

algorithms

cso_point.rs epa3.rs gjk.rs mod.rs special_support_maps.rs voronoi_simplex3.rs

closest_points

closest_points.rs closest_points_ball_ball.rs closest_points_composite_shape_shape.rs closest_points_line_line.rs closest_points_plane_support_map.rs closest_points_segment_segment.rs closest_points_shape_shape.rs closest_points_support_map_support_map.rs mod.rs

contact

contact.rs contact_ball_ball.rs contact_ball_convex_polyhedron.rs contact_composite_shape_shape.rs contact_kinematic.rs contact_manifold.rs contact_plane_support_map.rs contact_preprocessor.rs contact_shape_shape.rs contact_support_map_support_map.rs mod.rs

distance

distance.rs distance_ball_ball.rs distance_composite_shape_shape.rs distance_plane_support_map.rs distance_support_map_support_map.rs mod.rs

nonlinear_time_of_impact

mod.rs nonlinear_time_of_impact.rs nonlinear_time_of_impact_ball_ball.rs nonlinear_time_of_impact_composite_shape_shape.rs nonlinear_time_of_impact_support_map_support_map.rs

point

mod.rs point_aabb.rs point_ball.rs point_bounding_sphere.rs point_capsule.rs point_compound.rs point_cuboid.rs point_heightfield.rs point_plane.rs point_polyline.rs point_query.rs point_segment.rs point_shape.rs point_support_map.rs point_tetrahedron.rs point_triangle.rs point_trimesh.rs

proximity

mod.rs proximity.rs proximity_ball_ball.rs proximity_composite_shape_shape.rs proximity_plane_support_map.rs proximity_shape_shape.rs proximity_support_map_support_map.rs

ray

mod.rs ray.rs ray_aabb.rs ray_ball.rs ray_bounding_sphere.rs ray_compound.rs ray_cuboid.rs ray_heightfield.rs ray_plane.rs ray_polyline.rs ray_shape.rs ray_support_map.rs ray_triangle.rs ray_trimesh.rs

time_of_impact

mod.rs time_of_impact.rs time_of_impact_ball_ball.rs time_of_impact_composite_shape_shape.rs time_of_impact_plane_support_map.rs time_of_impact_support_map_support_map.rs

visitors

aabb_sets_interferences_collector.rs bounding_volume_interferences_collector.rs composite_closest_point_visitor.rs composite_point_containment_test.rs mod.rs point_interferences_collector.rs ray_interferences_collector.rs ray_intersection_cost_fn_visitor.rs

error.rs mod.rs toi_dispatcher.rs

shape

transformation

to_trimesh

ball_to_trimesh.rs capsule_to_trimesh.rs cone_to_trimesh.rs cuboid_to_trimesh.rs cylinder_to_trimesh.rs heightfield_to_trimesh.rs mod.rs to_trimesh.rs triangle_to_trimesh.rs

convex_hull2.rs convex_hull3.rs convex_hull_utils.rs hacd.rs mod.rs

utils

as_bytes.rs ccw_face_normal.rs center.rs cleanup.rs deterministic_state.rs hashable_partial_eq.rs isometry_ops.rs median.rs mod.rs point_cloud_support_point.rs point_in_poly2d.rs ref_with_cost.rs sort.rs sorted_pair.rs tetrahedron.rs triangle.rs

nom

bits

complete.rs macros.rs mod.rs streaming.rs

branch

macros.rs mod.rs

bytes

complete.rs macros.rs mod.rs streaming.rs

character

complete.rs macros.rs mod.rs streaming.rs

combinator

macros.rs mod.rs

multi

macros.rs mod.rs

number

complete.rs macros.rs mod.rs streaming.rs

sequence

macros.rs mod.rs

error.rs internal.rs lib.rs methods.rs str.rs traits.rs util.rs whitespace.rs

num_complex

cast.rs lib.rs pow.rs

num_cpus

lib.rs linux.rs

num_integer

average.rs lib.rs roots.rs

num_iter

num_rational

num_traits

ops

checked.rs inv.rs mod.rs mul_add.rs overflowing.rs saturating.rs wrapping.rs

bounds.rs cast.rs float.rs identities.rs int.rs lib.rs macros.rs pow.rs real.rs sign.rs

numext_constructor

fixed_hash

fixed_uint

definition.rs lib.rs utils.rs

numext_fixed_uint

numext_fixed_uint_core

numext_fixed_uint_hack

object

once_cell

imp_std.rs lib.rs

parking_lot

condvar.rs deadlock.rs elision.rs fair_mutex.rs lib.rs mutex.rs once.rs raw_fair_mutex.rs raw_mutex.rs raw_rwlock.rs remutex.rs rwlock.rs util.rs

parking_lot_core

thread_parker

linux.rs mod.rs

lib.rs parking_lot.rs spinwait.rs util.rs word_lock.rs

pathfinding

directed

astar.rs bfs.rs dfs.rs dijkstra.rs edmonds_karp.rs fringe.rs idastar.rs iddfs.rs mod.rs strongly_connected_components.rs topological_sort.rs

undirected

connected_components.rs kruskal.rs mod.rs

grid.rs kuhn_munkres.rs lib.rs matrix.rs utils.rs

pennereq

petgraph

pin_project_lite

pin_utils

lib.rs projection.rs stack_pin.rs

png

decoder

mod.rs stream.rs zlib.rs

chunk.rs common.rs encoder.rs filter.rs lib.rs traits.rs utils.rs

polygon2

ppv_lite86

x86_64

lib.rs soft.rs types.rs

proc_macro2

detection.rs fallback.rs lib.rs marker.rs parse.rs wrapper.rs

proc_macro_crate

proc_macro_hack

error.rs iter.rs lib.rs parse.rs quote.rs

proc_macro_nested

quote

ext.rs format.rs ident_fragment.rs lib.rs runtime.rs spanned.rs to_tokens.rs

rand

rand_chacha

chacha.rs guts.rs lib.rs

rand_core

block.rs error.rs impls.rs le.rs lib.rs os.rs

rand_distr

raw_window_handle

rawpointer

rayon

delegate.rs lib.rs math.rs option.rs par_either.rs prelude.rs private.rs range.rs range_inclusive.rs result.rs split_producer.rs str.rs string.rs vec.rs

rayon_core

compile_fail

mod.rs quicksort_race1.rs quicksort_race2.rs quicksort_race3.rs rc_return.rs rc_upvar.rs scope_join_bad.rs

join

scope

sleep

counters.rs mod.rs

spawn

thread_pool

job.rs latch.rs lib.rs log.rs private.rs registry.rs unwind.rs util.rs

rect_packer

packer

mod.rs packer.rs

regex

regex_syntax

retain_mut

rin

rin_app

app.rs application_callbacks.rs lib.rs

rin_blender

skinning

components.rs mod.rs

actions.rs animation_system.rs components.rs lib.rs scene_data.rs textures.rs

rin_core

rin_gl

autoload.rs basic_material.rs bitmap_font.rs cubemap.rs image_based_light.rs lib.rs macros.rs material.rs object.rs outline_material.rs renderer.rs shader_material.rs simple_fbo.rs traits.rs ttf.rs uniforms_cache.rs vao_mesh.rs vao_path.rs

rin_graphics

camera.rs freeimage.rs gradient.rs lib.rs mesh.rs mesh_slice.rs mvp.rs node.rs path.rs polyline.rs polyline_slice.rs primitives.rs projection.rs ttf.rs vertex.rs

rin_gui

rin_material

basic_material.rs lib.rs outline_material.rs parameter.rs pbr_material.rs texture.rs

rin_math

lib.rs rectangle.rs

rin_postpo

bloom.rs dof.rs fxaa.rs lib.rs lut.rs ssao.rs tonemap.rs

rin_scene

animation

mod.rs path_follower.rs

events

light

mod.rs shadow.rs

physics

mod.rs rigidbody.rs

postprocessing

renderer

time

transformation

bundle.rs components.rs geometry.rs immediate_renderer.rs lib.rs scene.rs skybox.rs water.rs

rin_util

autoloader.rs enum_set.rs error.rs lazy_value.rs lib.rs value_cache.rs

rin_window

events.rs glfw_window.rs lib.rs window.rs

rinblender

bones.rs catmullclark.rs curves.rs empty.rs enum_set.rs idtree.rs lamp.rs lib.rs loader.rs material.rs mesh.rs model.rs modifiers.rs packedfile.rs scene.rs shape_keys.rs texture.rs trimesh.rs utils.rs

rinecs

rinecs_derive

changes.rs component.rs debug.rs debug_param_fn.rs filter.rs hierarchical.rs hierarchical_oneton.rs lib.rs oneton.rs onetoone.rs

rinecs_derive_utils

lib.rs system.rs

ringui_derive

rustc_demangle

legacy.rs lib.rs v0.rs

rusty_pool

ryu

buffer

pretty

exponent.rs mantissa.rs mod.rs

common.rs d2s.rs d2s_full_table.rs d2s_intrinsics.rs digit_table.rs f2s.rs f2s_intrinsics.rs lib.rs

scopeguard

seitan

seitan_derive

semver

lib.rs version.rs version_req.rs

semver_parser

common.rs lib.rs range.rs recognize.rs version.rs

serde

de

ignored_any.rs impls.rs mod.rs seed.rs utf8.rs value.rs

private

de.rs doc.rs mod.rs ser.rs size_hint.rs

ser

fmt.rs impls.rs impossible.rs mod.rs

integer128.rs lib.rs macros.rs

serde_derive

internals

ast.rs attr.rs case.rs check.rs ctxt.rs mod.rs receiver.rs respan.rs symbol.rs

bound.rs de.rs dummy.rs fragment.rs lib.rs pretend.rs ser.rs try.rs

serde_json

features_check

io

value

de.rs from.rs index.rs mod.rs partial_eq.rs ser.rs

de.rs error.rs iter.rs lib.rs macros.rs map.rs number.rs read.rs ser.rs

shaderdata_derive

simba

scalar

complex.rs field.rs mod.rs real.rs subset.rs

simd

auto_simd_impl.rs mod.rs simd_bool.rs simd_complex.rs simd_option.rs simd_partial_ord.rs simd_real.rs simd_signed.rs simd_value.rs

slab

slice_of_array

slotmap

dense.rs hop.rs lib.rs normal.rs secondary.rs sparse_secondary.rs

smallvec

std140_data

streaming_iterator

lib.rs sources.rs

strsim

syn

synstructure

lib.rs macros.rs

thiserror

aserror.rs display.rs lib.rs

thiserror_impl

ast.rs attr.rs expand.rs fmt.rs lib.rs prop.rs valid.rs

thread_local

cached.rs lib.rs thread_id.rs unreachable.rs

tiff

decoder

ifd.rs mod.rs stream.rs

encoder

colortype.rs mod.rs writer.rs

bytecast.rs error.rs lib.rs tags.rs

time

display.rs duration.rs lib.rs parse.rs sys.rs

toml

datetime.rs de.rs lib.rs macros.rs map.rs ser.rs spanned.rs tokens.rs value.rs

typenum

array.rs bit.rs int.rs lib.rs marker_traits.rs operator_aliases.rs private.rs type_operators.rs uint.rs

unchecked_unwrap

unicode_xid

lib.rs tables.rs

vec2

cross.rs extract.rs inv.rs len.rs lerp.rs lib.rs min_max.rs new.rs norm.rs ops.rs set.rs transform.rs

vec3

cross.rs extract.rs inv.rs len.rs lerp.rs lib.rs min_max.rs new.rs norm.rs ops.rs set.rs transform.rs

weezl

decode.rs encode.rs error.rs lib.rs

x11

dpms.rs glx.rs internal.rs keysym.rs lib.rs link.rs xcursor.rs xf86vmode.rs xfixes.rs xft.rs xinerama.rs xinput.rs xinput2.rs xlib.rs xlib_xcb.rs xmd.rs xmu.rs xrandr.rs xrecord.rs xrender.rs xss.rs xt.rs xtest.rs

zlib_sys

// Copyright 2016 - 2018 Ulrik Sverdrup "bluss"
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use core::ops::{AddAssign, MulAssign};

/// General matrix multiply kernel
pub trait GemmKernel {
    type Elem: Element;

    /// Kernel rows
    const MR: usize = Self::MRTy::VALUE;
    /// Kernel cols
    const NR: usize = Self::NRTy::VALUE;
    /// Kernel rows as const num type
    type MRTy: ConstNum;
    /// Kernel cols as const num type
    type NRTy: ConstNum;

    /// align inputs to this
    fn align_to() -> usize;

    /// Whether to always use the masked wrapper around the kernel.
    fn always_masked() -> bool;

    fn nc() -> usize;
    fn kc() -> usize;
    fn mc() -> usize;

    /// Matrix multiplication kernel
    ///
    /// This does the matrix multiplication:
    ///
    /// C ← α A B + β C
    ///
    /// + `k`: length of data in a, b
    /// + a, b are packed
    /// + c has general strides
    /// + rsc: row stride of c
    /// + csc: col stride of c
    /// + `alpha`: scaling factor for A B product
    /// + `beta`: scaling factor for c.
    ///   Note: if `beta` is `0.`, the kernel should not (and must not)
    ///   read from c, its value is to be treated as if it was zero.
    ///
    /// When masked, the kernel is always called with β=0 but α is passed
    /// as usual. (This is only useful information if you return `true` from
    /// `always_masked`.)
    unsafe fn kernel(
        k: usize,
        alpha: Self::Elem,
        a: *const Self::Elem,
        b: *const Self::Elem,
        beta: Self::Elem,
        c: *mut Self::Elem, rsc: isize, csc: isize);
}

pub trait Element : Copy + AddAssign + MulAssign + Send + Sync {
    fn zero() -> Self;
    fn one() -> Self;
    fn is_zero(&self) -> bool;
}

impl Element for f32 {
    fn zero() -> Self { 0. }
    fn one() -> Self { 1. }
    fn is_zero(&self) -> bool { *self == 0. }
}

impl Element for f64 {
    fn zero() -> Self { 0. }
    fn one() -> Self { 1. }
    fn is_zero(&self) -> bool { *self == 0. }
}

/// Kernel selector
pub(crate) trait GemmSelect<T> {
    /// Call `select` with the selected kernel for this configuration
    fn select<K>(self, kernel: K)
        where K: GemmKernel<Elem=T>,
              T: Element;
}


pub trait ConstNum {
    const VALUE: usize;
}

pub struct U4;
pub struct U8;

impl ConstNum for U4 { const VALUE: usize = 4; }
impl ConstNum for U8 { const VALUE: usize = 8; }