Class ILKernelGenerator

Namespace: NumSharp.Backends.Kernels

Assembly: NumSharp.dll

Binary operations (same-type) - contiguous kernels and generic helpers.

public static class ILKernelGenerator

Inheritance: object

ILKernelGenerator

Inherited Members: object.Equals(object)

object.Equals(object, object)

object.GetHashCode()

object.GetType()

object.MemberwiseClone()

object.ReferenceEquals(object, object)

object.ToString()

Fields

VectorBits

Detected vector width at startup: 512, 256, 128, or 0 (no SIMD).

public static readonly int VectorBits

Field Value

int

VectorBytes

Number of bytes per vector register.

public static readonly int VectorBytes

Field Value

int

Properties

AxisReductionCachedCount

Number of axis reduction kernels in cache.

public static int AxisReductionCachedCount { get; }

Property Value

int

AxisScanCachedCount

Number of axis scan kernels in cache.

public static int AxisScanCachedCount { get; }

Property Value

int

BinaryScalarCachedCount

Number of binary scalar kernels in cache.

public static int BinaryScalarCachedCount { get; }

Property Value

int

CachedCount

Number of IL-generated kernels in cache.

public static int CachedCount { get; }

Property Value

int

ComparisonCachedCount

Number of comparison kernels in cache.

public static int ComparisonCachedCount { get; }

Property Value

int

ComparisonScalarCachedCount

Number of comparison scalar kernels in cache.

public static int ComparisonScalarCachedCount { get; }

Property Value

int

ElementReductionCachedCount

Number of element reduction kernels in cache.

public static int ElementReductionCachedCount { get; }

Property Value

int

Enabled

Whether IL generation is enabled. Can be disabled for debugging.

public static bool Enabled { get; set; }

Property Value

bool

MixedTypeCachedCount

Number of mixed-type kernels in cache.

public static int MixedTypeCachedCount { get; }

Property Value

int

Name

Provider name for diagnostics.

public static string Name { get; }

Property Value

string

NanAxisReductionCachedCount

Number of NaN axis reduction kernels in cache.

public static int NanAxisReductionCachedCount { get; }

Property Value

int

NanElementReductionCachedCount

Number of NaN element reduction kernels in cache.

public static int NanElementReductionCachedCount { get; }

Property Value

int

ScanCachedCount

Number of scan kernels in cache.

public static int ScanCachedCount { get; }

Property Value

int

UnaryCachedCount

Number of unary kernels in cache.

public static int UnaryCachedCount { get; }

Property Value

int

UnaryScalarCachedCount

Number of unary scalar kernels in cache.

public static int UnaryScalarCachedCount { get; }

Property Value

int

Methods

ClipArrayBounds<T>(T, T, T*, long)

Clip with element-wise array bounds (both min and max arrays). All three arrays must be broadcast to the same shape by the caller. For contiguous arrays of SIMD-supported types, uses Vector operations.

public static void ClipArrayBounds<T>(T* output, T* minArr, T* maxArr, long size) where T : unmanaged, IComparable<T>

Parameters

output T*
minArr T*
maxArr T*
size long

Type Parameters

T

Remarks

NumPy clip semantics: result[i] = min(max(a[i], min[i]), max[i]) When min[i] > max[i], result is max[i] (per NumPy behavior).

ClipArrayMax<T>(T, T, long)

Clip with element-wise max array bounds only (no min).

public static void ClipArrayMax<T>(T* output, T* maxArr, long size) where T : unmanaged, IComparable<T>

Parameters

output T*
maxArr T*
size long

Type Parameters

T

ClipArrayMin<T>(T, T, long)

Clip with element-wise min array bounds only (no max).

public static void ClipArrayMin<T>(T* output, T* minArr, long size) where T : unmanaged, IComparable<T>

Parameters

output T*
minArr T*
size long

Type Parameters

T

ClipHelper<T>(T*, long, T, T)

SIMD-optimized Clip operation for contiguous arrays (min and max). Modifies the array in-place: data[i] = Min(Max(data[i], minVal), maxVal)

public static void ClipHelper<T>(T* data, long size, T minVal, T maxVal) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
minVal T
maxVal T

Type Parameters

T

ClipMaxHelper<T>(T*, long, T)

SIMD-optimized Max-only Clip operation (no lower bound).

public static void ClipMaxHelper<T>(T* data, long size, T maxVal) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
maxVal T

Type Parameters

T

ClipMaxStrided<T>(T*, long, T, Shape)

Max-only Clip operation for strided arrays.

public static void ClipMaxStrided<T>(T* data, long size, T maxVal, Shape shape) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
maxVal T
shape Shape

Type Parameters

T

ClipMaxUnified<T>(T*, long, T, Shape)

Unified Max-only Clip operation.

public static void ClipMaxUnified<T>(T* data, long size, T maxVal, Shape shape) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
maxVal T
shape Shape

Type Parameters

T

ClipMinHelper<T>(T*, long, T)

SIMD-optimized Min-only Clip operation (no upper bound).

public static void ClipMinHelper<T>(T* data, long size, T minVal) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
minVal T

Type Parameters

T

ClipMinStrided<T>(T*, long, T, Shape)

Min-only Clip operation for strided arrays.

public static void ClipMinStrided<T>(T* data, long size, T minVal, Shape shape) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
minVal T
shape Shape

Type Parameters

T

ClipMinUnified<T>(T*, long, T, Shape)

Unified Min-only Clip operation.

public static void ClipMinUnified<T>(T* data, long size, T minVal, Shape shape) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
minVal T
shape Shape

Type Parameters

T

ClipStrided<T>(T*, long, T, T, Shape)

Clip operation for strided (non-contiguous) arrays. Uses coordinate-based iteration via Shape.TransformOffset.

public static void ClipStrided<T>(T* data, long size, T minVal, T maxVal, Shape shape) where T : unmanaged, IComparable<T>

Parameters

data T*
size long
minVal T
maxVal T
shape Shape

Type Parameters

T

Remarks

This handles arrays that are:

Transposed (stride order differs from dimension order)
Sliced with step (e.g., arr[::2])
Views with non-standard memory layout

Performance is O(n) with coordinate overhead per element. For contiguous arrays, use ClipHelper instead.

ClipUnified<T>(T*, long, T, T, Shape)

Unified Clip operation that handles both contiguous and strided arrays. Automatically selects the optimal path based on array contiguity.

public static void ClipUnified<T>(T* data, long size, T minVal, T maxVal, Shape shape) where T : unmanaged, IComparable<T>

Parameters

data T*: Pointer to the data buffer (at offset 0, not adjusted for shape.offset)
size long: Number of elements to process
minVal T: Minimum value to clip to
maxVal T: Maximum value to clip to
shape Shape: Shape describing the memory layout

Type Parameters

T

CumSumHelper<TIn, TOut>(void, void, long)

SIMD-optimized cumulative sum for contiguous arrays with type conversion. Called directly by DefaultEngine for the fast path.

public static void CumSumHelper<TIn, TOut>(void* input, void* output, long totalSize) where TIn : unmanaged where TOut : unmanaged

Parameters

input void*: Pointer to input data
output void*: Pointer to output data
totalSize long: Number of elements

Type Parameters

TIn: Input element type
TOut: Output element type

GetBinaryScalarDelegate(BinaryScalarKernelKey)

Get or generate an IL-based binary scalar delegate. Returns a Func<TLhs, TRhs, TResult> delegate.

public static Delegate GetBinaryScalarDelegate(BinaryScalarKernelKey key)

Parameters

key BinaryScalarKernelKey

Returns

Delegate

GetComparisonKernel(ComparisonKernelKey)

Get or generate a comparison kernel for the specified key.

public static ComparisonKernel GetComparisonKernel(ComparisonKernelKey key)

Parameters

key ComparisonKernelKey

Returns

ComparisonKernel

GetComparisonScalarDelegate(ComparisonScalarKernelKey)

Get or generate a comparison scalar delegate. Returns a Func<TLhs, TRhs, bool> delegate.

public static Delegate GetComparisonScalarDelegate(ComparisonScalarKernelKey key)

Parameters

key ComparisonScalarKernelKey

Returns

Delegate

GetContiguousKernel<T>(BinaryOp)

Get or generate an IL-based kernel for contiguous (SimdFull) operations. Returns null if IL generation is not supported for this type/operation.

public static ContiguousKernel<T>? GetContiguousKernel<T>(BinaryOp op) where T : unmanaged

Parameters

op BinaryOp

Returns

ContiguousKernel<T>

Type Parameters

T

GetCumulativeAxisKernel(CumulativeAxisKernelKey)

Get or generate a cumulative axis (scan along axis) kernel. Returns a delegate that computes running accumulation along a specific axis.

public static CumulativeAxisKernel GetCumulativeAxisKernel(CumulativeAxisKernelKey key)

Parameters

key CumulativeAxisKernelKey

Returns

CumulativeAxisKernel

GetCumulativeKernel(CumulativeKernelKey)

Get or generate a cumulative (scan) kernel. Returns a delegate that computes running accumulation over all elements.

public static CumulativeKernel GetCumulativeKernel(CumulativeKernelKey key)

Parameters

key CumulativeKernelKey

Returns

CumulativeKernel

GetMatMulKernel<T>()

Get or generate an IL-based high-performance MatMul kernel. Returns null if the type is not supported for SIMD optimization.

public static MatMul2DKernel<T>? GetMatMulKernel<T>() where T : unmanaged

Returns

MatMul2DKernel<T>

Type Parameters

T

GetMixedTypeKernel(MixedTypeKernelKey)

Get or generate a mixed-type kernel for the specified key.

public static MixedTypeKernel GetMixedTypeKernel(MixedTypeKernelKey key)

Parameters

key MixedTypeKernelKey

Returns

MixedTypeKernel

GetShiftArrayKernel<T>(bool)

Get or generate a shift kernel for element-wise shift amounts.

public static ILKernelGenerator.ShiftArrayKernel<T>? GetShiftArrayKernel<T>(bool isLeftShift) where T : unmanaged

Parameters

isLeftShift bool: True for left shift, false for right shift

Returns

ILKernelGenerator.ShiftArrayKernel<T>: Kernel delegate or null if not supported

Type Parameters

T: Integer element type

GetShiftScalarKernel<T>(bool)

Get or generate a SIMD-optimized shift kernel for uniform shift amount.

public static ILKernelGenerator.ShiftScalarKernel<T>? GetShiftScalarKernel<T>(bool isLeftShift) where T : unmanaged

Parameters

isLeftShift bool: True for left shift, false for right shift

Returns

ILKernelGenerator.ShiftScalarKernel<T>: Kernel delegate or null if not supported

Type Parameters

T: Integer element type

GetTypedElementReductionKernel<TResult>(ElementReductionKernelKey)

Get or generate a typed element-wise reduction kernel. Returns a delegate that reduces all elements to a single value of type TResult.

public static TypedElementReductionKernel<TResult> GetTypedElementReductionKernel<TResult>(ElementReductionKernelKey key) where TResult : unmanaged

Parameters

key ElementReductionKernelKey

Returns

TypedElementReductionKernel<TResult>

Type Parameters

TResult

GetUnaryKernel(UnaryKernelKey)

Get or generate a unary kernel for the specified key.

public static UnaryKernel GetUnaryKernel(UnaryKernelKey key)

Parameters

key UnaryKernelKey

Returns

UnaryKernel

GetUnaryScalarDelegate(UnaryScalarKernelKey)

Get or generate an IL-based unary scalar delegate. Returns a Func<TInput, TOutput> delegate.

public static Delegate GetUnaryScalarDelegate(UnaryScalarKernelKey key)

Parameters

key UnaryScalarKernelKey

Returns

Delegate

GetWhereKernel<T>()

Get or generate an IL-based where kernel for the specified type. Returns null if IL generation is disabled or fails.

public static WhereKernel<T>? GetWhereKernel<T>() where T : unmanaged

Returns

WhereKernel<T>

Type Parameters

T

ModfHelper(double, double, long)

SIMD-optimized Modf operation for contiguous double arrays. Computes fractional and integral parts in-place. Handles special values (NaN, Inf) according to C standard modf.

public static void ModfHelper(double* data, double* integral, long size)

Parameters

data double*: Input array (will contain fractional parts after)
integral double*: Output array for integral parts
size long: Number of elements

ModfHelper(float, float, long)

SIMD-optimized Modf operation for contiguous float arrays. Computes fractional and integral parts in-place. Handles special values (NaN, Inf) according to C standard modf.

public static void ModfHelper(float* data, float* integral, long size)

Parameters

data float*: Input array (will contain fractional parts after)
integral float*: Output array for integral parts
size long: Number of elements

TryGetAxisReductionKernel(AxisReductionKernelKey)

Try to get an axis reduction kernel. Supports all reduction operations and all types including type promotion. Uses SIMD for capable types, scalar loop for others.

public static AxisReductionKernel? TryGetAxisReductionKernel(AxisReductionKernelKey key)

Parameters

key AxisReductionKernelKey

Returns

AxisReductionKernel

TryGetComparisonKernel(ComparisonKernelKey)

Try to get or generate a comparison kernel. Returns null if generation fails.

public static ComparisonKernel? TryGetComparisonKernel(ComparisonKernelKey key)

Parameters

key ComparisonKernelKey

Returns

ComparisonKernel

TryGetCumulativeAxisKernel(CumulativeAxisKernelKey)

Try to get or generate a cumulative axis kernel.

public static CumulativeAxisKernel? TryGetCumulativeAxisKernel(CumulativeAxisKernelKey key)

Parameters

key CumulativeAxisKernelKey

Returns

CumulativeAxisKernel

TryGetCumulativeKernel(CumulativeKernelKey)

Try to get or generate a cumulative kernel.

public static CumulativeKernel? TryGetCumulativeKernel(CumulativeKernelKey key)

Parameters

key CumulativeKernelKey

Returns

CumulativeKernel

TryGetMixedTypeKernel(MixedTypeKernelKey)

Try to get or generate a mixed-type kernel. Returns null if generation fails.

public static MixedTypeKernel? TryGetMixedTypeKernel(MixedTypeKernelKey key)

Parameters

key MixedTypeKernelKey

Returns

MixedTypeKernel

TryGetNanAxisReductionKernel(AxisReductionKernelKey)

Try to get a NaN-aware axis reduction kernel. SIMD kernels exist only for float/double; Half and Complex route to scalar fallback paths (Default.Reduction.Nan.cs ExecuteNanAxisReductionScalar / np.nanmean.cs / np.nanvar.cs / np.nanstd.cs) which handle them directly.

public static AxisReductionKernel? TryGetNanAxisReductionKernel(AxisReductionKernelKey key)

Parameters

key AxisReductionKernelKey

Returns

AxisReductionKernel

TryGetNanElementReductionKernel<TResult>(ElementReductionKernelKey)

Try to get an IL-generated NaN element reduction kernel. Only supports float and double types (NaN is only defined for floating-point).

public static TypedElementReductionKernel<TResult>? TryGetNanElementReductionKernel<TResult>(ElementReductionKernelKey key) where TResult : unmanaged

Parameters

key ElementReductionKernelKey

Returns

TypedElementReductionKernel<TResult>

Type Parameters

TResult

TryGetTypedElementReductionKernel<TResult>(ElementReductionKernelKey)

Try to get or generate an element reduction kernel.

public static TypedElementReductionKernel<TResult>? TryGetTypedElementReductionKernel<TResult>(ElementReductionKernelKey key) where TResult : unmanaged

Parameters

key ElementReductionKernelKey

Returns

TypedElementReductionKernel<TResult>

Type Parameters

TResult

TryGetUnaryKernel(UnaryKernelKey)

Try to get or generate a unary kernel. Returns null if generation fails.

public static UnaryKernel? TryGetUnaryKernel(UnaryKernelKey key)

Parameters

key UnaryKernelKey

Returns

UnaryKernel

WhereExecute<T>(bool, T, T, T, long)

Execute where operation using IL-generated kernel or fallback to static helper.

public static void WhereExecute<T>(bool* cond, T* x, T* y, T* result, long count) where T : unmanaged

Parameters

cond bool*
x T*
y T*
result T*
count long

Type Parameters

T

Table of Contents

Class ILKernelGenerator

Fields

VectorBits

Field Value

VectorBytes

Field Value

Properties

AxisReductionCachedCount

Property Value

AxisScanCachedCount

Property Value

BinaryScalarCachedCount

Property Value

CachedCount

Property Value

ComparisonCachedCount

Property Value

ComparisonScalarCachedCount

Property Value

ElementReductionCachedCount

Property Value

Enabled

Property Value

MixedTypeCachedCount

Property Value

Name

Property Value

NanAxisReductionCachedCount

Property Value

NanElementReductionCachedCount

Property Value

ScanCachedCount

Property Value

UnaryCachedCount

Property Value

UnaryScalarCachedCount

Property Value

Methods

ClipArrayBounds<T>(T*, T*, T*, long)

Parameters

Type Parameters

Remarks

ClipArrayMax<T>(T*, T*, long)

Parameters

Type Parameters

ClipArrayMin<T>(T*, T*, long)

Parameters

Type Parameters

ClipHelper<T>(T*, long, T, T)

Parameters

Type Parameters

ClipMaxHelper<T>(T*, long, T)

Parameters

Type Parameters

ClipMaxStrided<T>(T*, long, T, Shape)

Parameters

Type Parameters

ClipMaxUnified<T>(T*, long, T, Shape)

Parameters

Type Parameters

ClipMinHelper<T>(T*, long, T)

Parameters

Type Parameters

ClipMinStrided<T>(T*, long, T, Shape)

Parameters

Type Parameters

ClipMinUnified<T>(T*, long, T, Shape)

Parameters

Type Parameters

ClipStrided<T>(T*, long, T, T, Shape)

Parameters

Type Parameters

Remarks

ClipUnified<T>(T*, long, T, T, Shape)

Parameters

Type Parameters

CumSumHelper<TIn, TOut>(void*, void*, long)

Parameters

Type Parameters

ClipArrayBounds<T>(T, T, T*, long)

ClipArrayMax<T>(T, T, long)

ClipArrayMin<T>(T, T, long)

CumSumHelper<TIn, TOut>(void, void, long)

ModfHelper(double, double, long)

ModfHelper(float, float, long)