Skip to content

< Back


LLamaModelQuantizeParams

Namespace: LLama.Native

Quantizer parameters used in the native API

1
public struct LLamaModelQuantizeParams

Inheritance ObjectValueTypeLLamaModelQuantizeParams

Remarks:

llama_model_quantize_params

Fields

nthread

number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()

1
public int nthread;

ftype

quantize to this llama_ftype

1
public LLamaFtype ftype;

output_tensor_type

output tensor type

1
public GGMLType output_tensor_type;

token_embedding_type

token embeddings tensor type

1
public GGMLType token_embedding_type;

imatrix

pointer to importance matrix data

1
public IntPtr imatrix;

kv_overrides

pointer to vector containing overrides

1
public IntPtr kv_overrides;

tensor_types

pointer to vector containing tensor types

1
public IntPtr tensor_types;

Properties

allow_requantize

allow quantizing non-f32/f16 tensors

1
public bool allow_requantize { get; set; }

Property Value

Boolean

quantize_output_tensor

quantize output.weight

1
public bool quantize_output_tensor { get; set; }

Property Value

Boolean

only_copy

only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored

1
public bool only_copy { get; set; }

Property Value

Boolean

pure

quantize all tensors to the default type

1
public bool pure { get; set; }

Property Value

Boolean

keep_split

quantize to the same number of shards

1
public bool keep_split { get; set; }

Property Value

Boolean

Methods

Default()

Create a LLamaModelQuantizeParams with default values

1
LLamaModelQuantizeParams Default()

Returns

LLamaModelQuantizeParams


< Back