LLamaContextParams
Namespace: LLama.Native
A C# representation of the llama.cpp llama_context_params
struct
public struct LLamaContextParams
Inheritance Object → ValueType → LLamaContextParams
Fields
seed
RNG seed, -1 for random
public int seed;
n_ctx
text context
public int n_ctx;
n_batch
prompt processing batch size
public int n_batch;
n_gpu_layers
number of layers to store in VRAM
public int n_gpu_layers;
main_gpu
the GPU that is used for scratch and small tensors
public int main_gpu;
tensor_split
how to split layers across multiple GPUs
public IntPtr tensor_split;
rope_freq_base
ref: https://github.com/ggerganov/llama.cpp/pull/2054 RoPE base frequency
public float rope_freq_base;
rope_freq_scale
ref: https://github.com/ggerganov/llama.cpp/pull/2054 RoPE frequency scaling factor
public float rope_freq_scale;
progress_callback
called with a progress value between 0 and 1, pass NULL to disable
public IntPtr progress_callback;
progress_callback_user_data
context pointer passed to the progress callback
public IntPtr progress_callback_user_data;
Properties
low_vram
if true, reduce VRAM usage at the cost of performance
public bool low_vram { get; set; }
Property Value
mul_mat_q
if true, use experimental mul_mat_q kernels
public bool mul_mat_q { get; set; }
Property Value
f16_kv
use fp16 for KV cache
public bool f16_kv { get; set; }
Property Value
logits_all
the llama_eval() call computes all logits, not just the last one
public bool logits_all { get; set; }
Property Value
vocab_only
only load the vocabulary, no weights
public bool vocab_only { get; set; }
Property Value
use_mmap
use mmap if possible
public bool use_mmap { get; set; }
Property Value
use_mlock
force system to keep model in RAM
public bool use_mlock { get; set; }
Property Value
embedding
embedding mode only
public bool embedding { get; set; }