LLamaContextParams
Namespace: LLama.Native
A C# representation of the llama.cpp llama_context_params
struct
public struct LLamaContextParams
Inheritance Object → ValueType → LLamaContextParams
Fields
seed
RNG seed, -1 for random
public uint seed;
n_ctx
text context, 0 = from model
public uint n_ctx;
n_batch
prompt processing batch size
public uint n_batch;
n_threads
number of threads to use for generation
public uint n_threads;
n_threads_batch
number of threads to use for batch processing
public uint n_threads_batch;
rope_scaling_type
RoPE scaling type, from enum llama_rope_scaling_type
public RopeScalingType rope_scaling_type;
rope_freq_base
RoPE base frequency, 0 = from model
public float rope_freq_base;
rope_freq_scale
RoPE frequency scaling factor, 0 = from model
public float rope_freq_scale;
yarn_ext_factor
YaRN extrapolation mix factor, negative = from model
public float yarn_ext_factor;
yarn_attn_factor
YaRN magnitude scaling factor
public float yarn_attn_factor;
yarn_beta_fast
YaRN low correction dim
public float yarn_beta_fast;
yarn_beta_slow
YaRN high correction dim
public float yarn_beta_slow;
yarn_orig_ctx
YaRN original context size
public uint yarn_orig_ctx;
defrag_threshold
defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default)
public float defrag_threshold;
cb_eval
ggml_backend_sched_eval_callback
public IntPtr cb_eval;
cb_eval_user_data
User data passed into cb_eval
public IntPtr cb_eval_user_data;
type_k
data type for K cache
public GGMLType type_k;
type_v
data type for V cache
public GGMLType type_v;
Properties
embedding
embedding mode only
public bool embedding { get; set; }
Property Value
offload_kqv
whether to offload the KQV ops (including the KV cache) to GPU
public bool offload_kqv { get; set; }
Property Value
do_pooling
Whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
public bool do_pooling { get; set; }