IContextParams
Namespace: LLama.Abstractions
The parameters for initializing a LLama context from a model.
public interface IContextParams
Properties
ContextSize
Model context size (n_ctx)
public abstract Nullable<uint> ContextSize { get; }
Property Value
BatchSize
batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
public abstract uint BatchSize { get; }
Property Value
Seed
Seed for the random number generator (seed)
public abstract uint Seed { get; }
Property Value
EmbeddingMode
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
public abstract bool EmbeddingMode { get; }
Property Value
RopeFrequencyBase
RoPE base frequency (null to fetch from the model)
public abstract Nullable<float> RopeFrequencyBase { get; }
Property Value
RopeFrequencyScale
RoPE frequency scaling factor (null to fetch from the model)
public abstract Nullable<float> RopeFrequencyScale { get; }
Property Value
Encoding
The encoding to use for models
public abstract Encoding Encoding { get; }
Property Value
Threads
Number of threads (null = autodetect) (n_threads)
public abstract Nullable<uint> Threads { get; }
Property Value
BatchThreads
Number of threads to use for batch processing (null = autodetect) (n_threads)
public abstract Nullable<uint> BatchThreads { get; }
Property Value
YarnExtrapolationFactor
YaRN extrapolation mix factor (null = from model)
public abstract Nullable<float> YarnExtrapolationFactor { get; }
Property Value
YarnAttentionFactor
YaRN magnitude scaling factor (null = from model)
public abstract Nullable<float> YarnAttentionFactor { get; }
Property Value
YarnBetaFast
YaRN low correction dim (null = from model)
public abstract Nullable<float> YarnBetaFast { get; }
Property Value
YarnBetaSlow
YaRN high correction dim (null = from model)
public abstract Nullable<float> YarnBetaSlow { get; }
Property Value
YarnOriginalContext
YaRN original context length (null = from model)
public abstract Nullable<uint> YarnOriginalContext { get; }
Property Value
YarnScalingType
YaRN scaling method to use.
public abstract Nullable<RopeScalingType> YarnScalingType { get; }
Property Value
TypeK
Override the type of the K cache
public abstract Nullable<GGMLType> TypeK { get; }
Property Value
TypeV
Override the type of the V cache
public abstract Nullable<GGMLType> TypeV { get; }
Property Value
NoKqvOffload
Whether to disable offloading the KQV cache to the GPU
public abstract bool NoKqvOffload { get; }
Property Value
DefragThreshold
defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default)
public abstract float DefragThreshold { get; }
Property Value
DoPooling
Whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
public abstract bool DoPooling { get; }