IContextParams

Namespace: LLama.Abstractions

The parameters for initializing a LLama context from a model.

public interface IContextParams

Properties

ContextSize

Model context size (n_ctx)

public abstract Nullable<uint> ContextSize { get; }

Property Value

Nullable<UInt32>

BatchSize

batch size for prompt processing (must be >=32 to use BLAS) (n_batch)

public abstract uint BatchSize { get; }

Property Value

UInt32

Seed

Seed for the random number generator (seed)

public abstract uint Seed { get; }

Property Value

UInt32

EmbeddingMode

Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.

public abstract bool EmbeddingMode { get; }

Property Value

Boolean

RopeFrequencyBase

RoPE base frequency (null to fetch from the model)

public abstract Nullable<float> RopeFrequencyBase { get; }

Property Value

Nullable<Single>

RopeFrequencyScale

RoPE frequency scaling factor (null to fetch from the model)

public abstract Nullable<float> RopeFrequencyScale { get; }

Property Value

Nullable<Single>

Encoding

The encoding to use for models

public abstract Encoding Encoding { get; }

Property Value

Encoding

Threads

Number of threads (null = autodetect) (n_threads)

public abstract Nullable<uint> Threads { get; }

Property Value

Nullable<UInt32>

BatchThreads

Number of threads to use for batch processing (null = autodetect) (n_threads)

public abstract Nullable<uint> BatchThreads { get; }

Property Value

Nullable<UInt32>

YarnExtrapolationFactor

YaRN extrapolation mix factor (null = from model)

public abstract Nullable<float> YarnExtrapolationFactor { get; }

Property Value

Nullable<Single>

YarnAttentionFactor

YaRN magnitude scaling factor (null = from model)

public abstract Nullable<float> YarnAttentionFactor { get; }

Property Value

Nullable<Single>

YarnBetaFast

YaRN low correction dim (null = from model)

public abstract Nullable<float> YarnBetaFast { get; }

Property Value

Nullable<Single>

YarnBetaSlow

YaRN high correction dim (null = from model)

public abstract Nullable<float> YarnBetaSlow { get; }

Property Value

Nullable<Single>

YarnOriginalContext

YaRN original context length (null = from model)

public abstract Nullable<uint> YarnOriginalContext { get; }

Property Value

Nullable<UInt32>

YarnScalingType

YaRN scaling method to use.

public abstract Nullable<RopeScalingType> YarnScalingType { get; }

Property Value

Nullable<RopeScalingType>

TypeK

Override the type of the K cache

public abstract Nullable<GGMLType> TypeK { get; }

Property Value

Nullable<GGMLType>

TypeV

Override the type of the V cache

public abstract Nullable<GGMLType> TypeV { get; }

Property Value

Nullable<GGMLType>

NoKqvOffload

Whether to disable offloading the KQV cache to the GPU

public abstract bool NoKqvOffload { get; }

Property Value

Boolean

DefragThreshold

defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default)

public abstract float DefragThreshold { get; }

Property Value

Single

DoPooling

Whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)

public abstract bool DoPooling { get; }

Property Value

Boolean