Skip to content

< Back


IContextParams

Namespace: LLama.Abstractions

The parameters for initializing a LLama context from a model.

1
public interface IContextParams

Attributes NullableContextAttribute

Properties

ContextSize

Model context size (n_ctx)

1
public abstract Nullable<uint> ContextSize { get; }

Property Value

Nullable<UInt32>

BatchSize

maximum batch size that can be submitted at once (must be >=32 to use BLAS) (n_batch)

1
public abstract uint BatchSize { get; }

Property Value

UInt32

UBatchSize

Physical batch size

1
public abstract uint UBatchSize { get; }

Property Value

UInt32

SeqMax

max number of sequences (i.e. distinct states for recurrent models)

1
public abstract uint SeqMax { get; }

Property Value

UInt32

Embeddings

If true, extract embeddings (together with logits).

1
public abstract bool Embeddings { get; }

Property Value

Boolean

RopeFrequencyBase

RoPE base frequency (null to fetch from the model)

1
public abstract Nullable<float> RopeFrequencyBase { get; }

Property Value

Nullable<Single>

RopeFrequencyScale

RoPE frequency scaling factor (null to fetch from the model)

1
public abstract Nullable<float> RopeFrequencyScale { get; }

Property Value

Nullable<Single>

Encoding

The encoding to use for models

1
public abstract Encoding Encoding { get; }

Property Value

Encoding

Threads

Number of threads (null = autodetect) (n_threads)

1
public abstract Nullable<int> Threads { get; }

Property Value

Nullable<Int32>

BatchThreads

Number of threads to use for batch processing (null = autodetect) (n_threads)

1
public abstract Nullable<int> BatchThreads { get; }

Property Value

Nullable<Int32>

YarnExtrapolationFactor

YaRN extrapolation mix factor (null = from model)

1
public abstract Nullable<float> YarnExtrapolationFactor { get; }

Property Value

Nullable<Single>

YarnAttentionFactor

YaRN magnitude scaling factor (null = from model)

1
public abstract Nullable<float> YarnAttentionFactor { get; }

Property Value

Nullable<Single>

YarnBetaFast

YaRN low correction dim (null = from model)

1
public abstract Nullable<float> YarnBetaFast { get; }

Property Value

Nullable<Single>

YarnBetaSlow

YaRN high correction dim (null = from model)

1
public abstract Nullable<float> YarnBetaSlow { get; }

Property Value

Nullable<Single>

YarnOriginalContext

YaRN original context length (null = from model)

1
public abstract Nullable<uint> YarnOriginalContext { get; }

Property Value

Nullable<UInt32>

YarnScalingType

YaRN scaling method to use.

1
public abstract Nullable<RopeScalingType> YarnScalingType { get; }

Property Value

Nullable<RopeScalingType>

TypeK

Override the type of the K cache

1
public abstract Nullable<GGMLType> TypeK { get; }

Property Value

Nullable<GGMLType>

TypeV

Override the type of the V cache

1
public abstract Nullable<GGMLType> TypeV { get; }

Property Value

Nullable<GGMLType>

NoKqvOffload

Whether to disable offloading the KQV cache to the GPU

1
public abstract bool NoKqvOffload { get; }

Property Value

Boolean

FlashAttention

Whether to use flash attention

1
public abstract bool FlashAttention { get; }

Property Value

Boolean

DefragThreshold

defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default) defragment the KV cache if holes/size > defrag_threshold, Set to or < 0 to disable (default)

1
public abstract Nullable<float> DefragThreshold { get; }

Property Value

Nullable<Single>

PoolingType

How to pool (sum) embedding results by sequence id (ignored if no pooling layer)

1
public abstract LLamaPoolingType PoolingType { get; }

Property Value

LLamaPoolingType

AttentionType

Attention type to use for embeddings

1
public abstract LLamaAttentionType AttentionType { get; }

Property Value

LLamaAttentionType


< Back