Skip to content

Commit aa8f9df

Browse files
committed
Fixups
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 287dbe7 commit aa8f9df

File tree

6 files changed

+1128
-1357
lines changed

6 files changed

+1128
-1357
lines changed

core/config/backend_config.go

Lines changed: 129 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -19,67 +19,68 @@ const (
1919
type TTSConfig struct {
2020

2121
// Voice wav path or id
22-
Voice string `yaml:"voice"`
22+
Voice string `yaml:"voice" json:"voice"`
2323

24-
AudioPath string `yaml:"audio_path"`
24+
AudioPath string `yaml:"audio_path" json:"audio_path"`
2525
}
2626

27+
// ModelConfig represents a model configuration
2728
type ModelConfig struct {
28-
schema.PredictionOptions `yaml:"parameters"`
29-
Name string `yaml:"name"`
30-
31-
F16 *bool `yaml:"f16"`
32-
Threads *int `yaml:"threads"`
33-
Debug *bool `yaml:"debug"`
34-
Roles map[string]string `yaml:"roles"`
35-
Embeddings *bool `yaml:"embeddings"`
36-
Backend string `yaml:"backend"`
37-
TemplateConfig TemplateConfig `yaml:"template"`
38-
KnownUsecaseStrings []string `yaml:"known_usecases"`
39-
KnownUsecases *ModelConfigUsecases `yaml:"-"`
40-
Pipeline Pipeline `yaml:"pipeline"`
41-
42-
PromptStrings, InputStrings []string `yaml:"-"`
43-
InputToken [][]int `yaml:"-"`
44-
functionCallString, functionCallNameString string `yaml:"-"`
45-
ResponseFormat string `yaml:"-"`
46-
ResponseFormatMap map[string]interface{} `yaml:"-"`
47-
48-
FunctionsConfig functions.FunctionsConfig `yaml:"function"`
49-
50-
FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
29+
schema.PredictionOptions `yaml:"parameters" json:"parameters"`
30+
Name string `yaml:"name" json:"name"`
31+
32+
F16 *bool `yaml:"f16" json:"f16"`
33+
Threads *int `yaml:"threads" json:"threads"`
34+
Debug *bool `yaml:"debug" json:"debug"`
35+
Roles map[string]string `yaml:"roles" json:"roles"`
36+
Embeddings *bool `yaml:"embeddings" json:"embeddings"`
37+
Backend string `yaml:"backend" json:"backend"`
38+
TemplateConfig TemplateConfig `yaml:"template" json:"template"`
39+
KnownUsecaseStrings []string `yaml:"known_usecases" json:"known_usecases"`
40+
KnownUsecases *ModelConfigUsecases `yaml:"-" json:"-"`
41+
Pipeline Pipeline `yaml:"pipeline" json:"pipeline"`
42+
43+
PromptStrings, InputStrings []string `yaml:"-" json:"-"`
44+
InputToken [][]int `yaml:"-" json:"-"`
45+
functionCallString, functionCallNameString string `yaml:"-" json:"-"`
46+
ResponseFormat string `yaml:"-" json:"-"`
47+
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
48+
49+
FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"`
50+
51+
FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
5152
// LLM configs (GPT4ALL, Llama.cpp, ...)
52-
LLMConfig `yaml:",inline"`
53+
LLMConfig `yaml:",inline" json:",inline"`
5354

5455
// Diffusers
55-
Diffusers Diffusers `yaml:"diffusers"`
56-
Step int `yaml:"step"`
56+
Diffusers Diffusers `yaml:"diffusers" json:"diffusers"`
57+
Step int `yaml:"step" json:"step"`
5758

5859
// GRPC Options
59-
GRPC GRPC `yaml:"grpc"`
60+
GRPC GRPC `yaml:"grpc" json:"grpc"`
6061

6162
// TTS specifics
62-
TTSConfig `yaml:"tts"`
63+
TTSConfig `yaml:"tts" json:"tts"`
6364

6465
// CUDA
6566
// Explicitly enable CUDA or not (some backends might need it)
66-
CUDA bool `yaml:"cuda"`
67+
CUDA bool `yaml:"cuda" json:"cuda"`
6768

68-
DownloadFiles []File `yaml:"download_files"`
69+
DownloadFiles []File `yaml:"download_files" json:"download_files"`
6970

70-
Description string `yaml:"description"`
71-
Usage string `yaml:"usage"`
71+
Description string `yaml:"description" json:"description"`
72+
Usage string `yaml:"usage" json:"usage"`
7273

73-
Options []string `yaml:"options"`
74-
Overrides []string `yaml:"overrides"`
74+
Options []string `yaml:"options" json:"options"`
75+
Overrides []string `yaml:"overrides" json:"overrides"`
7576
}
7677

7778
// Pipeline defines other models to use for audio-to-audio
7879
type Pipeline struct {
79-
TTS string `yaml:"tts"`
80-
LLM string `yaml:"llm"`
81-
Transcription string `yaml:"transcription"`
82-
VAD string `yaml:"vad"`
80+
TTS string `yaml:"tts" json:"tts"`
81+
LLM string `yaml:"llm" json:"llm"`
82+
Transcription string `yaml:"transcription" json:"transcription"`
83+
VAD string `yaml:"vad" json:"vad"`
8384
}
8485

8586
type File struct {
@@ -91,130 +92,132 @@ type File struct {
9192
type FeatureFlag map[string]*bool
9293

9394
func (ff FeatureFlag) Enabled(s string) bool {
94-
v, exist := ff[s]
95-
return exist && v != nil && *v
95+
if v, exists := ff[s]; exists && v != nil {
96+
return *v
97+
}
98+
return false
9699
}
97100

98101
type GRPC struct {
99-
Attempts int `yaml:"attempts"`
100-
AttemptsSleepTime int `yaml:"attempts_sleep_time"`
102+
Attempts int `yaml:"attempts" json:"attempts"`
103+
AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"`
101104
}
102105

103106
type Diffusers struct {
104-
CUDA bool `yaml:"cuda"`
105-
PipelineType string `yaml:"pipeline_type"`
106-
SchedulerType string `yaml:"scheduler_type"`
107-
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
108-
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
109-
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
110-
ClipModel string `yaml:"clip_model"` // Clip model to use
111-
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
112-
ControlNet string `yaml:"control_net"`
107+
CUDA bool `yaml:"cuda" json:"cuda"`
108+
PipelineType string `yaml:"pipeline_type" json:"pipeline_type"`
109+
SchedulerType string `yaml:"scheduler_type" json:"scheduler_type"`
110+
EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify
111+
IMG2IMG bool `yaml:"img2img" json:"img2img"` // Image to Image Diffuser
112+
ClipSkip int `yaml:"clip_skip" json:"clip_skip"` // Skip every N frames
113+
ClipModel string `yaml:"clip_model" json:"clip_model"` // Clip model to use
114+
ClipSubFolder string `yaml:"clip_subfolder" json:"clip_subfolder"` // Subfolder to use for clip model
115+
ControlNet string `yaml:"control_net" json:"control_net"`
113116
}
114117

115118
// LLMConfig is a struct that holds the configuration that are
116119
// generic for most of the LLM backends.
117120
type LLMConfig struct {
118-
SystemPrompt string `yaml:"system_prompt"`
119-
TensorSplit string `yaml:"tensor_split"`
120-
MainGPU string `yaml:"main_gpu"`
121-
RMSNormEps float32 `yaml:"rms_norm_eps"`
122-
NGQA int32 `yaml:"ngqa"`
123-
PromptCachePath string `yaml:"prompt_cache_path"`
124-
PromptCacheAll bool `yaml:"prompt_cache_all"`
125-
PromptCacheRO bool `yaml:"prompt_cache_ro"`
126-
MirostatETA *float64 `yaml:"mirostat_eta"`
127-
MirostatTAU *float64 `yaml:"mirostat_tau"`
128-
Mirostat *int `yaml:"mirostat"`
129-
NGPULayers *int `yaml:"gpu_layers"`
130-
MMap *bool `yaml:"mmap"`
131-
MMlock *bool `yaml:"mmlock"`
132-
LowVRAM *bool `yaml:"low_vram"`
133-
Reranking *bool `yaml:"reranking"`
134-
Grammar string `yaml:"grammar"`
135-
StopWords []string `yaml:"stopwords"`
136-
Cutstrings []string `yaml:"cutstrings"`
137-
ExtractRegex []string `yaml:"extract_regex"`
138-
TrimSpace []string `yaml:"trimspace"`
139-
TrimSuffix []string `yaml:"trimsuffix"`
140-
141-
ContextSize *int `yaml:"context_size"`
142-
NUMA bool `yaml:"numa"`
143-
LoraAdapter string `yaml:"lora_adapter"`
144-
LoraBase string `yaml:"lora_base"`
145-
LoraAdapters []string `yaml:"lora_adapters"`
146-
LoraScales []float32 `yaml:"lora_scales"`
147-
LoraScale float32 `yaml:"lora_scale"`
148-
NoMulMatQ bool `yaml:"no_mulmatq"`
149-
DraftModel string `yaml:"draft_model"`
150-
NDraft int32 `yaml:"n_draft"`
151-
Quantization string `yaml:"quantization"`
152-
LoadFormat string `yaml:"load_format"`
153-
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
154-
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
155-
EnforceEager bool `yaml:"enforce_eager"` // vLLM
156-
SwapSpace int `yaml:"swap_space"` // vLLM
157-
MaxModelLen int `yaml:"max_model_len"` // vLLM
158-
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
159-
DisableLogStatus bool `yaml:"disable_log_stats"` // vLLM
160-
DType string `yaml:"dtype"` // vLLM
161-
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt"` // vLLM
162-
MMProj string `yaml:"mmproj"`
163-
164-
FlashAttention bool `yaml:"flash_attention"`
165-
NoKVOffloading bool `yaml:"no_kv_offloading"`
166-
CacheTypeK string `yaml:"cache_type_k"`
167-
CacheTypeV string `yaml:"cache_type_v"`
168-
169-
RopeScaling string `yaml:"rope_scaling"`
170-
ModelType string `yaml:"type"`
171-
172-
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
173-
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
174-
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
175-
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
176-
177-
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
121+
SystemPrompt string `yaml:"system_prompt" json:"system_prompt"`
122+
TensorSplit string `yaml:"tensor_split" json:"tensor_split"`
123+
MainGPU string `yaml:"main_gpu" json:"main_gpu"`
124+
RMSNormEps float32 `yaml:"rms_norm_eps" json:"rms_norm_eps"`
125+
NGQA int32 `yaml:"ngqa" json:"ngqa"`
126+
PromptCachePath string `yaml:"prompt_cache_path" json:"prompt_cache_path"`
127+
PromptCacheAll bool `yaml:"prompt_cache_all" json:"prompt_cache_all"`
128+
PromptCacheRO bool `yaml:"prompt_cache_ro" json:"prompt_cache_ro"`
129+
MirostatETA *float64 `yaml:"mirostat_eta" json:"mirostat_eta"`
130+
MirostatTAU *float64 `yaml:"mirostat_tau" json:"mirostat_tau"`
131+
Mirostat *int `yaml:"mirostat" json:"mirostat"`
132+
NGPULayers *int `yaml:"gpu_layers" json:"gpu_layers"`
133+
MMap *bool `yaml:"mmap" json:"mmap"`
134+
MMlock *bool `yaml:"mmlock" json:"mmlock"`
135+
LowVRAM *bool `yaml:"low_vram" json:"low_vram"`
136+
Reranking *bool `yaml:"reranking" json:"reranking"`
137+
Grammar string `yaml:"grammar" json:"grammar"`
138+
StopWords []string `yaml:"stopwords" json:"stopwords"`
139+
Cutstrings []string `yaml:"cutstrings" json:"cutstrings"`
140+
ExtractRegex []string `yaml:"extract_regex" json:"extract_regex"`
141+
TrimSpace []string `yaml:"trimspace" json:"trimspace"`
142+
TrimSuffix []string `yaml:"trimsuffix" json:"trimsuffix"`
143+
144+
ContextSize *int `yaml:"context_size" json:"context_size"`
145+
NUMA bool `yaml:"numa" json:"numa"`
146+
LoraAdapter string `yaml:"lora_adapter" json:"lora_adapter"`
147+
LoraBase string `yaml:"lora_base" json:"lora_base"`
148+
LoraAdapters []string `yaml:"lora_adapters" json:"lora_adapters"`
149+
LoraScales []float32 `yaml:"lora_scales" json:"lora_scales"`
150+
LoraScale float32 `yaml:"lora_scale" json:"lora_scale"`
151+
NoMulMatQ bool `yaml:"no_mulmatq" json:"no_mulmatq"`
152+
DraftModel string `yaml:"draft_model" json:"draft_model"`
153+
NDraft int32 `yaml:"n_draft" json:"n_draft"`
154+
Quantization string `yaml:"quantization" json:"quantization"`
155+
LoadFormat string `yaml:"load_format" json:"load_format"`
156+
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM
157+
TrustRemoteCode bool `yaml:"trust_remote_code" json:"trust_remote_code"` // vLLM
158+
EnforceEager bool `yaml:"enforce_eager" json:"enforce_eager"` // vLLM
159+
SwapSpace int `yaml:"swap_space" json:"swap_space"` // vLLM
160+
MaxModelLen int `yaml:"max_model_len" json:"max_model_len"` // vLLM
161+
TensorParallelSize int `yaml:"tensor_parallel_size" json:"tensor_parallel_size"` // vLLM
162+
DisableLogStatus bool `yaml:"disable_log_stats" json:"disable_log_stats"` // vLLM
163+
DType string `yaml:"dtype" json:"dtype"` // vLLM
164+
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"` // vLLM
165+
MMProj string `yaml:"mmproj" json:"mmproj"`
166+
167+
FlashAttention bool `yaml:"flash_attention" json:"flash_attention"`
168+
NoKVOffloading bool `yaml:"no_kv_offloading" json:"no_kv_offloading"`
169+
CacheTypeK string `yaml:"cache_type_k" json:"cache_type_k"`
170+
CacheTypeV string `yaml:"cache_type_v" json:"cache_type_v"`
171+
172+
RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"`
173+
ModelType string `yaml:"type" json:"type"`
174+
175+
YarnExtFactor float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"`
176+
YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"`
177+
YarnBetaFast float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"`
178+
YarnBetaSlow float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"`
179+
180+
CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale
178181
}
179182

180183
// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
181184
type LimitMMPerPrompt struct {
182-
LimitImagePerPrompt int `yaml:"image"`
183-
LimitVideoPerPrompt int `yaml:"video"`
184-
LimitAudioPerPrompt int `yaml:"audio"`
185+
LimitImagePerPrompt int `yaml:"image" json:"image"`
186+
LimitVideoPerPrompt int `yaml:"video" json:"video"`
187+
LimitAudioPerPrompt int `yaml:"audio" json:"audio"`
185188
}
186189

187190
// TemplateConfig is a struct that holds the configuration of the templating system
188191
type TemplateConfig struct {
189192
// Chat is the template used in the chat completion endpoint
190-
Chat string `yaml:"chat"`
193+
Chat string `yaml:"chat" json:"chat"`
191194

192195
// ChatMessage is the template used for chat messages
193-
ChatMessage string `yaml:"chat_message"`
196+
ChatMessage string `yaml:"chat_message" json:"chat_message"`
194197

195198
// Completion is the template used for completion requests
196-
Completion string `yaml:"completion"`
199+
Completion string `yaml:"completion" json:"completion"`
197200

198201
// Edit is the template used for edit completion requests
199-
Edit string `yaml:"edit"`
202+
Edit string `yaml:"edit" json:"edit"`
200203

201204
// Functions is the template used when tools are present in the client requests
202-
Functions string `yaml:"function"`
205+
Functions string `yaml:"function" json:"function"`
203206

204207
// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
205208
// Note: this is mostly consumed for backends such as vllm and transformers
206209
// that can use the tokenizers specified in the JSON config files of the models
207-
UseTokenizerTemplate bool `yaml:"use_tokenizer_template"`
210+
UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"`
208211

209212
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
210213
// It defaults to \n
211-
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
214+
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"`
212215

213-
Multimodal string `yaml:"multimodal"`
216+
Multimodal string `yaml:"multimodal" json:"multimodal"`
214217

215-
JinjaTemplate bool `yaml:"jinja_template"`
218+
JinjaTemplate bool `yaml:"jinja_template" json:"jinja_template"`
216219

217-
ReplyPrefix string `yaml:"reply_prefix"`
220+
ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"`
218221
}
219222

220223
func (c *ModelConfig) UnmarshalYAML(value *yaml.Node) error {

0 commit comments

Comments
 (0)