@@ -12,9 +12,9 @@ import (
12
12
13
13
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
14
14
"github.com/go-skynet/LocalAI/pkg/xsysinfo"
15
+ "github.com/klauspost/cpuid/v2"
15
16
"github.com/phayes/freeport"
16
17
"github.com/rs/zerolog/log"
17
- "golang.org/x/sys/cpu"
18
18
19
19
"github.com/elliotchance/orderedmap/v2"
20
20
)
@@ -26,12 +26,13 @@ var Aliases map[string]string = map[string]string{
26
26
"langchain-huggingface" : LCHuggingFaceBackend ,
27
27
}
28
28
29
+ var autoDetect = os .Getenv ("DISABLE_AUTODETECT" ) != "true"
30
+
29
31
const (
30
32
LlamaGGML = "llama-ggml"
31
33
32
34
LLamaCPP = "llama-cpp"
33
35
34
- LLamaCPPCUDA12 = "llama-cpp-cuda12"
35
36
LLamaCPPAVX2 = "llama-cpp-avx2"
36
37
LLamaCPPAVX = "llama-cpp-avx"
37
38
LLamaCPPFallback = "llama-cpp-fallback"
90
91
91
92
// if we are autoDetecting, we want to show the llama.cpp variants as a single backend
92
93
if autoDetect {
93
- // if we find the llama.cpp variants, show them of as a single backend (llama-cpp)
94
- foundLCPPAVX , foundLCPPAVX2 , foundLCPPFallback , foundLCPPGRPC := false , false , false , false
94
+ // if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
95
+ // when starting the service
96
+ foundLCPPAVX , foundLCPPAVX2 , foundLCPPFallback , foundLCPPGRPC , foundLCPPCuda := false , false , false , false , false
95
97
if _ , ok := backends [LLamaCPP ]; ! ok {
96
98
for _ , e := range entry {
97
99
if strings .Contains (e .Name (), LLamaCPPAVX2 ) && ! foundLCPPAVX2 {
@@ -110,6 +112,10 @@ ENTRY:
110
112
backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPGRPC )
111
113
foundLCPPGRPC = true
112
114
}
115
+ if strings .Contains (e .Name (), LLamaCPPCUDA ) && ! foundLCPPCuda {
116
+ backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPCUDA )
117
+ foundLCPPCuda = true
118
+ }
113
119
}
114
120
}
115
121
}
@@ -172,18 +178,21 @@ func selectGRPCProcess(backend, assetDir string) string {
172
178
173
179
// Note: This environment variable is read by the LocalAI's llama.cpp grpc-server
174
180
if os .Getenv ("LLAMACPP_GRPC_SERVERS" ) != "" {
181
+ log .Info ().Msgf ("[%s] attempting to load with GRPC variant" , LLamaCPPGRPC )
175
182
return backendPath (assetDir , LLamaCPPGRPC )
176
183
}
177
184
178
185
gpus , err := xsysinfo .GPUs ()
179
186
if err == nil {
180
187
for _ , gpu := range gpus {
181
188
if strings .Contains (gpu .String (), "nvidia" ) {
182
- log .Info ().Msgf ("[%s] attempting to load with CUDA variant" , backend )
183
189
p := backendPath (assetDir , LLamaCPPCUDA )
184
190
if _ , err := os .Stat (p ); err == nil {
191
+ log .Info ().Msgf ("[%s] attempting to load with CUDA variant" , backend )
185
192
grpcProcess = p
186
193
foundCUDA = true
194
+ } else {
195
+ log .Info ().Msgf ("GPU device found but no CUDA backend present" )
187
196
}
188
197
}
189
198
}
@@ -193,10 +202,10 @@ func selectGRPCProcess(backend, assetDir string) string {
193
202
return grpcProcess
194
203
}
195
204
196
- if cpu . X86 . HasAVX2 {
205
+ if xsysinfo . HasCPUCaps ( cpuid . AVX2 ) {
197
206
log .Info ().Msgf ("[%s] attempting to load with AVX2 variant" , backend )
198
207
grpcProcess = backendPath (assetDir , LLamaCPPAVX2 )
199
- } else if cpu . X86 . HasAVX {
208
+ } else if xsysinfo . HasCPUCaps ( cpuid . AVX ) {
200
209
log .Info ().Msgf ("[%s] attempting to load with AVX variant" , backend )
201
210
grpcProcess = backendPath (assetDir , LLamaCPPAVX )
202
211
} else {
@@ -207,8 +216,6 @@ func selectGRPCProcess(backend, assetDir string) string {
207
216
return grpcProcess
208
217
}
209
218
210
- var autoDetect = os .Getenv ("DISABLE_AUTODETECT" ) != "true"
211
-
212
219
// starts the grpcModelProcess for the backend, and returns a grpc client
213
220
// It also loads the model
214
221
func (ml * ModelLoader ) grpcModel (backend string , o * Options ) func (string , string ) (ModelAddress , error ) {
0 commit comments