Skip to content

Commit 793c45d

Browse files
committed
minor fixups
Signed-off-by: mudler <[email protected]>
1 parent cacdf67 commit 793c45d

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

pkg/model/initializers.go

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ import (
1212

1313
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
1414
"github.com/go-skynet/LocalAI/pkg/xsysinfo"
15+
"github.com/klauspost/cpuid/v2"
1516
"github.com/phayes/freeport"
1617
"github.com/rs/zerolog/log"
17-
"golang.org/x/sys/cpu"
1818

1919
"github.com/elliotchance/orderedmap/v2"
2020
)
@@ -26,12 +26,13 @@ var Aliases map[string]string = map[string]string{
2626
"langchain-huggingface": LCHuggingFaceBackend,
2727
}
2828

29+
var autoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
30+
2931
const (
3032
LlamaGGML = "llama-ggml"
3133

3234
LLamaCPP = "llama-cpp"
3335

34-
LLamaCPPCUDA12 = "llama-cpp-cuda12"
3536
LLamaCPPAVX2 = "llama-cpp-avx2"
3637
LLamaCPPAVX = "llama-cpp-avx"
3738
LLamaCPPFallback = "llama-cpp-fallback"
@@ -90,8 +91,9 @@ ENTRY:
9091

9192
// if we are autoDetecting, we want to show the llama.cpp variants as a single backend
9293
if autoDetect {
93-
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp)
94-
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC := false, false, false, false
94+
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
95+
// when starting the service
96+
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
9597
if _, ok := backends[LLamaCPP]; !ok {
9698
for _, e := range entry {
9799
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
@@ -110,6 +112,10 @@ ENTRY:
110112
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
111113
foundLCPPGRPC = true
112114
}
115+
if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
116+
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
117+
foundLCPPCuda = true
118+
}
113119
}
114120
}
115121
}
@@ -172,18 +178,21 @@ func selectGRPCProcess(backend, assetDir string) string {
172178

173179
// Note: This environment variable is read by the LocalAI's llama.cpp grpc-server
174180
if os.Getenv("LLAMACPP_GRPC_SERVERS") != "" {
181+
log.Info().Msgf("[%s] attempting to load with GRPC variant", LLamaCPPGRPC)
175182
return backendPath(assetDir, LLamaCPPGRPC)
176183
}
177184

178185
gpus, err := xsysinfo.GPUs()
179186
if err == nil {
180187
for _, gpu := range gpus {
181188
if strings.Contains(gpu.String(), "nvidia") {
182-
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
183189
p := backendPath(assetDir, LLamaCPPCUDA)
184190
if _, err := os.Stat(p); err == nil {
191+
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
185192
grpcProcess = p
186193
foundCUDA = true
194+
} else {
195+
log.Info().Msgf("GPU device found but no CUDA backend present")
187196
}
188197
}
189198
}
@@ -193,10 +202,10 @@ func selectGRPCProcess(backend, assetDir string) string {
193202
return grpcProcess
194203
}
195204

196-
if cpu.X86.HasAVX2 {
205+
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
197206
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
198207
grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
199-
} else if cpu.X86.HasAVX {
208+
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
200209
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
201210
grpcProcess = backendPath(assetDir, LLamaCPPAVX)
202211
} else {
@@ -207,8 +216,6 @@ func selectGRPCProcess(backend, assetDir string) string {
207216
return grpcProcess
208217
}
209218

210-
var autoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
211-
212219
// starts the grpcModelProcess for the backend, and returns a grpc client
213220
// It also loads the model
214221
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (ModelAddress, error) {

0 commit comments

Comments
 (0)