chore: bump localai v2.26.0 (#487)

sozercan · web-flow · commit 921e04a83cd8 · 2025-02-25T22:09:08.000-08:00
Signed-off-by: Sertac Ozercan &lt;sozercan@gmail.com&gt;
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
@@ -29,13 +29,9 @@ jobs:
       matrix:
         backend:
           - llama
-          - stablediffusion
         arch:
           - amd64
           - arm64
-        exclude:
-        - backend: stablediffusion
-          arch: arm64
     steps:
       - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
         with:
@@ -127,28 +123,10 @@ jobs:
             exit 1
           fi
 
-      - name: run stablediffusion test
-        if: matrix.backend == 'stablediffusion'
-        run: |
-          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
-            "prompt": "A cute baby llama",
-            "size": "256x256"
-          }')
-          echo $result
-
-          url=$(echo "$result" | jq '.data[0].url')
-          if [ -z "$url" ]; then
-            exit 1
-          fi
-
       - name: save logs
         if: always()
         run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log
 
-      - name: save generated image
-        if: matrix.backend == 'stablediffusion'
-        run: docker cp testmodel:/tmp/generated/images /tmp
-
       - name: publish test artifacts
         if: always()
         uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
diff --git a/.github/workflows/test-podman-applesilicon.yaml b/.github/workflows/test-podman-applesilicon.yaml
@@ -45,7 +45,7 @@ jobs:
 
       - name: run test (gguf)
         run: |
-          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+          result=$(curl --fail --retry 10 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
             "model": "llama-3.2-1b-instruct",
             "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
           }')
diff --git a/pkg/aikit/config/specs_test.go b/pkg/aikit/config/specs_test.go
@@ -24,7 +24,6 @@ apiVersion: v1alpha1
 runtime: cuda
 backends:
 - exllama2
-- stablediffusion
 models:
 - name: test
   source: foo
@@ -34,7 +33,6 @@ models:
 				Runtime:    utils.RuntimeNVIDIA,
 				Backends: []string{
 					utils.BackendExllamaV2,
-					utils.BackendStableDiffusion,
 				},
 				Models: []Model{
 					{
diff --git a/pkg/aikit2llb/inference/convert.go b/pkg/aikit2llb/inference/convert.go
@@ -14,7 +14,7 @@ import (
 const (
 	distrolessBase = "ghcr.io/sozercan/base:latest"
 	localAIRepo    = "https://github.com/mudler/LocalAI"
-	localAIVersion = "v2.25.0"
+	localAIVersion = "v2.26.0"
 	cudaVersion    = "12-5"
 )
 
@@ -49,8 +49,6 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State,
 		switch c.Backends[b] {
 		case utils.BackendExllamaV2:
 			merge = installExllama(state, merge)
-		case utils.BackendStableDiffusion:
-			merge = installOpenCV(state, merge)
 		case utils.BackendMamba:
 			merge = installMamba(state, merge)
 		case utils.BackendDiffusers:
diff --git a/pkg/aikit2llb/inference/stablediffusion.go b/pkg/aikit2llb/inference/stablediffusion.go
diff --git a/pkg/build/build.go b/pkg/build/build.go
@@ -452,10 +452,6 @@ func validateInferenceConfig(c *config.InferenceConfig) error {
 		return errors.New("only one backend is supported at this time")
 	}
 
-	if slices.Contains(c.Backends, utils.BackendStableDiffusion) && (slices.Contains(c.Backends, utils.BackendExllamaV2)) {
-		return errors.New("cannot specify both stablediffusion with exllama2 at this time")
-	}
-
 	if (slices.Contains(c.Backends, utils.BackendExllamaV2) || slices.Contains(c.Backends, utils.BackendMamba) || slices.Contains(c.Backends, utils.BackendDiffusers)) && c.Runtime != utils.RuntimeNVIDIA {
 		return errors.New("exllama, mamba, and diffusers backends only supports nvidia cuda runtime. please add 'runtime: cuda' to your aikitfile.yaml")
 	}
@@ -464,7 +460,7 @@ func validateInferenceConfig(c *config.InferenceConfig) error {
 		return errors.New("apple silicon runtime only supports the default llama-cpp backend")
 	}
 
-	backends := []string{utils.BackendExllamaV2, utils.BackendStableDiffusion, utils.BackendMamba, utils.BackendDiffusers}
+	backends := []string{utils.BackendExllamaV2, utils.BackendMamba, utils.BackendDiffusers}
 	for _, b := range c.Backends {
 		if !slices.Contains(backends, b) {
 			return errors.Errorf("backend %s is not supported", b)
diff --git a/pkg/build/build_test.go b/pkg/build/build_test.go
@@ -84,7 +84,7 @@ func Test_validateConfig(t *testing.T) {
 			args: args{c: &config.InferenceConfig{
 				APIVersion: "v1alpha1",
 				Runtime:    "cuda",
-				Backends:   []string{"exllama", "stablediffusion"},
+				Backends:   []string{"exllama", "diffusers"},
 				Models: []config.Model{
 					{
 						Name:   "test",
diff --git a/pkg/utils/const.go b/pkg/utils/const.go
@@ -4,10 +4,9 @@ const (
 	RuntimeNVIDIA       = "cuda"
 	RuntimeAppleSilicon = "applesilicon" // experimental apple silicon runtime with vulkan arm64 support
 
-	BackendStableDiffusion = "stablediffusion"
-	BackendExllamaV2       = "exllama2"
-	BackendMamba           = "mamba"
-	BackendDiffusers       = "diffusers"
+	BackendExllamaV2 = "exllama2"
+	BackendMamba     = "mamba"
+	BackendDiffusers = "diffusers"
 
 	TargetUnsloth = "unsloth"
 
diff --git a/test/aikitfile-stablediffusion.yaml b/test/aikitfile-stablediffusion.yaml
diff --git a/website/docs/diffusion.md b/website/docs/diffusion.md
@@ -2,7 +2,7 @@
 title: Diffusion
 ---
 
-AIKit supports [`diffusers`](#diffusers) and [`stablediffusion`](#stablediffusion) backends.
+AIKit supports [`diffusers`](#diffusers) backend.
 
 ## diffusers
 
@@ -16,7 +16,7 @@ Please make sure to change syntax to `#syntax=ghcr.io/sozercan/aikit:latest` in
 
 https://github.com/sozercan/aikit/blob/main/test/aikitfile-diffusers.yaml
 
-## stablediffusion
+## stablediffusion NCNN
 
 https://github.com/EdVince/Stable-Diffusion-NCNN
 
@@ -25,7 +25,7 @@ This backend:
 - does not support CUDA runtime yet
 
 :::note
-This is an experimental backend and it may change in the future.
+This has been deprecated as of `v0.18.0` release.
 :::
 
 ### Example
diff --git a/website/docs/specs-inference.md b/website/docs/specs-inference.md
@@ -8,7 +8,7 @@ title: Inference API Specifications
 apiVersion: # required. only v1alpha1 is supported at the moment
 debug: # optional. if set to true, debug logs will be printed
 runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda"
-backends: # optional. list of additional backends. can be "stablediffusion", "exllama2", "diffusers", "mamba"
+backends: # optional. list of additional backends. can be "exllama2", "diffusers", "mamba"
 models: # required. list of models to build
   - name: # required. name of the model
     source: # required. source of the model. can be a url or a local file
@@ -26,8 +26,6 @@ Example:
 apiVersion: v1alpha1
 debug: true
 runtime: cuda
-backends:
-  - stablediffusion
 models:
   - name: llama-2-7b-chat
     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf

Original file line number	Diff line number	Diff line change
`@@ -452,10 +452,6 @@ func validateInferenceConfig(c *config.InferenceConfig) error {`
`452`	`452`	`return errors.New("only one backend is supported at this time")`
`453`	`453`	`}`
`454`	`454`
`455`		`- if slices.Contains(c.Backends, utils.BackendStableDiffusion) && (slices.Contains(c.Backends, utils.BackendExllamaV2)) {`
`456`		`- return errors.New("cannot specify both stablediffusion with exllama2 at this time")`
`457`		`- }`
`458`		`-`
`459`	`455`	`if (slices.Contains(c.Backends, utils.BackendExllamaV2) \|\| slices.Contains(c.Backends, utils.BackendMamba) \|\| slices.Contains(c.Backends, utils.BackendDiffusers)) && c.Runtime != utils.RuntimeNVIDIA {`
`460`	`456`	`return errors.New("exllama, mamba, and diffusers backends only supports nvidia cuda runtime. please add 'runtime: cuda' to your aikitfile.yaml")`
`461`	`457`	`}`
`@@ -464,7 +460,7 @@ func validateInferenceConfig(c *config.InferenceConfig) error {`
`464`	`460`	`return errors.New("apple silicon runtime only supports the default llama-cpp backend")`
`465`	`461`	`}`
`466`	`462`
`467`		`- backends := []string{utils.BackendExllamaV2, utils.BackendStableDiffusion, utils.BackendMamba, utils.BackendDiffusers}`
	`463`	`+ backends := []string{utils.BackendExllamaV2, utils.BackendMamba, utils.BackendDiffusers}`
`468`	`464`	`for _, b := range c.Backends {`
`469`	`465`	`if !slices.Contains(backends, b) {`
`470`	`466`	`return errors.Errorf("backend %s is not supported", b)`
Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,7 @@ func Test_validateConfig(t *testing.T) {`
`84`	`84`	`args: args{c: &config.InferenceConfig{`
`85`	`85`	`APIVersion: "v1alpha1",`
`86`	`86`	`Runtime: "cuda",`
`87`		`- Backends: []string{"exllama", "stablediffusion"},`
	`87`	`+ Backends: []string{"exllama", "diffusers"},`
`88`	`88`	`Models: []config.Model{`
`89`	`89`	`{`
`90`	`90`	`Name: "test",`