Skip to content

Commit 171c55d

Browse files
authored
Merge branch 'master' into fix-pr-folder-tasks
2 parents 3053f77 + 1392093 commit 171c55d

File tree

98 files changed

+1633
-663
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+1633
-663
lines changed

.devcontainer-scripts/utils.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# Param 2: email
1010
#
1111
config_user() {
12+
echo "Configuring git for $1 <$2>"
1213
local gcn=$(git config --global user.name)
1314
if [ -z "${gcn}" ]; then
1415
echo "Setting up git user / remote"
@@ -24,6 +25,7 @@ config_user() {
2425
# Param 2: remote url
2526
#
2627
config_remote() {
28+
echo "Adding git remote and fetching $2 as $1"
2729
local gr=$(git remote -v | grep $1)
2830
if [ -z "${gr}" ]; then
2931
git remote add $1 $2

.github/check_and_update.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,14 @@ def calculate_sha256(file_path):
2929
def manual_safety_check_hf(repo_id):
3030
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
3131
scan = scanResponse.json()
32-
if scan['hasUnsafeFile']:
33-
return scan
34-
return None
32+
# Check if 'hasUnsafeFile' exists in the response
33+
if 'hasUnsafeFile' in scan:
34+
if scan['hasUnsafeFile']:
35+
return scan
36+
else:
37+
return None
38+
else:
39+
return None
3540

3641
download_type, repo_id_or_url = parse_uri(uri)
3742

.github/workflows/image.yml

Lines changed: 73 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,78 @@ concurrency:
1313
cancel-in-progress: true
1414

1515
jobs:
16+
hipblas-jobs:
17+
uses: ./.github/workflows/image_build.yml
18+
with:
19+
tag-latest: ${{ matrix.tag-latest }}
20+
tag-suffix: ${{ matrix.tag-suffix }}
21+
ffmpeg: ${{ matrix.ffmpeg }}
22+
image-type: ${{ matrix.image-type }}
23+
build-type: ${{ matrix.build-type }}
24+
cuda-major-version: ${{ matrix.cuda-major-version }}
25+
cuda-minor-version: ${{ matrix.cuda-minor-version }}
26+
platforms: ${{ matrix.platforms }}
27+
runs-on: ${{ matrix.runs-on }}
28+
base-image: ${{ matrix.base-image }}
29+
grpc-base-image: ${{ matrix.grpc-base-image }}
30+
aio: ${{ matrix.aio }}
31+
makeflags: ${{ matrix.makeflags }}
32+
latest-image: ${{ matrix.latest-image }}
33+
latest-image-aio: ${{ matrix.latest-image-aio }}
34+
secrets:
35+
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
36+
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
37+
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
38+
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
39+
strategy:
40+
# Pushing with all jobs in parallel
41+
# eats the bandwidth of all the nodes
42+
max-parallel: 2
43+
matrix:
44+
include:
45+
- build-type: 'hipblas'
46+
platforms: 'linux/amd64'
47+
tag-latest: 'auto'
48+
tag-suffix: '-hipblas-ffmpeg'
49+
ffmpeg: 'true'
50+
image-type: 'extras'
51+
aio: "-aio-gpu-hipblas"
52+
base-image: "rocm/dev-ubuntu-22.04:6.1"
53+
grpc-base-image: "ubuntu:22.04"
54+
latest-image: 'latest-gpu-hipblas'
55+
latest-image-aio: 'latest-aio-gpu-hipblas'
56+
runs-on: 'arc-runner-set'
57+
makeflags: "--jobs=3 --output-sync=target"
58+
- build-type: 'hipblas'
59+
platforms: 'linux/amd64'
60+
tag-latest: 'false'
61+
tag-suffix: '-hipblas'
62+
ffmpeg: 'false'
63+
image-type: 'extras'
64+
base-image: "rocm/dev-ubuntu-22.04:6.1"
65+
grpc-base-image: "ubuntu:22.04"
66+
runs-on: 'arc-runner-set'
67+
makeflags: "--jobs=3 --output-sync=target"
68+
- build-type: 'hipblas'
69+
platforms: 'linux/amd64'
70+
tag-latest: 'false'
71+
tag-suffix: '-hipblas-ffmpeg-core'
72+
ffmpeg: 'true'
73+
image-type: 'core'
74+
base-image: "rocm/dev-ubuntu-22.04:6.1"
75+
grpc-base-image: "ubuntu:22.04"
76+
runs-on: 'arc-runner-set'
77+
makeflags: "--jobs=3 --output-sync=target"
78+
- build-type: 'hipblas'
79+
platforms: 'linux/amd64'
80+
tag-latest: 'false'
81+
tag-suffix: '-hipblas-core'
82+
ffmpeg: 'false'
83+
image-type: 'core'
84+
base-image: "rocm/dev-ubuntu-22.04:6.1"
85+
grpc-base-image: "ubuntu:22.04"
86+
runs-on: 'arc-runner-set'
87+
makeflags: "--jobs=3 --output-sync=target"
1688
self-hosted-jobs:
1789
uses: ./.github/workflows/image_build.yml
1890
with:
@@ -39,7 +111,7 @@ jobs:
39111
strategy:
40112
# Pushing with all jobs in parallel
41113
# eats the bandwidth of all the nodes
42-
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
114+
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
43115
matrix:
44116
include:
45117
# Extra images
@@ -122,29 +194,6 @@ jobs:
122194
base-image: "ubuntu:22.04"
123195
runs-on: 'arc-runner-set'
124196
makeflags: "--jobs=3 --output-sync=target"
125-
- build-type: 'hipblas'
126-
platforms: 'linux/amd64'
127-
tag-latest: 'auto'
128-
tag-suffix: '-hipblas-ffmpeg'
129-
ffmpeg: 'true'
130-
image-type: 'extras'
131-
aio: "-aio-gpu-hipblas"
132-
base-image: "rocm/dev-ubuntu-22.04:6.1"
133-
grpc-base-image: "ubuntu:22.04"
134-
latest-image: 'latest-gpu-hipblas'
135-
latest-image-aio: 'latest-aio-gpu-hipblas'
136-
runs-on: 'arc-runner-set'
137-
makeflags: "--jobs=3 --output-sync=target"
138-
- build-type: 'hipblas'
139-
platforms: 'linux/amd64'
140-
tag-latest: 'false'
141-
tag-suffix: '-hipblas'
142-
ffmpeg: 'false'
143-
image-type: 'extras'
144-
base-image: "rocm/dev-ubuntu-22.04:6.1"
145-
grpc-base-image: "ubuntu:22.04"
146-
runs-on: 'arc-runner-set'
147-
makeflags: "--jobs=3 --output-sync=target"
148197
- build-type: 'sycl_f16'
149198
platforms: 'linux/amd64'
150199
tag-latest: 'auto'
@@ -212,26 +261,6 @@ jobs:
212261
image-type: 'core'
213262
runs-on: 'arc-runner-set'
214263
makeflags: "--jobs=3 --output-sync=target"
215-
- build-type: 'hipblas'
216-
platforms: 'linux/amd64'
217-
tag-latest: 'false'
218-
tag-suffix: '-hipblas-ffmpeg-core'
219-
ffmpeg: 'true'
220-
image-type: 'core'
221-
base-image: "rocm/dev-ubuntu-22.04:6.1"
222-
grpc-base-image: "ubuntu:22.04"
223-
runs-on: 'arc-runner-set'
224-
makeflags: "--jobs=3 --output-sync=target"
225-
- build-type: 'hipblas'
226-
platforms: 'linux/amd64'
227-
tag-latest: 'false'
228-
tag-suffix: '-hipblas-core'
229-
ffmpeg: 'false'
230-
image-type: 'core'
231-
base-image: "rocm/dev-ubuntu-22.04:6.1"
232-
grpc-base-image: "ubuntu:22.04"
233-
runs-on: 'arc-runner-set'
234-
makeflags: "--jobs=3 --output-sync=target"
235264

236265
core-image-build:
237266
uses: ./.github/workflows/image_build.yml

.github/workflows/secscan.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
if: ${{ github.actor != 'dependabot[bot]' }}
1919
- name: Run Gosec Security Scanner
2020
if: ${{ github.actor != 'dependabot[bot]' }}
21-
uses: securego/[email protected].0
21+
uses: securego/[email protected].4
2222
with:
2323
# we let the report trigger content trigger a failure using the GitHub Security features.
2424
args: '-no-fail -fmt sarif -out results.sarif ./...'

.github/workflows/test.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,22 @@ jobs:
189189
uses: actions/checkout@v4
190190
with:
191191
submodules: true
192+
- name: Dependencies
193+
run: |
194+
# Install protoc
195+
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
196+
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
197+
rm protoc.zip
198+
go install google.golang.org/protobuf/cmd/[email protected]
199+
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
200+
PATH="$PATH:$HOME/go/bin" make protogen-go
192201
- name: Build images
193202
run: |
194203
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
195204
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
196205
- name: Test
197206
run: |
198-
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
207+
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
199208
make run-e2e-aio
200209
- name: Setup tmate session if tests fail
201210
if: ${{ failure() }}

Dockerfile

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -297,10 +297,10 @@ COPY .git .
297297
RUN make prepare
298298

299299
## Build the binary
300-
## If it's CUDA, we want to skip some of the llama-compat backends to save space
301-
## We only leave the most CPU-optimized variant and the fallback for the cublas build
302-
## (both will use CUDA for the actual computation)
303-
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
300+
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
301+
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
302+
## (both will use CUDA or hipblas for the actual computation)
303+
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
304304
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
305305
else \
306306
make build; \
@@ -338,9 +338,8 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
338338

339339
RUN apt-get update && \
340340
apt-get install -y --no-install-recommends \
341-
ssh less && \
342-
apt-get clean && \
343-
rm -rf /var/lib/apt/lists/*
341+
ssh less wget
342+
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
344343

345344
RUN go install github.com/go-delve/delve/cmd/dlv@latest
346345

Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
88
# llama.cpp versions
99
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
1010
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
11-
CPPLLAMA_VERSION?=6262d13e0b2da91f230129a93a996609a2f5a2f2
11+
CPPLLAMA_VERSION?=6f1d9d71f4c568778a7637ff6582e6f6ba5fb9d3
1212

1313
# go-rwkv version
1414
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
1515
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
1616

1717
# whisper.cpp version
1818
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
19-
WHISPER_CPP_VERSION?=049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f
19+
WHISPER_CPP_VERSION?=8feb375fbdf0277ad36958c218c6bf48fa0ba75a
2020

2121
# bert.cpp version
2222
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -359,6 +359,9 @@ clean-tests:
359359
rm -rf test-dir
360360
rm -rf core/http/backend-assets
361361

362+
clean-dc: clean
363+
cp -r /build/backend-assets /workspace/backend-assets
364+
362365
## Build:
363366
build: prepare backend-assets grpcs ## Build the project
364367
$(info ${GREEN}I local-ai build info:${RESET})
@@ -465,7 +468,7 @@ run-e2e-image:
465468
ls -liah $(abspath ./tests/e2e-fixtures)
466469
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
467470

468-
run-e2e-aio:
471+
run-e2e-aio: protogen-go
469472
@echo 'Running e2e AIO tests'
470473
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
471474

aio/cpu/vision.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ backend: llama-cpp
22
context_size: 4096
33
f16: true
44
mmap: true
5-
name: gpt-4-vision-preview
5+
name: gpt-4o
66

77
roles:
88
user: "USER:"

aio/gpu-8g/vision.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ backend: llama-cpp
22
context_size: 4096
33
f16: true
44
mmap: true
5-
name: gpt-4-vision-preview
5+
name: gpt-4o
66

77
roles:
88
user: "USER:"

aio/intel/vision.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ backend: llama-cpp
22
context_size: 4096
33
mmap: false
44
f16: false
5-
name: gpt-4-vision-preview
5+
name: gpt-4o
66

77
roles:
88
user: "USER:"

backend/backend.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ message PredictOptions {
134134
repeated string Images = 42;
135135
bool UseTokenizerTemplate = 43;
136136
repeated Message Messages = 44;
137+
repeated string Videos = 45;
138+
repeated string Audios = 46;
139+
string CorrelationId = 47;
137140
}
138141

139142
// The response message containing the result

backend/cpp/llama/grpc-server.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2106,6 +2106,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
21062106
data["ignore_eos"] = predict->ignoreeos();
21072107
data["embeddings"] = predict->embeddings();
21082108

2109+
// Add the correlationid to json data
2110+
data["correlation_id"] = predict->correlationid();
2111+
21092112
// for each image in the request, add the image data
21102113
//
21112114
for (int i = 0; i < predict->images_size(); i++) {
@@ -2344,6 +2347,11 @@ class BackendServiceImpl final : public backend::Backend::Service {
23442347
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
23452348
reply.set_prompt_tokens(tokens_evaluated);
23462349

2350+
// Log Request Correlation Id
2351+
LOG_VERBOSE("correlation:", {
2352+
{ "id", data["correlation_id"] }
2353+
});
2354+
23472355
// Send the reply
23482356
writer->Write(reply);
23492357

@@ -2367,6 +2375,12 @@ class BackendServiceImpl final : public backend::Backend::Service {
23672375
std::string completion_text;
23682376
task_result result = llama.queue_results.recv(task_id);
23692377
if (!result.error && result.stop) {
2378+
2379+
// Log Request Correlation Id
2380+
LOG_VERBOSE("correlation:", {
2381+
{ "id", data["correlation_id"] }
2382+
});
2383+
23702384
completion_text = result.result_json.value("content", "");
23712385
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
23722386
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);

backend/python/autogptq/requirements-intel.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
intel-extension-for-pytorch
33
torch
44
optimum[openvino]
5-
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
5+
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
accelerate
22
auto-gptq==0.7.1
3-
grpcio==1.66.1
3+
grpcio==1.66.2
44
protobuf
55
certifi
66
transformers

backend/python/bark/requirements-intel.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ intel-extension-for-pytorch
33
torch
44
torchaudio
55
optimum[openvino]
6-
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
6+
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
77
transformers
88
accelerate

backend/python/bark/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
bark==0.1.5
2-
grpcio==1.66.1
2+
grpcio==1.66.2
33
protobuf
44
certifi
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
grpcio==1.66.1
1+
grpcio==1.66.2
22
protobuf

backend/python/coqui/requirements-intel.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ intel-extension-for-pytorch
33
torch
44
torchaudio
55
optimum[openvino]
6-
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
6+
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
77
transformers
88
accelerate

0 commit comments

Comments
 (0)