[assets] fix docker images (#8203)

hiyouga · web-flow · commit a4048b7bb683 · 2025-05-28T22:26:05.000+08:00
diff --git a/.dockerignore b/.dockerignore
@@ -3,12 +3,12 @@
 .github
 .venv
 cache
-data
 docker
 saves
 hf_cache
 ms_cache
 om_cache
+shared_data
 output
 .dockerignore
 .gitattributes
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -1,4 +1,4 @@
-name: push_docker
+name: docker
 
 on:
   workflow_dispatch:
@@ -32,7 +32,15 @@ jobs:
       url: https://hub.docker.com/r/hiyouga/llamafactory
 
     steps:
-      - name: Checkout repository
+      - name: Free up disk space
+        run: |
+          df -h
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache
+          df -h
+
+      - name: Checkout
         uses: actions/checkout@v4
 
       - name: Set up Docker Buildx
diff --git a/README.md b/README.md
@@ -474,16 +474,25 @@ huggingface-cli login
 > [!IMPORTANT]
 > Installation is mandatory.
 
+#### Install from Source
+
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
 pip install -e ".[torch,metrics]" --no-build-isolation
 ```
 
-Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, aqlm, vllm, sglang, galore, apollo, badam, adam-mini, qwen, minicpm_v, modelscope, openmind, swanlab, quality
+Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, aqlm, vllm, sglang, galore, apollo, badam, adam-mini, qwen, minicpm_v, modelscope, openmind, swanlab, dev
 
-> [!TIP]
-> Use `pip install -e . --no-deps --no-build-isolation` to resolve package conflicts.
+#### Install from Docker Image
+
+```bash
+docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
+```
+
+Find the pre-built images: https://hub.docker.com/r/hiyouga/llamafactory/tags
+
+Please refer to [build docker](#build-docker) to build the image yourself.
 
 <details><summary>Setting up a virtual environment with <b>uv</b></summary>
 
@@ -671,7 +680,7 @@ docker run -dit --ipc=host --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
-    -v ./data:/app/data \
+    -v ./shared_data:/app/shared_data \
     -v ./output:/app/output \
     -p 7860:7860 \
     -p 8000:8000 \
@@ -686,14 +695,14 @@ For Ascend NPU users:
 ```bash
 docker build -f ./docker/docker-npu/Dockerfile \
     --build-arg PIP_INDEX=https://pypi.org/simple \
-    --build-arg EXTRAS=metrics \
+    --build-arg EXTRAS=torch-npu,metrics \
     -t llamafactory:latest .
 
 docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
-    -v ./data:/app/data \
+    -v ./shared_data:/app/shared_data \
     -v ./output:/app/output \
     -v /usr/local/dcmi:/usr/local/dcmi \
     -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
@@ -723,7 +732,7 @@ docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
-    -v ./data:/app/data \
+    -v ./shared_data:/app/shared_data \
     -v ./output:/app/output \
     -p 7860:7860 \
     -p 8000:8000 \
@@ -742,7 +751,7 @@ docker exec -it llamafactory bash
 - `hf_cache`: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
 - `ms_cache`: Similar to Hugging Face cache but for ModelScope users.
 - `om_cache`: Similar to Hugging Face cache but for Modelers users.
-- `data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
+- `shared_data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
 - `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
 
 </details>
diff --git a/README_zh.md b/README_zh.md
@@ -476,16 +476,25 @@ huggingface-cli login
 > [!IMPORTANT]
 > 此步骤为必需。
 
+#### 从源码安装
+
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
 pip install -e ".[torch,metrics]" --no-build-isolation
 ```
 
-可选的额外依赖项：torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、aqlm、vllm、sglang、galore、apollo、badam、adam-mini、qwen、minicpm_v、modelscope、openmind、swanlab、quality
+可选的额外依赖项：torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、aqlm、vllm、sglang、galore、apollo、badam、adam-mini、qwen、minicpm_v、modelscope、openmind、swanlab、dev
 
-> [!TIP]
-> 遇到包冲突时，可使用 `pip install -e . --no-deps --no-build-isolation` 解决。
+#### 从镜像安装
+
+```bash
+docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
+```
+
+查看全部镜像：https://hub.docker.com/r/hiyouga/llamafactory/tags
+
+请参阅[构建 Docker](#构建-docker) 来重新构建镜像。
 
 <details><summary>使用 <b>uv</b> 构建虚拟环境</summary>
 
@@ -673,7 +682,7 @@ docker run -dit --ipc=host --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
-    -v ./data:/app/data \
+    -v ./shared_data:/app/shared_data \
     -v ./output:/app/output \
     -p 7860:7860 \
     -p 8000:8000 \
@@ -688,14 +697,14 @@ docker exec -it llamafactory bash
 ```bash
 docker build -f ./docker/docker-npu/Dockerfile \
     --build-arg PIP_INDEX=https://pypi.org/simple \
-    --build-arg EXTRAS=metrics \
+    --build-arg EXTRAS=torch-npu,metrics \
     -t llamafactory:latest .
 
 docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
-    -v ./data:/app/data \
+    -v ./shared_data:/app/shared_data \
     -v ./output:/app/output \
     -v /usr/local/dcmi:/usr/local/dcmi \
     -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
@@ -725,7 +734,7 @@ docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
-    -v ./data:/app/data \
+    -v ./shared_data:/app/shared_data \
     -v ./output:/app/output \
     -p 7860:7860 \
     -p 8000:8000 \
@@ -744,7 +753,7 @@ docker exec -it llamafactory bash
 - `hf_cache`：使用宿主机的 Hugging Face 缓存文件夹，允许更改为新的目录。
 - `ms_cache`：类似 Hugging Face 缓存文件夹，为 ModelScope 用户提供。
 - `om_cache`：类似 Hugging Face 缓存文件夹，为 Modelers 用户提供。
-- `data`：宿主机中存放数据集的文件夹路径。
+- `shared_data`：宿主机中存放数据集的文件夹路径。
 - `output`：将导出目录设置为该路径后，即可在宿主机中访问导出后的模型。
 
 </details>
diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile
@@ -1,5 +1,5 @@
 # https://hub.docker.com/r/hiyouga/pytorch/tags
-ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0
+ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0-devel
 FROM ${BASE_IMAGE}
 
 # Installation arguments
@@ -47,7 +47,7 @@ RUN if [ "${INSTALL_FLASHATTN}" == "true" ]; then \
     fi
 
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/shared_data", "/app/output" ]
 
 # Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT=7860
diff --git a/docker/docker-cuda/Dockerfile.base b/docker/docker-cuda/Dockerfile.base
@@ -1,6 +1,6 @@
 # Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
 # https://hub.docker.com/r/pytorch/pytorch/tags
-FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
+FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel
 
 # Define environments
 ENV MAX_JOBS=16
diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml
@@ -11,7 +11,7 @@ services:
       - ../../hf_cache:/root/.cache/huggingface
       - ../../ms_cache:/root/.cache/modelscope
       - ../../om_cache:/root/.cache/openmind
-      - ../../data:/app/data
+      - ../../shared_data:/app/shared_data
       - ../../output:/app/output
     ports:
       - "7860:7860"
diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile
@@ -4,7 +4,7 @@ FROM ${BASE_IMAGE}
 
 # Installation arguments
 ARG PIP_INDEX=https://pypi.org/simple
-ARG EXTRAS=metrics
+ARG EXTRAS=torch-npu,metrics
 ARG HTTP_PROXY=""
 
 # Define environments
@@ -39,7 +39,7 @@ COPY . /app
 RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/shared_data", "/app/output" ]
 
 # Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT=7860
diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml
@@ -5,13 +5,13 @@ services:
       context: ../..
       args:
         PIP_INDEX: https://pypi.org/simple
-        EXTRAS: metrics
+        EXTRAS: torch-npu,metrics
     container_name: llamafactory
     volumes:
       - ../../hf_cache:/root/.cache/huggingface
       - ../../ms_cache:/root/.cache/modelscope
       - ../../om_cache:/root/.cache/openmind
-      - ../../data:/app/data
+      - ../../shared_data:/app/shared_data
       - ../../output:/app/output
       - /usr/local/dcmi:/usr/local/dcmi
       - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
diff --git a/docker/docker-rocm/Dockerfile b/docker/docker-rocm/Dockerfile
@@ -52,7 +52,7 @@ RUN if [ "${INSTALL_FLASHATTN}" == "true" ]; then \
     fi
 
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/shared_data", "/app/output" ]
 
 # Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT=7860
diff --git a/docker/docker-rocm/docker-compose.yml b/docker/docker-rocm/docker-compose.yml
@@ -11,7 +11,7 @@ services:
       - ../../hf_cache:/root/.cache/huggingface
       - ../../ms_cache:/root/.cache/modelscope
       - ../../om_cache:/root/.cache/openmind
-      - ../../data:/app/data
+      - ../../shared_data:/app/shared_data
       - ../../output:/app/output
     ports:
       - "7860:7860"
diff --git a/pyproject.toml b/pyproject.toml
@@ -92,12 +92,4 @@ conflicts = [
         { extra = "torch-npu" },
         { extra = "sglang" },
     ],
-    [
-        { extra = "vllm" },
-        { extra = "sglang" },
-    ],
-    [
-        { extra = "sglang" },
-        { extra = "minicpm_v" },
-    ],
 ]
diff --git a/setup.py b/setup.py
@@ -45,7 +45,7 @@ def get_console_scripts() -> list[str]:
     "torch": ["torch>=2.0.0", "torchvision>=0.15.0"],
     "torch-npu": ["torch==2.4.0", "torch-npu==2.4.0.post2", "decorator"],
     "metrics": ["nltk", "jieba", "rouge-chinese"],
-    "deepspeed": ["deepspeed>=0.10.0,<=0.16.5"],
+    "deepspeed": ["deepspeed>=0.10.0,<=0.16.9"],
     "liger-kernel": ["liger-kernel>=0.5.5"],
     "bitsandbytes": ["bitsandbytes>=0.39.0"],
     "hqq": ["hqq"],
@@ -58,7 +58,6 @@ def get_console_scripts() -> list[str]:
     "apollo": ["apollo-torch"],
     "badam": ["badam>=1.2.1"],
     "adam-mini": ["adam-mini"],
-    "qwen": ["transformers_stream_generator"],
     "minicpm_v": [
         "soundfile",
         "torchvision",
@@ -68,7 +67,6 @@ def get_console_scripts() -> list[str]:
         "msgpack",
         "referencing",
         "jsonschema_specifications",
-        "transformers==4.48.3",
     ],
     "modelscope": ["modelscope"],
     "openmind": ["openmind"],