76 lines
2.5 KiB
Docker
76 lines
2.5 KiB
Docker
# ==========================================
|
||
# 阶段 1:前端构建 (Node.js Builder)
|
||
# ==========================================
|
||
FROM node:18-alpine AS frontend-builder
|
||
|
||
WORKDIR /build
|
||
# 利用镜像缓存:先拷贝 package.json
|
||
COPY web_ui/package*.json ./
|
||
RUN npm config set registry https://registry.npmmirror.com && \
|
||
npm install
|
||
|
||
# 拷贝源码并构建
|
||
COPY web_ui/ .
|
||
RUN npm run build
|
||
|
||
# ==========================================
|
||
# 阶段 2:最终运行环境 (CoreX Runtime)
|
||
# ==========================================
|
||
FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10
|
||
|
||
ENV DEBIAN_FRONTEND=noninteractive
|
||
ENV MINERU_MODEL_SOURCE=local
|
||
|
||
# 1. 系统级依赖:仅安装运行时必需的字体和工具
|
||
RUN apt-get update && apt-get install -y \
|
||
fonts-noto-core \
|
||
fonts-noto-cjk \
|
||
fontconfig \
|
||
libgl1-mesa-glx \
|
||
&& fc-cache -fv \
|
||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||
|
||
# 2. Python 依赖安装策略:
|
||
# 使用 --no-deps 确保不覆盖 CoreX 预装的 torch 2.7.1 和 vllm 0.11.2
|
||
RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
|
||
python3 -m pip install 'mineru[core]>=2.7.4' --no-deps -i https://mirrors.aliyun.com/pypi/simple && \
|
||
# 手动补齐 MinerU 运行必需但基础镜像可能缺失的轻量依赖(不会破坏 torch)
|
||
python3 -m pip install \
|
||
"numpy==1.26.4" \
|
||
"opencv-python==4.11.0.86" \
|
||
"pydantic<2.0" \
|
||
"modelscope" \
|
||
"magic-pdf" \
|
||
-i https://mirrors.aliyun.com/pypi/simple && \
|
||
python3 -m pip cache purge
|
||
|
||
WORKDIR /app
|
||
|
||
# 3. 拷贝项目源码(排除 web_ui 源码以减小体积)
|
||
COPY . .
|
||
|
||
# ... 前面步骤保持不变 ...
|
||
|
||
# 4. 从阶段 1 拷贝前端构建产物
|
||
RUN mkdir -p mineru/cli/static/web
|
||
COPY --from=frontend-builder /build/dist/ ./mineru/cli/static/web/
|
||
|
||
# 5. 配置文件优化:必须先于模型下载执行
|
||
RUN mkdir -p /root/.cache/modelscope/hub/models && \
|
||
echo '{ \
|
||
"models-dir": "/root/.cache/modelscope/hub/models", \
|
||
"device-mode": "gpu", \
|
||
"vlm-config": { \
|
||
"kind": "vllm", \
|
||
"precision": "fp16" \
|
||
} \
|
||
}' > /root/magic-pdf.json
|
||
|
||
# 6. 模型预下载
|
||
RUN /bin/bash -c "export MINERU_MODEL_SOURCE=local && mineru-models-download -s modelscope -m all"
|
||
|
||
|
||
|
||
# 7. 入口点
|
||
ENTRYPOINT ["/bin/bash", "-c", "exec \"$@\"", "--"]
|
||
CMD ["python3", "-m", "mineru.cli.main"] |