86 lines
2.9 KiB
Docker
86 lines
2.9 KiB
Docker
# ==========================================
|
|
# 阶段 1: 构建阶段 (Builder) - 负责前端 UI 编译
|
|
# ==========================================
|
|
FROM docker.m.daocloud.io/ubuntu:22.04 AS builder
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# 替换为阿里云镜像源并安装 Node.js [cite: 8, 9]
|
|
RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
apt-get update && apt-get install -y curl && \
|
|
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
|
|
apt-get install -y nodejs && \
|
|
apt-get clean
|
|
|
|
WORKDIR /app
|
|
COPY . .
|
|
|
|
# 构建前端 Web UI [cite: 12]
|
|
WORKDIR /app/web_ui
|
|
RUN npm install && npm run build
|
|
|
|
|
|
# ==========================================
|
|
# 阶段 2: 运行阶段 (Runtime) - 天数智芯专用环境
|
|
# ==========================================
|
|
# 使用天数智芯官方适配镜像,该镜像内置了 Python 3.10.18 和 CoreX 驱动 [cite: 15]
|
|
FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
WORKDIR /app
|
|
|
|
# 1. 安装字体与 LibreOffice 依赖 [cite: 15]
|
|
RUN apt-get update && \
|
|
apt-get install -y \
|
|
fonts-noto-core \
|
|
fonts-noto-cjk \
|
|
fontconfig \
|
|
libgl1 \
|
|
libreoffice-writer \
|
|
libreoffice-core && \
|
|
fc-cache -fv && \
|
|
apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
# 2. 核心环境修复:确保在 CoreX 的 Python 路径下安装依赖
|
|
# 注意:使用 python3 -m pip 确保安装到 3.10.18 环境,避免 uvicorn 找不到 [cite: 16]
|
|
RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
|
|
python3 -m pip install \
|
|
'mineru[core]>=2.7.4' \
|
|
"uvicorn" \
|
|
"fastapi" \
|
|
"python-multipart" \
|
|
"modelscope>=1.26.0" \
|
|
"huggingface-hub>=0.32.4" \
|
|
"mineru-vl-utils>=0.1.19.1" \
|
|
"qwen-vl-utils>=0.0.14" \
|
|
numpy==1.26.4 \
|
|
opencv-python==4.11.0.86 \
|
|
-i https://mirrors.aliyun.com/pypi/simple
|
|
|
|
# 3. 拷贝源码及第一阶段的前端产物 [cite: 10, 13]
|
|
COPY . .
|
|
RUN mkdir -p /app/mineru/cli/static/web && \
|
|
cp -r /app/web_ui/dist/* /app/mineru/cli/static/web/
|
|
|
|
# 4. 下载模型权重 (离线模式必备) [cite: 16]
|
|
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
|
|
|
|
# 5. 注入适配天数 GPU 的配置文件 (开启 vLLM 推理) [cite: 13]
|
|
RUN mkdir -p /root/ && \
|
|
echo '{ \
|
|
"models-dir": "/root/.cache/modelscope/hub/models", \
|
|
"device-mode": "cuda", \
|
|
"vlm-config": { \
|
|
"kind": "vllm", \
|
|
"precision": "fp16" \
|
|
} \
|
|
}' > /root/magic-pdf.json
|
|
|
|
# 6. 设置环境变量 [cite: 17]
|
|
ENV MINERU_MODEL_SOURCE=local
|
|
ENV PYTHONPATH=/app
|
|
EXPOSE 8000
|
|
|
|
# 7. 启动服务:使用 /bin/bash 包装以加载 CoreX 环境路径,解决二进制执行错误
|
|
ENTRYPOINT ["/bin/bash", "-c", "PYTHONPATH=/app exec python3 -m mineru.cli.fast_api --host 0.0.0.0 --port 8000"] |