UnisMindMap/docker/china/iluvatar.Dockerfile

86 lines
2.9 KiB
Docker

# ==========================================
# 阶段 1: 构建阶段 (Builder) - 负责前端 UI 编译
# ==========================================
FROM docker.m.daocloud.io/ubuntu:22.04 AS builder
ENV DEBIAN_FRONTEND=noninteractive
# 替换为阿里云镜像源并安装 Node.js [cite: 8, 9]
RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \
apt-get update && apt-get install -y curl && \
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
apt-get install -y nodejs && \
apt-get clean
WORKDIR /app
COPY . .
# 构建前端 Web UI [cite: 12]
WORKDIR /app/web_ui
RUN npm install && npm run build
# ==========================================
# 阶段 2: 运行阶段 (Runtime) - 天数智芯专用环境
# ==========================================
# 使用天数智芯官方适配镜像,该镜像内置了 Python 3.10.18 和 CoreX 驱动 [cite: 15]
FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /app
# 1. 安装字体与 LibreOffice 依赖 [cite: 15]
RUN apt-get update && \
apt-get install -y \
fonts-noto-core \
fonts-noto-cjk \
fontconfig \
libgl1 \
libreoffice-writer \
libreoffice-core && \
fc-cache -fv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# 2. 核心环境修复:确保在 CoreX 的 Python 路径下安装依赖
# 注意:使用 python3 -m pip 确保安装到 3.10.18 环境,避免 uvicorn 找不到 [cite: 16]
RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
python3 -m pip install \
'mineru[core]>=2.7.4' \
"uvicorn" \
"fastapi" \
"python-multipart" \
"modelscope>=1.26.0" \
"huggingface-hub>=0.32.4" \
"mineru-vl-utils>=0.1.19.1" \
"qwen-vl-utils>=0.0.14" \
numpy==1.26.4 \
opencv-python==4.11.0.86 \
-i https://mirrors.aliyun.com/pypi/simple
# 3. 拷贝源码及第一阶段的前端产物 [cite: 10, 13]
COPY . .
RUN mkdir -p /app/mineru/cli/static/web && \
cp -r /app/web_ui/dist/* /app/mineru/cli/static/web/
# 4. 下载模型权重 (离线模式必备) [cite: 16]
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
# 5. 注入适配天数 GPU 的配置文件 (开启 vLLM 推理) [cite: 13]
RUN mkdir -p /root/ && \
echo '{ \
"models-dir": "/root/.cache/modelscope/hub/models", \
"device-mode": "cuda", \
"vlm-config": { \
"kind": "vllm", \
"precision": "fp16" \
} \
}' > /root/magic-pdf.json
# 6. 设置环境变量 [cite: 17]
ENV MINERU_MODEL_SOURCE=local
ENV PYTHONPATH=/app
EXPOSE 8000
# 7. 启动服务:使用 /bin/bash 包装以加载 CoreX 环境路径,解决二进制执行错误
ENTRYPOINT ["/bin/bash", "-c", "PYTHONPATH=/app exec python3 -m mineru.cli.fast_api --host 0.0.0.0 --port 8000"]