# ========================================== # 阶段 1: Build stage (构建阶段) # ========================================== FROM docker.m.daocloud.io/ubuntu:22.04 AS builder ENV DEBIAN_FRONTEND=noninteractive # 配置国内镜像源 RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list # 安装构建依赖 RUN apt-get update && \ apt-get install -y \ build-essential curl wget git fonts-noto-core fonts-noto-cjk \ fontconfig libgl1 libreoffice-writer libreoffice-core \ python3 python3-pip python3-venv && \ curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ apt-get install -y nodejs && \ fc-cache -fv && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN npm config set registry https://registry.npmmirror.com WORKDIR /app # 1. 复制依赖文件并安装 Python 环境 COPY setup.py* pyproject.toml* README.md* /app/ COPY mineru /app/mineru RUN python3 -m pip install -U pip setuptools wheel -i https://mirrors.aliyun.com/pypi/simple && \ python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple # 2. 下载模型 (默认下载到 /root/.cache/modelscope) RUN /bin/bash -c "mineru-models-download -s modelscope -m all" # 3. 构建前端 Web UI COPY web_ui /app/web_ui WORKDIR /app/web_ui RUN npm install && npm run build # 4. 准备静态文件 WORKDIR /app RUN mkdir -p mineru/cli/static/web && cp -r web_ui/dist/* mineru/cli/static/web/ # ========================================== # 阶段 2: Runtime stage (运行阶段) # ========================================== FROM docker.m.daocloud.io/ubuntu:22.04 AS runtime ENV DEBIAN_FRONTEND=noninteractive RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list # 安装运行必选依赖 RUN apt-get update && \ apt-get install -y \ fonts-noto-core fonts-noto-cjk fontconfig \ libgl1 libreoffice-writer libreoffice-core python3 python3-pip && \ fc-cache -fv && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* WORKDIR /app # 1. 拷贝模型缓存 (关键:解决“重复下载”问题) COPY --from=builder /root/.cache /root/.cache # 2. 拷贝代码与环境 COPY --from=builder /app /app COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages COPY --from=builder /usr/local/bin /usr/local/bin # 3. 关键修复:配置 magic-pdf.json (解决 'NoneType' object 错误) # 如果项目中有模板则用模板,否则生成一个基础配置 RUN if [ -f "/app/magic-pdf.template.json" ]; then \ cp /app/magic-pdf.template.json /root/magic-pdf.json; \ else \ echo '{"models-dir": "/root/.cache/modelscope/hub"}' > /root/magic-pdf.json; \ fi && \ # 强制将配置文件中的路径指向模型存放位置 sed -i 's|/tmp/models|/root/.cache/modelscope/hub|g' /root/magic-pdf.json # 4. 重新建立 editable 链接确保模块可查 RUN python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple # 设置环境变量 ENV MINERU_MODEL_SOURCE=local ENV PYTHONPATH=/app EXPOSE 8000 # 启动命令 ENTRYPOINT ["python3", "-m", "mineru.cli.fast_api", "--host", "0.0.0.0", "--port", "8000"]