96 lines
3.3 KiB
Plaintext
96 lines
3.3 KiB
Plaintext
# ==========================================
|
|
# 阶段 1: Build stage (构建阶段)
|
|
# ==========================================
|
|
FROM docker.m.daocloud.io/ubuntu:22.04 AS builder
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# 配置国内镜像源
|
|
RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list
|
|
|
|
# 安装构建依赖
|
|
RUN apt-get update && \
|
|
apt-get install -y \
|
|
build-essential curl wget git fonts-noto-core fonts-noto-cjk \
|
|
fontconfig libgl1 libreoffice-writer libreoffice-core \
|
|
python3 python3-pip python3-venv && \
|
|
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
|
|
apt-get install -y nodejs && \
|
|
fc-cache -fv && \
|
|
apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN npm config set registry https://registry.npmmirror.com
|
|
|
|
WORKDIR /app
|
|
|
|
# 1. 复制依赖文件并安装 Python 环境
|
|
COPY setup.py* pyproject.toml* README.md* /app/
|
|
COPY mineru /app/mineru
|
|
|
|
RUN python3 -m pip install -U pip setuptools wheel -i https://mirrors.aliyun.com/pypi/simple && \
|
|
python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple
|
|
|
|
# 2. 下载模型 (默认下载到 /root/.cache/modelscope)
|
|
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
|
|
|
|
# 3. 构建前端 Web UI
|
|
COPY web_ui /app/web_ui
|
|
WORKDIR /app/web_ui
|
|
RUN npm install && npm run build
|
|
|
|
# 4. 准备静态文件
|
|
WORKDIR /app
|
|
RUN mkdir -p mineru/cli/static/web && cp -r web_ui/dist/* mineru/cli/static/web/
|
|
|
|
# ==========================================
|
|
# 阶段 2: Runtime stage (运行阶段)
|
|
# ==========================================
|
|
FROM docker.m.daocloud.io/ubuntu:22.04 AS runtime
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list
|
|
|
|
# 安装运行必选依赖
|
|
RUN apt-get update && \
|
|
apt-get install -y \
|
|
fonts-noto-core fonts-noto-cjk fontconfig \
|
|
libgl1 libreoffice-writer libreoffice-core python3 python3-pip && \
|
|
fc-cache -fv && \
|
|
apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /app
|
|
|
|
# 1. 拷贝模型缓存 (关键:解决“重复下载”问题)
|
|
COPY --from=builder /root/.cache /root/.cache
|
|
|
|
# 2. 拷贝代码与环境
|
|
COPY --from=builder /app /app
|
|
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
|
|
# 3. 关键修复:配置 magic-pdf.json (解决 'NoneType' object 错误)
|
|
# 如果项目中有模板则用模板,否则生成一个基础配置
|
|
RUN if [ -f "/app/magic-pdf.template.json" ]; then \
|
|
cp /app/magic-pdf.template.json /root/magic-pdf.json; \
|
|
else \
|
|
echo '{"models-dir": "/root/.cache/modelscope/hub"}' > /root/magic-pdf.json; \
|
|
fi && \
|
|
# 强制将配置文件中的路径指向模型存放位置
|
|
sed -i 's|/tmp/models|/root/.cache/modelscope/hub|g' /root/magic-pdf.json
|
|
|
|
# 4. 重新建立 editable 链接确保模块可查
|
|
RUN python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple
|
|
|
|
# 设置环境变量
|
|
ENV MINERU_MODEL_SOURCE=local
|
|
ENV PYTHONPATH=/app
|
|
|
|
EXPOSE 8000
|
|
|
|
# 启动命令
|
|
ENTRYPOINT ["python3", "-m", "mineru.cli.fast_api", "--host", "0.0.0.0", "--port", "8000"] |