diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 8a566649a..000000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..92c02acca --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +.git* \ No newline at end of file diff --git a/.github/buildkitd.toml b/.github/buildkitd.toml deleted file mode 100644 index 19615ad72..000000000 --- a/.github/buildkitd.toml +++ /dev/null @@ -1,2 +0,0 @@ -[worker.oci] - max-parallelism=1 \ No newline at end of file diff --git a/.github/workflows/build-and-push-python-pg.yml b/.github/workflows/build-and-push-python-pg.yml index fff8265be..d82ca86bd 100644 --- a/.github/workflows/build-and-push-python-pg.yml +++ b/.github/workflows/build-and-push-python-pg.yml @@ -43,18 +43,15 @@ jobs: DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME} --tag ${DOCKER_IMAGE}:latest" echo ::set-output name=docker_image::${DOCKER_IMAGE} echo ::set-output name=version::${TAG_NAME} - echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} \ + echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} --no-cache \ --build-arg VERSION=${TAG_NAME} \ --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ - --build-arg --no-cache --provenance=false \ --build-arg VCS_REF=${GITHUB_SHA::8} \ ${DOCKER_IMAGE_TAGS} . - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - with: - config: .github/buildkitd.toml - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: diff --git a/.github/workflows/build-and-push-vector-model.yml b/.github/workflows/build-and-push-vector-model.yml index 6dc736bf9..1e8556461 100644 --- a/.github/workflows/build-and-push-vector-model.yml +++ b/.github/workflows/build-and-push-vector-model.yml @@ -48,18 +48,15 @@ jobs: DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME} --tag ${DOCKER_IMAGE}:latest" echo ::set-output name=docker_image::${DOCKER_IMAGE} echo ::set-output name=version::${TAG_NAME} - echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} \ + echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} --no-cache \ --build-arg VERSION=${TAG_NAME} \ --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ - --build-arg --no-cache --provenance=false \ --build-arg VCS_REF=${GITHUB_SHA::8} \ ${DOCKER_IMAGE_TAGS} . - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - with: - config: .github/buildkitd.toml - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: diff --git a/.github/workflows/build-and-push.yml b/.github/workflows/build-and-push.yml index dcff2a1fa..b1102252e 100644 --- a/.github/workflows/build-and-push.yml +++ b/.github/workflows/build-and-push.yml @@ -5,7 +5,7 @@ on: inputs: dockerImageTag: description: 'Docker Image Tag' - default: 'v1.0.0' + default: 'v1.0.0-dev' required: true architecture: description: 'Architecture' @@ -16,9 +16,19 @@ on: - linux/amd64 - linux/arm64 - linux/amd64,linux/arm64 + registry: + description: 'Push To Registry' + required: true + default: 'dockerhub, fit2cloud-registry' + type: choice + options: + - dockerhub + - fit2cloud-registry + - dockerhub, fit2cloud-registry jobs: - build-and-push-fit2cloud-registry: + build-and-push-to-fit2cloud-registry: + if: ${{ contains(github.event.inputs.registry, 'fit2cloud') }} runs-on: ubuntu-latest steps: - name: Check Disk Space @@ -45,38 +55,35 @@ jobs: DOCKER_IMAGE=registry-hkproxy.fit2cloud.com/maxkb/maxkb DOCKER_PLATFORMS=${{ github.event.inputs.architecture }} TAG_NAME=${{ github.event.inputs.dockerImageTag }} - DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME} --tag ${DOCKER_IMAGE}:latest" - echo ::set-output name=docker_image::${DOCKER_IMAGE} - echo ::set-output name=version::${TAG_NAME} - echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} \ - --build-arg VERSION=${TAG_NAME} \ - --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ - --build-arg --no-cache \ - --build-arg VCS_REF=${GITHUB_SHA::8} \ + if [[ ${TAG_NAME} == *dev* ]]; then + DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME}" + else + DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME} --tag ${DOCKER_IMAGE}:latest" + fi + echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} --no-cache \ ${DOCKER_IMAGE_TAGS} . - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - with: - config: .github/buildkitd.toml - - name: Login to FIT2CLOUD Registry - uses: docker/login-action@v3 - with: - registry: registry-hkproxy.fit2cloud.com - username: ${{ secrets.FIT2CLOUD_REGISTRY_USERNAME }} - password: ${{ secrets.FIT2CLOUD_REGISTRY_PASSWORD }} - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GH_TOKEN }} + - name: Login to FIT2CLOUD Registry + uses: docker/login-action@v3 + with: + registry: registry-hkproxy.fit2cloud.com + username: ${{ secrets.FIT2CLOUD_REGISTRY_USERNAME }} + password: ${{ secrets.FIT2CLOUD_REGISTRY_PASSWORD }} - name: Docker Buildx (build-and-push) run: | docker buildx build --output "type=image,push=true" ${{ steps.prepare.outputs.buildx_args }} -f installer/Dockerfile - build-and-push-to-ghcr: + build-and-push-to-dockerhub: + if: ${{ contains(github.event.inputs.registry, 'dockerhub') }} runs-on: ubuntu-latest steps: - name: Check Disk Space @@ -100,30 +107,32 @@ jobs: - name: Prepare id: prepare run: | - DOCKER_IMAGE=ghcr.io/1panel-dev/maxkb + DOCKER_IMAGE=1panel/maxkb DOCKER_PLATFORMS=${{ github.event.inputs.architecture }} TAG_NAME=${{ github.event.inputs.dockerImageTag }} - DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME} --tag ${DOCKER_IMAGE}:latest" - echo ::set-output name=docker_image::${DOCKER_IMAGE} - echo ::set-output name=version::${TAG_NAME} - echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} \ - --build-arg VERSION=${TAG_NAME} \ - --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ - --build-arg --no-cache --provenance=false \ - --build-arg VCS_REF=${GITHUB_SHA::8} \ + if [[ ${TAG_NAME} == *dev* ]]; then + DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME}" + else + DOCKER_IMAGE_TAGS="--tag ${DOCKER_IMAGE}:${TAG_NAME} --tag ${DOCKER_IMAGE}:latest" + fi + echo ::set-output name=buildx_args::--platform ${DOCKER_PLATFORMS} --no-cache \ ${DOCKER_IMAGE_TAGS} . + env - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - with: - config: .github/buildkitd.toml - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GH_TOKEN }} + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Docker Buildx (build-and-push) run: | - docker buildx build --output "type=image,push=true" ${{ steps.prepare.outputs.buildx_args }} -f installer/Dockerfile \ No newline at end of file + docker buildx build --output "type=image,push=true" ${{ steps.prepare.outputs.buildx_args }} -f installer/Dockerfile diff --git a/.gitignore b/.gitignore index 7147555f8..8627829c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,18 @@ +# Mac +.DS_Store +*/.DS_Store + +# VS Code +.vscode +*.project +*.factorypath + +# IntelliJ IDEA +.idea +*.iws +*.iml +*.ipr + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -165,4 +180,4 @@ models/ data .idea .dev -poetry.lock \ No newline at end of file +poetry.lock diff --git a/README.md b/README.md index 3c14b7a81..37904c30f 100644 --- a/README.md +++ b/README.md @@ -1,66 +1,59 @@ -

-

基于大语言模型的知识库问答系统

-

- License: GPL v3 - Codacy - Latest release - Stars - +

MaxKB

+

基于 LLM 大语言模型的知识库问答系统

+

+ License: GPL v3 + Codacy + Latest release + Stars


+MaxKB 是一款基于 LLM 大语言模型的知识库问答系统。 -MaxKB( Max Knowlegde Base 的简称)是一款基于大语言模型的知识库问答系统,其核心目标是利用大语言模型对自然语言理解和生成的能力揣摩用户意图,对原始知识点进行汇总、整合,生成更贴切的答案。 +- **多模型**:支持对接主流的大模型,包括本地私有大模型(如 Llama 2)、Azure OpenAI 和百度千帆大模型等; +- **开箱即用**:支持直接上传文档、自动爬取在线文档,支持文本自动拆分、向量化,智能问答交互体验好; +- **无缝嵌入**:支持零编码快速嵌入到第三方业务系统。 -MaxKB 的产品特性: -- **多模型对接** -MaxKB 支持对接市场上主流的模型供应商,包括百度千帆大模型、 Azure OpenAI 和 Ollama 私有模型平台等。 +## 快速开始 -- **开箱即用** -支持一键获取在线文本或直接上传文档,MaxKB 系统自动进行文本拆分、知识向量化;构建应用实现 LLM 问答功能,根据用户提问和知识库内容生成精确回答。 - -- **无缝集成** -MaxKB 应用支持零编码集成到企业第三方系统。 - -## 一键启动 - -MaxKB 支持一键启动,仅需执行以下命令: ``` -docker run --name="maxkb" -p 8000:8000 -d ghcr.io/1panel-dev/maxkb +docker run -d --name=maxkb -p 8080:8080 1panel/maxkb ``` +## 自定义持久化数据 -## 整体架构 -![arch](https://github.com/1Panel-dev/maxkb/assets/52996290/ca786342-8a0a-4921-b847-a96fff9a3e09) - -## 实现原理 - -- 获取本地文档; -- 读取文本; -- 文本分割; -- 文本向量化; -- Query 向量化; -- 向量匹配最相似的 TOP N 个文本; -- 匹配出的文本作为上下文和问题一起添加到 prompt 中; -- 提交给 LLM 做生成回答。 - -![Implementation principle](https://github.com/1Panel-dev/maxkb/assets/52996290/51956c12-1396-4625-8b29-005ac60ca11d) +- 在主机系统上创建一个存储数据的目录,例如:/opt/maxkb/data +- 在主机系统上创建一个存储配置文件的目录,例如:/opt/maxkb/conf +``` +docker run --name=maxkb -p 8080:8000 -v /opt/maxkb/data:/var/lib/postgresql/data -v /opt/maxkb/conf:/opt/maxkb/conf -d 1panel/maxkb +``` +也可以通过 [1Panel 应用商店](https://apps.fit2cloud.com/1panel) 快速部署 MaxKB + Ollama(Llama 2),30 分钟内即可上线基于本地大模型的知识库问答系统。 -## 社区 +## UI 展示 + + + + + + + + + +
Demo1Demo2
Demo3Demo4
-如果您在使用过程中有任何疑问或建议,欢迎提交 GitHub Issue 或到我们官方论坛进行交流沟通: +## 社区交流 + +[论坛](https://bbs.fit2cloud.com/c/mk/11) -- [论坛](https://bbs.fit2cloud.com/) ## 技术栈 -- 前端:[Vue3.js](https://cn.vuejs.org/)、[Element Plus](https://element-plus.org/zh-CN/)、[TypeScript](https://www.tslang.cn/) -- 后端:[django](https://www.djangoproject.com/)、[langchain](https://www.langchain.com/) -- 中间件:[postgresql](https://www.postgresql.org/) -- 基础设施:[Docker](https://www.docker.com/) - - +- 前端:[Vue.js](https://cn.vuejs.org/) +- 后端:[Django](https://www.djangoproject.com/) +- Langchain:[Langchain](https://www.langchain.com/) +- 向量数据库:[PostgreSQL](https://www.postgresql.org/) +- 大模型:Azure OpenAI、百度千帆大模型、[Ollama](https://github.com/ollama/ollama) ## License diff --git a/installer/Dockerfile b/installer/Dockerfile index ac81d8aeb..f95a4e814 100644 --- a/installer/Dockerfile +++ b/installer/Dockerfile @@ -12,32 +12,25 @@ FROM ghcr.io/1panel-dev/maxkb-python-pg:python3.11.8-pg15.3 RUN mkdir -p /opt/maxkb/app && mkdir -p /opt/maxkb/model && mkdir -p /opt/maxkb/conf # 拷贝项目 COPY . /opt/maxkb/app +RUN rm -rf /opt/maxkb/app/ui COPY installer/config.yaml /opt/maxkb/conf -ENV POSTGRES_USER root -ENV POSTGRES_PASSWORD Password123@postgres -RUN rm -rf /opt/maxkb/app/ui /opt/maxkb/app/build COPY --from=vector-model model /opt/maxkb/app/model COPY --from=web-build ui /opt/maxkb/app/ui -RUN ls -la /opt/maxkb/app -# 复制模型 -RUN mv /opt/maxkb/app/model/* /opt/maxkb/model -RUN ls /opt/maxkb/model -RUN cp -r /opt/maxkb/model/base/hub /opt/maxkb/model/tokenizer -# 设置工作目录 +ENV POSTGRES_USER root +ENV POSTGRES_PASSWORD Password123@postgres WORKDIR /opt/maxkb/app -# 更新apt-get包管理器 -RUN apt-get update&&apt-get install -y curl -# 更新pip -RUN pip3 install --upgrade pip -# 安装 poetry包管理器 -RUN pip3 install poetry -# 导出依赖 -RUN poetry export -f requirements.txt --output requirements.txt --without-hashes -# 下载python依赖 -RUN pip3 install --no-cache-dir -r requirements.txt +RUN mv /opt/maxkb/app/model/* /opt/maxkb/model && \ + cp -r /opt/maxkb/model/base/hub /opt/maxkb/model/tokenizer && \ + apt-get update && apt-get install -y curl && \ + pip3 install --upgrade pip && \ + pip3 install poetry && \ + poetry export -f requirements.txt --output requirements.txt --without-hashes && \ + pip3 install --no-cache-dir -r requirements.txt && \ + pip3 cache purge && \ + rm -rf /var/lib/apt/lists/* # 启动命令 VOLUME /opt/maxkb/conf -EXPOSE 8000 +EXPOSE 8080 COPY installer/run-maxkb.sh /usr/bin/ RUN chmod 755 /usr/bin/run-maxkb.sh ENTRYPOINT ["bash", "-c"] diff --git a/main.py b/main.py index 6f7480807..f925b09f8 100644 --- a/main.py +++ b/main.py @@ -44,7 +44,7 @@ def perform_db_migrate(): def start_services(): management.call_command('migrate') - management.call_command('runserver', "0.0.0.0:8000") + management.call_command('runserver', "0.0.0.0:8080") if __name__ == '__main__': diff --git a/pyproject.toml b/pyproject.toml index cf26ea8fc..cd9fea832 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.11" -django = "4.1.10" +django = "4.1.13" djangorestframework = "3.14.0" drf-yasg = "1.21.7" django-filter = "23.2" @@ -16,7 +16,7 @@ langchain = "^0.1.11" psycopg2-binary = "2.9.7" jieba = "^0.42.1" diskcache = "^5.6.3" -pillow = "9.5.0" +pillow = "^10.2.0" filetype = "^1.2.0" chardet = "^5.2.0" sentence-transformers = "^2.2.2" diff --git a/ui/package.json b/ui/package.json index ca9174f91..742ff4658 100644 --- a/ui/package.json +++ b/ui/package.json @@ -13,7 +13,7 @@ "format": "prettier --write src/" }, "dependencies": { - "axios": "^0.27.2", + "axios": "^0.28.0", "element-plus": "^2.5.6", "install": "^0.13.0", "lodash": "^4.17.21", diff --git a/ui/vite.config.ts b/ui/vite.config.ts index 36a2935ab..d7ac2ac19 100644 --- a/ui/vite.config.ts +++ b/ui/vite.config.ts @@ -11,7 +11,7 @@ export default defineConfig(({ mode }) => { const ENV = loadEnv(mode, envDir) const proxyConf: Record = {} proxyConf['/api'] = { - target: 'http://127.0.0.1:8000', + target: 'http://127.0.0.1:8080', changeOrigin: true, rewrite: (path) => path.replace(ENV.VITE_BASE_PATH, '/') }