From d9173cfa10fb26091d15f3d6d2661469eeaeaf0b Mon Sep 17 00:00:00 2001 From: panyy Date: Mon, 29 Jun 2026 18:38:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=E6=89=B9=E9=87=8F=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E6=BA=90=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/event/listener_manage.py | 2 + .../management/commands/services/command.py | 1 + .../commands/services/services/__init__.py | 1 + apps/knowledge/serializers/document.py | 51 ++++++++++++++++--- apps/knowledge/urls.py | 1 + apps/knowledge/views/document.py | 23 +++++++++ ui/src/api/knowledge/document.ts | 16 ++++++ .../system-resource-management/document.ts | 34 ++++++++++++- ui/src/api/system-shared/document.ts | 34 ++++++++++++- ui/src/views/document/index.vue | 18 +++++++ 10 files changed, 173 insertions(+), 8 deletions(-) diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index b7122b934..318b2f0bb 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -207,6 +207,8 @@ class ListenerManagement: @staticmethod def post_update_document_status(document_id, task_type: TaskType): _document = QuerySet(Document).filter(id=document_id).first() + if _document is None: + return status = Status(_document.status) if status[task_type] == State.REVOKE: diff --git a/apps/common/management/commands/services/command.py b/apps/common/management/commands/services/command.py index 5dfb7570d..ae86418ed 100644 --- a/apps/common/management/commands/services/command.py +++ b/apps/common/management/commands/services/command.py @@ -23,6 +23,7 @@ class Services(TextChoices): services_map = { cls.gunicorn.value: services.GunicornService, cls.celery_default: services.CeleryDefaultService, + cls.celery_model: services.CeleryModelService, cls.local_model: services.GunicornLocalModelService, } return services_map.get(name) diff --git a/apps/common/management/commands/services/services/__init__.py b/apps/common/management/commands/services/services/__init__.py index 7c6731c25..99125d1b7 100644 --- a/apps/common/management/commands/services/services/__init__.py +++ b/apps/common/management/commands/services/services/__init__.py @@ -1,4 +1,5 @@ from .celery_default import * +from .celery_model import * from .gunicorn import * from .local_model import * from .scheduler import * diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index 0d63c9665..98bb06516 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -632,8 +632,17 @@ class DocumentSerializers(serializers.Serializer): document = QuerySet(Document).filter( id=self.data.get('document_id'), knowledge_id=self.data.get('knowledge_id') ).first() - source_file_id = document.meta.get('source_file_id') if document and document.meta else None - file_query_set = QuerySet(File).filter(source_type=FileSourceType.DOCUMENT, source_id=self.data.get('document_id')) + file = DocumentSerializers.Operate.get_source_file(document, self.data.get('knowledge_id')) + if not file: + raise AppApiException(500, _('File not exist. Only manually uploaded documents are supported')) + return FileSerializer.Operate(data={'id': file.id}).get(with_valid=True) + + @staticmethod + def get_source_file(document, knowledge_id): + if not document: + return None + source_file_id = document.meta.get('source_file_id') if document.meta else None + file_query_set = QuerySet(File).filter(source_type=FileSourceType.DOCUMENT, source_id=document.id) file = file_query_set.filter(id=source_file_id).first() if source_file_id else None if not file: file = file_query_set.first() @@ -641,11 +650,9 @@ class DocumentSerializers(serializers.Serializer): file = QuerySet(File).filter( id=source_file_id, source_type=FileSourceType.KNOWLEDGE, - source_id=self.data.get('knowledge_id') + source_id=knowledge_id ).first() - if not file: - raise AppApiException(500, _('File not exist. Only manually uploaded documents are supported')) - return FileSerializer.Operate(data={'id': file.id}).get(with_valid=True) + return file def one(self, with_valid=False): self.is_valid(raise_exception=True) @@ -1292,6 +1299,38 @@ class DocumentSerializers(serializers.Serializer): response.write(zip_buffer.getvalue()) return response + def batch_download_source_file(self, instance: Dict, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + BatchDocumentExportSerializer(data=instance).is_valid(raise_exception=True) + knowledge_id = self.data.get('knowledge_id') + document_id_list = instance.get('document_id_list') + document_list = list(QuerySet(Document).filter(knowledge_id=knowledge_id, id__in=document_id_list)) + if len(document_list) != len(set([str(document_id) for document_id in document_id_list])): + raise AppApiException(500, _('Document id does not exist')) + + response = HttpResponse(content_type='application/zip') + response['Content-Disposition'] = 'attachment; filename="source_documents.zip"' + zip_buffer = io.BytesIO() + used_names = defaultdict(int) + file_count = 0 + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + for document in document_list: + file = DocumentSerializers.Operate.get_source_file(document, knowledge_id) + if not file: + continue + safe_name = re.sub(r'[\\/:*?"<>|]', '_', file.file_name or document.name or str(document.id)).strip() + safe_name = safe_name or str(document.id) + base_name, ext = os.path.splitext(safe_name) + used_names[safe_name] += 1 + zip_name = safe_name if used_names[safe_name] == 1 else f'{base_name}_{used_names[safe_name]}{ext}' + zip_file.writestr(zip_name, file.get_bytes()) + file_count += 1 + if file_count == 0: + raise AppApiException(500, _('File not exist. Only manually uploaded documents are supported')) + response.write(zip_buffer.getvalue()) + return response + @transaction.atomic def batch_delete(self, instance: Dict, with_valid=True): if with_valid: diff --git a/apps/knowledge/urls.py b/apps/knowledge/urls.py index c2cfb2285..53de5d863 100644 --- a/apps/knowledge/urls.py +++ b/apps/knowledge/urls.py @@ -32,6 +32,7 @@ urlpatterns = [ path('workspace//knowledge//document/batch_delete', views.DocumentView.BatchDelete.as_view()), path('workspace//knowledge//document/batch_refresh', views.DocumentView.BatchRefresh.as_view()), path('workspace//knowledge//document/batch_export_zip', views.DocumentView.BatchExportZip.as_view()), + path('workspace//knowledge//document/batch_download_source_file', views.DocumentView.BatchDownloadSourceFile.as_view()), path('workspace//knowledge//document/batch_generate_related', views.DocumentView.BatchGenerateRelated.as_view()), path('workspace//knowledge//document/web', views.WebDocumentView.as_view()), path('workspace//knowledge//document/qa', views.QaDocumentView.as_view()), diff --git a/apps/knowledge/views/document.py b/apps/knowledge/views/document.py index 3c5afa5f3..0db49a6af 100644 --- a/apps/knowledge/views/document.py +++ b/apps/knowledge/views/document.py @@ -688,6 +688,29 @@ class DocumentView(APIView): 'knowledge_id': knowledge_id }).batch_export_zip(request.data) + class BatchDownloadSourceFile(APIView): + authentication_classes = [TokenAuth] + + @has_permissions( + PermissionConstants.KNOWLEDGE_DOCUMENT_DOWNLOAD_SOURCE_FILE.get_workspace_knowledge_permission(), + PermissionConstants.KNOWLEDGE_DOCUMENT_DOWNLOAD_SOURCE_FILE.get_workspace_permission_workspace_manage_role(), + RoleConstants.WORKSPACE_MANAGE.get_workspace_role(), + ViewPermission([RoleConstants.USER.get_workspace_role()], + [PermissionConstants.KNOWLEDGE.get_workspace_knowledge_permission()], CompareConstants.AND), + ) + @log( + menu='document', operate="Batch download source files", + get_operation_object=lambda r, keywords: get_knowledge_document_operation_object( + get_knowledge_operation_object(keywords.get('knowledge_id')), + get_document_operation_object_batch(r.data.get('document_id_list')) + ), + ) + def post(self, request: Request, workspace_id: str, knowledge_id: str): + return DocumentSerializers.Batch(data={ + 'workspace_id': workspace_id, + 'knowledge_id': knowledge_id + }).batch_download_source_file(request.data) + class DownloadSourceFile(APIView): authentication_classes = [TokenAuth] diff --git a/ui/src/api/knowledge/document.ts b/ui/src/api/knowledge/document.ts index 57d3d9647..2683fd42c 100644 --- a/ui/src/api/knowledge/document.ts +++ b/ui/src/api/knowledge/document.ts @@ -210,6 +210,21 @@ const exportMulDocumentZip: ( ) } +const exportMulSourceDocumentZip: ( + document_name: string, + knowledge_id: string, + document_id_list: string[], + loading?: Ref, +) => Promise = (document_name, knowledge_id, document_id_list, loading) => { + return exportExcelPost( + document_name.trim() + '_source.zip', + `${prefix.value}/${knowledge_id}/document/batch_download_source_file`, + {}, + { document_id_list }, + loading, + ) +} + /** * 刷新文档向量库 * @param 参数 @@ -635,6 +650,7 @@ export default { exportDocument, exportDocumentZip, exportMulDocumentZip, + exportMulSourceDocumentZip, putDocumentRefresh, putDocumentSync, putMulDocument, diff --git a/ui/src/api/system-resource-management/document.ts b/ui/src/api/system-resource-management/document.ts index c661bc5b8..27b43dad4 100644 --- a/ui/src/api/system-resource-management/document.ts +++ b/ui/src/api/system-resource-management/document.ts @@ -1,5 +1,5 @@ import { Result } from '@/request/Result' -import { get, post, del, put, exportExcel, exportFile } from '@/request/index' +import { get, post, del, put, exportExcel, exportFile, exportExcelPost } from '@/request/index' import type { Ref } from 'vue' import type { KeyValue } from '@/api/type/common' import type { pageRequest } from '@/api/type/common' @@ -187,6 +187,36 @@ const exportDocumentZip: ( ) } +const exportMulDocumentZip: ( + document_name: string, + knowledge_id: string, + document_id_list: string[], + loading?: Ref, +) => Promise = (document_name, knowledge_id, document_id_list, loading) => { + return exportExcelPost( + document_name.trim() + '.zip', + `${prefix}/${knowledge_id}/document/batch_export_zip`, + {}, + { document_id_list }, + loading, + ) +} + +const exportMulSourceDocumentZip: ( + document_name: string, + knowledge_id: string, + document_id_list: string[], + loading?: Ref, +) => Promise = (document_name, knowledge_id, document_id_list, loading) => { + return exportExcelPost( + document_name.trim() + '_source.zip', + `${prefix}/${knowledge_id}/document/batch_download_source_file`, + {}, + { document_id_list }, + loading, + ) +} + /** * 刷新文档向量库 * @param 参数 @@ -580,6 +610,8 @@ export default { postReplaceSourceFile, exportDocument, exportDocumentZip, + exportMulDocumentZip, + exportMulSourceDocumentZip, putDocumentRefresh, putDocumentSync, putMulDocument, diff --git a/ui/src/api/system-shared/document.ts b/ui/src/api/system-shared/document.ts index dc5c79fdc..43589f23d 100644 --- a/ui/src/api/system-shared/document.ts +++ b/ui/src/api/system-shared/document.ts @@ -1,5 +1,5 @@ import { Result } from '@/request/Result' -import { get, post, del, put, exportExcel, exportFile } from '@/request/index' +import { get, post, del, put, exportExcel, exportFile, exportExcelPost } from '@/request/index' import type { Ref } from 'vue' import type { KeyValue } from '@/api/type/common' import type { pageRequest } from '@/api/type/common' @@ -187,6 +187,36 @@ const exportDocumentZip: ( ) } +const exportMulDocumentZip: ( + document_name: string, + knowledge_id: string, + document_id_list: string[], + loading?: Ref, +) => Promise = (document_name, knowledge_id, document_id_list, loading) => { + return exportExcelPost( + document_name.trim() + '.zip', + `${prefix}/${knowledge_id}/document/batch_export_zip`, + {}, + { document_id_list }, + loading, + ) +} + +const exportMulSourceDocumentZip: ( + document_name: string, + knowledge_id: string, + document_id_list: string[], + loading?: Ref, +) => Promise = (document_name, knowledge_id, document_id_list, loading) => { + return exportExcelPost( + document_name.trim() + '_source.zip', + `${prefix}/${knowledge_id}/document/batch_download_source_file`, + {}, + { document_id_list }, + loading, + ) +} + /** * 刷新文档向量库 * @param 参数 @@ -581,6 +611,8 @@ export default { postReplaceSourceFile, exportDocument, exportDocumentZip, + exportMulDocumentZip, + exportMulSourceDocumentZip, putDocumentRefresh, putDocumentSync, putMulDocument, diff --git a/ui/src/views/document/index.vue b/ui/src/views/document/index.vue index f77fb2b41..2983539f5 100644 --- a/ui/src/views/document/index.vue +++ b/ui/src/views/document/index.vue @@ -85,6 +85,13 @@ > {{ $t('views.document.setting.export') }} Zip + + 批量导出源文档 + { (knowledgeDetail?.value.type === 1 && permissionPrecise.value.doc_sync(id)) || (knowledgeDetail?.value.type === 2 && permissionPrecise.value.doc_sync(id)) || permissionPrecise.value.doc_export(id) || + permissionPrecise.value.doc_download(id) || permissionPrecise.value.doc_delete(id) || permissionPrecise.value.doc_tag(id) ) } @@ -1131,6 +1139,16 @@ function exportMulDocumentZip() { }) } +function exportMulSourceDocumentZip() { + const arr: string[] = multipleSelection.value.map((v) => v.id) + loadSharedApi({ type: 'document', systemType: apiType.value }) + .exportMulSourceDocumentZip(knowledgeDetail.value?.name || 'documents', id, arr, loading) + .then(() => { + MsgSuccess(t('common.exportSuccess')) + multipleTableRef.value?.clearSelection() + }) +} + function batchRefresh() { const arr: string[] = multipleSelection.value.map((v) => v.id) const embeddingBatchDocument = (stateList: Array) => {