feat:批量导出源文件

v3.2
panyy 2026-06-29 18:38:51 +08:00
parent 1db5c59925
commit d9173cfa10
10 changed files with 173 additions and 8 deletions

View File

@ -207,6 +207,8 @@ class ListenerManagement:
@staticmethod
def post_update_document_status(document_id, task_type: TaskType):
_document = QuerySet(Document).filter(id=document_id).first()
if _document is None:
return
status = Status(_document.status)
if status[task_type] == State.REVOKE:

View File

@ -23,6 +23,7 @@ class Services(TextChoices):
services_map = {
cls.gunicorn.value: services.GunicornService,
cls.celery_default: services.CeleryDefaultService,
cls.celery_model: services.CeleryModelService,
cls.local_model: services.GunicornLocalModelService,
}
return services_map.get(name)

View File

@ -1,4 +1,5 @@
from .celery_default import *
from .celery_model import *
from .gunicorn import *
from .local_model import *
from .scheduler import *

View File

@ -632,8 +632,17 @@ class DocumentSerializers(serializers.Serializer):
document = QuerySet(Document).filter(
id=self.data.get('document_id'), knowledge_id=self.data.get('knowledge_id')
).first()
source_file_id = document.meta.get('source_file_id') if document and document.meta else None
file_query_set = QuerySet(File).filter(source_type=FileSourceType.DOCUMENT, source_id=self.data.get('document_id'))
file = DocumentSerializers.Operate.get_source_file(document, self.data.get('knowledge_id'))
if not file:
raise AppApiException(500, _('File not exist. Only manually uploaded documents are supported'))
return FileSerializer.Operate(data={'id': file.id}).get(with_valid=True)
@staticmethod
def get_source_file(document, knowledge_id):
if not document:
return None
source_file_id = document.meta.get('source_file_id') if document.meta else None
file_query_set = QuerySet(File).filter(source_type=FileSourceType.DOCUMENT, source_id=document.id)
file = file_query_set.filter(id=source_file_id).first() if source_file_id else None
if not file:
file = file_query_set.first()
@ -641,11 +650,9 @@ class DocumentSerializers(serializers.Serializer):
file = QuerySet(File).filter(
id=source_file_id,
source_type=FileSourceType.KNOWLEDGE,
source_id=self.data.get('knowledge_id')
source_id=knowledge_id
).first()
if not file:
raise AppApiException(500, _('File not exist. Only manually uploaded documents are supported'))
return FileSerializer.Operate(data={'id': file.id}).get(with_valid=True)
return file
def one(self, with_valid=False):
self.is_valid(raise_exception=True)
@ -1292,6 +1299,38 @@ class DocumentSerializers(serializers.Serializer):
response.write(zip_buffer.getvalue())
return response
def batch_download_source_file(self, instance: Dict, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
BatchDocumentExportSerializer(data=instance).is_valid(raise_exception=True)
knowledge_id = self.data.get('knowledge_id')
document_id_list = instance.get('document_id_list')
document_list = list(QuerySet(Document).filter(knowledge_id=knowledge_id, id__in=document_id_list))
if len(document_list) != len(set([str(document_id) for document_id in document_id_list])):
raise AppApiException(500, _('Document id does not exist'))
response = HttpResponse(content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="source_documents.zip"'
zip_buffer = io.BytesIO()
used_names = defaultdict(int)
file_count = 0
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for document in document_list:
file = DocumentSerializers.Operate.get_source_file(document, knowledge_id)
if not file:
continue
safe_name = re.sub(r'[\\/:*?"<>|]', '_', file.file_name or document.name or str(document.id)).strip()
safe_name = safe_name or str(document.id)
base_name, ext = os.path.splitext(safe_name)
used_names[safe_name] += 1
zip_name = safe_name if used_names[safe_name] == 1 else f'{base_name}_{used_names[safe_name]}{ext}'
zip_file.writestr(zip_name, file.get_bytes())
file_count += 1
if file_count == 0:
raise AppApiException(500, _('File not exist. Only manually uploaded documents are supported'))
response.write(zip_buffer.getvalue())
return response
@transaction.atomic
def batch_delete(self, instance: Dict, with_valid=True):
if with_valid:

View File

@ -32,6 +32,7 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_delete', views.DocumentView.BatchDelete.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_refresh', views.DocumentView.BatchRefresh.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_export_zip', views.DocumentView.BatchExportZip.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_download_source_file', views.DocumentView.BatchDownloadSourceFile.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_generate_related', views.DocumentView.BatchGenerateRelated.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/web', views.WebDocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),

View File

@ -688,6 +688,29 @@ class DocumentView(APIView):
'knowledge_id': knowledge_id
}).batch_export_zip(request.data)
class BatchDownloadSourceFile(APIView):
authentication_classes = [TokenAuth]
@has_permissions(
PermissionConstants.KNOWLEDGE_DOCUMENT_DOWNLOAD_SOURCE_FILE.get_workspace_knowledge_permission(),
PermissionConstants.KNOWLEDGE_DOCUMENT_DOWNLOAD_SOURCE_FILE.get_workspace_permission_workspace_manage_role(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role(),
ViewPermission([RoleConstants.USER.get_workspace_role()],
[PermissionConstants.KNOWLEDGE.get_workspace_knowledge_permission()], CompareConstants.AND),
)
@log(
menu='document', operate="Batch download source files",
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object_batch(r.data.get('document_id_list'))
),
)
def post(self, request: Request, workspace_id: str, knowledge_id: str):
return DocumentSerializers.Batch(data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id
}).batch_download_source_file(request.data)
class DownloadSourceFile(APIView):
authentication_classes = [TokenAuth]

View File

@ -210,6 +210,21 @@ const exportMulDocumentZip: (
)
}
const exportMulSourceDocumentZip: (
document_name: string,
knowledge_id: string,
document_id_list: string[],
loading?: Ref<boolean>,
) => Promise<any> = (document_name, knowledge_id, document_id_list, loading) => {
return exportExcelPost(
document_name.trim() + '_source.zip',
`${prefix.value}/${knowledge_id}/document/batch_download_source_file`,
{},
{ document_id_list },
loading,
)
}
/**
*
* @param
@ -635,6 +650,7 @@ export default {
exportDocument,
exportDocumentZip,
exportMulDocumentZip,
exportMulSourceDocumentZip,
putDocumentRefresh,
putDocumentSync,
putMulDocument,

View File

@ -1,5 +1,5 @@
import { Result } from '@/request/Result'
import { get, post, del, put, exportExcel, exportFile } from '@/request/index'
import { get, post, del, put, exportExcel, exportFile, exportExcelPost } from '@/request/index'
import type { Ref } from 'vue'
import type { KeyValue } from '@/api/type/common'
import type { pageRequest } from '@/api/type/common'
@ -187,6 +187,36 @@ const exportDocumentZip: (
)
}
const exportMulDocumentZip: (
document_name: string,
knowledge_id: string,
document_id_list: string[],
loading?: Ref<boolean>,
) => Promise<any> = (document_name, knowledge_id, document_id_list, loading) => {
return exportExcelPost(
document_name.trim() + '.zip',
`${prefix}/${knowledge_id}/document/batch_export_zip`,
{},
{ document_id_list },
loading,
)
}
const exportMulSourceDocumentZip: (
document_name: string,
knowledge_id: string,
document_id_list: string[],
loading?: Ref<boolean>,
) => Promise<any> = (document_name, knowledge_id, document_id_list, loading) => {
return exportExcelPost(
document_name.trim() + '_source.zip',
`${prefix}/${knowledge_id}/document/batch_download_source_file`,
{},
{ document_id_list },
loading,
)
}
/**
*
* @param
@ -580,6 +610,8 @@ export default {
postReplaceSourceFile,
exportDocument,
exportDocumentZip,
exportMulDocumentZip,
exportMulSourceDocumentZip,
putDocumentRefresh,
putDocumentSync,
putMulDocument,

View File

@ -1,5 +1,5 @@
import { Result } from '@/request/Result'
import { get, post, del, put, exportExcel, exportFile } from '@/request/index'
import { get, post, del, put, exportExcel, exportFile, exportExcelPost } from '@/request/index'
import type { Ref } from 'vue'
import type { KeyValue } from '@/api/type/common'
import type { pageRequest } from '@/api/type/common'
@ -187,6 +187,36 @@ const exportDocumentZip: (
)
}
const exportMulDocumentZip: (
document_name: string,
knowledge_id: string,
document_id_list: string[],
loading?: Ref<boolean>,
) => Promise<any> = (document_name, knowledge_id, document_id_list, loading) => {
return exportExcelPost(
document_name.trim() + '.zip',
`${prefix}/${knowledge_id}/document/batch_export_zip`,
{},
{ document_id_list },
loading,
)
}
const exportMulSourceDocumentZip: (
document_name: string,
knowledge_id: string,
document_id_list: string[],
loading?: Ref<boolean>,
) => Promise<any> = (document_name, knowledge_id, document_id_list, loading) => {
return exportExcelPost(
document_name.trim() + '_source.zip',
`${prefix}/${knowledge_id}/document/batch_download_source_file`,
{},
{ document_id_list },
loading,
)
}
/**
*
* @param
@ -581,6 +611,8 @@ export default {
postReplaceSourceFile,
exportDocument,
exportDocumentZip,
exportMulDocumentZip,
exportMulSourceDocumentZip,
putDocumentRefresh,
putDocumentSync,
putMulDocument,

View File

@ -85,6 +85,13 @@
>
{{ $t('views.document.setting.export') }} Zip
</el-dropdown-item>
<el-dropdown-item
@click="exportMulSourceDocumentZip"
:disabled="multipleSelection.length === 0"
v-if="permissionPrecise.doc_download(id)"
>
批量导出源文档
</el-dropdown-item>
<el-dropdown-item
divided
@click="syncMulDocument"
@ -792,6 +799,7 @@ const MoreFilledPermission0 = (id: string) => {
(knowledgeDetail?.value.type === 1 && permissionPrecise.value.doc_sync(id)) ||
(knowledgeDetail?.value.type === 2 && permissionPrecise.value.doc_sync(id)) ||
permissionPrecise.value.doc_export(id) ||
permissionPrecise.value.doc_download(id) ||
permissionPrecise.value.doc_delete(id) || permissionPrecise.value.doc_tag(id)
)
}
@ -1131,6 +1139,16 @@ function exportMulDocumentZip() {
})
}
function exportMulSourceDocumentZip() {
const arr: string[] = multipleSelection.value.map((v) => v.id)
loadSharedApi({ type: 'document', systemType: apiType.value })
.exportMulSourceDocumentZip(knowledgeDetail.value?.name || 'documents', id, arr, loading)
.then(() => {
MsgSuccess(t('common.exportSuccess'))
multipleTableRef.value?.clearSelection()
})
}
function batchRefresh() {
const arr: string[] = multipleSelection.value.map((v) => v.id)
const embeddingBatchDocument = (stateList: Array<string>) => {