nex_docus/backend/app/api/v1/search.py

129 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
文档搜索相关 API
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, or_
import os
import glob
from app.core.database import get_db
from app.core.deps import get_current_user
from app.models.user import User
from app.models.project import Project, ProjectMember
from app.services.storage import storage_service
from app.schemas.response import success_response
router = APIRouter()
@router.get("/documents", response_model=dict)
async def search_documents(
keyword: str = Query(..., min_length=1, description="搜索关键词"),
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db)
):
"""
文档搜索(简化版)
搜索范围:项目名称、项目描述、文件名(支持.md和.pdf
"""
if not keyword:
return success_response(data=[])
keyword_lower = keyword.lower()
# 获取用户有权限访问的项目
# 1. 用户创建的项目
owned_projects_result = await db.execute(
select(Project).where(Project.owner_id == current_user.id, Project.status == 1)
)
owned_projects = owned_projects_result.scalars().all()
# 2. 用户参与的项目
member_projects_result = await db.execute(
select(Project)
.join(ProjectMember, ProjectMember.project_id == Project.id)
.where(
ProjectMember.user_id == current_user.id,
Project.owner_id != current_user.id,
Project.status == 1
)
)
member_projects = member_projects_result.scalars().all()
# 合并所有可访问的项目
all_projects = owned_projects + member_projects
# 搜索结果列表
search_results = []
# 搜索项目和文件
for project in all_projects:
# 检查项目名称或描述是否匹配
project_matched = False
if keyword_lower in project.name.lower():
project_matched = True
elif project.description and keyword_lower in project.description.lower():
project_matched = True
# 如果项目本身匹配,添加到结果
if project_matched:
search_results.append({
"type": "project",
"project_id": project.id,
"project_name": project.name,
"project_description": project.description or "",
"match_type": "项目",
})
# 搜索项目中的文件名
try:
project_path = storage_service.get_secure_path(project.storage_key)
if not project_path.exists() or not project_path.is_dir():
continue
# 查找所有 .md 和 .pdf 文件
md_files = list(project_path.rglob("*.md"))
pdf_files = list(project_path.rglob("*.pdf"))
all_files = md_files + pdf_files
for file_path in all_files:
# 跳过 _assets 目录中的文件
if "_assets" in file_path.parts:
continue
try:
# 获取相对路径
relative_path = str(file_path.relative_to(project_path))
# 获取文件名PDF保留扩展名MD去掉扩展名
if file_path.suffix.lower() == '.pdf':
file_name = file_path.name # PDF保留完整文件名
else:
file_name = file_path.stem # MD去掉扩展名
# 检查关键词是否在文件名或路径中
if keyword_lower in file_name.lower() or keyword_lower in relative_path.lower():
search_results.append({
"type": "file",
"project_id": project.id,
"project_name": project.name,
"file_path": relative_path,
"file_name": file_name,
"match_type": "文件",
})
except Exception:
# 忽略无法处理的文件
continue
except Exception:
# 忽略无法遍历的目录
continue
# 限制返回结果数量
search_results = search_results[:100]
return success_response(data=search_results, message=f"找到 {len(search_results)} 个结果")