""" 文档搜索相关 API """ from fastapi import APIRouter, Depends, HTTPException, Query from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select, or_ import os import glob from app.core.database import get_db from app.core.deps import get_current_user from app.models.user import User from app.models.project import Project, ProjectMember from app.services.storage import storage_service from app.schemas.response import success_response router = APIRouter() @router.get("/documents", response_model=dict) async def search_documents( keyword: str = Query(..., min_length=1, description="搜索关键词"), current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db) ): """ 文档搜索(简化版) 搜索范围:项目名称、项目描述、文件名(支持.md和.pdf) """ if not keyword: return success_response(data=[]) keyword_lower = keyword.lower() # 获取用户有权限访问的项目 # 1. 用户创建的项目 owned_projects_result = await db.execute( select(Project).where(Project.owner_id == current_user.id, Project.status == 1) ) owned_projects = owned_projects_result.scalars().all() # 2. 用户参与的项目 member_projects_result = await db.execute( select(Project) .join(ProjectMember, ProjectMember.project_id == Project.id) .where( ProjectMember.user_id == current_user.id, Project.owner_id != current_user.id, Project.status == 1 ) ) member_projects = member_projects_result.scalars().all() # 合并所有可访问的项目 all_projects = owned_projects + member_projects # 搜索结果列表 search_results = [] # 搜索项目和文件 for project in all_projects: # 检查项目名称或描述是否匹配 project_matched = False if keyword_lower in project.name.lower(): project_matched = True elif project.description and keyword_lower in project.description.lower(): project_matched = True # 如果项目本身匹配,添加到结果 if project_matched: search_results.append({ "type": "project", "project_id": project.id, "project_name": project.name, "project_description": project.description or "", "match_type": "项目", }) # 搜索项目中的文件名 try: project_path = storage_service.get_secure_path(project.storage_key) if not project_path.exists() or not project_path.is_dir(): continue # 查找所有 .md 和 .pdf 文件 md_files = list(project_path.rglob("*.md")) pdf_files = list(project_path.rglob("*.pdf")) all_files = md_files + pdf_files for file_path in all_files: # 跳过 _assets 目录中的文件 if "_assets" in file_path.parts: continue try: # 获取相对路径 relative_path = str(file_path.relative_to(project_path)) # 获取文件名(PDF保留扩展名,MD去掉扩展名) if file_path.suffix.lower() == '.pdf': file_name = file_path.name # PDF保留完整文件名 else: file_name = file_path.stem # MD去掉扩展名 # 检查关键词是否在文件名或路径中 if keyword_lower in file_name.lower() or keyword_lower in relative_path.lower(): search_results.append({ "type": "file", "project_id": project.id, "project_name": project.name, "file_path": relative_path, "file_name": file_name, "match_type": "文件", }) except Exception: # 忽略无法处理的文件 continue except Exception: # 忽略无法遍历的目录 continue # 限制返回结果数量 search_results = search_results[:100] return success_response(data=search_results, message=f"找到 {len(search_results)} 个结果")