v0.1.4-p1
parent
590eae9f0c
commit
6795fedbfe
|
|
@ -43,3 +43,15 @@ PANEL_ACCESS_PASSWORD=change_me_panel_password
|
|||
|
||||
# Max upload size for backend validation (MB)
|
||||
UPLOAD_MAX_MB=200
|
||||
|
||||
# Local speech-to-text (Whisper via whisper.cpp model file)
|
||||
STT_ENABLED=true
|
||||
STT_MODEL=ggml-small-q8_0.bin
|
||||
STT_MODEL_DIR=${HOST_DATA_ROOT}/model
|
||||
STT_DEVICE=cpu
|
||||
STT_MAX_AUDIO_SECONDS=20
|
||||
STT_DEFAULT_LANGUAGE=zh
|
||||
STT_FORCE_SIMPLIFIED=true
|
||||
STT_AUDIO_PREPROCESS=true
|
||||
STT_AUDIO_FILTER=highpass=f=120,lowpass=f=7600,afftdn=nf=-20
|
||||
STT_INITIAL_PROMPT=以下内容可能包含简体中文和英文术语。请优先输出简体中文,英文单词、缩写、品牌名和数字保持原文,不要翻译。
|
||||
|
|
|
|||
|
|
@ -104,6 +104,8 @@ graph TD
|
|||
- 配置绝对路径:
|
||||
- `HOST_DATA_ROOT`
|
||||
- `HOST_BOTS_WORKSPACE_ROOT`
|
||||
- 如启用本地语音识别,请将 Whisper `.bin` 模型文件放到 `${HOST_DATA_ROOT}/model/`
|
||||
并让 `STT_MODEL` 指向完整文件名,例如 `ggml-small-q8_0.bin`
|
||||
- 中国网络建议配置加速项:
|
||||
- `PIP_INDEX_URL`、`PIP_TRUSTED_HOST`
|
||||
- `NPM_REGISTRY`
|
||||
|
|
@ -120,3 +122,4 @@ graph TD
|
|||
- 必须挂载 `/var/run/docker.sock`,否则后端无法操作 Bot 镜像与容器。
|
||||
- `HOST_BOTS_WORKSPACE_ROOT` 必须是宿主机绝对路径,并且在 `docker-compose.prod.yml` 中以“同路径”挂载到后端容器。
|
||||
原因:后端通过 Docker API 创建 Bot 容器时,使用的是宿主机可见的 bind 路径。
|
||||
- 语音识别当前基于 `pywhispercpp==1.3.1` + Whisper `.bin` 模型文件,不使用 `faster-whisper`。
|
||||
|
|
|
|||
|
|
@ -27,6 +27,18 @@ PANEL_ACCESS_PASSWORD=
|
|||
# Max upload size for backend validation (MB)
|
||||
UPLOAD_MAX_MB=100
|
||||
|
||||
# Local speech-to-text (Whisper via whisper.cpp model file)
|
||||
STT_ENABLED=true
|
||||
STT_MODEL=ggml-small-q8_0.bin
|
||||
STT_MODEL_DIR=../data/model
|
||||
STT_DEVICE=cpu
|
||||
STT_MAX_AUDIO_SECONDS=20
|
||||
STT_DEFAULT_LANGUAGE=zh
|
||||
STT_FORCE_SIMPLIFIED=true
|
||||
STT_AUDIO_PREPROCESS=true
|
||||
STT_AUDIO_FILTER=highpass=f=120,lowpass=f=7600,afftdn=nf=-20
|
||||
STT_INITIAL_PROMPT=以下内容可能包含简体中文和英文术语。请优先输出简体中文,英文单词、缩写、品牌名和数字保持原文,不要翻译。
|
||||
|
||||
# Local backend server options (for `python3 main.py`)
|
||||
APP_HOST=0.0.0.0
|
||||
APP_PORT=8000
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@ ARG PIP_TRUSTED_HOST=
|
|||
COPY backend/requirements.txt ./requirements.txt
|
||||
RUN if [ -n "${PIP_INDEX_URL}" ]; then pip config set global.index-url "${PIP_INDEX_URL}"; fi \
|
||||
&& if [ -n "${PIP_TRUSTED_HOST}" ]; then pip config set global.trusted-host "${PIP_TRUSTED_HOST}"; fi \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& pip install --upgrade pip \
|
||||
&& pip install -r requirements.txt
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ def _normalize_dir_path(path_value: str) -> str:
|
|||
raw = str(path_value or "").strip()
|
||||
if not raw:
|
||||
return raw
|
||||
raw = os.path.expandvars(os.path.expanduser(raw))
|
||||
p = Path(raw)
|
||||
if p.is_absolute():
|
||||
return str(p)
|
||||
|
|
@ -117,6 +118,26 @@ DATABASE_ENGINE: Final[str] = _database_engine(DATABASE_URL)
|
|||
DATABASE_URL_DISPLAY: Final[str] = _mask_database_url(DATABASE_URL)
|
||||
DATABASE_ECHO: Final[bool] = _env_bool("DATABASE_ECHO", True)
|
||||
UPLOAD_MAX_MB: Final[int] = _env_int("UPLOAD_MAX_MB", 100, 1, 2048)
|
||||
STT_ENABLED: Final[bool] = _env_bool("STT_ENABLED", True)
|
||||
STT_MODEL: Final[str] = str(os.getenv("STT_MODEL") or "ggml-small-q8_0.bin").strip()
|
||||
_DEFAULT_STT_MODEL_DIR: Final[Path] = (Path(DATA_ROOT) / "model").resolve()
|
||||
_configured_stt_model_dir = _normalize_dir_path(os.getenv("STT_MODEL_DIR", str(_DEFAULT_STT_MODEL_DIR)))
|
||||
if _configured_stt_model_dir and not Path(_configured_stt_model_dir).exists() and _DEFAULT_STT_MODEL_DIR.exists():
|
||||
STT_MODEL_DIR: Final[str] = str(_DEFAULT_STT_MODEL_DIR)
|
||||
else:
|
||||
STT_MODEL_DIR: Final[str] = _configured_stt_model_dir
|
||||
STT_DEVICE: Final[str] = str(os.getenv("STT_DEVICE") or "cpu").strip().lower() or "cpu"
|
||||
STT_MAX_AUDIO_SECONDS: Final[int] = _env_int("STT_MAX_AUDIO_SECONDS", 20, 5, 600)
|
||||
STT_DEFAULT_LANGUAGE: Final[str] = str(os.getenv("STT_DEFAULT_LANGUAGE") or "zh").strip().lower() or "zh"
|
||||
STT_FORCE_SIMPLIFIED: Final[bool] = _env_bool("STT_FORCE_SIMPLIFIED", True)
|
||||
STT_AUDIO_PREPROCESS: Final[bool] = _env_bool("STT_AUDIO_PREPROCESS", True)
|
||||
STT_AUDIO_FILTER: Final[str] = str(
|
||||
os.getenv("STT_AUDIO_FILTER") or "highpass=f=120,lowpass=f=7600,afftdn=nf=-20"
|
||||
).strip()
|
||||
STT_INITIAL_PROMPT: Final[str] = str(
|
||||
os.getenv("STT_INITIAL_PROMPT")
|
||||
or "以下内容可能包含简体中文和英文术语。请优先输出简体中文,英文单词、缩写、品牌名和数字保持原文,不要翻译。"
|
||||
).strip()
|
||||
|
||||
REDIS_ENABLED: Final[bool] = _env_bool("REDIS_ENABLED", False)
|
||||
REDIS_URL: Final[str] = str(os.getenv("REDIS_URL") or "").strip()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,259 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from core.settings import (
|
||||
STT_AUDIO_FILTER,
|
||||
STT_AUDIO_PREPROCESS,
|
||||
STT_DEVICE,
|
||||
STT_ENABLED,
|
||||
STT_FORCE_SIMPLIFIED,
|
||||
STT_INITIAL_PROMPT,
|
||||
STT_MAX_AUDIO_SECONDS,
|
||||
STT_MODEL,
|
||||
STT_MODEL_DIR,
|
||||
)
|
||||
|
||||
|
||||
class SpeechServiceError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class SpeechDisabledError(SpeechServiceError):
|
||||
pass
|
||||
|
||||
|
||||
class SpeechDurationError(SpeechServiceError):
|
||||
pass
|
||||
|
||||
|
||||
class WhisperSpeechService:
|
||||
def __init__(self) -> None:
|
||||
self._model: Any = None
|
||||
self._model_source: str = ""
|
||||
self._backend: str = ""
|
||||
self._model_lock = threading.Lock()
|
||||
|
||||
def _resolve_model_source(self) -> str:
|
||||
model = str(STT_MODEL or "").strip()
|
||||
model_dir = str(STT_MODEL_DIR or "").strip()
|
||||
|
||||
if not model:
|
||||
raise SpeechServiceError(
|
||||
"STT_MODEL is empty. Please set the full model file name, e.g. ggml-samll-q8_0.bin."
|
||||
)
|
||||
|
||||
# If STT_MODEL itself is an absolute/relative path, use it directly.
|
||||
if any(sep in model for sep in ("/", "\\")):
|
||||
direct = Path(model).expanduser()
|
||||
if not direct.exists() or not direct.is_file():
|
||||
raise SpeechServiceError(f"STT model file not found: {direct}")
|
||||
if direct.suffix.lower() != ".bin":
|
||||
raise SpeechServiceError(
|
||||
"STT_MODEL must point to a whisper.cpp ggml .bin model file."
|
||||
)
|
||||
return str(direct.resolve())
|
||||
|
||||
# Strict mode: only exact filename, no alias/auto detection.
|
||||
if Path(model).suffix.lower() != ".bin":
|
||||
raise SpeechServiceError(
|
||||
"STT_MODEL must be the exact model file name (with .bin), e.g. ggml-small-q8_0.bin."
|
||||
)
|
||||
|
||||
if not model_dir:
|
||||
raise SpeechServiceError("STT_MODEL_DIR is empty.")
|
||||
root = Path(model_dir).expanduser()
|
||||
if not root.exists() or not root.is_dir():
|
||||
raise SpeechServiceError(f"STT_MODEL_DIR does not exist: {root}")
|
||||
candidate = root / model
|
||||
if not candidate.exists() or not candidate.is_file():
|
||||
raise SpeechServiceError(
|
||||
f"STT model file not found under STT_MODEL_DIR: {candidate}"
|
||||
)
|
||||
return str(candidate.resolve())
|
||||
|
||||
def _load_model(self) -> Any:
|
||||
model_source = self._resolve_model_source()
|
||||
if self._model is not None and self._model_source == model_source:
|
||||
return self._model
|
||||
with self._model_lock:
|
||||
if self._model is not None and self._model_source == model_source:
|
||||
return self._model
|
||||
try:
|
||||
from pywhispercpp.model import Model # type: ignore
|
||||
except Exception as exc:
|
||||
raise SpeechServiceError(
|
||||
"pywhispercpp is not installed in the active backend environment. "
|
||||
"Run pip install -r backend/requirements.txt or rebuild the backend image."
|
||||
) from exc
|
||||
self._model = Model(
|
||||
model_source,
|
||||
print_realtime=False,
|
||||
print_progress=False,
|
||||
)
|
||||
self._backend = "pywhispercpp"
|
||||
self._model_source = model_source
|
||||
return self._model
|
||||
|
||||
@staticmethod
|
||||
def _preprocess_audio(file_path: str) -> str:
|
||||
target = str(file_path or "").strip()
|
||||
if not STT_AUDIO_PREPROCESS or not target or not os.path.isfile(target):
|
||||
return target
|
||||
if shutil.which("ffmpeg") is None:
|
||||
return target
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav", prefix=".speech_clean_")
|
||||
tmp_path = tmp.name
|
||||
tmp.close()
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i",
|
||||
target,
|
||||
"-vn",
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
]
|
||||
audio_filter = str(STT_AUDIO_FILTER or "").strip()
|
||||
if audio_filter:
|
||||
cmd.extend(["-af", audio_filter])
|
||||
cmd.extend(["-c:a", "pcm_s16le", tmp_path])
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False,
|
||||
)
|
||||
if completed.returncode != 0 or not os.path.exists(tmp_path) or os.path.getsize(tmp_path) <= 0:
|
||||
if os.path.exists(tmp_path):
|
||||
os.remove(tmp_path)
|
||||
return target
|
||||
return tmp_path
|
||||
except Exception:
|
||||
if os.path.exists(tmp_path):
|
||||
os.remove(tmp_path)
|
||||
return target
|
||||
|
||||
@staticmethod
|
||||
def _probe_audio_duration_seconds(file_path: str) -> Optional[float]:
|
||||
try:
|
||||
import av # type: ignore
|
||||
|
||||
with av.open(file_path) as container:
|
||||
if container.duration is not None:
|
||||
# container.duration is in av.time_base units.
|
||||
return max(0.0, float(container.duration / av.time_base))
|
||||
for stream in container.streams:
|
||||
if stream.type != "audio":
|
||||
continue
|
||||
if stream.duration is not None and stream.time_base is not None:
|
||||
return max(0.0, float(stream.duration * stream.time_base))
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_text(text: str) -> str:
|
||||
content = str(text or "").strip()
|
||||
if not content or not STT_FORCE_SIMPLIFIED:
|
||||
return content
|
||||
try:
|
||||
from opencc_purepy import OpenCC # type: ignore
|
||||
|
||||
return str(OpenCC("t2s").convert(content) or "").strip() or content
|
||||
except Exception:
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
def _filter_supported_transcribe_kwargs(model: Any, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
try:
|
||||
available = set(model.get_params().keys())
|
||||
except Exception:
|
||||
return kwargs
|
||||
return {key: value for key, value in kwargs.items() if key in available}
|
||||
|
||||
def transcribe_file(self, file_path: str, language: Optional[str] = None) -> Dict[str, Any]:
|
||||
if not STT_ENABLED:
|
||||
raise SpeechDisabledError("Speech-to-text is disabled")
|
||||
target = str(file_path or "").strip()
|
||||
if not target or not os.path.isfile(target):
|
||||
raise SpeechServiceError("Audio file not found")
|
||||
|
||||
duration_seconds = self._probe_audio_duration_seconds(target)
|
||||
if duration_seconds is not None and duration_seconds > float(STT_MAX_AUDIO_SECONDS) + 0.3:
|
||||
raise SpeechDurationError(f"Audio duration exceeds {STT_MAX_AUDIO_SECONDS} seconds")
|
||||
|
||||
prepared_target = self._preprocess_audio(target)
|
||||
try:
|
||||
model = self._load_model()
|
||||
lang = str(language or "").strip().lower()
|
||||
normalized_lang: Optional[str] = None
|
||||
if lang and lang not in {"auto", "null", "none"}:
|
||||
normalized_lang = lang
|
||||
|
||||
max_end = 0.0
|
||||
detected_language = ""
|
||||
texts = []
|
||||
kwargs: Dict[str, Any] = {
|
||||
"print_realtime": False,
|
||||
"print_progress": False,
|
||||
"no_context": True,
|
||||
"suppress_non_speech_tokens": True,
|
||||
}
|
||||
if normalized_lang:
|
||||
kwargs["language"] = normalized_lang
|
||||
initial_prompt = str(STT_INITIAL_PROMPT or "").strip()
|
||||
if initial_prompt:
|
||||
kwargs["initial_prompt"] = initial_prompt
|
||||
kwargs = self._filter_supported_transcribe_kwargs(model, kwargs)
|
||||
try:
|
||||
segments = model.transcribe(prepared_target, **kwargs)
|
||||
except Exception as exc:
|
||||
raise SpeechServiceError(
|
||||
f"pywhispercpp transcription failed: {exc}. "
|
||||
"If input is not wav, install ffmpeg in runtime image."
|
||||
) from exc
|
||||
for segment in segments:
|
||||
txt = str(getattr(segment, "text", "") or "").strip()
|
||||
if txt:
|
||||
texts.append(txt)
|
||||
if normalized_lang:
|
||||
detected_language = normalized_lang
|
||||
try:
|
||||
max_end = max(max_end, float(getattr(segment, "t1", 0.0) or 0.0) / 100.0)
|
||||
except Exception:
|
||||
pass
|
||||
if max_end > float(STT_MAX_AUDIO_SECONDS) + 0.3:
|
||||
raise SpeechDurationError(f"Audio duration exceeds {STT_MAX_AUDIO_SECONDS} seconds")
|
||||
|
||||
text = self._normalize_text(" ".join(texts).strip())
|
||||
if not text:
|
||||
raise SpeechServiceError("No speech detected")
|
||||
|
||||
if duration_seconds is None:
|
||||
duration_seconds = max_end if max_end > 0 else None
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"language": detected_language or None,
|
||||
"duration_seconds": duration_seconds,
|
||||
"max_audio_seconds": STT_MAX_AUDIO_SECONDS,
|
||||
"model": STT_MODEL,
|
||||
"device": STT_DEVICE,
|
||||
"backend": self._backend or "unknown",
|
||||
}
|
||||
finally:
|
||||
if prepared_target != target and os.path.exists(prepared_target):
|
||||
try:
|
||||
os.remove(prepared_target)
|
||||
except Exception:
|
||||
pass
|
||||
119
backend/main.py
119
backend/main.py
|
|
@ -1,5 +1,6 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
|
|
@ -12,7 +13,7 @@ from urllib.parse import unquote
|
|||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile, WebSocket, WebSocketDisconnect
|
||||
from fastapi import Depends, FastAPI, File, Form, HTTPException, Request, UploadFile, WebSocket, WebSocketDisconnect
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from sqlmodel import Session, select
|
||||
|
|
@ -21,6 +22,12 @@ from core.config_manager import BotConfigManager
|
|||
from core.cache import cache
|
||||
from core.database import engine, get_session, init_database
|
||||
from core.docker_manager import BotDockerManager
|
||||
from core.speech_service import (
|
||||
SpeechDisabledError,
|
||||
SpeechDurationError,
|
||||
SpeechServiceError,
|
||||
WhisperSpeechService,
|
||||
)
|
||||
from core.settings import (
|
||||
BOTS_WORKSPACE_ROOT,
|
||||
DATA_ROOT,
|
||||
|
|
@ -37,11 +44,17 @@ from core.settings import (
|
|||
REDIS_ENABLED,
|
||||
REDIS_PREFIX,
|
||||
REDIS_URL,
|
||||
STT_DEVICE,
|
||||
STT_DEFAULT_LANGUAGE,
|
||||
STT_ENABLED,
|
||||
STT_MAX_AUDIO_SECONDS,
|
||||
STT_MODEL,
|
||||
UPLOAD_MAX_MB,
|
||||
)
|
||||
from models.bot import BotInstance, BotMessage, NanobotImage
|
||||
|
||||
app = FastAPI(title="Dashboard Nanobot API")
|
||||
logger = logging.getLogger("dashboard.backend")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
|
|
@ -55,6 +68,7 @@ os.makedirs(DATA_ROOT, exist_ok=True)
|
|||
|
||||
docker_manager = BotDockerManager(host_data_root=BOTS_WORKSPACE_ROOT)
|
||||
config_manager = BotConfigManager(host_data_root=BOTS_WORKSPACE_ROOT)
|
||||
speech_service = WhisperSpeechService()
|
||||
BOT_ID_PATTERN = re.compile(r"^[A-Za-z0-9_]+$")
|
||||
|
||||
|
||||
|
|
@ -501,6 +515,13 @@ def get_system_defaults():
|
|||
"limits": {
|
||||
"upload_max_mb": UPLOAD_MAX_MB,
|
||||
},
|
||||
"speech": {
|
||||
"enabled": STT_ENABLED,
|
||||
"model": STT_MODEL,
|
||||
"device": STT_DEVICE,
|
||||
"max_audio_seconds": STT_MAX_AUDIO_SECONDS,
|
||||
"default_language": STT_DEFAULT_LANGUAGE,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -3117,6 +3138,102 @@ async def upload_workspace_files(
|
|||
return {"bot_id": bot_id, "files": rows}
|
||||
|
||||
|
||||
@app.post("/api/bots/{bot_id}/speech/transcribe")
|
||||
async def transcribe_bot_speech(
|
||||
bot_id: str,
|
||||
file: UploadFile = File(...),
|
||||
language: Optional[str] = Form(None),
|
||||
session: Session = Depends(get_session),
|
||||
):
|
||||
bot = session.get(BotInstance, bot_id)
|
||||
if not bot:
|
||||
raise HTTPException(status_code=404, detail="Bot not found")
|
||||
if not STT_ENABLED:
|
||||
raise HTTPException(status_code=400, detail="Speech recognition is disabled")
|
||||
if not file:
|
||||
raise HTTPException(status_code=400, detail="no audio file uploaded")
|
||||
|
||||
original_name = str(file.filename or "audio.webm").strip() or "audio.webm"
|
||||
safe_name = os.path.basename(original_name).replace("\\", "_").replace("/", "_")
|
||||
ext = os.path.splitext(safe_name)[1].strip().lower() or ".webm"
|
||||
if len(ext) > 12:
|
||||
ext = ".webm"
|
||||
|
||||
tmp_path = ""
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=ext, prefix=".speech_", dir=DATA_ROOT) as tmp:
|
||||
tmp_path = tmp.name
|
||||
while True:
|
||||
chunk = await file.read(1024 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
tmp.write(chunk)
|
||||
|
||||
if not tmp_path or not os.path.exists(tmp_path) or os.path.getsize(tmp_path) <= 0:
|
||||
raise HTTPException(status_code=400, detail="audio payload is empty")
|
||||
|
||||
resolved_language = str(language or "").strip() or STT_DEFAULT_LANGUAGE
|
||||
result = await asyncio.to_thread(speech_service.transcribe_file, tmp_path, resolved_language)
|
||||
text = str(result.get("text") or "").strip()
|
||||
if not text:
|
||||
raise HTTPException(status_code=400, detail="No speech detected")
|
||||
return {
|
||||
"bot_id": bot_id,
|
||||
"text": text,
|
||||
"duration_seconds": result.get("duration_seconds"),
|
||||
"max_audio_seconds": STT_MAX_AUDIO_SECONDS,
|
||||
"model": STT_MODEL,
|
||||
"device": STT_DEVICE,
|
||||
"language": result.get("language") or resolved_language,
|
||||
}
|
||||
except SpeechDisabledError as exc:
|
||||
logger.warning(
|
||||
"speech transcribe disabled bot_id=%s file=%s language=%s detail=%s",
|
||||
bot_id,
|
||||
safe_name,
|
||||
language,
|
||||
exc,
|
||||
)
|
||||
raise HTTPException(status_code=400, detail=str(exc))
|
||||
except SpeechDurationError:
|
||||
logger.warning(
|
||||
"speech transcribe too long bot_id=%s file=%s language=%s max_seconds=%s",
|
||||
bot_id,
|
||||
safe_name,
|
||||
language,
|
||||
STT_MAX_AUDIO_SECONDS,
|
||||
)
|
||||
raise HTTPException(status_code=413, detail=f"Audio duration exceeds {STT_MAX_AUDIO_SECONDS} seconds")
|
||||
except SpeechServiceError as exc:
|
||||
logger.exception(
|
||||
"speech transcribe failed bot_id=%s file=%s language=%s",
|
||||
bot_id,
|
||||
safe_name,
|
||||
language,
|
||||
)
|
||||
raise HTTPException(status_code=400, detail=str(exc))
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"speech transcribe unexpected error bot_id=%s file=%s language=%s",
|
||||
bot_id,
|
||||
safe_name,
|
||||
language,
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=f"speech transcription failed: {exc}")
|
||||
finally:
|
||||
try:
|
||||
await file.close()
|
||||
except Exception:
|
||||
pass
|
||||
if tmp_path and os.path.exists(tmp_path):
|
||||
try:
|
||||
os.remove(tmp_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@app.websocket("/ws/monitor/{bot_id}")
|
||||
async def websocket_endpoint(websocket: WebSocket, bot_id: str):
|
||||
with Session(engine) as session:
|
||||
|
|
|
|||
|
|
@ -15,3 +15,5 @@ watchfiles==0.21.0
|
|||
urllib3==1.26.18
|
||||
requests==2.31.0
|
||||
redis==5.0.8
|
||||
opencc-purepy==1.1.0
|
||||
pywhispercpp==1.3.1
|
||||
|
|
|
|||
|
|
@ -24,6 +24,16 @@ services:
|
|||
REDIS_PREFIX: ${REDIS_PREFIX:-dashboard_nanobot}
|
||||
REDIS_DEFAULT_TTL: ${REDIS_DEFAULT_TTL:-60}
|
||||
PANEL_ACCESS_PASSWORD: ${PANEL_ACCESS_PASSWORD:-}
|
||||
STT_ENABLED: ${STT_ENABLED:-true}
|
||||
STT_MODEL: ${STT_MODEL:-ggml-small-q8_0.bin}
|
||||
STT_MODEL_DIR: ${STT_MODEL_DIR:-${HOST_DATA_ROOT}/model}
|
||||
STT_DEVICE: ${STT_DEVICE:-cpu}
|
||||
STT_MAX_AUDIO_SECONDS: ${STT_MAX_AUDIO_SECONDS:-20}
|
||||
STT_DEFAULT_LANGUAGE: ${STT_DEFAULT_LANGUAGE:-zh}
|
||||
STT_FORCE_SIMPLIFIED: ${STT_FORCE_SIMPLIFIED:-true}
|
||||
STT_AUDIO_PREPROCESS: ${STT_AUDIO_PREPROCESS:-true}
|
||||
STT_AUDIO_FILTER: ${STT_AUDIO_FILTER:-highpass=f=120,lowpass=f=7600,afftdn=nf=-20}
|
||||
STT_INITIAL_PROMPT: ${STT_INITIAL_PROMPT:-以下内容可能包含简体中文和英文术语。请优先输出简体中文,英文单词、缩写、品牌名和数字保持原文,不要翻译。}
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- ${HOST_DATA_ROOT}:${HOST_DATA_ROOT}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,19 @@ export const dashboardEn = {
|
|||
copyPromptFail: 'Failed to copy prompt.',
|
||||
editPromptDone: 'Inserted into composer.',
|
||||
voiceInput: 'Voice input',
|
||||
voiceUnavailable: 'Voice input is not available yet.',
|
||||
textInput: 'Text input',
|
||||
voiceUnavailable: 'Speech recognition is disabled.',
|
||||
voiceUnsupported: 'Your browser does not support audio recording.',
|
||||
voicePermissionDenied: 'Microphone permission denied. Please allow access in browser settings.',
|
||||
voiceRecordFail: 'Audio recording failed. Please retry.',
|
||||
voiceReady: 'Click the mic to start recording',
|
||||
voiceRecording: 'Recording...',
|
||||
voiceTranscribing: 'Transcribing...',
|
||||
voiceStart: 'Start recording',
|
||||
voiceStop: 'Stop recording',
|
||||
voiceTranscribeDone: 'Voice converted to text.',
|
||||
voiceTranscribeEmpty: 'No valid speech detected.',
|
||||
voiceTranscribeFail: 'Speech transcription failed.',
|
||||
copyReply: 'Copy reply',
|
||||
copyReplyDone: 'Reply copied.',
|
||||
copyReplyFail: 'Failed to copy reply.',
|
||||
|
|
|
|||
|
|
@ -28,7 +28,19 @@ export const dashboardZhCn = {
|
|||
copyPromptFail: '复制指令失败。',
|
||||
editPromptDone: '已填入输入框。',
|
||||
voiceInput: '语音输入',
|
||||
voiceUnavailable: '语音输入暂未接入。',
|
||||
textInput: '文字输入',
|
||||
voiceUnavailable: '语音识别未启用。',
|
||||
voiceUnsupported: '当前浏览器不支持录音。',
|
||||
voicePermissionDenied: '麦克风权限被拒绝,请在浏览器设置中允许访问。',
|
||||
voiceRecordFail: '录音失败,请重试。',
|
||||
voiceReady: '点击麦克风开始录音',
|
||||
voiceRecording: '录音中...',
|
||||
voiceTranscribing: '语音识别中...',
|
||||
voiceStart: '开始录音',
|
||||
voiceStop: '停止录音',
|
||||
voiceTranscribeDone: '语音已转为文本。',
|
||||
voiceTranscribeEmpty: '未识别到有效语音内容。',
|
||||
voiceTranscribeFail: '语音识别失败。',
|
||||
copyReply: '复制回复',
|
||||
copyReplyDone: '回复已复制。',
|
||||
copyReplyFail: '复制回复失败。',
|
||||
|
|
|
|||
|
|
@ -1085,17 +1085,126 @@
|
|||
padding: 14px 120px 42px 14px;
|
||||
}
|
||||
|
||||
.ops-voice-panel {
|
||||
min-height: 96px;
|
||||
border: 1px dashed color-mix(in oklab, var(--line) 72%, var(--brand) 28%);
|
||||
border-radius: 12px;
|
||||
background: color-mix(in oklab, var(--panel) 78%, var(--panel-soft) 22%);
|
||||
padding: 12px 14px 12px 14px;
|
||||
display: grid;
|
||||
align-content: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.ops-voice-title {
|
||||
font-size: 13px;
|
||||
font-weight: 700;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.ops-voice-wave {
|
||||
height: 28px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid color-mix(in oklab, var(--line) 76%, transparent);
|
||||
background: color-mix(in oklab, var(--panel-soft) 78%, var(--panel) 22%);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
padding: 0 6px;
|
||||
overflow: hidden;
|
||||
flex: 1 1 auto;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.ops-voice-wave-segment {
|
||||
height: 100%;
|
||||
min-width: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 2px;
|
||||
padding: 0 6px;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in oklab, var(--panel) 60%, rgba(255, 255, 255, 0.18) 40%);
|
||||
}
|
||||
|
||||
.ops-voice-wave.is-mobile .ops-voice-wave-segment {
|
||||
flex: 1 1 auto;
|
||||
}
|
||||
|
||||
.ops-voice-wave.is-desktop .ops-voice-wave-segment {
|
||||
flex: 1 1 0;
|
||||
}
|
||||
|
||||
.ops-voice-wave-segment i {
|
||||
display: inline-block;
|
||||
width: 3px;
|
||||
min-width: 3px;
|
||||
height: 10px;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in oklab, var(--line) 72%, var(--text) 28%);
|
||||
opacity: 0.72;
|
||||
}
|
||||
|
||||
.ops-voice-wave-segment i:nth-child(3n) {
|
||||
height: 14px;
|
||||
}
|
||||
|
||||
.ops-voice-wave-segment i:nth-child(4n) {
|
||||
height: 18px;
|
||||
}
|
||||
|
||||
.ops-voice-wave-segment i:nth-child(5n) {
|
||||
height: 12px;
|
||||
}
|
||||
|
||||
.ops-voice-wave.is-live .ops-voice-wave-segment i {
|
||||
background: color-mix(in oklab, var(--brand) 60%, #8ec3ff 40%);
|
||||
animation: ops-voice-wave 1.05s ease-in-out infinite;
|
||||
}
|
||||
|
||||
.ops-voice-countdown {
|
||||
flex: 0 0 auto;
|
||||
font-size: 13px;
|
||||
font-weight: 700;
|
||||
color: var(--title);
|
||||
min-width: 44px;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.ops-composer-tools-right {
|
||||
position: absolute;
|
||||
bottom: 14px;
|
||||
display: inline-flex;
|
||||
left: 12px;
|
||||
right: 12px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-end;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.ops-composer-tools-right {
|
||||
right: 12px;
|
||||
max-width: calc(100% - 24px);
|
||||
width: auto;
|
||||
}
|
||||
|
||||
.ops-voice-inline {
|
||||
min-width: 0;
|
||||
flex: 1 1 auto;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
margin-right: 4px;
|
||||
}
|
||||
|
||||
@media (max-width: 720px) {
|
||||
.ops-voice-wave {
|
||||
gap: 4px;
|
||||
padding: 0 4px;
|
||||
}
|
||||
|
||||
.ops-voice-wave-segment {
|
||||
padding: 0 4px;
|
||||
}
|
||||
}
|
||||
|
||||
.ops-composer-inline-btn {
|
||||
|
|
@ -1116,6 +1225,11 @@
|
|||
color: var(--icon);
|
||||
}
|
||||
|
||||
.ops-composer-inline-btn.is-active {
|
||||
background: color-mix(in oklab, var(--brand-soft) 42%, var(--panel) 58%);
|
||||
color: var(--brand);
|
||||
}
|
||||
|
||||
.ops-composer-submit-btn {
|
||||
width: 34px;
|
||||
height: 34px;
|
||||
|
|
@ -1224,6 +1338,17 @@
|
|||
100% { transform: translateX(430%); }
|
||||
}
|
||||
|
||||
@keyframes ops-voice-wave {
|
||||
0%, 100% {
|
||||
transform: scaleY(0.55);
|
||||
opacity: 0.35;
|
||||
}
|
||||
50% {
|
||||
transform: scaleY(1.95);
|
||||
opacity: 1;
|
||||
}
|
||||
}
|
||||
|
||||
.ops-pending-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
|
|
|
|||
|
|
@ -249,6 +249,12 @@ interface SystemDefaultsResponse {
|
|||
limits?: {
|
||||
upload_max_mb?: number;
|
||||
};
|
||||
speech?: {
|
||||
enabled?: boolean;
|
||||
model?: string;
|
||||
device?: string;
|
||||
max_audio_seconds?: number;
|
||||
};
|
||||
}
|
||||
|
||||
type BotEnvParams = Record<string, string>;
|
||||
|
|
@ -719,6 +725,11 @@ export function BotDashboardModule({
|
|||
const fileNotPreviewableLabel = locale === 'zh' ? '当前文件类型不支持预览' : 'This file type is not previewable';
|
||||
const [selectedBotId, setSelectedBotId] = useState('');
|
||||
const [command, setCommand] = useState('');
|
||||
const [speechEnabled, setSpeechEnabled] = useState(true);
|
||||
const [voiceMaxSeconds, setVoiceMaxSeconds] = useState(20);
|
||||
const [isVoiceRecording, setIsVoiceRecording] = useState(false);
|
||||
const [isVoiceTranscribing, setIsVoiceTranscribing] = useState(false);
|
||||
const [voiceCountdown, setVoiceCountdown] = useState(20);
|
||||
const [isSaving, setIsSaving] = useState(false);
|
||||
const [showBaseModal, setShowBaseModal] = useState(false);
|
||||
const [showParamModal, setShowParamModal] = useState(false);
|
||||
|
|
@ -798,6 +809,10 @@ export function BotDashboardModule({
|
|||
const [feedbackSavingByMessageId, setFeedbackSavingByMessageId] = useState<Record<number, boolean>>({});
|
||||
const [showRuntimeActionModal, setShowRuntimeActionModal] = useState(false);
|
||||
const [workspaceHoverCard, setWorkspaceHoverCard] = useState<WorkspaceHoverCardState | null>(null);
|
||||
const voiceRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const voiceStreamRef = useRef<MediaStream | null>(null);
|
||||
const voiceChunksRef = useRef<BlobPart[]>([]);
|
||||
const voiceTimerRef = useRef<number | null>(null);
|
||||
const runtimeMenuRef = useRef<HTMLDivElement | null>(null);
|
||||
const botOrderRef = useRef<Record<string, number>>({});
|
||||
const nextBotOrderRef = useRef(1);
|
||||
|
|
@ -1544,16 +1559,36 @@ export function BotDashboardModule({
|
|||
persistComposerDraft(selectedBotId, command, pendingAttachments);
|
||||
}, [selectedBotId, composerDraftHydrated, command, pendingAttachments]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
clearVoiceTimer();
|
||||
try {
|
||||
if (voiceRecorderRef.current && voiceRecorderRef.current.state !== 'inactive') {
|
||||
voiceRecorderRef.current.stop();
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
releaseVoiceStream();
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isVoiceRecording && !isVoiceTranscribing) {
|
||||
setVoiceCountdown(voiceMaxSeconds);
|
||||
}
|
||||
}, [voiceMaxSeconds, isVoiceRecording, isVoiceTranscribing]);
|
||||
|
||||
useEffect(() => {
|
||||
const hasDraft = Boolean(String(command || '').trim()) || pendingAttachments.length > 0 || Boolean(quotedReply);
|
||||
if (!hasDraft && !isUploadingAttachments) return;
|
||||
if (!hasDraft && !isUploadingAttachments && !isVoiceRecording && !isVoiceTranscribing) return;
|
||||
const onBeforeUnload = (event: BeforeUnloadEvent) => {
|
||||
event.preventDefault();
|
||||
event.returnValue = '';
|
||||
};
|
||||
window.addEventListener('beforeunload', onBeforeUnload);
|
||||
return () => window.removeEventListener('beforeunload', onBeforeUnload);
|
||||
}, [command, pendingAttachments.length, quotedReply, isUploadingAttachments]);
|
||||
}, [command, pendingAttachments.length, quotedReply, isUploadingAttachments, isVoiceRecording, isVoiceTranscribing]);
|
||||
|
||||
const syncChatScrollToBottom = useCallback((behavior: ScrollBehavior = 'auto') => {
|
||||
const box = chatScrollRef.current;
|
||||
|
|
@ -1580,6 +1615,9 @@ export function BotDashboardModule({
|
|||
|
||||
useEffect(() => {
|
||||
setQuotedReply(null);
|
||||
if (isVoiceRecording) {
|
||||
stopVoiceRecording();
|
||||
}
|
||||
}, [selectedBotId]);
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -1637,9 +1675,21 @@ export function BotDashboardModule({
|
|||
const loadSystemDefaults = async () => {
|
||||
try {
|
||||
const res = await axios.get<SystemDefaultsResponse>(`${APP_ENDPOINTS.apiBase}/system/defaults`);
|
||||
if (!alive) return;
|
||||
const configured = Number(res.data?.limits?.upload_max_mb);
|
||||
if (!Number.isFinite(configured) || configured <= 0 || !alive) return;
|
||||
setUploadMaxMb(Math.max(1, Math.floor(configured)));
|
||||
if (Number.isFinite(configured) && configured > 0) {
|
||||
setUploadMaxMb(Math.max(1, Math.floor(configured)));
|
||||
}
|
||||
const speechEnabledRaw = res.data?.speech?.enabled;
|
||||
if (typeof speechEnabledRaw === 'boolean') {
|
||||
setSpeechEnabled(speechEnabledRaw);
|
||||
}
|
||||
const speechSeconds = Number(res.data?.speech?.max_audio_seconds);
|
||||
if (Number.isFinite(speechSeconds) && speechSeconds > 0) {
|
||||
const normalized = Math.max(5, Math.floor(speechSeconds));
|
||||
setVoiceMaxSeconds(normalized);
|
||||
setVoiceCountdown(normalized);
|
||||
}
|
||||
} catch {
|
||||
// keep default limit
|
||||
}
|
||||
|
|
@ -2642,8 +2692,155 @@ export function BotDashboardModule({
|
|||
filePickerRef.current?.click();
|
||||
};
|
||||
|
||||
const clearVoiceTimer = () => {
|
||||
if (voiceTimerRef.current) {
|
||||
window.clearInterval(voiceTimerRef.current);
|
||||
voiceTimerRef.current = null;
|
||||
}
|
||||
};
|
||||
|
||||
const releaseVoiceStream = () => {
|
||||
if (voiceStreamRef.current) {
|
||||
voiceStreamRef.current.getTracks().forEach((track) => {
|
||||
try {
|
||||
track.stop();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
});
|
||||
voiceStreamRef.current = null;
|
||||
}
|
||||
};
|
||||
|
||||
const transcribeVoiceBlob = async (blob: Blob) => {
|
||||
if (!selectedBot || blob.size <= 0) return;
|
||||
setIsVoiceTranscribing(true);
|
||||
try {
|
||||
const mime = String(blob.type || '').toLowerCase();
|
||||
const ext = mime.includes('ogg') ? 'ogg' : mime.includes('mp4') ? 'mp4' : 'webm';
|
||||
const file = new File([blob], `voice-input-${Date.now()}.${ext}`, { type: blob.type || 'audio/webm' });
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
formData.append('language', 'zh');
|
||||
const res = await axios.post<{ text?: string }>(
|
||||
`${APP_ENDPOINTS.apiBase}/bots/${selectedBot.id}/speech/transcribe`,
|
||||
formData,
|
||||
{ timeout: 120000 },
|
||||
);
|
||||
const text = normalizeUserMessageText(String(res.data?.text || ''));
|
||||
if (!text) {
|
||||
notify(t.voiceTranscribeEmpty, { tone: 'warning' });
|
||||
return;
|
||||
}
|
||||
setCommand((prev) => {
|
||||
const base = String(prev || '').trim();
|
||||
if (!base) return text;
|
||||
return `${base}\n${text}`;
|
||||
});
|
||||
window.requestAnimationFrame(() => composerTextareaRef.current?.focus());
|
||||
notify(t.voiceTranscribeDone, { tone: 'success' });
|
||||
} catch (error: any) {
|
||||
const msg = String(error?.response?.data?.detail || '').trim();
|
||||
console.error('Speech transcription failed', {
|
||||
botId: selectedBot.id,
|
||||
message: msg || t.voiceTranscribeFail,
|
||||
status: error?.response?.status,
|
||||
response: error?.response?.data,
|
||||
error,
|
||||
});
|
||||
notify(msg || t.voiceTranscribeFail, { tone: 'error' });
|
||||
} finally {
|
||||
setIsVoiceTranscribing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const stopVoiceRecording = () => {
|
||||
const recorder = voiceRecorderRef.current;
|
||||
if (!recorder || recorder.state === 'inactive') return;
|
||||
try {
|
||||
recorder.stop();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
};
|
||||
|
||||
const startVoiceRecording = async () => {
|
||||
if (!selectedBot || !canChat || isVoiceTranscribing) return;
|
||||
if (!speechEnabled) {
|
||||
notify(t.voiceUnavailable, { tone: 'warning' });
|
||||
return;
|
||||
}
|
||||
if (typeof window === 'undefined' || typeof navigator === 'undefined' || !navigator.mediaDevices?.getUserMedia) {
|
||||
notify(t.voiceUnsupported, { tone: 'error' });
|
||||
return;
|
||||
}
|
||||
if (typeof MediaRecorder === 'undefined') {
|
||||
notify(t.voiceUnsupported, { tone: 'error' });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const mimeCandidates = ['audio/webm;codecs=opus', 'audio/webm', 'audio/ogg;codecs=opus', 'audio/mp4'];
|
||||
const supportedMime = mimeCandidates.find((candidate) => MediaRecorder.isTypeSupported(candidate));
|
||||
const recorder = supportedMime
|
||||
? new MediaRecorder(stream, { mimeType: supportedMime })
|
||||
: new MediaRecorder(stream);
|
||||
voiceStreamRef.current = stream;
|
||||
voiceRecorderRef.current = recorder;
|
||||
voiceChunksRef.current = [];
|
||||
setVoiceCountdown(voiceMaxSeconds);
|
||||
setIsVoiceRecording(true);
|
||||
|
||||
recorder.ondataavailable = (event: BlobEvent) => {
|
||||
if (event.data && event.data.size > 0) {
|
||||
voiceChunksRef.current.push(event.data);
|
||||
}
|
||||
};
|
||||
recorder.onerror = () => {
|
||||
setIsVoiceRecording(false);
|
||||
clearVoiceTimer();
|
||||
releaseVoiceStream();
|
||||
notify(t.voiceRecordFail, { tone: 'error' });
|
||||
};
|
||||
recorder.onstop = () => {
|
||||
const blob = new Blob(voiceChunksRef.current, { type: supportedMime || recorder.mimeType || 'audio/webm' });
|
||||
voiceRecorderRef.current = null;
|
||||
voiceChunksRef.current = [];
|
||||
clearVoiceTimer();
|
||||
releaseVoiceStream();
|
||||
setIsVoiceRecording(false);
|
||||
setVoiceCountdown(voiceMaxSeconds);
|
||||
if (blob.size > 0) {
|
||||
void transcribeVoiceBlob(blob);
|
||||
}
|
||||
};
|
||||
|
||||
recorder.start(200);
|
||||
clearVoiceTimer();
|
||||
voiceTimerRef.current = window.setInterval(() => {
|
||||
setVoiceCountdown((prev) => {
|
||||
if (prev <= 1) {
|
||||
stopVoiceRecording();
|
||||
return 0;
|
||||
}
|
||||
return prev - 1;
|
||||
});
|
||||
}, 1000);
|
||||
} catch {
|
||||
releaseVoiceStream();
|
||||
setIsVoiceRecording(false);
|
||||
clearVoiceTimer();
|
||||
notify(t.voicePermissionDenied, { tone: 'error' });
|
||||
}
|
||||
};
|
||||
|
||||
const onVoiceInput = () => {
|
||||
notify(t.voiceUnavailable, { tone: 'warning' });
|
||||
if (isVoiceTranscribing) return;
|
||||
if (isVoiceRecording) {
|
||||
stopVoiceRecording();
|
||||
return;
|
||||
}
|
||||
void startVoiceRecording();
|
||||
};
|
||||
|
||||
const onPickAttachments = async (event: ChangeEvent<HTMLInputElement>) => {
|
||||
|
|
@ -3393,7 +3590,7 @@ export function BotDashboardModule({
|
|||
value={command}
|
||||
onChange={(e) => setCommand(e.target.value)}
|
||||
onKeyDown={onComposerKeyDown}
|
||||
disabled={!canChat}
|
||||
disabled={!canChat || isVoiceRecording || isVoiceTranscribing}
|
||||
placeholder={
|
||||
canChat
|
||||
? t.inputPlaceholder
|
||||
|
|
@ -3401,18 +3598,54 @@ export function BotDashboardModule({
|
|||
}
|
||||
/>
|
||||
<div className="ops-composer-tools-right">
|
||||
<LucentIconButton
|
||||
className="ops-composer-inline-btn"
|
||||
disabled={!canChat}
|
||||
{(isVoiceRecording || isVoiceTranscribing) ? (
|
||||
<div className="ops-voice-inline" aria-live="polite">
|
||||
<div className={`ops-voice-wave ${isVoiceRecording ? 'is-live' : ''} ${isCompactMobile ? 'is-mobile' : 'is-desktop'}`}>
|
||||
{Array.from({ length: isCompactMobile ? 1 : 5 }).map((_, segmentIdx) => (
|
||||
<div key={`vw-segment-${segmentIdx}`} className="ops-voice-wave-segment">
|
||||
{Array.from({ length: isCompactMobile ? 28 : 18 }).map((_, idx) => {
|
||||
const delayIndex = isCompactMobile
|
||||
? idx
|
||||
: (segmentIdx * 18) + idx;
|
||||
return (
|
||||
<i
|
||||
key={`vw-inline-${segmentIdx}-${idx}`}
|
||||
style={{ animationDelay: `${(delayIndex % 14) * 0.06}s` }}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<div className="ops-voice-countdown mono">
|
||||
{isVoiceRecording ? `${voiceCountdown}s` : t.voiceTranscribing}
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
<button
|
||||
className={`ops-composer-inline-btn ${isVoiceRecording ? 'is-recording' : ''}`}
|
||||
disabled={!canChat || isVoiceTranscribing || (!speechEnabled && !isVoiceRecording)}
|
||||
onClick={onVoiceInput}
|
||||
tooltip={t.voiceInput}
|
||||
aria-label={t.voiceInput}
|
||||
aria-label={isVoiceRecording ? t.voiceStop : t.voiceStart}
|
||||
title={
|
||||
isVoiceTranscribing
|
||||
? t.voiceTranscribing
|
||||
: isVoiceRecording
|
||||
? t.voiceStop
|
||||
: t.voiceStart
|
||||
}
|
||||
>
|
||||
<Mic size={16} />
|
||||
</LucentIconButton>
|
||||
{isVoiceTranscribing ? (
|
||||
<RefreshCw size={16} className="animate-spin" />
|
||||
) : isVoiceRecording ? (
|
||||
<Square size={16} />
|
||||
) : (
|
||||
<Mic size={16} />
|
||||
)}
|
||||
</button>
|
||||
<LucentIconButton
|
||||
className="ops-composer-inline-btn"
|
||||
disabled={!canChat || isUploadingAttachments}
|
||||
disabled={!canChat || isUploadingAttachments || isVoiceRecording || isVoiceTranscribing}
|
||||
onClick={triggerPickAttachments}
|
||||
tooltip={isUploadingAttachments ? t.uploadingFile : t.uploadFile}
|
||||
aria-label={isUploadingAttachments ? t.uploadingFile : t.uploadFile}
|
||||
|
|
@ -3424,7 +3657,12 @@ export function BotDashboardModule({
|
|||
disabled={
|
||||
isChatEnabled && (isThinking || isSending)
|
||||
? Boolean(interruptingByBot[selectedBot.id])
|
||||
: (!isChatEnabled || (!command.trim() && pendingAttachments.length === 0 && !quotedReply))
|
||||
: (
|
||||
!isChatEnabled
|
||||
|| isVoiceRecording
|
||||
|| isVoiceTranscribing
|
||||
|| (!command.trim() && pendingAttachments.length === 0 && !quotedReply)
|
||||
)
|
||||
}
|
||||
onClick={() => void (isChatEnabled && (isThinking || isSending) ? interruptExecution() : send())}
|
||||
aria-label={isChatEnabled && (isThinking || isSending) ? t.interrupt : t.send}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ if [[ ! -f "$ENV_FILE" ]]; then
|
|||
fi
|
||||
|
||||
echo "[deploy] using env: $ENV_FILE"
|
||||
docker compose --env-file "$ENV_FILE" -f "$ROOT_DIR/docker-compose.prod.yml" config -q
|
||||
docker compose --env-file "$ENV_FILE" -f "$ROOT_DIR/docker-compose.prod.yml" up -d --build
|
||||
|
||||
echo "[deploy] service status"
|
||||
|
|
|
|||
|
|
@ -4,4 +4,9 @@ set -euo pipefail
|
|||
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
ENV_FILE="${1:-$ROOT_DIR/.env.prod}"
|
||||
|
||||
if [[ ! -f "$ENV_FILE" ]]; then
|
||||
echo "Missing env file: $ENV_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker compose --env-file "$ENV_FILE" -f "$ROOT_DIR/docker-compose.prod.yml" down
|
||||
|
|
|
|||
Loading…
Reference in New Issue