UnisKB/apps/application/serializers/application_chat.py

306 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# coding=utf-8
"""
@project: MaxKB
@Author虎虎
@file application_chat.py
@date2025/6/10 11:06
@desc:
"""
import datetime
import os
import re
from io import BytesIO
from typing import Dict
import openpyxl
import pytz
from django.core import validators
from django.db import models
from django.db.models import QuerySet, Q
from django.http import StreamingHttpResponse
from django.utils import timezone
from django.utils.translation import gettext_lazy as _, gettext
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
from rest_framework import serializers
from application.models import Chat, Application, ChatRecord
from common.db.search import get_dynamics_model, native_search, native_page_search, native_page_handler
from common.exception.app_exception import AppApiException
from common.utils.common import get_file_content
from maxkb.conf import PROJECT_DIR
from maxkb.settings import TIME_ZONE, edition
class ApplicationChatResponseSerializers(serializers.Serializer):
id = serializers.UUIDField(required=True, label=_("chat id"))
abstract = serializers.CharField(required=True, label=_("summary"))
chat_user_id = serializers.UUIDField(required=True, label=_("Chat User ID"))
chat_user_type = serializers.CharField(required=True, label=_("Chat User Type"))
is_deleted = serializers.BooleanField(required=True, label=_("Is delete"))
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
chat_record_count = serializers.IntegerField(required=True, label=_("Number of conversations"))
star_num = serializers.IntegerField(required=True, label=_("Number of Likes"))
trample_num = serializers.IntegerField(required=True, label=_("Number of thumbs-downs"))
mark_sum = serializers.IntegerField(required=True, label=_("Number of tags"))
feedback_count = serializers.IntegerField(required=False, label=_("Feedback count"))
feedback_content = serializers.CharField(required=False, allow_null=True, allow_blank=True,
label=_("Feedback content"))
feedback_list = serializers.ListField(required=False, label=_("Feedback list"))
class ApplicationChatRecordExportRequest(serializers.Serializer):
select_ids = serializers.ListField(required=True, label=_("Chat ID List"),
child=serializers.UUIDField(required=True, label=_("Chat ID")))
class ApplicationChatQuerySerializers(serializers.Serializer):
workspace_id = serializers.CharField(required=False, allow_null=True, allow_blank=True, label=_("Workspace ID"))
abstract = serializers.CharField(required=False, allow_blank=True, allow_null=True, label=_("summary"))
username = serializers.CharField(required=False, allow_blank=True, allow_null=True, label=_("username"))
start_time = serializers.DateField(format='%Y-%m-%d', label=_("Start time"))
end_time = serializers.DateField(format='%Y-%m-%d', label=_("End time"))
application_id = serializers.UUIDField(required=True, label=_("Application ID"))
min_star = serializers.IntegerField(required=False, min_value=0,
label=_("Minimum number of likes"))
min_trample = serializers.IntegerField(required=False, min_value=0,
label=_("Minimum number of clicks"))
comparer = serializers.CharField(required=False, label=_("Comparator"), validators=[
validators.RegexValidator(regex=re.compile("^and|or$"),
message=_("Only supports and|or"), code=500)
])
def is_valid(self, *, raise_exception=False):
super().is_valid(raise_exception=True)
workspace_id = self.data.get('workspace_id')
query_set = QuerySet(Application).filter(id=self.data.get('application_id'))
if workspace_id:
query_set = query_set.filter(workspace_id=workspace_id)
if not query_set.exists():
raise AppApiException(500, _('Application id does not exist'))
def get_end_time(self):
d = datetime.datetime.strptime(self.data.get('end_time'), '%Y-%m-%d').date()
naive = datetime.datetime.combine(d, datetime.time.max)
return timezone.make_aware(naive, timezone.get_default_timezone())
def get_start_time(self):
d = datetime.datetime.strptime(self.data.get('start_time'), '%Y-%m-%d').date()
naive = datetime.datetime.combine(d, datetime.time.min)
return timezone.make_aware(naive, timezone.get_default_timezone())
def get_query_set(self, select_ids=None):
end_time = self.get_end_time()
start_time = self.get_start_time()
query_set = QuerySet(model=get_dynamics_model(
{'application_chat.application_id': models.CharField(),
'application_chat.abstract': models.CharField(),
'application_chat.asker': models.JSONField(),
"star_num": models.IntegerField(),
'trample_num': models.IntegerField(),
'comparer': models.CharField(),
'application_chat.update_time': models.DateTimeField(),
'application_chat.id': models.UUIDField(),
'application_chat_record_temp.id': models.UUIDField()}))
base_query_dict = {'application_chat.application_id': self.data.get("application_id"),
'application_chat.update_time__gte': start_time,
'application_chat.update_time__lte': end_time,
}
if 'abstract' in self.data and self.data.get('abstract') is not None:
base_query_dict['application_chat.abstract__icontains'] = self.data.get('abstract')
if 'username' in self.data and self.data.get('username') is not None:
base_query_dict['application_chat.asker__username__icontains'] = self.data.get('username')
if select_ids is not None and len(select_ids) > 0:
base_query_dict['application_chat.id__in'] = select_ids
base_condition = Q(**base_query_dict)
min_star_query = None
min_trample_query = None
if 'min_star' in self.data and self.data.get('min_star') is not None:
min_star_query = Q(star_num__gte=self.data.get('min_star'))
if 'min_trample' in self.data and self.data.get('min_trample') is not None:
min_trample_query = Q(trample_num__gte=self.data.get('min_trample'))
if min_star_query is not None and min_trample_query is not None:
if self.data.get(
'comparer') is not None and self.data.get('comparer') == 'or':
condition = base_condition & (min_star_query | min_trample_query)
else:
condition = base_condition & (min_star_query & min_trample_query)
elif min_star_query is not None:
condition = base_condition & min_star_query
elif min_trample_query is not None:
condition = base_condition & min_trample_query
else:
condition = base_condition
return {
'default_queryset': query_set.filter(condition).order_by("-application_chat.update_time")
}
def list(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
result = native_search(self.get_query_set(), select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
('list_application_chat_ee.sql' if ['PE', 'EE'].__contains__(
edition) else 'list_application_chat.sql'))),
with_table_name=False)
return self.append_feedback_content(result)
@staticmethod
def get_feedback_items(details):
if not isinstance(details, dict):
return []
feedback = details.get('feedback')
if isinstance(feedback, list):
return feedback
if isinstance(feedback, dict):
items = feedback.get('items')
return items if isinstance(items, list) else []
return []
@classmethod
def append_feedback_content(cls, records):
if not records:
return records
chat_id_list = [row.get('id') for row in records if row.get('id') is not None]
feedback_map = {}
chat_records = QuerySet(ChatRecord).filter(chat_id__in=chat_id_list).order_by('create_time')
for chat_record in chat_records:
feedback_items = cls.get_feedback_items(chat_record.details)
if not feedback_items:
continue
feedback_map[str(chat_record.chat_id)] = feedback_items
for row in records:
feedback_items = feedback_map.get(str(row.get('id')), [])
latest_feedback = feedback_items[-1] if feedback_items else {}
row['feedback_count'] = len(feedback_items)
row['feedback_content'] = latest_feedback.get('content') if isinstance(latest_feedback, dict) else ''
row['feedback_list'] = feedback_items
return records
@staticmethod
def paragraph_list_to_string(paragraph_list):
return "\n**********\n".join(
[f"{paragraph.get('title')}:\n{paragraph.get('content')}" for paragraph in
paragraph_list] if paragraph_list is not None else '')
@staticmethod
def to_row(row: Dict):
details = row.get('details') or {}
padding_problem_text = ' '.join((node.get("answer", "") or "") for key, node in details.items() if
node.get("type") == 'question-node')
search_dataset_node_list = [(key, node) for key, node in details.items() if
node.get("type") == 'search-dataset-node' or node.get(
"step_type") == 'search_step']
reference_paragraph_len = '\n'.join([str(len(node.get('paragraph_list',
[]))) if key == 'search_step' else node.get(
'name') + ':' + str(
len(node.get('paragraph_list', [])) if node.get('paragraph_list', []) is not None else '0') for
key, node in search_dataset_node_list])
reference_paragraph = '\n----------\n'.join(
[ApplicationChatQuerySerializers.paragraph_list_to_string(node.get('paragraph_list',
[])) if key == 'search_step' else node.get(
'name') + ':\n' + ApplicationChatQuerySerializers.paragraph_list_to_string(node.get('paragraph_list',
[])) for
key, node in search_dataset_node_list])
improve_paragraph_list = row.get('improve_paragraph_list') or []
vote_status_map = {'-1': '未投票', '0': '赞同', '1': '反对'}
return [str(row.get('chat_id')), row.get('abstract'), row.get('problem_text'), padding_problem_text,
row.get('answer_text'), vote_status_map.get(row.get('vote_status')), reference_paragraph_len,
reference_paragraph,
"\n".join([
f"{improve_paragraph_list[index].get('title')}\n{improve_paragraph_list[index].get('content')}"
for index in range(len(improve_paragraph_list))]),
row.get('asker').get('username'),
(row.get('message_tokens') or 0) + (row.get('answer_tokens') or 0), row.get('run_time'),
str(row.get('create_time').astimezone(pytz.timezone(TIME_ZONE)).strftime('%Y-%m-%d %H:%M:%S')
if row.get('create_time') is not None else None)]
@staticmethod
def reset_value(value):
if isinstance(value, str):
value = re.sub(ILLEGAL_CHARACTERS_RE, '', value)
if isinstance(value, datetime.datetime):
eastern = pytz.timezone(TIME_ZONE)
c = datetime.timezone(eastern._utcoffset)
value = value.astimezone(c)
return value
def export(self, data, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
ApplicationChatRecordExportRequest(data=data).is_valid(raise_exception=True)
def stream_response():
workbook = openpyxl.Workbook(write_only=True)
worksheet = workbook.create_sheet(title='Sheet1')
current_page = 1
page_size = 500
headers = [gettext('Conversation ID'), gettext('summary'), gettext('User Questions'),
gettext('Problem after optimization'),
gettext('answer'), gettext('User feedback'),
gettext('Reference segment number'),
gettext('Section title + content'),
gettext('Annotation'), gettext('USER'), gettext('Consuming tokens'),
gettext('Time consumed (s)'),
gettext('Question Time')]
worksheet.append(headers)
for data_list in native_page_handler(page_size, self.get_query_set(data.get('select_ids')),
primary_key='application_chat_record_temp.id',
primary_queryset='default_queryset',
get_primary_value=lambda item: item.get('id'),
select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
('export_application_chat_ee.sql' if ['PE',
'EE'].__contains__(
edition) else 'export_application_chat.sql'))),
with_table_name=False):
for item in data_list:
row = [self.reset_value(v) for v in self.to_row(item)]
worksheet.append(row)
current_page = current_page + 1
output = BytesIO()
workbook.save(output)
output.seek(0)
yield output.getvalue()
output.close()
workbook.close()
response = StreamingHttpResponse(stream_response(),
content_type='application/vnd.open.xmlformats-officedocument.spreadsheetml.sheet')
response['Content-Disposition'] = 'attachment; filename="data.xlsx"'
return response
def page(self, current_page: int, page_size: int, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
page = native_page_search(current_page, page_size, self.get_query_set(), select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "application", 'sql',
('list_application_chat_ee.sql' if ['PE', 'EE'].__contains__(
edition) else 'list_application_chat.sql'))),
with_table_name=False)
page['records'] = self.append_feedback_content(page.get('records'))
return page
class ChatCountSerializer(serializers.Serializer):
chat_id = serializers.UUIDField(required=True, label=_("Conversation ID"))
def get_query_set(self):
return QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id'))
def update_chat(self):
self.is_valid(raise_exception=True)
count_chat_record = native_search(self.get_query_set(), get_file_content(
os.path.join(PROJECT_DIR, "apps", "application", 'sql', 'count_chat_record.sql')), with_search_one=True)
QuerySet(Chat).filter(id=self.data.get('chat_id')).update(star_num=count_chat_record.get('star_num', 0) or 0,
trample_num=count_chat_record.get('trample_num',
0) or 0,
chat_record_count=count_chat_record.get(
'chat_record_count', 0) or 0,
mark_sum=count_chat_record.get('mark_sum', 0) or 0)
return True