good-news/services/ocr_service.go

88 lines
1.7 KiB
Go

package services
import (
"github.com/otiai10/gosseract/v2"
"regexp"
"strconv"
"strings"
)
// OCRService 提供OCR文字识别服务
type OCRService struct {
client *gosseract.Client
}
// NewOCRService 创建新的OCR服务实例
func NewOCRService() (*OCRService, error) {
client := gosseract.NewClient()
return &OCRService{client: client}, nil
}
// Close 关闭OCR服务
func (s *OCRService) Close() {
s.client.Close()
}
// ExtractInfo 从图片中提取喜报信息
func (s *OCRService) ExtractInfo(imagePath string) (string, int, string, error) {
// 设置中文语言包
err := s.client.SetLanguage("chi_sim")
if err != nil {
return "", 0, "", err
}
// 设置图片
err = s.client.SetImage(imagePath)
if err != nil {
return "", 0, "", err
}
// 获取文本
text, err := s.client.Text()
if err != nil {
return "", 0, "", err
}
// 提取项目名称
projectName := extractProjectName(text)
// 提取点数
points := extractPoints(text)
// 提取代表处
representative := extractRepresentative(text)
return projectName, points, representative, nil
}
// 提取项目名称
func extractProjectName(text string) string {
lines := strings.Split(text, "\n")
for _, line := range lines {
if strings.Contains(line, "项目") {
return strings.TrimSpace(line)
}
}
return ""
}
// 提取点数
func extractPoints(text string) int {
re := regexp.MustCompile(`(\d+)\s*点|points?`)
matches := re.FindStringSubmatch(text)
if len(matches) > 1 {
points, _ := strconv.Atoi(matches[1])
return points
}
return 0
}
// 提取代表处
func extractRepresentative(text string) string {
re := regexp.MustCompile(`([^\s]+代表处)`)
matches := re.FindStringSubmatch(text)
if len(matches) > 1 {
return matches[1]
}
return ""
}