| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- import base64
- import requests
- from typing import Dict, Any, Optional
- from utils.logger import chat_logger
- import aiohttp
- import asyncio
- class PaddleOCRService:
- """PaddleOCR API服务封装"""
- def __init__(self, api_url: str, token: str):
- self.api_url = api_url
- self.token = token
- self.headers = {
- "Authorization": f"token {token}",
- "Content-Type": "application/json",
- }
- async def recognize_image_async(
- self,
- image_bytes: bytes,
- file_type: int = 1, # 1: 图片, 0: PDF
- use_doc_orientation_classify: bool = False,
- use_doc_unwarping: bool = False,
- use_textline_orientation: bool = False,
- ) -> Dict[str, Any]:
- """异步调用OCR API"""
- # 同步调用(如果不需要异步,可以用requests)
- return self.recognize_image_sync(
- image_bytes=image_bytes,
- file_type=file_type,
- use_doc_orientation_classify=use_doc_orientation_classify,
- use_doc_unwarping=use_doc_unwarping,
- use_textline_orientation=use_textline_orientation,
- )
- def recognize_image_sync(
- self,
- image_bytes: bytes,
- file_type: int = 1,
- use_doc_orientation_classify: bool = False,
- use_doc_unwarping: bool = False,
- use_textline_orientation: bool = False,
- ) -> Dict[str, Any]:
- """同步调用OCR API"""
- try:
- # 转换为base64
- file_data = base64.b64encode(image_bytes).decode("ascii")
- payload = {
- "file": file_data,
- "fileType": file_type,
- "useDocOrientationClassify": use_doc_orientation_classify,
- "useDocUnwarping": use_doc_unwarping,
- "useTextlineOrientation": use_textline_orientation,
- }
- response = requests.post(
- self.api_url, json=payload, headers=self.headers, timeout=30
- )
- if response.status_code != 200:
- chat_logger.error(
- f"OCR API调用失败: {response.status_code}, {response.text}"
- )
- raise Exception(f"OCR识别失败: {response.status_code}")
- result = response.json()
- if "result" not in result:
- chat_logger.error(f"OCR返回格式错误: {result}")
- raise Exception("OCR返回格式错误")
- return result["result"]
- except requests.exceptions.Timeout:
- chat_logger.error("OCR API调用超时")
- raise Exception("OCR识别超时")
- except Exception as e:
- chat_logger.error(f"OCR识别异常: {str(e)}")
- raise
- def extract_text_from_result(self, json_data: Dict) -> str:
- """从OCR结果中提取文本(格式化)"""
- try:
- ocr_result = json_data["ocrResults"][0]["prunedResult"]
- texts = ocr_result["rec_texts"]
- scores = ocr_result["rec_scores"]
- boxes = ocr_result["rec_boxes"]
- ocr_text = "识别文本 | 识别框坐标"
- ocr_text += "\r\n" + ("-" * 50)
- for text, score, box in zip(texts, scores, boxes):
- if score > 0.5:
- ocr_text += f"\r\n{text} | {box}"
- # # 按置信度过滤并排序
- # filtered_results = []
- # for i, (text, score) in enumerate(zip(texts, scores)):
- # if score > 0.5: # 置信度阈值
- # filtered_results.append(
- # {
- # "index": i,
- # "text": text,
- # "score": float(score),
- # "box": (
- # ocr_data["rec_boxes"][i]
- # if i < len(ocr_data["rec_boxes"])
- # else None
- # ),
- # }
- # )
- # # 按位置排序(从上到下,从左到右)
- # filtered_results.sort(
- # key=lambda x: (
- # x["box"][0][1] if x["box"] else 0, # y坐标
- # x["box"][0][0] if x["box"] else 0, # x坐标
- # )
- # )
- # # 拼接为文本
- # ocr_text = "\n".join([item["text"] for item in filtered_results])
- # chat_logger.info(f"OCR识别成功,识别到{len(filtered_results)}个文本块")
- return ocr_text
- except Exception as e:
- chat_logger.error(f"提取OCR文本失败: {str(e)}")
- raise
|