| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- from langchain_openai import ChatOpenAI
- from langchain.messages import HumanMessage, SystemMessage
- import json
- import re
- from typing import Dict, Any
- from utils.logger import chat_logger
- import os
- from config.settings import settings
- class LLMParser:
- """LLM解析器"""
- def __init__(self, llm_config: Dict = None):
- self.llm = ChatOpenAI(
- model=settings.LLM_MODEL,
- temperature=settings.LLM_TEMPERATURE,
- api_key=settings.DEEPSEEK_API_KEY,
- base_url=settings.DEEPSEEK_BASE_URL,
- max_tokens=settings.LLM_MAX_TOKENS,
- )
- async def parse_to_json(self, ocr_text: str, template) -> Dict[str, Any]:
- """使用模板解析OCR文本为JSON"""
- print("template.system_prompt:", template.system_prompt)
- messages = [
- SystemMessage(content=template.system_prompt),
- HumanMessage(content=f"请从以下文本中提取信息:\n\n{ocr_text}"),
- ]
- try:
- response = await self.llm.ainvoke(messages)
- content = response.content # response.generations[0][0].text
- # 提取JSON(处理可能的Markdown格式)
- json_match = re.search(r"```json\n(.*?)\n```", content, re.DOTALL)
- if json_match:
- content = json_match.group(1)
- result = json.loads(content)
- # 验证结果
- if template.validate_result(result):
- result = template.post_process(result)
- return result
- else:
- raise ValueError("解析结果验证失败")
- except json.JSONDecodeError as e:
- chat_logger.error(f"JSON解析失败: {e}, 原始内容: {content}")
- raise
- except Exception as e:
- chat_logger.error(f"LLM解析失败: {e}")
- raise
|