from langchain_openai import ChatOpenAI from langchain.messages import HumanMessage, SystemMessage import json import re from typing import Dict, Any from utils.logger import chat_logger import os from config.settings import settings class LLMParser: """LLM解析器""" def __init__(self, llm_config: Dict = None): self.llm = ChatOpenAI( model=settings.LLM_MODEL, temperature=settings.LLM_TEMPERATURE, api_key=settings.DEEPSEEK_API_KEY, base_url=settings.DEEPSEEK_BASE_URL, max_tokens=settings.LLM_MAX_TOKENS, ) async def parse_to_json(self, ocr_text: str, template) -> Dict[str, Any]: """使用模板解析OCR文本为JSON""" print("template.system_prompt:", template.system_prompt) messages = [ SystemMessage(content=template.system_prompt), HumanMessage(content=f"请从以下文本中提取信息:\n\n{ocr_text}"), ] try: response = await self.llm.ainvoke(messages) content = response.content # response.generations[0][0].text # 提取JSON(处理可能的Markdown格式) json_match = re.search(r"```json\n(.*?)\n```", content, re.DOTALL) if json_match: content = json_match.group(1) result = json.loads(content) # 验证结果 if template.validate_result(result): result = template.post_process(result) return result else: raise ValueError("解析结果验证失败") except json.JSONDecodeError as e: chat_logger.error(f"JSON解析失败: {e}, 原始内容: {content}") raise except Exception as e: chat_logger.error(f"LLM解析失败: {e}") raise