from langchain_openai import ChatOpenAI
from langchain.messages import HumanMessage, SystemMessage
import json
import re
from typing import Dict, Any
from utils.logger import chat_logger
import os
from config.settings import settings


class LLMParser:
    """LLM解析器"""

    def __init__(self, llm_config: Dict = None):
        self.llm = ChatOpenAI(
            model=settings.LLM_MODEL,
            temperature=settings.LLM_TEMPERATURE,
            api_key=settings.DEEPSEEK_API_KEY,
            base_url=settings.DEEPSEEK_BASE_URL,
            max_tokens=settings.LLM_MAX_TOKENS,
        )

    async def parse_to_json(self, ocr_text: str, template) -> Dict[str, Any]:
        """使用模板解析OCR文本为JSON"""
        print("template.system_prompt:", template.system_prompt)
        messages = [
            SystemMessage(content=template.system_prompt),
            HumanMessage(content=f"请从以下文本中提取信息：\n\n{ocr_text}"),
        ]

        try:
            response = await self.llm.ainvoke(messages)
            content = response.content  # response.generations[0][0].text

            # 提取JSON（处理可能的Markdown格式）
            json_match = re.search(r"```json\n(.*?)\n```", content, re.DOTALL)
            if json_match:
                content = json_match.group(1)

            result = json.loads(content)

            # 验证结果
            if template.validate_result(result):
                result = template.post_process(result)
                return result
            else:
                raise ValueError("解析结果验证失败")

        except json.JSONDecodeError as e:
            chat_logger.error(f"JSON解析失败: {e}, 原始内容: {content}")
            raise
        except Exception as e:
            chat_logger.error(f"LLM解析失败: {e}")
            raise