import re import html import requests import json from typing import List, Dict, Any, Optional, Callable from pathlib import Path def html_to_text(html_content: str) -> str: """HTML转文本""" if not html_content: return "" clean = re.compile(r"<[^>]+>") text = clean.sub("", html_content) text = html.unescape(text) return re.sub(r"\s+", " ", text).strip() def get_unique_match_count(search_text: str, filter_words: List[str]) -> int: """获取唯一匹配计数""" sorted_keywords = sorted(filter_words, key=len, reverse=True) match_count = 0 remaining_text = search_text.lower() for keyword in sorted_keywords: kw_lower = keyword.lower() if kw_lower in remaining_text: match_count += 1 remaining_text = remaining_text.replace(kw_lower, "", 1) return match_count def calculate_relevance_score( doc_name: str, doc_keywords: str, search_keywords: List[str] ) -> float: """ 计算文档与搜索关键词的相关性得分 Args: doc_name: 文档标题 doc_keywords: 文档关键词 search_keywords: 搜索关键词列表 Returns: 相关性得分(0-100) """ # 合并搜索文本 search_text = f"{doc_name} {doc_keywords}".lower() search_keywords_lower = [kw.lower() for kw in search_keywords] # 权重设置 TITLE_WEIGHT = 0.7 # 标题权重(提高) KEYWORD_WEIGHT = 0.3 # 关键词权重 EXACT_MATCH_BONUS = 0.5 # 精确匹配奖励(提高) PARTIAL_MATCH_FACTOR = 0.3 # 部分匹配因子 total_score = 0.0 # 1. 标题匹配得分(重新设计) title_score = 0.0 doc_name_lower = doc_name.lower() for keyword in search_keywords_lower: if keyword in doc_name_lower: # 基础得分:基于关键词长度和位置 base_score = min(len(keyword) * 2, 10) # 每个字符2分,最多10分 # 位置权重:标题开头和结尾的匹配更重要 if doc_name_lower.startswith(keyword): base_score *= 1.5 elif doc_name_lower.endswith(keyword): base_score *= 1.3 # 精确匹配奖励:完全包含关键词 if f" {keyword} " in f" {doc_name_lower} ": base_score *= 1 + EXACT_MATCH_BONUS title_score += base_score # 2. 关键词匹配得分 keyword_score = 0.0 for keyword in search_keywords_lower: if keyword in doc_keywords.lower(): keyword_score += min(len(keyword) * 1.5, 8) # 每个字符1.5分,最多8分 # 3. 计算总得分 total_score = (title_score * TITLE_WEIGHT) + (keyword_score * KEYWORD_WEIGHT) # 4. 匹配数量奖励(重要改进) matched_count = sum(1 for kw in search_keywords_lower if kw in search_text) if matched_count > 0: coverage_ratio = matched_count / len(search_keywords_lower) # 匹配越多,奖励越大 total_score *= 1 + coverage_ratio * 0.5 # 5. 特殊关键词优先级(针对你的具体需求) priority_keywords = ["交期", "审核", "修改", "终止", "失败", "错误", "明细", "功能"] for keyword in priority_keywords: if keyword in search_keywords_lower and keyword in search_text: total_score *= 1.2 # 提高优先级关键词奖励 # 6. 长标题惩罚调整(避免长标题得分过低) if len(doc_name) > 30: # 长标题轻微惩罚,但不要过度惩罚 total_score *= 0.9 # 7. 确保至少匹配一个关键词就有基础分 if matched_count == 0: return 0.0 return min(total_score, 100.0) def find_most_relevant_document( doc_list: List[dict], search_keywords: List[str], max_matches: int = 10 ) -> List[dict]: """ 找到最相关的文档 Args: doc_list: 文档列表 search_keywords: 搜索关键词 max_matches: 最大返回数量(增加到10) Returns: 按相关性排序的文档列表 """ scored_docs = [] for doc in doc_list: doc_id = doc["DocID"] doc_name = doc["DocName"] doc_keywords = doc.get("keyword", "") # 计算相关性得分 score = calculate_relevance_score(doc_name, doc_keywords, search_keywords) # 降低过滤门槛,只要匹配至少一个关键词就考虑 match_count = sum( 1 for kw in search_keywords if kw.lower() in f"{doc_name} {doc_keywords}".lower() ) if match_count > 0: scored_docs.append( { "doc_id": doc_id, "doc_name": doc_name, "keywords": doc_keywords, "relevance_score": score, "match_count": match_count, } ) # 按相关性得分降序排序 scored_docs.sort(key=lambda x: x["relevance_score"], reverse=True) return scored_docs[:max_matches] def call_csharp_api( backend_url: str, token: str, uoName: str, functionName: str, SParms: dict ) -> str: """调用C# API的通用方法""" print(f"🔧 API调用调试信息:") print(f" - 后端地址: {backend_url}") print(f" - Token: {'已配置' if token else '未配置'}") print(f" - 功能: {functionName}") print(f" - 参数: {SParms}") if not backend_url or not token: error_msg = f"错误:未配置后端地址或认证令牌。后端: {backend_url or '未配置'}, Token: {'已配置' if token else '未配置'}" print(f"❌ {error_msg}") return error_msg headers = { "Accept": "application/json, text/plain, */*", "Content-Type": "application/json", "X-TOKEN": token, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", } payload = { "token": token, "CList": [ { "uoName": uoName, "functionName": functionName, "SParms": SParms, "ifcommit": True, "returnStrList": [], } ], "language": "zh-cn", } try: print(f"🌐 发送API请求到: {backend_url}") response = requests.post(backend_url, headers=headers, json=payload, timeout=30) print(f"📡 响应状态码: {response.status_code}") if response.status_code == 200: data = response.json() return process_api_response(data) else: error_msg = f"API请求失败,状态码: {response.status_code}" print(f"❌ {error_msg}") return error_msg except Exception as e: error_msg = f"API调用异常: {str(e)}" print(f"❌ {error_msg}") return error_msg def process_api_response(data: Dict[str, Any]) -> str: """处理API响应""" try: inner_json_str = data.get("reJob", {}).get("0", "{}") inner_data = json.loads(inner_json_str) if "err_msg" in inner_data: return f"API返回错误: {inner_data['err_msg']}" warning_msg = None if "warning_msg" in inner_data: warning_msg = inner_data["warning_msg"] if "data" in inner_data: data_list = inner_data["data"] if not data_list: return "没有数据" if isinstance(data_list[0], dict): headers = list(data_list[0].keys()) result = [",".join(headers)] for row in data_list: result.append(",".join([str(row.get(h, "")) for h in headers])) if warning_msg: result.append(f"# 警告: {warning_msg}") print(result) return "\n".join(result) return json.dumps(data, ensure_ascii=False) except Exception as e: return f"响应处理错误: {str(e)}" # 工具配置管理函数 def load_tool_config( config_path: Path, get_default_config: Optional[Callable] = None ) -> Dict[str, Any]: """ 加载工具配置的通用函数 Args: config_path: 配置文件路径 get_default_config: 获取默认配置的回调函数,如果不提供则返回空字典 """ if not config_path.exists(): print(f"警告: 配置文件不存在: {config_path}") if get_default_config: return get_default_config() return {} try: with open(config_path, "r", encoding="utf-8") as f: return json.load(f) except json.JSONDecodeError as e: print(f"错误: 配置文件格式不正确: {e}") if get_default_config: return get_default_config() return {} except Exception as e: print(f"错误: 读取配置文件失败: {e}") if get_default_config: return get_default_config() return {} def assemble_tool_description(tool_config: Dict[str, Any]) -> str: """组装工具描述,将所有键值组合成一个完整的字符串""" if not tool_config: return "" description_parts = [] # 基础描述 if "基础描述" in tool_config: description_parts.append(tool_config["基础描述"]) # 功能说明 if "功能说明" in tool_config: description_parts.append(f"\n功能: {tool_config['功能说明']}") # 入参说明 if "入参说明" in tool_config: if isinstance(tool_config["入参说明"], dict): description_parts.append("\n参数:") for param, desc in tool_config["入参说明"].items(): description_parts.append(f" {param}: {desc}") else: description_parts.append(f"\n参数说明: {tool_config['入参说明']}") # 返回值说明 if "返回值说明" in tool_config: if isinstance(tool_config["返回值说明"], dict): description_parts.append("\n返回:") for key, value in tool_config["返回值说明"].items(): if isinstance(value, list): description_parts.append(f" {key}:") for item in value: description_parts.append(f" - {item}") else: description_parts.append(f" {key}: {value}") else: description_parts.append(f"\n返回结果: {tool_config['返回值说明']}") # 输出格式要求 if "输出格式要求" in tool_config: if isinstance(tool_config["输出格式要求"], list): description_parts.append("\n输出要求:") for requirement in tool_config["输出格式要求"]: description_parts.append(f" - {requirement}") else: description_parts.append(f"\n注意: {tool_config['输出格式要求']}") # 使用示例 if "使用示例" in tool_config: description_parts.append(f"\n示例: {tool_config['使用示例']}") return "\n".join(description_parts) def get_tool_prompt( tool_name: str, default_config_func: Optional[Callable] = None ) -> str: """ 获取工具的完整提示词 Args: tool_name: 工具名称 default_config_func: 获取默认配置的函数 """ # 计算配置文件路径 current_file = Path(__file__) config_path = current_file.parent.parent / "config" / "tool_config.json" # 加载配置 config = load_tool_config(config_path, default_config_func) # 获取工具配置 tool_config = config.get(tool_name, {}) # 如果配置为空且提供了默认配置函数,使用默认配置 if not tool_config and default_config_func: default_config = default_config_func() if isinstance(default_config, dict) and tool_name in default_config: tool_config = default_config[tool_name] elif isinstance(default_config, dict) and not default_config: # 如果返回的是整个配置字典 tool_config = default_config else: tool_config = {} # 组装描述 if tool_config: return assemble_tool_description(tool_config) else: return f"执行 {tool_name} 功能"