from langchain.tools import tool from typing import List import requests import json import os from .base_tool import html_to_text, get_unique_match_count @tool def get_knowledge_list(filter_words: List[str], match_limit: int = 3) -> str: """获取知识库列表,返回知识库DocID和标题DocName以及关键字keyword的列表,根据关键字及标题,按用户问题筛选出要用到的文章后, 通过DocID获取文章内容,另外会提供访问文章正文的工具,最终返回的文章数最好在10篇以内 Args: filter_words: 筛选知识库文章的关键词列表,只要符合其中任意一个关键词,就会返回该文章 match_limit: 筛选知识库文章的匹配计数,默认值为3,即只要符合3个关键词,就会返回该文章。但如果没有符合的文章或数量太少影响作答,可再次调用该工具,将match_limit减1,直到符合文章,但match_limit不能小于1 Returns: 一个包含知识库DocID、标题DocName和关键字keyword的列表,每个元素为字符串,字符串格式为"DocID:DocName|keyword",如果没有keyword,则格式为"DocID:DocName",每个元素之间用换行符分隔 """ print(f"正在查询知识库列表,筛选关键词:{filter_words} 匹配下限:{match_limit}") kms_list_url = os.getenv("KMS_LIST_URL") payload = { "categorycodeList": [], "ignoreTypeSub": False, "ignoreStandardByTopic": True, } headers = { "Accept": "application/json, text/plain, */*", "Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", } try: response = requests.post( kms_list_url, headers=headers, json=payload, timeout=10 ) if response.status_code == 200: data = response.json() matched_lines = "" for doc in data["docList"]: doc_id = doc["DocID"] doc_name = doc["DocName"] doc_keywords = doc["keyword"] search_text = f"{doc_name} {doc_keywords}".lower() if not filter_words: line = ( f"{doc_id}:{doc_name}|{doc_keywords}" if doc_keywords else f"{doc_id}:{doc_name}" ) matched_lines += line + "\n" else: match_count = get_unique_match_count(search_text, filter_words) if match_count >= match_limit: line = ( f"{doc_id}:{doc_name}|{doc_keywords}" if doc_keywords else f"{doc_id}:{doc_name}" ) matched_lines += line + "\n" return matched_lines else: return f"请求失败,状态码: {response.status_code}" except Exception as e: return f"请求异常: {e}" @tool def get_knowledge_content(docid: str) -> str: """获取知识库文章内容 Args: docid: 知识库文章的DocID Returns: 知识库文章内容 """ print(f"正在获取知识库文章内容,DocID: {docid}") kms_view_url = os.getenv("KMS_VIEW_URL") headers = { "Accept": "application/json, text/plain, */*", "Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", } try: payload = {"docid": docid} response = requests.post( kms_view_url, headers=headers, json=payload, timeout=10 ) if response.status_code == 200: data = response.json() doc_html = data.get("DocHtml", "") plain_text = html_to_text(doc_html) print(f"已获取到ID: {docid}的文章内容,长度{len(plain_text)}") return plain_text else: return f"请求失败,状态码: {response.status_code}" except Exception as e: return f"请求异常: {e}"