| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- from langchain.tools import tool
- from typing import List
- import requests
- import json
- import os
- from .base_tool import html_to_text, get_unique_match_count
- from config.settings import settings
- @tool
- def get_knowledge_list(filter_words: List[str], match_limit: int = 3) -> str:
- """根据关键词筛选知识库文章列表
- Args:
- filter_words: 关键词列表,匹配任一关键词即返回
- match_limit: 最小匹配数(默认3),无结果时可减少重试(最小1)
- Returns:
- 文章列表,格式:每行"DocID:DocName|keyword",用于后续获取内容
- """
- print(f"正在查询知识库列表,筛选关键词:{filter_words} 匹配下限:{match_limit}")
- kms_list_url = settings.KMS_LIST_URL # os.getenv("KMS_LIST_URL")
- payload = {
- "categorycodeList": [],
- "ignoreTypeSub": False,
- "ignoreStandardByTopic": True,
- }
- headers = {
- "Accept": "application/json, text/plain, */*",
- "Content-Type": "application/json",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
- }
- try:
- response = requests.post(
- kms_list_url, headers=headers, json=payload, timeout=10
- )
- if response.status_code == 200:
- data = response.json()
- matched_lines = ""
- for doc in data["docList"]:
- doc_id = doc["DocID"]
- doc_name = doc["DocName"]
- doc_keywords = doc["keyword"]
- search_text = f"{doc_name} {doc_keywords}".lower()
- if not filter_words:
- line = (
- f"{doc_id}:{doc_name}|{doc_keywords}"
- if doc_keywords
- else f"{doc_id}:{doc_name}"
- )
- matched_lines += line + "\n"
- else:
- match_count = get_unique_match_count(search_text, filter_words)
- if match_count >= match_limit:
- line = (
- f"{doc_id}:{doc_name}|{doc_keywords}"
- if doc_keywords
- else f"{doc_id}:{doc_name}"
- )
- matched_lines += line + "\n"
- return matched_lines
- else:
- return f"请求失败,状态码: {response.status_code}"
- except Exception as e:
- return f"请求异常: {e}"
- @tool
- def get_knowledge_content(docid: str) -> str:
- """获取知识库文章内容
- Args:
- docid: 知识库文章的DocID
- Returns:
- 知识库文章内容
- """
- print(f"正在获取知识库文章内容,DocID: {docid}")
- kms_view_url = settings.KMS_VIEW_URL # os.getenv("KMS_VIEW_URL")
- headers = {
- "Accept": "application/json, text/plain, */*",
- "Content-Type": "application/json",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
- }
- try:
- payload = {"docid": docid}
- response = requests.post(
- kms_view_url, headers=headers, json=payload, timeout=10
- )
- if response.status_code == 200:
- data = response.json()
- doc_html = data.get("DocHtml", "")
- plain_text = html_to_text(doc_html)
- print(f"已获取到ID: {docid}的文章内容,长度{len(plain_text)}")
- return plain_text
- else:
- return f"请求失败,状态码: {response.status_code}"
- except Exception as e:
- return f"请求异常: {e}"
|