knowledge_tools.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. from langchain.tools import tool
  2. from typing import List
  3. import requests
  4. import json
  5. import os
  6. from .base_tool import html_to_text, get_unique_match_count
  7. from config.settings import settings
  8. @tool
  9. def get_knowledge_list(filter_words: List[str], match_limit: int = 3) -> str:
  10. """根据关键词筛选知识库文章列表
  11. Args:
  12. filter_words: 关键词列表,匹配任一关键词即返回
  13. match_limit: 最小匹配数(默认3),无结果时可减少重试(最小1)
  14. Returns:
  15. 文章列表,格式:每行"DocID:DocName|keyword",用于后续获取内容
  16. """
  17. print(f"正在查询知识库列表,筛选关键词:{filter_words} 匹配下限:{match_limit}")
  18. kms_list_url = settings.KMS_LIST_URL # os.getenv("KMS_LIST_URL")
  19. payload = {
  20. "categorycodeList": [],
  21. "ignoreTypeSub": False,
  22. "ignoreStandardByTopic": True,
  23. }
  24. headers = {
  25. "Accept": "application/json, text/plain, */*",
  26. "Content-Type": "application/json",
  27. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
  28. }
  29. try:
  30. response = requests.post(
  31. kms_list_url, headers=headers, json=payload, timeout=10
  32. )
  33. if response.status_code == 200:
  34. data = response.json()
  35. matched_lines = ""
  36. for doc in data["docList"]:
  37. doc_id = doc["DocID"]
  38. doc_name = doc["DocName"]
  39. doc_keywords = doc["keyword"]
  40. search_text = f"{doc_name} {doc_keywords}".lower()
  41. if not filter_words:
  42. line = (
  43. f"{doc_id}:{doc_name}|{doc_keywords}"
  44. if doc_keywords
  45. else f"{doc_id}:{doc_name}"
  46. )
  47. matched_lines += line + "\n"
  48. else:
  49. match_count = get_unique_match_count(search_text, filter_words)
  50. if match_count >= match_limit:
  51. line = (
  52. f"{doc_id}:{doc_name}|{doc_keywords}"
  53. if doc_keywords
  54. else f"{doc_id}:{doc_name}"
  55. )
  56. matched_lines += line + "\n"
  57. return matched_lines
  58. else:
  59. return f"请求失败,状态码: {response.status_code}"
  60. except Exception as e:
  61. return f"请求异常: {e}"
  62. @tool
  63. def get_knowledge_content(docid: str) -> str:
  64. """获取知识库文章内容
  65. Args:
  66. docid: 知识库文章的DocID
  67. Returns:
  68. 知识库文章内容
  69. """
  70. print(f"正在获取知识库文章内容,DocID: {docid}")
  71. kms_view_url = settings.KMS_VIEW_URL # os.getenv("KMS_VIEW_URL")
  72. headers = {
  73. "Accept": "application/json, text/plain, */*",
  74. "Content-Type": "application/json",
  75. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
  76. }
  77. try:
  78. payload = {"docid": docid}
  79. response = requests.post(
  80. kms_view_url, headers=headers, json=payload, timeout=10
  81. )
  82. if response.status_code == 200:
  83. data = response.json()
  84. doc_html = data.get("DocHtml", "")
  85. plain_text = html_to_text(doc_html)
  86. print(f"已获取到ID: {docid}的文章内容,长度{len(plain_text)}")
  87. return plain_text
  88. else:
  89. return f"请求失败,状态码: {response.status_code}"
  90. except Exception as e:
  91. return f"请求异常: {e}"