瀏覽代碼

定期同步

longjoedyy 2 周之前
父節點
當前提交
bb0b2c5ec4

+ 4 - 1
.env.example

@@ -1 +1,4 @@
-ENCRYPTED_DEEPSEEK_KEY=加密后的DEEPSEEK_API_KEY,生产环境使用
+ENCRYPTED_DEEPSEEK_KEY=加密后的DEEPSEEK_API_KEY,生产环境使用
+PADDLE_OCR_API_URL=文字识别API地址
+PADDLE_OCR_TOKEN=文字识别API秘钥
+ECHARTS_ENABLED=false #启用图表输出功能

+ 5 - 2
api/routes.py

@@ -29,11 +29,14 @@ from core.document_processor.document_service import DocumentProcessingService
 from core.async_chat_service import async_chat_service
 from core.chat_result_manager import chat_result_manager
 from typing import Dict, Any
+from config.settings import settings
 
 # 初始化服务
 ocr_service = PaddleOCRService(
-    api_url="https://a8l0g1qda8zd48nb.aistudio-app.com/ocr",
-    token="f97d214abf87d5ea3c156e21257732a3b19661cb",
+    # api_url="https://a8l0g1qda8zd48nb.aistudio-app.com/ocr",
+    # token="f97d214abf87d5ea3c156e21257732a3b19661cb",
+    api_url=settings.PADDLE_OCR_API_URL,
+    token=settings.PADDLE_OCR_TOKEN,
 )
 doc_service = DocumentProcessingService(ocr_service=ocr_service)
 

+ 9 - 0
config/settings.py

@@ -29,6 +29,14 @@ class Settings:
             "KMS_VIEW_URL", "http://kms.longjoe.com:82/KMS/api/common/DocViewAi"
         )
 
+        # paddleorc配置
+        self.PADDLE_OCR_API_URL = os.getenv(
+            "PADDLE_OCR_API_URL", "https://a8l0g1qda8zd48nb.aistudio-app.com/ocr"
+        )
+        self.PADDLE_OCR_TOKEN = os.getenv(
+            "PADDLE_OCR_TOKEN", "f97d214abf87d5ea3c156e21257732a3b19661cb"
+        )
+
         # 其他配置
         self.LLM_MODEL = os.getenv("LLM_MODEL", "qwen-flash")
         self.LLM_TEMPERATURE = 0.2
@@ -36,6 +44,7 @@ class Settings:
         self.KNOWLEDGE_BASE_ENABLED = (
             os.getenv("KNOWLEDGE_BASE_ENABLED", "false").lower() == "true"
         )
+        self.ECHARTS_ENABLED = os.getenv("ECHARTS_ENABLED", "false").lower() == "true"
 
     def _load_env_file(self):
         """加载对应的.env文件"""

+ 35 - 7
config/template_config.json

@@ -3,25 +3,50 @@
         "cusamt": {
             "field_guidance": {
                 "cusname": [
-                    "一般是付款人字样旁边的户名,如果是农商行的回单,在左上角"
+                    "付款方,采购方",
+                    "如出现{cusname}{viewdate 格式M.DD}付款{acccode}{cusamt}元 类似格式的文字,优先取该文字中的{cusname}"
                 ],
                 "viewdate": [
                     "日期格式可能为:YYYY-MM-DD、YYYY/MM/DD、YYYY年MM月DD日"
                 ],
                 "cusamt": [
-                    "金额:注意区分大小写:大写金额和小写金额都要识别"
+                    "金额:注意区分大小写:大写金额和小写金额都要识别",
+                    "要求输出必须是纯数字"
                 ],
                 "accname": [
-                    "收款账户名称:格式可能: 工行, 广发行"
+                    "收款账户名称:格式可能: 工行, 广发行",
+                    "**禁止**将账号数字(如'4574')或金额数字(如'888')填入此字段。",
+                    "**若无明确对应信息,本字段输出空字符串。**"
                 ],
                 "acccode": [
-                    "收款帐号:格式可能:6228****1234"
+                    "优先匹配并应用下方“特殊文本格式”中的规则,提取'{帐号}'部分。",
+                    "账号通常是连续的数字串,可能包含星号(*)但不会包含'元'字。",
+                    "**重要**:账号数字(如'4574')和金额数字(如'888')必须严格区分。金额数字后必有'元'字,且不会作为账号。"
                 ],
                 "dscrp": [
-                    "备注:附言等信息,已在其他信息明确的无需重复"
+                    "备注:附言等信息,已在其他信息明确的无需重复",
+                    "一般是有具体含义的简短描述"
+                ],
+                "relcode": [
+                    "提取'送货单号'、'单号'、'发货单号'、'单据号'、'相关号'等类似含义的号码。",
+                    "**优先**识别图片截图中单据打印区域(通常是顶部、右上角)的独立单号栏位。",
+                    "若无明确对应的单号信息,本字段输出空字符串。"
+                ],
+                "cust_account": [
+                    "提取付款人(即客户)转出资金所用的**银行账号、卡号、第三方支付账号**。",
+                    "常见位置:'付款账号'、'转出账号'、'借方账号'等字样旁边。",
+                    "**重要**:严格区分`acccode`(收款方账号)、`cust_account`(付款方账号)、`relcode`(送货单号)。三者含义完全不同,严禁混淆。",
+                    "**若无明确的、与付款方对应的账号信息,本字段必须输出空字符串。** 禁止猜测或填入其他无关信息。"
                 ]
             },
-            "additional_rules": "如果识别到农商行回单,付款人信息通常在左上角区域"
+            "additional_rules": [
+                "如果识别到农商行回单,付款人信息通常在左上角区域",
+                "用户指令格式:{cusname}{viewdate 格式M.DD}付款{acccode}{cusamt}元",
+                "例子:伏特加3.13付款三楼54元, 客户:伏特加,付款日期:3.13,帐号:三楼,金额:54元",
+                "例子:奥方 3.14付款4574卡号888元,客户:奥方,付款日期:3.14,帐号:4574,金额:888元,'卡号'是描述4574,'元'是描述888",
+                "例子:缔造北欧3.15付款门市150元,客户:缔造北欧,付款日期:3.15,帐号:门市,金额:150元",
+                "复杂例子:万家达1000米诺曼-02定金3.13付款工行7703尾号4740元,客户:万家达,付款日期:3.13,帐号:工行7703,金额:4740元,备注:1000米诺曼-02定金,'尾号'是描述7703,'元'是描述4740"
+            ]
         },
         "saletask": {
             "field_guidance": {
@@ -53,7 +78,10 @@
                 "enprice": [],
                 "rebate": []
             },
-            "additional_rules": "如无法判断内容是客户还是产品,优先判断为产品,'-'号前后内容不拆开. 例如:'Domino多米诺-02 16米 备注 重庆照母山120',Domino多米诺-02 是产品,16米是数量,重庆照母山120是备注.如果出现 谁谁谁要什么之类的,谁谁谁就是客户."
+            "additional_rules": [
+                "如无法判断内容是客户还是产品,优先判断为产品,'-'号前后内容不拆开. 例如:'Domino多米诺-02 16米 备注 重庆照母山120',Domino多米诺-02 是产品,16米是数量,重庆照母山120是备注.如果出现 谁谁谁要什么之类的,谁谁谁就是客户.",
+                "用户指令格式可能:{cusname} {mtrlname} {saleqty}{unit} {dscrp}. 例如:'长利 泰迪 b-01 1米 带门市',cusname:长利,mtrlname:泰迪 b-01,saleqty:1,unit:米,dscrp:带门市"
+            ]
         }
     }
 }

File diff suppressed because it is too large
+ 10 - 4
config/tool_config.json


+ 66 - 1
core/agent.py

@@ -98,6 +98,71 @@ def create_system_prompt(
     | 数据1 | 数据2 | 数据3 |
     | 数据4 | 数据5 | 数据6 |
     """
+    if settings.ECHARTS_ENABLED:
+        system_prompt = (
+            system_prompt
+            + """
+    并且根据数据的格式,主动选择合适的图表输出,你可以输出柱状图、折线图、饼图。
+
+    饼图格式范例如下:
+    ```echarts
+    {{
+    "title": {{
+        "text": "浏览器份额", "left": "center" }}
+    "tooltip": {{
+        "trigger": "item" }},
+    "legend": {{
+        "orient": "vertical", "left": "left" }},
+    "series": [
+        {{
+        "name": "Share",
+        "type": "pie",
+        "radius": "55%",
+        "center": ["50%", "60%"],
+        "data": [
+            {{"value": 1048, "name": "Chrome" }}
+            {{"value": 735, "name": "Firefox" }}
+            {{"value": 580, "name": "Edge" }}
+        ]
+        }}
+    ]
+    }}
+    ```
+
+    柱状图格式范例如下:
+    ```echarts
+    {{
+    "title": {{"text": "每周销量" }}
+    "tooltip": {{}},
+    "xAxis": {{"type": "category", "data": ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"] }}
+    "yAxis": {{"type": "value" }}
+    "series": [
+        {{"type": "bar", "data": [120, 200, 150, 80, 70, 110, 130] }}
+    ]
+    }}
+    ```
+
+    折线图,格式范例如下:
+    ```echarts
+    {{
+    "title": {{ "text": "温度趋势" }},
+    "tooltip": {{ "trigger": "axis" }},
+    "legend": {{ "data": ["最高", "最低"] }},
+    "xAxis": {{
+        "type": "category",
+        "boundaryGap": false,
+        "data": ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]
+    }},
+    "yAxis": {{"type": "value" }},
+    "series": [
+        {{"name": "最高", "type": "line", "data": [11, 11, 15, 13, 12, 13, 10], "smooth": true }}
+        {{"name": "最低", "type": "line", "data": [1, -2, 2, 5, 3, 2, 0], "smooth": true }}
+    ]
+    }}
+    ```
+    """
+        )
+
     return system_prompt
 
 
@@ -216,7 +281,7 @@ def create_langchain_agent(
 
     # 获取动态的system_prompt
     system_prompt = create_system_prompt(backend_url, token, username)
-
+    print(system_prompt)
     # def simple_turn_based_trim(
     #     messages: Sequence[BaseMessage],
     #     keep_turns: int = 3,

+ 21 - 7
core/document_processor/templates/base_template.py

@@ -103,8 +103,8 @@ class DocumentTemplate(ABC):
     def _build_extended_guidance(
         self,
         field_guidance: Dict[str, list],
-        hardcoded_rules: str,
-        configured_rules: str,
+        hardcoded_rules: Any,
+        configured_rules: Any,
     ) -> str:
         """构建扩展指导信息"""
         guidance_parts = []
@@ -117,12 +117,26 @@ class DocumentTemplate(ABC):
                     combined_hints = "; ".join(hints)
                     guidance_parts.append(f"- {field_name}: {combined_hints}")
 
-        # 合并额外规则(过滤空值
+        # 合并额外规则(支持字符串和数组格式
         additional_rules = []
-        if hardcoded_rules and hardcoded_rules.strip():
-            additional_rules.append(hardcoded_rules)
-        if configured_rules and configured_rules.strip():
-            additional_rules.append(configured_rules)
+
+        # 处理硬编码规则
+        if hardcoded_rules:
+            if isinstance(hardcoded_rules, str) and hardcoded_rules.strip():
+                additional_rules.append(hardcoded_rules)
+            elif isinstance(hardcoded_rules, list):
+                additional_rules.extend(
+                    [rule for rule in hardcoded_rules if rule and rule.strip()]
+                )
+
+        # 处理配置规则
+        if configured_rules:
+            if isinstance(configured_rules, str) and configured_rules.strip():
+                additional_rules.append(configured_rules)
+            elif isinstance(configured_rules, list):
+                additional_rules.extend(
+                    [rule for rule in configured_rules if rule and rule.strip()]
+                )
 
         if additional_rules:
             guidance_parts.append(f"特殊规则: {'; '.join(additional_rules)}")

+ 2 - 0
core/document_processor/templates/cusamt_template.py

@@ -32,6 +32,8 @@ class CusAmtTemplate(DocumentTemplate):
             "accname": "收款账户名称",
             "acccode": "收款帐号",
             "dscrp": "备注",
+            "relcode": "相关号码",
+            "cust_account": "客户账户",
             "kindstr": "收款类型(规范为以下几类:余款,订金,预收款,其它,缺省值:余款)",
         }
 

+ 6 - 1
tools/base_tool.py

@@ -330,7 +330,12 @@ def assemble_tool_description(tool_config: Dict[str, Any]) -> str:
 
     # 使用示例
     if "使用示例" in tool_config:
-        description_parts.append(f"\n示例: {tool_config['使用示例']}")
+        if isinstance(tool_config["使用示例"], list):
+            description_parts.append("\n示例:")
+            for example in tool_config["使用示例"]:
+                description_parts.append(f"  - {example}")
+        else:
+            description_parts.append(f"\n示例: {tool_config['使用示例']}")
 
     return "\n".join(description_parts)