feat: 增强工作流引擎的错误处理和模型回退逻辑

- 添加标记以跟踪是否使用了默认模型，优化错误处理流程 - 在LLM调用失败时，如果未使用默认模型且错误为404，尝试回退到默认模型并重试 - 改进日志记录，提供更详细的错误信息和回退尝试的记录
2026-01-23 17:30:48 +08:00 · 2026-01-23 17:30:48 +08:00 · 020a192542
parent a61e710ff1
commit 020a192542
3 changed files with 44 additions and 8 deletions
--- a/data/chroma/kb_2/714cba61-f48b-47c5-bcb8-eed8ffd400b0/data_level0.bin
+++ b/data/chroma/kb_2/714cba61-f48b-47c5-bcb8-eed8ffd400b0/data_level0.bin
--- a/data/chroma/kb_2/chroma.sqlite3
+++ b/data/chroma/kb_2/chroma.sqlite3
--- a/th_agenter/services/workflow_engine.py
+++ b/th_agenter/services/workflow_engine.py
@ -654,6 +654,9 @@ class WorkflowEngine:
        """执行LLM节点"""
        config = input_data.get('node_config', {})
        # 标记是否已经使用了默认模型（用于错误时决定是否回退）
        used_default_model = False
        # 获取LLM配置
        model_id = config.get('model_id')
        if not model_id:
@ -669,9 +672,9 @@ class WorkflowEngine:
                    result = await self.session.execute(
                        select(LLMConfig).where(LLMConfig.model_name == model_value)
                    )
-                    llm_config = result.scalar_one_or_none()
+                    llm_cfg = result.scalar_one_or_none()
-                    if llm_config:
+                    if llm_cfg:
-                        model_id = llm_config.id
+                        model_id = llm_cfg.id
        # 如果还是没有，尝试从节点定义本身获取
        if not model_id:
@ -687,9 +690,9 @@ class WorkflowEngine:
                        result = await self.session.execute(
                            select(LLMConfig).where(LLMConfig.model_name == model_value)
                        )
-                        llm_config = result.scalar_one_or_none()
+                        llm_cfg = result.scalar_one_or_none()
-                        if llm_config:
+                        if llm_cfg:
-                            model_id = llm_config.id
+                            model_id = llm_cfg.id
        # 如果还是没有，尝试使用默认的LLM配置
        if not model_id:
@ -698,6 +701,7 @@ class WorkflowEngine:
            default_config = await llm_config_service.get_default_chat_config(self.session)
            if default_config:
                model_id = default_config.id
                used_default_model = True
                logger.info(f"LLM节点未指定模型配置，使用默认模型: {default_config.model_name} (ID: {model_id})")
            else:
                raise ValueError(
@ -826,8 +830,40 @@ class WorkflowEngine:
            }
        except Exception as e:
-            logger.error(f"LLM调用失败: {str(e)}")
+            error_msg = str(e)
-            raise ValueError(f"LLM调用失败: {str(e)}")
+            logger.error(f"LLM调用失败: {error_msg}")
            # 如果当前使用的不是默认模型，并且错误包含 Not Found / 404，则尝试回退到默认模型再调用一次
            if (not used_default_model) and ("Not Found" in error_msg or "404" in error_msg):
                try:
                    from ..services.llm_config_service import LLMConfigService
                    llm_config_service = LLMConfigService()
                    default_config = await llm_config_service.get_default_chat_config(self.session)
                    if default_config:
                        logger.warning(
                            f"LLM调用失败，模型可能不存在或端点错误，"
                            f"尝试使用默认模型重试: {default_config.model_name} (ID: {default_config.id})"
                        )
                        fallback_response = await self.llm_service.chat_completion(
                            model_config=default_config,
                            messages=[{"role": "user", "content": prompt}],
                            temperature=config.get('temperature', 0.7),
                            max_tokens=config.get('max_tokens')
                        )
                        return {
                            'success': True,
                            'response': fallback_response,
                            'prompt': prompt,
                            'model': default_config.model_name,
                            'tokens_used': getattr(fallback_response, 'usage', {}).get('total_tokens', 0)
                            if hasattr(fallback_response, 'usage') else 0,
                            'fallback_model_used': True
                        }
                except Exception as fallback_error:
                    logger.error(f"使用默认模型重试LLM调用失败: {str(fallback_error)}")
                    # 继续向下抛出原始错误
            raise ValueError(f"LLM调用失败: {error_msg}")
    def _substitute_variables(self, template: str, input_data: Dict[str, Any]) -> str:
        """变量替换函数"""