feat: 更新工作流引擎和数据库日志记录逻辑

- 在工作流执行过程中增强异常处理，使用logger.exception记录详细的错误信息和堆栈信息，便于调试 - 更新DrSession类的desc方法，添加更详细的日志记录，包含调用位置 - 在LLM节点执行中添加流式输出支持，改进提示词构建逻辑，确保更准确的用户查询响应 - 更新数据库文件和二进制数据，确保数据一致性
2026-01-28 20:14:29 +08:00 · 2026-01-28 20:14:29 +08:00 · e308e9d2f2
parent 643c2f90c4
commit e308e9d2f2
5 changed files with 366 additions and 162 deletions
--- a/data/chroma/kb_1/add53ead-7f8c-45e1-9851-b11e93ad0dfb/data_level0.bin
+++ b/data/chroma/kb_1/add53ead-7f8c-45e1-9851-b11e93ad0dfb/data_level0.bin
--- a/data/chroma/kb_1/chroma.sqlite3
+++ b/data/chroma/kb_1/chroma.sqlite3
--- a/th_agenter/api/endpoints/workflow.py
+++ b/th_agenter/api/endpoints/workflow.py
@ -424,7 +424,12 @@ async def execute_workflow_stream(
            yield f"data: {json.dumps({'type': 'workflow_complete', 'message': '工作流执行完成', 'timestamp': datetime.now().isoformat()}, ensure_ascii=False)}\n\n"
        except Exception as e:
-            logger.error(f"流式工作流执行异常: {e}", exc_info=True)
+            # 这里捕获到的通常是内部节点或引擎抛出的异常，比如 KeyError("'pk_1'")
            # 使用 exception 打印完整堆栈，并记录异常类型与 repr，方便排查
            logger.exception(
                f"流式工作流执行异常，type={type(e).__name__}, repr={repr(e)}"
            )
            # 将错误信息推送给前端
            yield f"data: {json.dumps({'type': 'error', 'message': f'工作流执行失败: {str(e)}'}, ensure_ascii=False)}\n\n"
    response = StreamingResponse(
--- a/th_agenter/db/database.py
+++ b/th_agenter/db/database.py
@ -50,7 +50,14 @@ class DrSession(AsyncSession):
    def desc(self, value: str) -> None:
        """Set work brief in session info."""
        self.stepIndex += 1
-        logger.info(value)
+        # 统一在这里打印更详细的 session 日志，方便排查问题
        try:
            # level 取 -3，可以拿到触发 desc 设置的上层业务代码位置
            pos = self.parse_source_pos(-3)
        except Exception:
            pos = "unknown"
        logger.info(f"{self.log_prefix()} STEP[{self.stepIndex}] {value} >>> @ {pos}")
    def log_prefix(self) -> str:
        """Get log prefix with session ID and desc."""
@ -118,7 +125,10 @@ async def get_session(request: Request = None):
        client_host = request.client.host
    else:
        client_host = "无request"
-    session = DrSession(bind=engine_async)
+
    # 使用 AsyncSessionFactory 创建会话，确保 async/greenlet 配置正确
    #（包括 expire_on_commit=False，避免在属性访问时触发隐式 IO，导致 MissingGreenlet / pk_1 参数异常）
    session: DrSession = AsyncSessionFactory()
    session.title = f"{url} - {client_host}"
@ -131,6 +141,9 @@ async def get_session(request: Request = None):
    except Exception as e:
        errMsg = f"数据库 session 异常 >>> {e}"
        # 先打带堆栈的异常日志
        session.log_exception(errMsg)
        # 再通过 desc 打一条结构化的 info 日志（含步骤、调用位置）
        session.desc = f"EXCEPTION: {errMsg}"
        await session.rollback()
        # 重新抛出原始异常，不转换为 HTTPException
--- a/th_agenter/services/workflow_engine.py
+++ b/th_agenter/services/workflow_engine.py
@ -177,7 +177,8 @@ class WorkflowEngine:
            }
        except Exception as e:
-            logger.error(f"工作流执行失败: {str(e)}")
+            # 打印完整堆栈，方便排查如 KeyError("'pk_1'") 之类的问题
            logger.exception(f"工作流执行失败: {str(e)}")
            execution.status = ExecutionStatus.FAILED
            execution.error_message = str(e)
            execution.completed_at = datetime.now().isoformat()
@ -306,6 +307,7 @@ class WorkflowEngine:
        node_info = node_graph[node_id]
        node = node_info['node']
        node_type = node.get('type', '')
        # 等待所有输入节点完成
        for input_node_id in node_info['inputs']:
@ -328,7 +330,36 @@ class WorkflowEngine:
        try:
            # 执行当前节点
-            output = await self._execute_single_node(execution, node, context)
+            if node_type == 'llm':
                # 对 LLM 节点使用真正的流式执行
                output = None
                async for event in self._execute_llm_node_stream(execution, node, context):
                    # event 统一为内部事件，包含 event_type 字段
                    if event.get('event_type') == 'delta':
                        # 向前端推送流式增量输出
                        yield {
                            'type': 'node_stream',
                            'execution_id': execution.id,
                            'node_id': node_id,
                            'status': 'streaming',
                            'data': {
                                'node_name': node.get('name', ''),
                                'node_type': node_type,
                                'delta': event.get('delta', ''),
                                'full_response': event.get('full_response', '')
                            },
                            'timestamp': datetime.now().isoformat()
                        }
                    elif event.get('event_type') == 'final':
                        # 最终完整输出，供后续节点使用
                        output = event.get('output', {})
                if output is None:
                    output = {}
            else:
                # 非 LLM 节点仍然走原来的单次执行逻辑
                output = await self._execute_single_node(execution, node, context)
            context['node_outputs'][node_id] = output
            # 发送节点完成的消息
@ -416,6 +447,12 @@ class WorkflowEngine:
        try:
            # 准备输入数据
            input_data = self._prepare_node_input(node, context)
            # 这里打印节点级别的输入数据，辅助定位 KeyError 等问题
            try:
                logger.info(f"执行节点 {node_id} ({node_type}) 输入数据: {json.dumps(input_data, ensure_ascii=False)[:2000]}")
            except Exception:
                # 有些数据不可序列化，退化为直接打印 repr
                logger.info(f"执行节点 {node_id} ({node_type}) 输入数据(非JSON): {repr(input_data)[:2000]}")
            # 为前端显示准备输入数据
            display_input_data = input_data.copy()
@ -476,7 +513,11 @@ class WorkflowEngine:
            return output_data
        except Exception as e:
-            logger.error(f"节点 {node_id} 执行失败: {str(e)}")
+            # 记录更详细的节点异常信息（包含堆栈）
            logger.exception(
                f"节点执行失败 - id={node_id}, type={node_type}, name={node_name}, "
                f"error_type={type(e).__name__}, error={str(e)}"
            )
            end_time = time.time()
            node_execution.status = ExecutionStatus.FAILED
            node_execution.error_message = str(e)
@ -650,24 +691,283 @@ class WorkflowEngine:
            'data': result_data
        }
-    async def _execute_llm_node(self, node: Dict[str, Any], input_data: Dict[str, Any]) -> Dict[str, Any]:
+    def _build_llm_prompt(self, node: Dict[str, Any], input_data: Dict[str, Any]) -> (str, str):
-        """执行LLM节点"""
+        """
        根据节点配置、知识库结果和工作流输入构建提示词。
        返回 (prompt, prompt_template)：
        - prompt: 变量替换后的最终提示词
        - prompt_template: 原始模板（未替换变量）
        """
        config = input_data.get('node_config', {})
-        
+        prompt_template = config.get('prompt', '')
        # 如果提示词为空，尝试自动构建提示词（RAG 或直接用用户输入）
        if not prompt_template:
            previous_outputs = input_data.get('previous_outputs', {})
            knowledge_base_results = None
            user_query = None
            # 查找知识库节点的输出
            for node_id, output in previous_outputs.items():
                if isinstance(output, dict) and output.get('knowledge_base_id'):
                    knowledge_base_results = output.get('results', [])
                    user_query = output.get('query', '')
                    break
            # 如果没有找到知识库结果，尝试从工作流输入中获取查询
            if not user_query:
                workflow_input = input_data.get('workflow_input', {})
                for key, value in workflow_input.items():
                    if isinstance(value, str) and value.strip():
                        user_query = value.strip()
                        break
            # 构建提示词
            if knowledge_base_results and len(knowledge_base_results) > 0:
                max_score = 0
                for result in knowledge_base_results:
                    score = result.get('normalized_score', result.get('similarity_score', 0))
                    if score > max_score:
                        max_score = score
                is_relevant = max_score >= 0.5
                if is_relevant:
                    # 有相关的知识库结果，构建 RAG 风格的提示词
                    context_parts = []
                    for i, result in enumerate(knowledge_base_results[:5], 1):
                        content = result.get('content', '').strip()
                        if content:
                            max_length = 1000
                            if len(content) > max_length:
                                content = content[:max_length] + "..."
                            context_parts.append(f"【参考文档{i}】\n{content}\n")
                    context = "\n\n".join(context_parts)
                    prompt_template = f"""你是一个专业的助手。请仔细阅读以下参考文档，然后回答用户的问题。
 {context}
 【用户问题】
 {user_query or '请回答上述问题'}
 【重要提示】
 - 参考文档中包含了与用户问题相关的信息
 - 请仔细阅读参考文档，提取相关信息来回答用户的问题
 - 即使文档没有直接定义，也要基于文档中的相关内容进行解释和说明
 - 如果文档中提到了相关概念、政策、法规等，请基于这些内容进行回答
 - 回答要准确、详细、有条理，尽量引用文档中的具体内容"""
                    logger.info(
                        f"自动构建RAG提示词，包含 {len(knowledge_base_results)} 个相关知识库结果（最高相似度: {max_score:.3f}），用户问题: {user_query}"
                    )
                else:
                    logger.warning(
                        f"知识库结果相似度较低（最高: {max_score:.3f}），认为不相关，将直接回答用户问题"
                    )
                    prompt_template = user_query or "请帮助我处理这个任务。"
            elif user_query:
                prompt_template = user_query
                logger.info(f"自动使用工作流输入作为提示词: {user_query}")
            else:
                prompt_template = "请帮助我处理这个任务。"
                logger.warning("LLM节点提示词为空，且无法从上下文获取，使用默认提示词")
        # 变量替换
        enable_variable_substitution = config.get('enable_variable_substitution', True)
        if enable_variable_substitution:
            prompt = self._substitute_variables(prompt_template, input_data)
        else:
            prompt = prompt_template
        return prompt, prompt_template
    async def _execute_llm_node(self, node: Dict[str, Any], input_data: Dict[str, Any]) -> Dict[str, Any]:
        """执行LLM节点（非流式）"""
        config = input_data.get('node_config', {})
        # 标记是否已经使用了默认模型（用于错误时决定是否回退）
        used_default_model = False
        # 获取 LLM 配置
        model_id = config.get('model_id')
        if not model_id:
            model_value = config.get('model_name', config.get('model'))
            if model_value:
                if isinstance(model_value, int):
                    model_id = model_value
                else:
                    from sqlalchemy import select
                    result = await self.session.execute(
                        select(LLMConfig).where(LLMConfig.model_name == model_value)
                    )
                    llm_cfg = result.scalar_one_or_none()
                    if llm_cfg:
                        model_id = llm_cfg.id
        if not model_id:
            node_config = node.get('config', {})
            model_id = node_config.get('model_id')
            if not model_id:
                model_value = node_config.get('model_name', node_config.get('model'))
                if model_value:
                    if isinstance(model_value, int):
                        model_id = model_value
                    else:
                        from sqlalchemy import select
                        result = await self.session.execute(
                            select(LLMConfig).where(LLMConfig.model_name == model_value)
                        )
                        llm_cfg = result.scalar_one_or_none()
                        if llm_cfg:
                            model_id = llm_cfg.id
        if not model_id:
            from ..services.llm_config_service import LLMConfigService
            llm_config_service = LLMConfigService()
            default_config = await llm_config_service.get_default_chat_config(self.session)
            if default_config:
                model_id = default_config.id
                used_default_model = True
                logger.info(
                    f"LLM节点未指定模型配置，使用默认模型: {default_config.model_name} (ID: {model_id})"
                )
            else:
                raise ValueError(
                    "未指定有效的大模型配置，且未找到默认配置。\n"
                    "请在节点配置中添加模型ID或模型名称，例如：\n"
                    "  - config.model_id: 1\n"
                    "  - config.model_name: 'gpt-4'\n"
                    "  - config.model: 'gpt-4'\n"
                    "或者设置一个默认的LLM配置。"
                )
        from sqlalchemy import select
        result = await self.session.execute(
            select(LLMConfig).where(LLMConfig.id == model_id)
        )
        llm_config = result.scalar_one_or_none()
        if not llm_config:
            raise ValueError(f"大模型配置 {model_id} 不存在")
        # 使用统一的构建逻辑生成提示词
        prompt, prompt_template = self._build_llm_prompt(node, input_data)
        logger.info(
            f"LLM 节点最终提示词（非流式）: node_id={node.get('id')}, "
            f"model_id={llm_config.id}, model_name={llm_config.model_name}, prompt={prompt}"
        )
        # 记录处理后的提示词到输入数据中，用于前端显示
        input_data['processed_prompt'] = prompt
        input_data['original_prompt'] = prompt_template
        # 调用 LLM 服务（非流式路径：用于 /execute 接口）
        try:
            response = await self.llm_service.chat_completion(
                model_config=llm_config,
                messages=[{"role": "user", "content": prompt}],
                temperature=config.get('temperature', 0.7),
                max_tokens=config.get('max_tokens'),
            )
            return {
                'success': True,
                'response': response,
                'prompt': prompt,
                'model': llm_config.model_name,
                'tokens_used': getattr(response, 'usage', {}).get('total_tokens', 0)
                if hasattr(response, 'usage')
                else 0,
            }
        except Exception as e:
            error_msg = str(e)
            detailed_error = error_msg
            if "使用的模型:" not in error_msg and "模型:" not in error_msg:
                model_info = (
                    f"使用的模型: {llm_config.model_name} (ID: {llm_config.id}), "
                    f"base_url: {llm_config.base_url}"
                )
                if "Not Found" in error_msg or "404" in error_msg:
                    detailed_error = (
                        f"{detailed_error}。{model_info}。可能的原因：1) 模型名称格式不正确（SiliconFlow需要org/model格式）；"
                        "2) base_url配置错误；3) API端点不存在"
                    )
                elif (
                    "403" in error_msg
                    or "account balance" in error_msg.lower()
                    or "insufficient" in error_msg.lower()
                ):
                    detailed_error = (
                        f"{detailed_error}。{model_info}。可能的原因：账户余额不足或API密钥权限不足"
                    )
                elif "401" in error_msg or "authentication" in error_msg.lower():
                    detailed_error = (
                        f"{detailed_error}。{model_info}。可能的原因：API密钥无效或已过期"
                    )
                else:
                    detailed_error = f"{detailed_error}。{model_info}"
            logger.error(f"LLM调用失败: {detailed_error}")
            if (not used_default_model) and (
                "Not Found" in error_msg or "404" in error_msg
            ):
                try:
                    from ..services.llm_config_service import LLMConfigService
                    llm_config_service = LLMConfigService()
                    default_config = await llm_config_service.get_default_chat_config(
                        self.session
                    )
                    if default_config:
                        logger.warning(
                            "LLM调用失败，模型可能不存在或端点错误，"
                            f"尝试使用默认模型重试: {default_config.model_name} (ID: {default_config.id})"
                        )
                        fallback_response = await self.llm_service.chat_completion(
                            model_config=default_config,
                            messages=[{"role": "user", "content": prompt}],
                            temperature=config.get('temperature', 0.7),
                            max_tokens=config.get('max_tokens'),
                        )
                        return {
                            'success': True,
                            'response': fallback_response,
                            'prompt': prompt,
                            'model': default_config.model_name,
                            'tokens_used': getattr(
                                fallback_response, 'usage', {}
                            ).get('total_tokens', 0)
                            if hasattr(fallback_response, 'usage')
                            else 0,
                            'fallback_model_used': True,
                        }
                except Exception as fallback_error:
                    logger.error(
                        f"使用默认模型重试LLM调用失败: {str(fallback_error)}"
                    )
            raise ValueError(f"LLM调用失败: {detailed_error}")
    async def _execute_llm_node_stream(self, execution: WorkflowExecution, node: Dict[str, Any], context: Dict[str, Any]):
        """执行LLM节点（流式版本），用于 /execute-stream 接口"""
        node_id = node['id']
        config = self._prepare_node_input(node, context).get('node_config', {})
        # 下面的逻辑与 _execute_llm_node 中获取模型配置和提示词的过程保持一致，
        # 以保证流式与非流式路径的行为一致。
        used_default_model = False
        # 获取LLM配置
        model_id = config.get('model_id')
        if not model_id:
            # 兼容前端的model字段（可能是ID或名称）
            model_value = config.get('model_name', config.get('model'))
            if model_value:
                # 如果是整数，直接作为ID使用
                if isinstance(model_value, int):
                    model_id = model_value
                else:
                    # 如果是字符串，按名称查询
                    from sqlalchemy import select
                    result = await self.session.execute(
                        select(LLMConfig).where(LLMConfig.model_name == model_value)
@ -676,7 +976,6 @@ class WorkflowEngine:
                    if llm_cfg:
                        model_id = llm_cfg.id
        # 如果还是没有，尝试从节点定义本身获取
        if not model_id:
            node_config = node.get('config', {})
            model_id = node_config.get('model_id')
@ -694,7 +993,6 @@ class WorkflowEngine:
                        if llm_cfg:
                            model_id = llm_cfg.id
        # 如果还是没有，尝试使用默认的LLM配置
        if not model_id:
            from ..services.llm_config_service import LLMConfigService
            llm_config_service = LLMConfigService()
@ -702,15 +1000,10 @@ class WorkflowEngine:
            if default_config:
                model_id = default_config.id
                used_default_model = True
-                logger.info(f"LLM节点未指定模型配置，使用默认模型: {default_config.model_name} (ID: {model_id})")
+                logger.info(f"[STREAM] LLM节点未指定模型配置，使用默认模型: {default_config.model_name} (ID: {model_id})")
            else:
                raise ValueError(
-                    "未指定有效的大模型配置，且未找到默认配置。\n"
+                    "未指定有效的大模型配置，且未找到默认配置。"
                    "请在节点配置中添加模型ID或模型名称，例如：\n"
                    "  - config.model_id: 1\n"
                    "  - config.model_name: 'gpt-4'\n"
                    "  - config.model: 'gpt-4'\n"
                    "或者设置一个默认的LLM配置。"
                )
        from sqlalchemy import select
@ -721,165 +1014,58 @@ class WorkflowEngine:
        if not llm_config:
            raise ValueError(f"大模型配置 {model_id} 不存在")
-        # 准备提示词
+        # 构造 prompt，使用与非流式路径相同的逻辑
-        prompt_template = config.get('prompt', '')
+        input_data = self._prepare_node_input(node, context)
        config = input_data.get('node_config', {})
        prompt, prompt_template = self._build_llm_prompt(node, input_data)
-        # 如果提示词为空，尝试自动构建提示词
+        # 打印流式路径下的提示词，确认实际发给大模型的内容
-        if not prompt_template:
+        logger.info(
-            # 检查是否有知识库搜索结果
+            f"LLM 节点最终提示词（流式）: node_id={node.get('id')}, "
-            previous_outputs = input_data.get('previous_outputs', {})
+            f"model_id={llm_config.id}, model_name={llm_config.model_name}, prompt={prompt}"
-            knowledge_base_results = None
+        )
            user_query = None
            # 查找知识库节点的输出
            for node_id, output in previous_outputs.items():
                if isinstance(output, dict) and output.get('knowledge_base_id'):
                    knowledge_base_results = output.get('results', [])
                    user_query = output.get('query', '')
                    break
            # 如果没有找到知识库结果，尝试从工作流输入中获取查询
            if not user_query:
                workflow_input = input_data.get('workflow_input', {})
                # 尝试获取第一个非空字符串值作为查询
                for key, value in workflow_input.items():
                    if isinstance(value, str) and value.strip():
                        user_query = value.strip()
                        break
            # 构建提示词
            if knowledge_base_results and len(knowledge_base_results) > 0:
                # 检查知识库结果的相似度，判断是否相关
                max_score = 0
                for result in knowledge_base_results:
                    score = result.get('normalized_score', result.get('similarity_score', 0))
                    if score > max_score:
                        max_score = score
                # 如果最高相似度分数很低（低于0.5），认为结果不相关
                is_relevant = max_score >= 0.5
                if is_relevant:
                    # 有相关的知识库结果，构建RAG风格的提示词
                    context_parts = []
                    for i, result in enumerate(knowledge_base_results[:5], 1):  # 取前5个结果
                        content = result.get('content', '').strip()
                        if content:
                            # 限制每个结果的长度，避免提示词过长
                            max_length = 1000
                            if len(content) > max_length:
                                content = content[:max_length] + "..."
                            context_parts.append(f"【参考文档{i}】\n{content}\n")
                    context = "\n\n".join(context_parts)
                    prompt_template = f"""你是一个专业的助手。请仔细阅读以下参考文档，然后回答用户的问题。
 {context}
 【用户问题】
 {user_query or '请回答上述问题'}
 【重要提示】
 - 参考文档中包含了与用户问题相关的信息
 - 请仔细阅读参考文档，提取相关信息来回答用户的问题
 - 即使文档没有直接定义，也要基于文档中的相关内容进行解释和说明
 - 如果文档中提到了相关概念、政策、法规等，请基于这些内容进行回答
 - 回答要准确、详细、有条理，尽量引用文档中的具体内容"""
                    logger.info(f"自动构建RAG提示词，包含 {len(knowledge_base_results)} 个相关知识库结果（最高相似度: {max_score:.3f}），用户问题: {user_query}")
                else:
                    # 知识库结果不相关，直接回答用户问题
                    logger.warning(f"知识库结果相似度较低（最高: {max_score:.3f}），认为不相关，将直接回答用户问题")
                    prompt_template = user_query or "请帮助我处理这个任务。"
            elif user_query:
                # 没有知识库结果，但有用户查询，构建简单提示词
                prompt_template = user_query
                logger.info(f"自动使用工作流输入作为提示词: {user_query}")
            else:
                # 既没有知识库结果，也没有用户查询
                prompt_template = "请帮助我处理这个任务。"
                logger.warning("LLM节点提示词为空，且无法从上下文获取，使用默认提示词")
-        # 检查是否启用变量替换
+        full_response = ""
        enable_variable_substitution = config.get('enable_variable_substitution', True)
        if enable_variable_substitution:
            # 使用增强的变量替换
            prompt = self._substitute_variables(prompt_template, input_data)
        else:
            prompt = prompt_template
        # 记录处理后的提示词到输入数据中，用于前端显示
        input_data['processed_prompt'] = prompt
        input_data['original_prompt'] = prompt_template
        # 调用LLM服务
        try:
-            response = await self.llm_service.chat_completion(
+            # 调用 LLMService 流式接口
            async for chunk in self.llm_service.chat_completion_stream(
                model_config=llm_config,
                messages=[{"role": "user", "content": prompt}],
                temperature=config.get('temperature', 0.7),
                max_tokens=config.get('max_tokens')
-            )
+            ):
                if not chunk:
                    continue
                full_response += chunk
                # 将增量结果向外层生成器抛出
                yield {
                    'event_type': 'delta',
                    'delta': chunk,
                    'full_response': full_response,
                }
-            return {
+            # 完成后抛出最终结果，供后续节点依赖
            final_output = {
                'success': True,
-                'response': response,
+                'response': full_response,
                'prompt': prompt,
                'model': llm_config.model_name,
-                'tokens_used': getattr(response, 'usage', {}).get('total_tokens', 0) if hasattr(response, 'usage') else 0
+                'tokens_used': 0  # 流式接口暂不提供 usage 统计
            }
-            
+            yield {
                'event_type': 'final',
                'output': final_output,
            }
        except Exception as e:
            error_msg = str(e)
            # LLMService 已经添加了详细的错误信息（包括处理后的模型名称和base_url），直接使用
            # 如果错误信息中已经包含了模型信息，就不再重复添加
            detailed_error = error_msg
            # 如果错误信息中还没有包含模型配置信息，则添加（使用原始配置作为补充）
            if "使用的模型:" not in error_msg and "模型:" not in error_msg:
                model_info = f"使用的模型: {llm_config.model_name} (ID: {llm_config.id}), base_url: {llm_config.base_url}"
-                if "Not Found" in error_msg or "404" in error_msg:
+                detailed_error = f"{detailed_error}。{model_info}"
-                    detailed_error = f"{detailed_error}。{model_info}。可能的原因：1) 模型名称格式不正确（SiliconFlow需要org/model格式）；2) base_url配置错误；3) API端点不存在"
+            logger.error(f"[STREAM] LLM流式调用失败: {detailed_error}")
-                elif "403" in error_msg or "account balance" in error_msg.lower() or "insufficient" in error_msg.lower():
+            raise ValueError(f"LLM流式调用失败: {detailed_error}")
                    detailed_error = f"{detailed_error}。{model_info}。可能的原因：账户余额不足或API密钥权限不足"
                elif "401" in error_msg or "authentication" in error_msg.lower():
                    detailed_error = f"{detailed_error}。{model_info}。可能的原因：API密钥无效或已过期"
                else:
                    detailed_error = f"{detailed_error}。{model_info}"
            logger.error(f"LLM调用失败: {detailed_error}")
            # 如果当前使用的不是默认模型，并且错误包含 Not Found / 404，则尝试回退到默认模型再调用一次
            if (not used_default_model) and ("Not Found" in error_msg or "404" in error_msg):
                try:
                    from ..services.llm_config_service import LLMConfigService
                    llm_config_service = LLMConfigService()
                    default_config = await llm_config_service.get_default_chat_config(self.session)
                    if default_config:
                        logger.warning(
                            f"LLM调用失败，模型可能不存在或端点错误，"
                            f"尝试使用默认模型重试: {default_config.model_name} (ID: {default_config.id})"
                        )
                        fallback_response = await self.llm_service.chat_completion(
                            model_config=default_config,
                            messages=[{"role": "user", "content": prompt}],
                            temperature=config.get('temperature', 0.7),
                            max_tokens=config.get('max_tokens')
                        )
                        return {
                            'success': True,
                            'response': fallback_response,
                            'prompt': prompt,
                            'model': default_config.model_name,
                            'tokens_used': getattr(fallback_response, 'usage', {}).get('total_tokens', 0)
                            if hasattr(fallback_response, 'usage') else 0,
                            'fallback_model_used': True
                        }
                except Exception as fallback_error:
                    logger.error(f"使用默认模型重试LLM调用失败: {str(fallback_error)}")
                    # 继续向下抛出原始错误
            raise ValueError(f"LLM调用失败: {detailed_error}")
    def _substitute_variables(self, template: str, input_data: Dict[str, Any]) -> str:
        """变量替换函数"""