增加stop标记检测与续传

2026-01-05 23:48:40 +08:00
parent a93ab74ce1
commit ea24f7db31
1 changed files with 216 additions and 5 deletions
--- a/docutranslate/agents/agent.py
+++ b/docutranslate/agents/agent.py
@@ -359,6 +359,94 @@ class Agent:
            data["response_format"] = {"type": "json_object"}
        return headers, data
    async def _continue_fetch_async(
            self,
            client: httpx.AsyncClient,
            prompt: str,
            system_prompt: str,
            force_json: bool,
            pre_send_handler: PreSendHandlerType,
            result_handler: ResultHandlerType,
            error_result_handler: ErrorResultHandlerType,
            retry_count: int,
            accumulated_result: str = "",
    ) -> Any:
        """
        当 finish_reason 为 length 时，继续获取剩余内容
        """
        self.logger.info(f"继续获取剩余内容 (已累计 {len(accumulated_result)} 字符)...")
        # 使用空内容继续请求，实际上多数 API 需要用户提供已获取的内容作为上下文
        # 这里我们发送一个继续信号，让模型继续输出
        continue_prompt = f"{prompt}\n\n[系统提示：之前的响应被截断，请继续输出剩余内容。]"
        if pre_send_handler:
            system_prompt, continue_prompt = pre_send_handler(system_prompt, continue_prompt)
        # 速率限制检查
        estimated_tokens = self._estimate_tokens(system_prompt) + self._estimate_tokens(continue_prompt)
        await self.rate_limiter.acquire_async(tokens=estimated_tokens)
        headers, data = self._prepare_request_data(continue_prompt, system_prompt, json_format=force_json)
        try:
            response = await client.post(
                f"{self.baseurl}/chat/completions",
                json=data,
                headers=headers,
                timeout=self.timeout,
            )
            response.raise_for_status()
            response_data = response.json()
            finish_reason = response_data.get("choices", [{}])[0].get("finish_reason", None)
            additional_result = response_data["choices"][0]["message"]["content"]
            input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
                extract_token_info(response_data)
            )
            self.token_counter.add(input_tokens, cached_tokens, output_tokens, reasoning_tokens)
            # 累加结果
            accumulated_result += additional_result
            # 如果仍然是 length，继续获取
            if finish_reason == "length":
                return await self._continue_fetch_async(
                    client=client,
                    prompt=prompt,
                    system_prompt=system_prompt,
                    force_json=force_json,
                    pre_send_handler=pre_send_handler,
                    result_handler=result_handler,
                    error_result_handler=error_result_handler,
                    retry_count=retry_count,
                    accumulated_result=accumulated_result,
                )
            # 非 length 结束，返回累加结果
            return (
                accumulated_result
                if result_handler is None
                else result_handler(accumulated_result, prompt, self.logger)
            )
        except (httpx.HTTPStatusError, httpx.RequestError, KeyError, IndexError, ValueError) as e:
            self.logger.error(f"继续获取内容失败: {repr(e)}")
            # 返回已获取的部分结果
            if accumulated_result:
                return (
                    accumulated_result
                    if result_handler is None
                    else result_handler(accumulated_result, prompt, self.logger)
                )
            # 如果没有部分结果，调用错误处理器
            return (
                prompt
                if error_result_handler is None
                else error_result_handler(prompt, self.logger)
            )
    async def send_async(
            self,
            client: httpx.AsyncClient,
@@ -398,9 +486,31 @@ class Agent:
                timeout=self.timeout,
            )
            response.raise_for_status()
            result = response.json()["choices"][0]["message"]["content"]
            response_data = response.json()
            # 检查 finish_reason
            finish_reason = response_data.get("choices", [{}])[0].get("finish_reason", None)
            if finish_reason != "stop":
                # 非正常结束，可能是 length (长度限制)、tool_calls、content_filter 等
                self.logger.warning(
                    f"finish_reason 为 '{finish_reason}'，非正常结束。prompt: {prompt[:100]}..."
                )
                # 如果是长度限制，尝试继续获取
                if finish_reason == "length":
                    return await self._continue_fetch_async(
                        client=client,
                        prompt=prompt,
                        system_prompt=system_prompt,
                        force_json=force_json,
                        pre_send_handler=pre_send_handler,
                        result_handler=result_handler,
                        error_result_handler=error_result_handler,
                        retry_count=retry_count,
                    )
            result = response_data["choices"][0]["message"]["content"]
            input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
                extract_token_info(response_data)
            )
@@ -593,6 +703,86 @@ class Agent:
            return results
    def _continue_fetch(
            self,
            client: httpx.Client,
            prompt: str,
            system_prompt: str,
            force_json: bool,
            pre_send_handler,
            result_handler,
            error_result_handler,
            retry_count: int,
            accumulated_result: str = "",
    ) -> Any:
        """
        当 finish_reason 为 length 时，继续获取剩余内容（同步版本）
        """
        self.logger.info(f"继续获取剩余内容 (已累计 {len(accumulated_result)} 字符)...")
        continue_prompt = f"{prompt}\n\n[系统提示：之前的响应被截断，请继续输出剩余内容。]"
        if pre_send_handler:
            system_prompt, continue_prompt = pre_send_handler(system_prompt, continue_prompt)
        estimated_tokens = self._estimate_tokens(system_prompt) + self._estimate_tokens(continue_prompt)
        self.rate_limiter.acquire_sync(tokens=estimated_tokens)
        headers, data = self._prepare_request_data(continue_prompt, system_prompt, json_format=force_json)
        try:
            response = client.post(
                f"{self.baseurl}/chat/completions",
                json=data,
                headers=headers,
                timeout=self.timeout,
            )
            response.raise_for_status()
            response_data = response.json()
            finish_reason = response_data.get("choices", [{}])[0].get("finish_reason", None)
            additional_result = response_data["choices"][0]["message"]["content"]
            input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
                extract_token_info(response_data)
            )
            self.token_counter.add(input_tokens, cached_tokens, output_tokens, reasoning_tokens)
            accumulated_result += additional_result
            if finish_reason == "length":
                return self._continue_fetch(
                    client=client,
                    prompt=prompt,
                    system_prompt=system_prompt,
                    force_json=force_json,
                    pre_send_handler=pre_send_handler,
                    result_handler=result_handler,
                    error_result_handler=error_result_handler,
                    retry_count=retry_count,
                    accumulated_result=accumulated_result,
                )
            return (
                accumulated_result
                if result_handler is None
                else result_handler(accumulated_result, prompt, self.logger)
            )
        except (httpx.HTTPStatusError, httpx.RequestError, KeyError, IndexError, ValueError) as e:
            self.logger.error(f"继续获取内容失败: {repr(e)}")
            if accumulated_result:
                return (
                    accumulated_result
                    if result_handler is None
                    else result_handler(accumulated_result, prompt, self.logger)
                )
            return (
                prompt
                if error_result_handler is None
                else error_result_handler(prompt, self.logger)
            )
    def send(
            self,
            client: httpx.Client,
@@ -630,10 +820,31 @@ class Agent:
                timeout=self.timeout,
            )
            response.raise_for_status()
            result = response.json()["choices"][0]["message"]["content"]
            response_data = response.json()
            # 检查 finish_reason
            finish_reason = response_data.get("choices", [{}])[0].get("finish_reason", None)
            if finish_reason != "stop":
                # 非正常结束，可能是 length (长度限制)、tool_calls、content_filter 等
                self.logger.warning(
                    f"finish_reason 为 '{finish_reason}'，非正常结束。prompt: {prompt[:100]}..."
                )
                # 如果是长度限制，尝试继续获取
                if finish_reason == "length":
                    return self._continue_fetch(
                        client=client,
                        prompt=prompt,
                        system_prompt=system_prompt,
                        force_json=force_json,
                        pre_send_handler=pre_send_handler,
                        result_handler=result_handler,
                        error_result_handler=error_result_handler,
                        retry_count=retry_count,
                    )
            result = response_data["choices"][0]["message"]["content"]
            input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
                extract_token_info(response_data)
            )