From 2b6824c34b5497700d689b0f9fc0d130c54f099f Mon Sep 17 00:00:00 2001 From: xunbu Date: Mon, 18 Aug 2025 21:54:47 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- README_JP.md | 2 +- README_ZH.md | 2 +- docutranslate/agents/agent.py | 15 ++++++++------- docutranslate/converter/x2md/converter_mineru.py | 15 ++++++++------- docutranslate/exporter/xlsx/xlsx2xlsx_exporter.py | 1 - docutranslate/global_values/__init__.py | 6 ++++-- docutranslate/utils/utils.py | 12 ++++++++++++ 8 files changed, 35 insertions(+), 20 deletions(-) create mode 100644 docutranslate/utils/utils.py diff --git a/README.md b/README.md index 6d9468f..b4a1417 100644 --- a/README.md +++ b/README.md @@ -503,7 +503,7 @@ A: It is completely possible. The following two conditions need to be met: A: `MarkdownBasedWorkflow` automatically caches the results of document parsing (conversion from files to Markdown) to avoid wasting time and resources on repeated parsing. The cache is stored in memory by default and records the most recent 10 parsing operations. The number of cached items can be changed via the `DOCUTRANSLATE_CACHE_NUM` environment variable. **Q: How can I use the software via a proxy?** -A: The software does not use a proxy by default. Set the `DOCUTRANSLATE_USE_PROXY` environment variable to `true` to enable communication via a proxy. +A: The software does not use a proxy by default. Set the `DOCUTRANSLATE_PROXY_ENABLED` environment variable to `true` to enable communication via a proxy. ## Star History diff --git a/README_JP.md b/README_JP.md index 8abbeeb..6158650 100644 --- a/README_JP.md +++ b/README_JP.md @@ -503,7 +503,7 @@ A: 完全に可能です。以下の2つの条件を満たす必要がありま A: `MarkdownBasedWorkflow` はドキュメント解析(ファイルからMarkdownへの変換)の結果を自動的にキャッシュし、繰り返し解析による時間とリソースの浪費を回避します。キャッシュはデフォルトでメモリに保存され、直近の10回の解析が記録されます。環境変数 `DOCUTRANSLATE_CACHE_NUM` を通じてキャッシュ数を変更することができます。 **Q: ソフトウェアをプロキシ経由で使用するにはどうすればよいですか** -A: ソフトウェアはデフォルトでプロキシを使用しません。環境変数 `DOCUTRANSLATE_USE_PROXY` を `true` に設定することで、ソフトウェアがプロキシ経由で通信するようになります。 +A: ソフトウェアはデフォルトでプロキシを使用しません。環境変数 `DOCUTRANSLATE_PROXY_ENABLED` を `true` に設定することで、ソフトウェアがプロキシ経由で通信するようになります。 ## スター履歴 diff --git a/README_ZH.md b/README_ZH.md index 986a653..6b72627 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -490,7 +490,7 @@ A: `MarkdownBasedWorkflow` 会自动缓存文档解析(文件到Markdown的转 `DOCUTRANSLATE_CACHE_NUM` 环境变量来修改缓存数量。 **Q: 如何让软件可以经过代理** -A: 软件默认不使用代理,可以通过设置环境变量`DOCUTRANSLATE_USE_PROXY`为`true`让软件通过代理。 +A: 软件默认不使用代理,可以通过设置环境变量`DOCUTRANSLATE_PROXY_ENABLED`为`true`让软件通过代理。 ## Star History diff --git a/docutranslate/agents/agent.py b/docutranslate/agents/agent.py index f29d873..ec09e0c 100644 --- a/docutranslate/agents/agent.py +++ b/docutranslate/agents/agent.py @@ -11,6 +11,7 @@ import httpx from docutranslate.global_values import USE_PROXY from docutranslate.logger import global_logger +from docutranslate.utils.utils import get_httpx_proxies MAX_RETRY_COUNT = 2 MAX_TOTAL_ERROR_COUNT = 10 @@ -87,13 +88,13 @@ class Agent: self.model_id = config.model_id.strip() self.system_prompt = config.system_prompt or "" self.temperature = config.temperature - # self.client = httpx.Client(trust_env=False, proxy=None, verify=False) - # self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False) - self.client = httpx.Client(verify=False) if USE_PROXY else httpx.Client(trust_env=False, proxy=None, - verify=False) - self.client_async = httpx.AsyncClient(verify=False) if USE_PROXY else httpx.AsyncClient(trust_env=False, - proxy=None, - verify=False) + if USE_PROXY: + self.client = httpx.Client(proxies=get_httpx_proxies(), verify=False) + self.client_async = httpx.AsyncClient(proxies=get_httpx_proxies(), verify=False) + else: + self.client = httpx.Client(trust_env=False, proxy=None, verify=False) + self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False) + self.max_concurrent = config.max_concurrent self.timeout = config.timeout self.thinking = config.thinking diff --git a/docutranslate/converter/x2md/converter_mineru.py b/docutranslate/converter/x2md/converter_mineru.py index 658111b..58e1ac2 100644 --- a/docutranslate/converter/x2md/converter_mineru.py +++ b/docutranslate/converter/x2md/converter_mineru.py @@ -7,7 +7,6 @@ from typing import Hashable import httpx from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig -from docutranslate.global_values import USE_PROXY from docutranslate.ir.document import Document from docutranslate.ir.markdown_document import MarkdownDocument from docutranslate.utils.markdown_utils import embed_inline_image_from_zip @@ -30,12 +29,14 @@ timeout = httpx.Timeout( write=200.0, # 写入超时 (发送数据的最长时间) pool=1.0 # 从连接池获取连接的超时时间 ) -if USE_PROXY: - client = httpx.Client(timeout=timeout, verify=False) - client_async = httpx.AsyncClient(timeout=timeout, verify=False) -else: - client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False) - client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False) +# if USE_PROXY: +# client = httpx.Client(proxies=get_httpx_proxies(), timeout=timeout, verify=False) +# client_async = httpx.AsyncClient(proxies=get_httpx_proxies(), timeout=timeout, verify=False) +# else: +# client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False) +# client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False) +client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False) +client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False) class ConverterMineru(X2MarkdownConverter): diff --git a/docutranslate/exporter/xlsx/xlsx2xlsx_exporter.py b/docutranslate/exporter/xlsx/xlsx2xlsx_exporter.py index 20cc3af..e83f487 100644 --- a/docutranslate/exporter/xlsx/xlsx2xlsx_exporter.py +++ b/docutranslate/exporter/xlsx/xlsx2xlsx_exporter.py @@ -1,4 +1,3 @@ -from docutranslate.exporter.txt.base import TXTExporter from docutranslate.exporter.xlsx.base import XlsxExporter from docutranslate.ir.document import Document diff --git a/docutranslate/global_values/__init__.py b/docutranslate/global_values/__init__.py index 76fefb6..c97f7da 100644 --- a/docutranslate/global_values/__init__.py +++ b/docutranslate/global_values/__init__.py @@ -2,5 +2,7 @@ import os from .conditional_import import available_packages, conditional_import -USE_PROXY = True if (os.getenv("DOCUTRANSLATE_USE_PROXY") and os.getenv( - "DOCUTRANSLATE_USE_PROXY").lower() == "true") else False \ No newline at end of file +USE_PROXY = True if (os.getenv("DOCUTRANSLATE_PROXY_ENABLED") and os.getenv( + "DOCUTRANSLATE_PROXY_ENABLED").lower() == "true") else False + +print(f"USE_PROXY:{USE_PROXY}") diff --git a/docutranslate/utils/utils.py b/docutranslate/utils/utils.py new file mode 100644 index 0000000..1702227 --- /dev/null +++ b/docutranslate/utils/utils.py @@ -0,0 +1,12 @@ +from urllib.request import getproxies + + +def get_httpx_proxies(): + https_proxy = getproxies().get("https") + http_proxy = getproxies().get("http") + proxies = {} + if https_proxy: + proxies["https://"] = https_proxy + if http_proxy: + proxies["http://"] = http_proxy + return proxies