From 4bfdab5be21519cd1a8579671836069a2c484483 Mon Sep 17 00:00:00 2001 From: xunbu Date: Wed, 27 Aug 2025 23:59:02 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9C=AF=E8=AF=AD=E8=A1=A8?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/app.py | 29 +++++++++++++++++---------- docutranslate/static/index.html | 2 +- docutranslate/static/papaparse.min.js | 7 +++++++ 3 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 docutranslate/static/papaparse.min.js diff --git a/docutranslate/app.py b/docutranslate/app.py index 87178c4..e831669 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -232,6 +232,7 @@ class BaseWorkflowParams(BaseModel): thinking: ThinkingMode = Field(default=default_params["thinking"], description="是否启用深度思考", examples=["default", "enable", "disable"]) custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt") + glossary_dict: Optional[Dict[str, str]] = Field(None, description="术语表字典,key为原文,value为译文。") # 2. 为每个工作流创建独立的参数模型 @@ -401,7 +402,11 @@ class TranslateServiceRequest(BaseModel): "separator": " \n---翻译---\n ", "chunk_size": 2000, "concurrent": 5, - "translate_regions": ["Sheet1!A1:B10", "C:D"] + "translate_regions": ["Sheet1!A1:B10", "C:D"], + "glossary_dict": { + "OpenAI": "开放人工智能", + "LLM": "大语言模型" + } } } }, @@ -509,7 +514,7 @@ async def _perform_translation( translator_config = MDTranslatorConfig( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent' + 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' }, exclude_none=True) ) converter_config = None @@ -533,7 +538,7 @@ async def _perform_translation( translator_config = TXTTranslatorConfig( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent' + 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' }, exclude_none=True) ) html_exporter_config = TXT2HTMLExporterConfig(cdn=True) @@ -549,7 +554,7 @@ async def _perform_translation( json_paths=payload.json_paths, **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent' + 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' }, exclude_none=True) ) html_exporter_config = Json2HTMLExporterConfig(cdn=True) @@ -565,7 +570,7 @@ async def _perform_translation( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator', 'translate_regions' + 'insert_mode', 'separator', 'translate_regions', 'glossary_dict' }, exclude_none=True) ) html_exporter_config = Xlsx2HTMLExporterConfig(cdn=True) @@ -582,7 +587,7 @@ async def _perform_translation( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator' + 'insert_mode', 'separator', 'glossary_dict' }, exclude_none=True) ) html_exporter_config = Docx2HTMLExporterConfig(cdn=True) @@ -599,7 +604,7 @@ async def _perform_translation( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator' + 'insert_mode', 'separator', 'glossary_dict' }, exclude_none=True) ) html_exporter_config = Srt2HTMLExporterConfig(cdn=True) @@ -616,7 +621,7 @@ async def _perform_translation( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator' + 'insert_mode', 'separator', 'glossary_dict' }, exclude_none=True) ) html_exporter_config = Epub2HTMLExporterConfig(cdn=True) @@ -634,7 +639,7 @@ async def _perform_translation( **payload.model_dump(include={ 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator' + 'insert_mode', 'separator', 'glossary_dict' }, exclude_none=True) ) workflow_config = HtmlWorkflowConfig( @@ -1292,6 +1297,7 @@ async def temp_translate( thinking: ThinkingMode = Body(default_params["thinking"]), chunk_size: int = Body(default_params["chunk_size"]), custom_prompt: Optional[str] = Body(None), model_version: Literal["pipeline", "vlm"] = Body("vlm"), + glossary_dict: Optional[Dict[str, str]] = Body(None), ): file_name = Path(file_name) try: @@ -1304,7 +1310,8 @@ async def temp_translate( converter_config=ConverterMineruConfig(mineru_token=mineru_token, model_version=model_version), translator_config=MDTranslatorConfig(base_url=base_url, api_key=api_key, model_id=model_id, to_lang=to_lang, custom_prompt=custom_prompt, temperature=temperature, - thinking=thinking, chunk_size=chunk_size, concurrent=concurrent), + thinking=thinking, chunk_size=chunk_size, concurrent=concurrent, + glossary_dict=glossary_dict), html_exporter_config=MD2HTMLExporterConfig() ) workflow = MarkdownBasedWorkflow(workflow_config) @@ -1341,4 +1348,4 @@ def run_app(port: int | None = None): if __name__ == "__main__": - run_app() + run_app() \ No newline at end of file diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index 674472e..27eecf7 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file diff --git a/docutranslate/static/papaparse.min.js b/docutranslate/static/papaparse.min.js new file mode 100644 index 0000000..eeaf983 --- /dev/null +++ b/docutranslate/static/papaparse.min.js @@ -0,0 +1,7 @@ +/* @license +Papa Parse +v5.4.1 +https://github.com/mholt/PapaParse +License: MIT +*/ +!function(e,t){"function"==typeof define&&define.amd?define([],t):"object"==typeof module&&"undefined"!=typeof exports?module.exports=t():e.Papa=t()}(this,function s(){"use strict";var f="undefined"!=typeof self?self:"undefined"!=typeof window?window:void 0!==f?f:{};var n=!f.document&&!!f.postMessage,o=f.IS_PAPA_WORKER||!1,a={},u=0,b={parse:function(e,t){var r=(t=t||{}).dynamicTyping||!1;J(r)&&(t.dynamicTypingFunction=r,r={});if(t.dynamicTyping=r,t.transform=!!J(t.transform)&&t.transform,t.worker&&b.WORKERS_SUPPORTED){var i=function(){if(!b.WORKERS_SUPPORTED)return!1;var e=(r=f.URL||f.webkitURL||null,i=s.toString(),b.BLOB_URL||(b.BLOB_URL=r.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ","(",i,")();"],{type:"text/javascript"})))),t=new f.Worker(e);var r,i;return t.onmessage=_,t.id=u++,a[t.id]=t}();return i.userStep=t.step,i.userChunk=t.chunk,i.userComplete=t.complete,i.userError=t.error,t.step=J(t.step),t.chunk=J(t.chunk),t.complete=J(t.complete),t.error=J(t.error),delete t.worker,void i.postMessage({input:e,config:t,workerId:i.id})}var n=null;b.NODE_STREAM_INPUT,"string"==typeof e?(e=function(e){if(65279===e.charCodeAt(0))return e.slice(1);return e}(e),n=t.download?new l(t):new p(t)):!0===e.readable&&J(e.read)&&J(e.on)?n=new g(t):(f.File&&e instanceof File||e instanceof Object)&&(n=new c(t));return n.stream(e)},unparse:function(e,t){var n=!1,_=!0,m=",",y="\r\n",s='"',a=s+s,r=!1,i=null,o=!1;!function(){if("object"!=typeof t)return;"string"!=typeof t.delimiter||b.BAD_DELIMITERS.filter(function(e){return-1!==t.delimiter.indexOf(e)}).length||(m=t.delimiter);("boolean"==typeof t.quotes||"function"==typeof t.quotes||Array.isArray(t.quotes))&&(n=t.quotes);"boolean"!=typeof t.skipEmptyLines&&"string"!=typeof t.skipEmptyLines||(r=t.skipEmptyLines);"string"==typeof t.newline&&(y=t.newline);"string"==typeof t.quoteChar&&(s=t.quoteChar);"boolean"==typeof t.header&&(_=t.header);if(Array.isArray(t.columns)){if(0===t.columns.length)throw new Error("Option columns is empty");i=t.columns}void 0!==t.escapeChar&&(a=t.escapeChar+s);("boolean"==typeof t.escapeFormulae||t.escapeFormulae instanceof RegExp)&&(o=t.escapeFormulae instanceof RegExp?t.escapeFormulae:/^[=+\-@\t\r].*$/)}();var u=new RegExp(Q(s),"g");"string"==typeof e&&(e=JSON.parse(e));if(Array.isArray(e)){if(!e.length||Array.isArray(e[0]))return h(null,e,r);if("object"==typeof e[0])return h(i||Object.keys(e[0]),e,r)}else if("object"==typeof e)return"string"==typeof e.data&&(e.data=JSON.parse(e.data)),Array.isArray(e.data)&&(e.fields||(e.fields=e.meta&&e.meta.fields||i),e.fields||(e.fields=Array.isArray(e.data[0])?e.fields:"object"==typeof e.data[0]?Object.keys(e.data[0]):[]),Array.isArray(e.data[0])||"object"==typeof e.data[0]||(e.data=[e.data])),h(e.fields||[],e.data||[],r);throw new Error("Unable to serialize unrecognized input");function h(e,t,r){var i="";"string"==typeof e&&(e=JSON.parse(e)),"string"==typeof t&&(t=JSON.parse(t));var n=Array.isArray(e)&&0=this._config.preview;if(o)f.postMessage({results:n,workerId:b.WORKER_ID,finished:a});else if(J(this._config.chunk)&&!t){if(this._config.chunk(n,this._handle),this._handle.paused()||this._handle.aborted())return void(this._halted=!0);n=void 0,this._completeResults=void 0}return this._config.step||this._config.chunk||(this._completeResults.data=this._completeResults.data.concat(n.data),this._completeResults.errors=this._completeResults.errors.concat(n.errors),this._completeResults.meta=n.meta),this._completed||!a||!J(this._config.complete)||n&&n.meta.aborted||(this._config.complete(this._completeResults,this._input),this._completed=!0),a||n&&n.meta.paused||this._nextChunk(),n}this._halted=!0},this._sendError=function(e){J(this._config.error)?this._config.error(e):o&&this._config.error&&f.postMessage({workerId:b.WORKER_ID,error:e,finished:!1})}}function l(e){var i;(e=e||{}).chunkSize||(e.chunkSize=b.RemoteChunkSize),h.call(this,e),this._nextChunk=n?function(){this._readChunk(),this._chunkLoaded()}:function(){this._readChunk()},this.stream=function(e){this._input=e,this._nextChunk()},this._readChunk=function(){if(this._finished)this._chunkLoaded();else{if(i=new XMLHttpRequest,this._config.withCredentials&&(i.withCredentials=this._config.withCredentials),n||(i.onload=v(this._chunkLoaded,this),i.onerror=v(this._chunkError,this)),i.open(this._config.downloadRequestBody?"POST":"GET",this._input,!n),this._config.downloadRequestHeaders){var e=this._config.downloadRequestHeaders;for(var t in e)i.setRequestHeader(t,e[t])}if(this._config.chunkSize){var r=this._start+this._config.chunkSize-1;i.setRequestHeader("Range","bytes="+this._start+"-"+r)}try{i.send(this._config.downloadRequestBody)}catch(e){this._chunkError(e.message)}n&&0===i.status&&this._chunkError()}},this._chunkLoaded=function(){4===i.readyState&&(i.status<200||400<=i.status?this._chunkError():(this._start+=this._config.chunkSize?this._config.chunkSize:i.responseText.length,this._finished=!this._config.chunkSize||this._start>=function(e){var t=e.getResponseHeader("Content-Range");if(null===t)return-1;return parseInt(t.substring(t.lastIndexOf("/")+1))}(i),this.parseChunk(i.responseText)))},this._chunkError=function(e){var t=i.statusText||e;this._sendError(new Error(t))}}function c(e){var i,n;(e=e||{}).chunkSize||(e.chunkSize=b.LocalChunkSize),h.call(this,e);var s="undefined"!=typeof FileReader;this.stream=function(e){this._input=e,n=e.slice||e.webkitSlice||e.mozSlice,s?((i=new FileReader).onload=v(this._chunkLoaded,this),i.onerror=v(this._chunkError,this)):i=new FileReaderSync,this._nextChunk()},this._nextChunk=function(){this._finished||this._config.preview&&!(this._rowCount=this._input.size,this.parseChunk(e.target.result)},this._chunkError=function(){this._sendError(i.error)}}function p(e){var r;h.call(this,e=e||{}),this.stream=function(e){return r=e,this._nextChunk()},this._nextChunk=function(){if(!this._finished){var e,t=this._config.chunkSize;return t?(e=r.substring(0,t),r=r.substring(t)):(e=r,r=""),this._finished=!r,this.parseChunk(e)}}}function g(e){h.call(this,e=e||{});var t=[],r=!0,i=!1;this.pause=function(){h.prototype.pause.apply(this,arguments),this._input.pause()},this.resume=function(){h.prototype.resume.apply(this,arguments),this._input.resume()},this.stream=function(e){this._input=e,this._input.on("data",this._streamData),this._input.on("end",this._streamEnd),this._input.on("error",this._streamError)},this._checkIsFinished=function(){i&&1===t.length&&(this._finished=!0)},this._nextChunk=function(){this._checkIsFinished(),t.length?this.parseChunk(t.shift()):r=!0},this._streamData=v(function(e){try{t.push("string"==typeof e?e:e.toString(this._config.encoding)),r&&(r=!1,this._checkIsFinished(),this.parseChunk(t.shift()))}catch(e){this._streamError(e)}},this),this._streamError=v(function(e){this._streamCleanUp(),this._sendError(e)},this),this._streamEnd=v(function(){this._streamCleanUp(),i=!0,this._streamData("")},this),this._streamCleanUp=v(function(){this._input.removeListener("data",this._streamData),this._input.removeListener("end",this._streamEnd),this._input.removeListener("error",this._streamError)},this)}function r(m){var a,o,u,i=Math.pow(2,53),n=-i,s=/^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/,h=/^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/,t=this,r=0,f=0,d=!1,e=!1,l=[],c={data:[],errors:[],meta:{}};if(J(m.step)){var p=m.step;m.step=function(e){if(c=e,_())g();else{if(g(),0===c.data.length)return;r+=e.data.length,m.preview&&r>m.preview?o.abort():(c.data=c.data[0],p(c,t))}}}function y(e){return"greedy"===m.skipEmptyLines?""===e.join("").trim():1===e.length&&0===e[0].length}function g(){return c&&u&&(k("Delimiter","UndetectableDelimiter","Unable to auto-detect delimiting character; defaulted to '"+b.DefaultDelimiter+"'"),u=!1),m.skipEmptyLines&&(c.data=c.data.filter(function(e){return!y(e)})),_()&&function(){if(!c)return;function e(e,t){J(m.transformHeader)&&(e=m.transformHeader(e,t)),l.push(e)}if(Array.isArray(c.data[0])){for(var t=0;_()&&t=l.length?"__parsed_extra":l[r]),m.transform&&(s=m.transform(s,n)),s=v(n,s),"__parsed_extra"===n?(i[n]=i[n]||[],i[n].push(s)):i[n]=s}return m.header&&(r>l.length?k("FieldMismatch","TooManyFields","Too many fields: expected "+l.length+" fields but parsed "+r,f+t):r=i.length/2?"\r\n":"\r"}(e,i)),u=!1,m.delimiter)J(m.delimiter)&&(m.delimiter=m.delimiter(e),c.meta.delimiter=m.delimiter);else{var n=function(e,t,r,i,n){var s,a,o,u;n=n||[",","\t","|",";",b.RECORD_SEP,b.UNIT_SEP];for(var h=0;h=N)return L(!0)}else for(S=W,W++;;){if(-1===(S=i.indexOf(z,S+1)))return r||h.push({type:"Quotes",code:"MissingQuotes",message:"Quoted field unterminated",row:u.length,index:W}),T();if(S===n-1)return T(i.substring(W,S).replace(C,z));if(z!==K||i[S+1]!==K){if(z===K||0===S||i[S-1]!==K){-1!==w&&w=N)return L(!0);break}h.push({type:"Quotes",code:"InvalidQuotes",message:"Trailing quote on quoted field is malformed",row:u.length,index:W}),S++}}else S++}return T();function I(e){u.push(e),d=W}function A(e){var t=0;if(-1!==e){var r=i.substring(S+1,e);r&&""===r.trim()&&(t=r.length)}return t}function T(e){return r||(void 0===e&&(e=i.substring(W)),f.push(e),W=n,I(f),o&&F()),L()}function D(e){W=e,I(f),f=[],R=i.indexOf(P,W)}function L(e){return{data:u,errors:h,meta:{delimiter:M,linebreak:P,aborted:H,truncated:!!e,cursor:d+(t||0)}}}function F(){q(L()),u=[],h=[]}},this.abort=function(){H=!0},this.getCharIndex=function(){return W}}function _(e){var t=e.data,r=a[t.workerId],i=!1;if(t.error)r.userError(t.error,t.file);else if(t.results&&t.results.data){var n={abort:function(){i=!0,m(t.workerId,{data:[],errors:[],meta:{aborted:!0}})},pause:y,resume:y};if(J(r.userStep)){for(var s=0;s