<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>gradio on Astroicers Blog</title><link>https://astroicers.link/tags/gradio.html</link><description>Recent content in gradio on Astroicers Blog</description><generator>Hugo -- gohugo.io</generator><language>en</language><lastBuildDate>Mon, 27 Apr 2026 00:00:00 +0000</lastBuildDate><atom:link href="https://astroicers.link/tags/gradio/index.xml" rel="self" type="application/rss+xml"/><item><title>Text Generation WebUI 本地 LLM 介面</title><link>https://astroicers.link/p/text-generation-webui-%E6%9C%AC%E5%9C%B0-llm-%E4%BB%8B%E9%9D%A2.html</link><pubDate>Mon, 27 Apr 2026 00:00:00 +0000</pubDate><guid>https://astroicers.link/p/text-generation-webui-%E6%9C%AC%E5%9C%B0-llm-%E4%BB%8B%E9%9D%A2.html</guid><description>&lt;h2 id="專案簡介">專案簡介&lt;/h2>
&lt;p>&lt;a class="link" href="https://github.com/oobabooga/text-generation-webui" target="_blank" rel="noopener"
>Text Generation WebUI&lt;/a> 是一個功能完整的 Gradio 網頁介面，用於運行各種大型語言模型。支援多種模型格式和載入器，是本地 LLM 實驗的最佳平台。&lt;/p>
&lt;p>&lt;strong>GitHub Stars&lt;/strong>: 46K+&lt;/p>
&lt;h2 id="主要功能">主要功能&lt;/h2>
&lt;ul>
&lt;li>&lt;strong>多格式支援&lt;/strong> - GGUF、GPTQ、AWQ、EXL2&lt;/li>
&lt;li>&lt;strong>多載入器&lt;/strong> - llama.cpp、ExLlamaV2、Transformers&lt;/li>
&lt;li>&lt;strong>擴充系統&lt;/strong> - 語音、圖片生成、RAG&lt;/li>
&lt;li>&lt;strong>角色扮演&lt;/strong> - 自訂角色和對話模式&lt;/li>
&lt;li>&lt;strong>API 相容&lt;/strong> - OpenAI API 格式&lt;/li>
&lt;/ul>
&lt;h2 id="安裝">安裝&lt;/h2>
&lt;h3 id="一鍵安裝">一鍵安裝&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Linux/macOS&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">git clone https://github.com/oobabooga/text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nb">cd&lt;/span> text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">./start_linux.sh
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Windows&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">start_windows.bat
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="手動安裝">手動安裝&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">conda create -n textgen &lt;span class="nv">python&lt;/span>&lt;span class="o">=&lt;/span>3.11
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">conda activate textgen
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">pip install torch torchvision torchaudio
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">git clone https://github.com/oobabooga/text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nb">cd&lt;/span> text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">pip install -r requirements.txt
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="啟動">啟動&lt;/h2>
&lt;h3 id="基本啟動">基本啟動&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python server.py
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;p>訪問 &lt;code>http://localhost:7860&lt;/code>&lt;/p>
&lt;h3 id="常用參數">常用參數&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 啟用 API&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">python server.py --api
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 指定監聽位址&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">python server.py --listen --listen-port &lt;span class="m">8080&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 啟用共享（公開存取）&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">python server.py --share
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="模型下載">模型下載&lt;/h2>
&lt;h3 id="透過-webui-下載">透過 WebUI 下載&lt;/h3>
&lt;ol>
&lt;li>前往「Model」頁籤&lt;/li>
&lt;li>輸入 HuggingFace 模型名稱&lt;/li>
&lt;li>點擊「Download」&lt;/li>
&lt;/ol>
&lt;h3 id="命令列下載">命令列下載&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python download-model.py TheBloke/Llama-2-7B-GGUF
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="模型存放">模型存放&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-fallback" data-lang="fallback">&lt;span class="line">&lt;span class="cl">text-generation-webui/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">└── models/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ├── llama-2-7b.Q4_K_M.gguf
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> └── mistral-7b-instruct/
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="載入器選擇">載入器選擇&lt;/h2>
&lt;table>
&lt;thead>
&lt;tr>
&lt;th>載入器&lt;/th>
&lt;th>格式&lt;/th>
&lt;th>特點&lt;/th>
&lt;/tr>
&lt;/thead>
&lt;tbody>
&lt;tr>
&lt;td>llama.cpp&lt;/td>
&lt;td>GGUF&lt;/td>
&lt;td>CPU 友好，量化支援&lt;/td>
&lt;/tr>
&lt;tr>
&lt;td>ExLlamaV2&lt;/td>
&lt;td>EXL2/GPTQ&lt;/td>
&lt;td>GPU 最佳效能&lt;/td>
&lt;/tr>
&lt;tr>
&lt;td>Transformers&lt;/td>
&lt;td>HF 原始&lt;/td>
&lt;td>完整功能支援&lt;/td>
&lt;/tr>
&lt;tr>
&lt;td>AutoGPTQ&lt;/td>
&lt;td>GPTQ&lt;/td>
&lt;td>GPU 量化推理&lt;/td>
&lt;/tr>
&lt;/tbody>
&lt;/table>
&lt;h3 id="載入器設定">載入器設定&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># models/config.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">llama-2-7b&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">loader&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">llama.cpp&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_gpu_layers&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">35&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_ctx&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">4096&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="對話模式">對話模式&lt;/h2>
&lt;h3 id="chat-模式">Chat 模式&lt;/h3>
&lt;p>標準聊天介面，支援：&lt;/p>
&lt;ul>
&lt;li>系統提示詞&lt;/li>
&lt;li>對話歷史&lt;/li>
&lt;li>參數調整&lt;/li>
&lt;/ul>
&lt;h3 id="notebook-模式">Notebook 模式&lt;/h3>
&lt;p>自由文字編輯，適合：&lt;/p>
&lt;ul>
&lt;li>故事創作&lt;/li>
&lt;li>程式碼生成&lt;/li>
&lt;li>長文編輯&lt;/li>
&lt;/ul>
&lt;h3 id="instruct-模式">Instruct 模式&lt;/h3>
&lt;p>指令格式對話，使用模型特定模板：&lt;/p>
&lt;ul>
&lt;li>Alpaca&lt;/li>
&lt;li>Vicuna&lt;/li>
&lt;li>ChatML&lt;/li>
&lt;/ul>
&lt;h2 id="角色設定">角色設定&lt;/h2>
&lt;h3 id="建立角色">建立角色&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;span class="lnt">9
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># characters/assistant.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">name&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">Security Expert&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">context&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="p">|&lt;/span>&lt;span class="sd">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> You are a cybersecurity expert with deep knowledge
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> of penetration testing, vulnerability assessment,
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> and secure coding practices.&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">greeting&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="p">|&lt;/span>&lt;span class="sd">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> Hello! I&amp;#39;m your security consultant. How can I help
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> you with your security concerns today?&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="使用角色">使用角色&lt;/h3>
&lt;ol>
&lt;li>前往「Parameters」→「Character」&lt;/li>
&lt;li>選擇角色檔案&lt;/li>
&lt;li>開始對話&lt;/li>
&lt;/ol>
&lt;h2 id="api-使用">API 使用&lt;/h2>
&lt;h3 id="啟用-api">啟用 API&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python server.py --api --api-port &lt;span class="m">5000&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="openai-格式">OpenAI 格式&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;span class="lnt">13
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-python" data-lang="python">&lt;span class="line">&lt;span class="cl">&lt;span class="kn">from&lt;/span> &lt;span class="nn">openai&lt;/span> &lt;span class="kn">import&lt;/span> &lt;span class="n">OpenAI&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">client&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">OpenAI&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">base_url&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;http://localhost:5000/v1&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">api_key&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;not-needed&amp;#34;&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">response&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">client&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">chat&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">completions&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">create&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">model&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;llama-2-7b&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">messages&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">[&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">{&lt;/span>&lt;span class="s2">&amp;#34;role&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;user&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span> &lt;span class="s2">&amp;#34;content&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;What is SQL injection?&amp;#34;&lt;/span>&lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">]&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="原生-api">原生 API&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-python" data-lang="python">&lt;span class="line">&lt;span class="cl">&lt;span class="kn">import&lt;/span> &lt;span class="nn">requests&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">response&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">requests&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">post&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;http://localhost:5000/api/v1/generate&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">json&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">{&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;prompt&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;Explain XSS attacks:&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;max_new_tokens&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="mi">200&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;temperature&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="mf">0.7&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="擴充功能">擴充功能&lt;/h2>
&lt;h3 id="常用擴充">常用擴充&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;span class="lnt">9
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 語音輸入/輸出&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/whisper_stt/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/silero_tts/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># RAG&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/superboogav2/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 多模態&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/multimodal/
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="啟用擴充">啟用擴充&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python server.py --extensions whisper_stt silero_tts
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="效能優化">效能優化&lt;/h2>
&lt;h3 id="gpu-設定">GPU 設定&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 調整 GPU 層數&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--n-gpu-layers &lt;span class="m">35&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 設定 context 大小&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--n_ctx &lt;span class="m">4096&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="記憶體優化">記憶體優化&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 使用 4-bit 量化&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--load-in-4bit
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 減少 context&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--n_ctx &lt;span class="m">2048&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="相關連結">相關連結&lt;/h2>
&lt;ul>
&lt;li>&lt;a class="link" href="https://github.com/oobabooga/text-generation-webui" target="_blank" rel="noopener"
>GitHub Repository&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://github.com/oobabooga/text-generation-webui/wiki" target="_blank" rel="noopener"
>Wiki 文件&lt;/a>&lt;/li>
&lt;/ul>
&lt;h2 id="延伸閱讀">延伸閱讀&lt;/h2>
&lt;ul>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/ollama-local-llm/" >Ollama 本地 LLM 部署&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/open-webui-local-ai/" >Open WebUI 本地 AI 介面&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/llama-cpp-inference/" >llama.cpp 高效能推理&lt;/a>&lt;/li>
&lt;/ul></description></item></channel></rss>