<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>local-ai on Astroicers Blog</title><link>https://astroicers.link/tags/local-ai.html</link><description>Recent content in local-ai on Astroicers Blog</description><generator>Hugo -- gohugo.io</generator><language>en</language><lastBuildDate>Sat, 09 May 2026 00:00:00 +0000</lastBuildDate><atom:link href="https://astroicers.link/tags/local-ai/index.xml" rel="self" type="application/rss+xml"/><item><title>PrivateGPT 本地私有 RAG 系統</title><link>https://astroicers.link/p/privategpt-%E6%9C%AC%E5%9C%B0%E7%A7%81%E6%9C%89-rag-%E7%B3%BB%E7%B5%B1.html</link><pubDate>Sat, 09 May 2026 00:00:00 +0000</pubDate><guid>https://astroicers.link/p/privategpt-%E6%9C%AC%E5%9C%B0%E7%A7%81%E6%9C%89-rag-%E7%B3%BB%E7%B5%B1.html</guid><description>&lt;h2 id="專案簡介">專案簡介&lt;/h2>
&lt;p>&lt;a class="link" href="https://github.com/imartinez/privateGPT" target="_blank" rel="noopener"
>PrivateGPT&lt;/a> 是一個完全私有的 RAG（檢索增強生成）系統，讓你與文件對話而不需要網路連線。所有資料處理都在本地進行，適合處理敏感或機密文件。&lt;/p>
&lt;p>&lt;strong>GitHub Stars&lt;/strong>: 57K+&lt;/p>
&lt;h2 id="主要功能">主要功能&lt;/h2>
&lt;ul>
&lt;li>&lt;strong>完全離線&lt;/strong> - 無需網路連線&lt;/li>
&lt;li>&lt;strong>隱私優先&lt;/strong> - 資料不離開本地&lt;/li>
&lt;li>&lt;strong>多格式支援&lt;/strong> - PDF、Word、Markdown、程式碼&lt;/li>
&lt;li>&lt;strong>多模型&lt;/strong> - Ollama、llama.cpp、HuggingFace&lt;/li>
&lt;li>&lt;strong>API 相容&lt;/strong> - OpenAI 格式 API&lt;/li>
&lt;/ul>
&lt;h2 id="安裝">安裝&lt;/h2>
&lt;h3 id="poetry-安裝">Poetry 安裝&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">git clone https://github.com/zylon-ai/private-gpt
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nb">cd&lt;/span> private-gpt
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 安裝依賴&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">poetry install --extras &lt;span class="s2">&amp;#34;ui llms-llama-cpp embeddings-huggingface vector-stores-qdrant&amp;#34;&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 下載模型&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">poetry run python scripts/setup
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="快速安裝">快速安裝&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 使用 Ollama&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">poetry install --extras &lt;span class="s2">&amp;#34;ui llms-ollama embeddings-ollama vector-stores-qdrant&amp;#34;&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="啟動">啟動&lt;/h2>
&lt;h3 id="啟動服務">啟動服務&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 啟動 API 和 UI&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nv">PGPT_PROFILES&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="nb">local&lt;/span> make run
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 僅 API&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">poetry run python -m private_gpt
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;p>訪問 &lt;code>http://localhost:8001&lt;/code>&lt;/p>
&lt;h3 id="設定檔">設定檔&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;span class="lnt">13
&lt;/span>&lt;span class="lnt">14
&lt;/span>&lt;span class="lnt">15
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># settings-local.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">server&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">env_name&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">local&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">port&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">8001&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">llm&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">ollama&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ollama&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">llama3.3&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">api_base&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">http://localhost:11434&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">embedding&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">ollama&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ollama&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">nomic-embed-text&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="上傳文件">上傳文件&lt;/h2>
&lt;h3 id="支援格式">支援格式&lt;/h3>
&lt;ul>
&lt;li>PDF&lt;/li>
&lt;li>Word (.docx)&lt;/li>
&lt;li>PowerPoint (.pptx)&lt;/li>
&lt;li>Markdown&lt;/li>
&lt;li>純文字&lt;/li>
&lt;li>程式碼檔案&lt;/li>
&lt;/ul>
&lt;h3 id="透過-ui-上傳">透過 UI 上傳&lt;/h3>
&lt;ol>
&lt;li>開啟網頁介面&lt;/li>
&lt;li>點擊「Upload」&lt;/li>
&lt;li>選擇檔案&lt;/li>
&lt;li>等待處理完成&lt;/li>
&lt;/ol>
&lt;h3 id="透過-api-上傳">透過 API 上傳&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">curl -X POST &lt;span class="s2">&amp;#34;http://localhost:8001/v1/ingest/file&amp;#34;&lt;/span> &lt;span class="se">\
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="se">&lt;/span> -H &lt;span class="s2">&amp;#34;Content-Type: multipart/form-data&amp;#34;&lt;/span> &lt;span class="se">\
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="se">&lt;/span> -F &lt;span class="s2">&amp;#34;file=@document.pdf&amp;#34;&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="python-上傳">Python 上傳&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-python" data-lang="python">&lt;span class="line">&lt;span class="cl">&lt;span class="kn">import&lt;/span> &lt;span class="nn">requests&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="k">with&lt;/span> &lt;span class="nb">open&lt;/span>&lt;span class="p">(&lt;/span>&lt;span class="s2">&amp;#34;document.pdf&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span> &lt;span class="s2">&amp;#34;rb&amp;#34;&lt;/span>&lt;span class="p">)&lt;/span> &lt;span class="k">as&lt;/span> &lt;span class="n">f&lt;/span>&lt;span class="p">:&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">response&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">requests&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">post&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;http://localhost:8001/v1/ingest/file&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">files&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">{&lt;/span>&lt;span class="s2">&amp;#34;file&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="n">f&lt;/span>&lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="對話查詢">對話查詢&lt;/h2>
&lt;h3 id="ui-查詢">UI 查詢&lt;/h3>
&lt;p>直接在網頁介面輸入問題。&lt;/p>
&lt;h3 id="api-查詢">API 查詢&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">curl -X POST &lt;span class="s2">&amp;#34;http://localhost:8001/v1/chat/completions&amp;#34;&lt;/span> &lt;span class="se">\
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="se">&lt;/span> -H &lt;span class="s2">&amp;#34;Content-Type: application/json&amp;#34;&lt;/span> &lt;span class="se">\
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="se">&lt;/span> -d &lt;span class="s1">&amp;#39;{
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="s1"> &amp;#34;messages&amp;#34;: [
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="s1"> {&amp;#34;role&amp;#34;: &amp;#34;user&amp;#34;, &amp;#34;content&amp;#34;: &amp;#34;這份文件的主要內容是什麼？&amp;#34;}
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="s1"> ],
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="s1"> &amp;#34;use_context&amp;#34;: true
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="s1"> }&amp;#39;&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="python-整合">Python 整合&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;span class="lnt">13
&lt;/span>&lt;span class="lnt">14
&lt;/span>&lt;span class="lnt">15
&lt;/span>&lt;span class="lnt">16
&lt;/span>&lt;span class="lnt">17
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-python" data-lang="python">&lt;span class="line">&lt;span class="cl">&lt;span class="kn">from&lt;/span> &lt;span class="nn">openai&lt;/span> &lt;span class="kn">import&lt;/span> &lt;span class="n">OpenAI&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">client&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">OpenAI&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">base_url&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;http://localhost:8001/v1&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">api_key&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;not-needed&amp;#34;&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 使用上傳的文件作為上下文&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">response&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">client&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">chat&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">completions&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">create&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">model&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;private-gpt&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">messages&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">[&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">{&lt;/span>&lt;span class="s2">&amp;#34;role&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;user&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span> &lt;span class="s2">&amp;#34;content&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;摘要這份報告的重點&amp;#34;&lt;/span>&lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">],&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">extra_body&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">{&lt;/span>&lt;span class="s2">&amp;#34;use_context&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="kc">True&lt;/span>&lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nb">print&lt;/span>&lt;span class="p">(&lt;/span>&lt;span class="n">response&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">choices&lt;/span>&lt;span class="p">[&lt;/span>&lt;span class="mi">0&lt;/span>&lt;span class="p">]&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">message&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">content&lt;/span>&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="模型設定">模型設定&lt;/h2>
&lt;h3 id="使用-ollama">使用 Ollama&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># settings-ollama.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">llm&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">ollama&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ollama&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">llama3.3&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">api_base&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">http://localhost:11434&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">request_timeout&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">120.0&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">embedding&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">ollama&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ollama&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">nomic-embed-text&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="使用-llamacpp">使用 llama.cpp&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># settings-local.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">llm&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">llamacpp&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">llamacpp&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model_path&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">models/llama-2-7b-chat.Q4_K_M.gguf&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_ctx&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">4096&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_gpu_layers&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">35&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">embedding&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">huggingface&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">huggingface&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model_name&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">BAAI/bge-small-en-v1.5&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="使用-openai">使用 OpenAI&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># settings-openai.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">llm&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">openai&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">openai&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">gpt-4o&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">api_key&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">${OPENAI_API_KEY}&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">embedding&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">mode&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">openai&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">openai&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">model&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">text-embedding-3-small&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="向量資料庫">向量資料庫&lt;/h2>
&lt;h3 id="qdrant預設">Qdrant（預設）&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">vectorstore&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">database&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">qdrant&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">qdrant&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">path&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">./local_data/qdrant&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="chroma">Chroma&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">vectorstore&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">database&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">chroma&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">chroma&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">persist_directory&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">./local_data/chroma&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="docker-部署">Docker 部署&lt;/h2>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;span class="lnt">13
&lt;/span>&lt;span class="lnt">14
&lt;/span>&lt;span class="lnt">15
&lt;/span>&lt;span class="lnt">16
&lt;/span>&lt;span class="lnt">17
&lt;/span>&lt;span class="lnt">18
&lt;/span>&lt;span class="lnt">19
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">version&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="s1">&amp;#39;3.8&amp;#39;&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">services&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">private-gpt&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">image&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">zylonai/private-gpt:latest&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ports&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>- &lt;span class="s2">&amp;#34;8001:8001&amp;#34;&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">environment&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>- &lt;span class="l">PGPT_PROFILES=docker&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">volumes&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>- &lt;span class="l">./local_data:/app/local_data&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>- &lt;span class="l">./settings-docker.yaml:/app/settings-docker.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ollama&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">image&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">ollama/ollama&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">volumes&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>- &lt;span class="l">ollama-data:/root/.ollama&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">volumes&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">ollama-data&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="安全考量">安全考量&lt;/h2>
&lt;h3 id="資料隔離">資料隔離&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># 限制文件目錄&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">data&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">local_data_folder&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">./secure_data&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="存取控制">存取控制&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">server&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">auth&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">enabled&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="kc">true&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">secret&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">your-secret-key&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="稽核日誌">稽核日誌&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">logging&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">level&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">INFO&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">file&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">./logs/private-gpt.log&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="效能優化">效能優化&lt;/h2>
&lt;h3 id="gpu-加速">GPU 加速&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">llm&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">llamacpp&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_gpu_layers&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">99&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="c"># 所有層到 GPU&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="embedding-快取">Embedding 快取&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="nt">embedding&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">cache&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">enabled&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="kc">true&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">type&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">file&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">path&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">./cache/embeddings&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="相關連結">相關連結&lt;/h2>
&lt;ul>
&lt;li>&lt;a class="link" href="https://github.com/imartinez/privateGPT" target="_blank" rel="noopener"
>GitHub Repository&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://docs.privategpt.dev/" target="_blank" rel="noopener"
>官方文件&lt;/a>&lt;/li>
&lt;/ul>
&lt;h2 id="延伸閱讀">延伸閱讀&lt;/h2>
&lt;ul>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/ollama-local-llm/" >Ollama 本地 LLM 部署&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/langchain-ai-framework/" >LangChain AI 應用開發框架&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/milvus-vector-database/" >Milvus 向量資料庫&lt;/a>&lt;/li>
&lt;/ul></description></item><item><title>Text Generation WebUI 本地 LLM 介面</title><link>https://astroicers.link/p/text-generation-webui-%E6%9C%AC%E5%9C%B0-llm-%E4%BB%8B%E9%9D%A2.html</link><pubDate>Mon, 27 Apr 2026 00:00:00 +0000</pubDate><guid>https://astroicers.link/p/text-generation-webui-%E6%9C%AC%E5%9C%B0-llm-%E4%BB%8B%E9%9D%A2.html</guid><description>&lt;h2 id="專案簡介">專案簡介&lt;/h2>
&lt;p>&lt;a class="link" href="https://github.com/oobabooga/text-generation-webui" target="_blank" rel="noopener"
>Text Generation WebUI&lt;/a> 是一個功能完整的 Gradio 網頁介面，用於運行各種大型語言模型。支援多種模型格式和載入器，是本地 LLM 實驗的最佳平台。&lt;/p>
&lt;p>&lt;strong>GitHub Stars&lt;/strong>: 46K+&lt;/p>
&lt;h2 id="主要功能">主要功能&lt;/h2>
&lt;ul>
&lt;li>&lt;strong>多格式支援&lt;/strong> - GGUF、GPTQ、AWQ、EXL2&lt;/li>
&lt;li>&lt;strong>多載入器&lt;/strong> - llama.cpp、ExLlamaV2、Transformers&lt;/li>
&lt;li>&lt;strong>擴充系統&lt;/strong> - 語音、圖片生成、RAG&lt;/li>
&lt;li>&lt;strong>角色扮演&lt;/strong> - 自訂角色和對話模式&lt;/li>
&lt;li>&lt;strong>API 相容&lt;/strong> - OpenAI API 格式&lt;/li>
&lt;/ul>
&lt;h2 id="安裝">安裝&lt;/h2>
&lt;h3 id="一鍵安裝">一鍵安裝&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Linux/macOS&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">git clone https://github.com/oobabooga/text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nb">cd&lt;/span> text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">./start_linux.sh
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># Windows&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">start_windows.bat
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="手動安裝">手動安裝&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">conda create -n textgen &lt;span class="nv">python&lt;/span>&lt;span class="o">=&lt;/span>3.11
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">conda activate textgen
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">pip install torch torchvision torchaudio
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">git clone https://github.com/oobabooga/text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="nb">cd&lt;/span> text-generation-webui
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">pip install -r requirements.txt
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="啟動">啟動&lt;/h2>
&lt;h3 id="基本啟動">基本啟動&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python server.py
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;p>訪問 &lt;code>http://localhost:7860&lt;/code>&lt;/p>
&lt;h3 id="常用參數">常用參數&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 啟用 API&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">python server.py --api
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 指定監聽位址&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">python server.py --listen --listen-port &lt;span class="m">8080&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 啟用共享（公開存取）&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">python server.py --share
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="模型下載">模型下載&lt;/h2>
&lt;h3 id="透過-webui-下載">透過 WebUI 下載&lt;/h3>
&lt;ol>
&lt;li>前往「Model」頁籤&lt;/li>
&lt;li>輸入 HuggingFace 模型名稱&lt;/li>
&lt;li>點擊「Download」&lt;/li>
&lt;/ol>
&lt;h3 id="命令列下載">命令列下載&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python download-model.py TheBloke/Llama-2-7B-GGUF
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="模型存放">模型存放&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-fallback" data-lang="fallback">&lt;span class="line">&lt;span class="cl">text-generation-webui/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">└── models/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> ├── llama-2-7b.Q4_K_M.gguf
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> └── mistral-7b-instruct/
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="載入器選擇">載入器選擇&lt;/h2>
&lt;table>
&lt;thead>
&lt;tr>
&lt;th>載入器&lt;/th>
&lt;th>格式&lt;/th>
&lt;th>特點&lt;/th>
&lt;/tr>
&lt;/thead>
&lt;tbody>
&lt;tr>
&lt;td>llama.cpp&lt;/td>
&lt;td>GGUF&lt;/td>
&lt;td>CPU 友好，量化支援&lt;/td>
&lt;/tr>
&lt;tr>
&lt;td>ExLlamaV2&lt;/td>
&lt;td>EXL2/GPTQ&lt;/td>
&lt;td>GPU 最佳效能&lt;/td>
&lt;/tr>
&lt;tr>
&lt;td>Transformers&lt;/td>
&lt;td>HF 原始&lt;/td>
&lt;td>完整功能支援&lt;/td>
&lt;/tr>
&lt;tr>
&lt;td>AutoGPTQ&lt;/td>
&lt;td>GPTQ&lt;/td>
&lt;td>GPU 量化推理&lt;/td>
&lt;/tr>
&lt;/tbody>
&lt;/table>
&lt;h3 id="載入器設定">載入器設定&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># models/config.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">llama-2-7b&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">loader&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">llama.cpp&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_gpu_layers&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">35&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w"> &lt;/span>&lt;span class="nt">n_ctx&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="m">4096&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="對話模式">對話模式&lt;/h2>
&lt;h3 id="chat-模式">Chat 模式&lt;/h3>
&lt;p>標準聊天介面，支援：&lt;/p>
&lt;ul>
&lt;li>系統提示詞&lt;/li>
&lt;li>對話歷史&lt;/li>
&lt;li>參數調整&lt;/li>
&lt;/ul>
&lt;h3 id="notebook-模式">Notebook 模式&lt;/h3>
&lt;p>自由文字編輯，適合：&lt;/p>
&lt;ul>
&lt;li>故事創作&lt;/li>
&lt;li>程式碼生成&lt;/li>
&lt;li>長文編輯&lt;/li>
&lt;/ul>
&lt;h3 id="instruct-模式">Instruct 模式&lt;/h3>
&lt;p>指令格式對話，使用模型特定模板：&lt;/p>
&lt;ul>
&lt;li>Alpaca&lt;/li>
&lt;li>Vicuna&lt;/li>
&lt;li>ChatML&lt;/li>
&lt;/ul>
&lt;h2 id="角色設定">角色設定&lt;/h2>
&lt;h3 id="建立角色">建立角色&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;span class="lnt">9
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-yaml" data-lang="yaml">&lt;span class="line">&lt;span class="cl">&lt;span class="c"># characters/assistant.yaml&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">name&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="l">Security Expert&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">context&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="p">|&lt;/span>&lt;span class="sd">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> You are a cybersecurity expert with deep knowledge
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> of penetration testing, vulnerability assessment,
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> and secure coding practices.&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="w">&lt;/span>&lt;span class="nt">greeting&lt;/span>&lt;span class="p">:&lt;/span>&lt;span class="w"> &lt;/span>&lt;span class="p">|&lt;/span>&lt;span class="sd">
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> Hello! I&amp;#39;m your security consultant. How can I help
&lt;/span>&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="sd"> you with your security concerns today?&lt;/span>&lt;span class="w">
&lt;/span>&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="使用角色">使用角色&lt;/h3>
&lt;ol>
&lt;li>前往「Parameters」→「Character」&lt;/li>
&lt;li>選擇角色檔案&lt;/li>
&lt;li>開始對話&lt;/li>
&lt;/ol>
&lt;h2 id="api-使用">API 使用&lt;/h2>
&lt;h3 id="啟用-api">啟用 API&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python server.py --api --api-port &lt;span class="m">5000&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="openai-格式">OpenAI 格式&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;span class="lnt">11
&lt;/span>&lt;span class="lnt">12
&lt;/span>&lt;span class="lnt">13
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-python" data-lang="python">&lt;span class="line">&lt;span class="cl">&lt;span class="kn">from&lt;/span> &lt;span class="nn">openai&lt;/span> &lt;span class="kn">import&lt;/span> &lt;span class="n">OpenAI&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">client&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">OpenAI&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">base_url&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;http://localhost:5000/v1&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">api_key&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;not-needed&amp;#34;&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">response&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">client&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">chat&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">completions&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">create&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">model&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="s2">&amp;#34;llama-2-7b&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">messages&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">[&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">{&lt;/span>&lt;span class="s2">&amp;#34;role&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;user&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span> &lt;span class="s2">&amp;#34;content&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;What is SQL injection?&amp;#34;&lt;/span>&lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">]&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="原生-api">原生 API&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt"> 1
&lt;/span>&lt;span class="lnt"> 2
&lt;/span>&lt;span class="lnt"> 3
&lt;/span>&lt;span class="lnt"> 4
&lt;/span>&lt;span class="lnt"> 5
&lt;/span>&lt;span class="lnt"> 6
&lt;/span>&lt;span class="lnt"> 7
&lt;/span>&lt;span class="lnt"> 8
&lt;/span>&lt;span class="lnt"> 9
&lt;/span>&lt;span class="lnt">10
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-python" data-lang="python">&lt;span class="line">&lt;span class="cl">&lt;span class="kn">import&lt;/span> &lt;span class="nn">requests&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="n">response&lt;/span> &lt;span class="o">=&lt;/span> &lt;span class="n">requests&lt;/span>&lt;span class="o">.&lt;/span>&lt;span class="n">post&lt;/span>&lt;span class="p">(&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;http://localhost:5000/api/v1/generate&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="n">json&lt;/span>&lt;span class="o">=&lt;/span>&lt;span class="p">{&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;prompt&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="s2">&amp;#34;Explain XSS attacks:&amp;#34;&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;max_new_tokens&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="mi">200&lt;/span>&lt;span class="p">,&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="s2">&amp;#34;temperature&amp;#34;&lt;/span>&lt;span class="p">:&lt;/span> &lt;span class="mf">0.7&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl"> &lt;span class="p">}&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="p">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="擴充功能">擴充功能&lt;/h2>
&lt;h3 id="常用擴充">常用擴充&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;span class="lnt">6
&lt;/span>&lt;span class="lnt">7
&lt;/span>&lt;span class="lnt">8
&lt;/span>&lt;span class="lnt">9
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 語音輸入/輸出&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/whisper_stt/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/silero_tts/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># RAG&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/superboogav2/
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 多模態&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">extensions/multimodal/
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="啟用擴充">啟用擴充&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">python server.py --extensions whisper_stt silero_tts
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="效能優化">效能優化&lt;/h2>
&lt;h3 id="gpu-設定">GPU 設定&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 調整 GPU 層數&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--n-gpu-layers &lt;span class="m">35&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 設定 context 大小&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--n_ctx &lt;span class="m">4096&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h3 id="記憶體優化">記憶體優化&lt;/h3>
&lt;div class="highlight">&lt;div class="chroma">
&lt;table class="lntable">&lt;tr>&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code>&lt;span class="lnt">1
&lt;/span>&lt;span class="lnt">2
&lt;/span>&lt;span class="lnt">3
&lt;/span>&lt;span class="lnt">4
&lt;/span>&lt;span class="lnt">5
&lt;/span>&lt;/code>&lt;/pre>&lt;/td>
&lt;td class="lntd">
&lt;pre tabindex="0" class="chroma">&lt;code class="language-bash" data-lang="bash">&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 使用 4-bit 量化&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--load-in-4bit
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">&lt;span class="c1"># 減少 context&lt;/span>
&lt;/span>&lt;/span>&lt;span class="line">&lt;span class="cl">--n_ctx &lt;span class="m">2048&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/td>&lt;/tr>&lt;/table>
&lt;/div>
&lt;/div>&lt;h2 id="相關連結">相關連結&lt;/h2>
&lt;ul>
&lt;li>&lt;a class="link" href="https://github.com/oobabooga/text-generation-webui" target="_blank" rel="noopener"
>GitHub Repository&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://github.com/oobabooga/text-generation-webui/wiki" target="_blank" rel="noopener"
>Wiki 文件&lt;/a>&lt;/li>
&lt;/ul>
&lt;h2 id="延伸閱讀">延伸閱讀&lt;/h2>
&lt;ul>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/ollama-local-llm/" >Ollama 本地 LLM 部署&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/open-webui-local-ai/" >Open WebUI 本地 AI 介面&lt;/a>&lt;/li>
&lt;li>&lt;a class="link" href="https://astroicers.link/p/llama-cpp-inference/" >llama.cpp 高效能推理&lt;/a>&lt;/li>
&lt;/ul></description></item></channel></rss>