Qdrant 高效能向量資料庫

使用 Qdrant 建構大規模向量搜尋應用,支援過濾、分片、雲端部署

專案簡介

Qdrant 是一個用 Rust 開發的高效能向量資料庫,專為生產環境設計。支援大規模向量搜尋、豐富的過濾功能、水平擴展等企業級特性。

GitHub Stars: 28K+

主要功能

  • 高效能 - Rust 實作,極致效能
  • 豐富過濾 - 向量 + 屬性聯合查詢
  • 水平擴展 - 分片和副本支援
  • 雲原生 - Kubernetes 部署
  • 多租戶 - 隔離的 Collection

安裝

Docker

1
2
3
docker run -p 6333:6333 -p 6334:6334 \
  -v $(pwd)/qdrant_storage:/qdrant/storage \
  qdrant/qdrant

Docker Compose

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
version: '3.8'
services:
  qdrant:
    image: qdrant/qdrant:latest
    ports:
      - "6333:6333"
      - "6334:6334"
    volumes:
      - ./qdrant_storage:/qdrant/storage
    environment:
      - QDRANT__SERVICE__GRPC_PORT=6334

Python SDK

1
pip install qdrant-client

快速開始

基本使用

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

# 連接
client = QdrantClient("localhost", port=6333)

# 建立 Collection
client.create_collection(
    collection_name="security_docs",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)

# 新增向量
client.upsert(
    collection_name="security_docs",
    points=[
        PointStruct(
            id=1,
            vector=[0.1, 0.2, ...],  # 384 維向量
            payload={"title": "SQL Injection", "severity": "high"}
        ),
        PointStruct(
            id=2,
            vector=[0.3, 0.4, ...],
            payload={"title": "XSS Attack", "severity": "medium"}
        )
    ]
)

# 搜尋
results = client.search(
    collection_name="security_docs",
    query_vector=[0.15, 0.25, ...],
    limit=5
)

Collection 管理

建立 Collection

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
from qdrant_client.models import Distance, VectorParams

# 單一向量欄位
client.create_collection(
    collection_name="docs",
    vectors_config=VectorParams(
        size=768,
        distance=Distance.COSINE
    )
)

# 多向量欄位
client.create_collection(
    collection_name="multimodal",
    vectors_config={
        "text": VectorParams(size=768, distance=Distance.COSINE),
        "image": VectorParams(size=512, distance=Distance.EUCLIDEAN)
    }
)

距離度量

類型說明
COSINE餘弦相似度
EUCLID歐幾里得距離
DOT內積

Collection 操作

1
2
3
4
5
6
7
8
# 取得資訊
info = client.get_collection("docs")

# 列出所有
collections = client.get_collections()

# 刪除
client.delete_collection("docs")

向量操作

新增/更新

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from qdrant_client.models import PointStruct

# 新增點
client.upsert(
    collection_name="docs",
    points=[
        PointStruct(
            id=1,
            vector=[0.1, 0.2, 0.3, ...],
            payload={
                "title": "OWASP Top 10",
                "category": "security",
                "year": 2025
            }
        )
    ]
)

# 使用 UUID
import uuid
client.upsert(
    collection_name="docs",
    points=[
        PointStruct(
            id=str(uuid.uuid4()),
            vector=[0.1, 0.2, ...],
            payload={"title": "Document"}
        )
    ]
)

批次操作

1
2
3
4
5
6
7
8
# 批次上傳
client.upload_collection(
    collection_name="docs",
    vectors=vectors_list,
    payload=payloads_list,
    ids=ids_list,
    batch_size=100
)

刪除

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
# 依 ID 刪除
client.delete(
    collection_name="docs",
    points_selector=[1, 2, 3]
)

# 依條件刪除
from qdrant_client.models import Filter, FieldCondition, MatchValue

client.delete(
    collection_name="docs",
    points_selector=Filter(
        must=[
            FieldCondition(
                key="category",
                match=MatchValue(value="deprecated")
            )
        ]
    )
)

搜尋

基本搜尋

1
2
3
4
5
6
7
8
9
results = client.search(
    collection_name="docs",
    query_vector=[0.1, 0.2, ...],
    limit=10
)

for result in results:
    print(f"ID: {result.id}, Score: {result.score}")
    print(f"Payload: {result.payload}")

過濾搜尋

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
from qdrant_client.models import Filter, FieldCondition, MatchValue, Range

results = client.search(
    collection_name="docs",
    query_vector=query_vector,
    query_filter=Filter(
        must=[
            FieldCondition(
                key="category",
                match=MatchValue(value="security")
            ),
            FieldCondition(
                key="severity",
                match=MatchValue(value="high")
            )
        ]
    ),
    limit=10
)

範圍過濾

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
results = client.search(
    collection_name="docs",
    query_vector=query_vector,
    query_filter=Filter(
        must=[
            FieldCondition(
                key="year",
                range=Range(gte=2024, lte=2026)
            )
        ]
    ),
    limit=10
)

過濾運算子

比較運算

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
from qdrant_client.models import FieldCondition, MatchValue, Range

# 等於
FieldCondition(key="status", match=MatchValue(value="active"))

# 範圍
FieldCondition(key="score", range=Range(gt=0.5, lt=1.0))

# 在列表中
FieldCondition(key="tag", match=MatchAny(any=["web", "api"]))

邏輯運算

1
2
3
4
5
Filter(
    must=[...],      # AND
    should=[...],    # OR
    must_not=[...]   # NOT
)

索引優化

建立 Payload 索引

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# 加速過濾查詢
client.create_payload_index(
    collection_name="docs",
    field_name="category",
    field_schema="keyword"
)

# 數值索引
client.create_payload_index(
    collection_name="docs",
    field_name="year",
    field_schema="integer"
)

HNSW 設定

1
2
3
4
5
6
7
8
9
from qdrant_client.models import HnswConfigDiff

client.update_collection(
    collection_name="docs",
    hnsw_config=HnswConfigDiff(
        m=16,
        ef_construct=100
    )
)

LangChain 整合

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
from langchain_qdrant import Qdrant
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# 建立向量儲存
vectorstore = Qdrant.from_documents(
    documents=docs,
    embedding=embeddings,
    url="http://localhost:6333",
    collection_name="langchain_docs"
)

# 搜尋
results = vectorstore.similarity_search("security vulnerability", k=3)

分片與複製

建立分片 Collection

1
2
3
4
5
6
client.create_collection(
    collection_name="large_docs",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
    shard_number=6,          # 分片數
    replication_factor=2     # 副本數
)

雲端服務

Qdrant Cloud

1
2
3
4
client = QdrantClient(
    url="https://your-cluster.qdrant.io",
    api_key="your-api-key"
)

相關連結

延伸閱讀

comments powered by Disqus
Built with Hugo
Theme Stack designed by Jimmy