RAG / Retrieval
Embeddings quickstart
Single-text / batch vectorization in 3 lines. Embeddings is one of the few endpoints we DON'T auto-route — your vector DB locks the dimension, so the model must be pinned.
python
# ⚠ Embeddings differ from chat / image — DO NOT switch models!
# text-embedding-3-large = 3072 dim / bge-m3 = 1024 dim / voyage-3 = 1024 dim
# Vector DB schema locks dimension; index + query MUST use same model,
# or cosine distance breaks.
from nexevo_ai import Nexevo
client = Nexevo()
# Model picking guide:
# text-embedding-3-large → English RAG benchmark, 3072 dim
# bge-m3 → Chinese RAG / cheap default, 1024 dim
# voyage-3-large → high-quality English, 1024 dim
# jina-embeddings-v3 → multilingual, 1024 dim
resp = client.embeddings.create(
model="text-embedding-3-large",
input="Nexevo.ai is an LLM gateway",
)
vec = resp["data"][0]["embedding"]
print(f"Dimension: {len(vec)}")
print(f"Tokens used: {resp['usage']['prompt_tokens']}")
# Batch — embed multiple in 1 call (more efficient)
batch = client.embeddings.create(
model="bge-m3", # Chinese / multilingual recommended; 1024 dim
input=["doc 1", "doc 2", "doc 3"],
)
for row in batch["data"]:
print(f"index={row['index']} dim={len(row['embedding'])}")