Skip to content
SIE

LlamaIndex

The sie-llamaindex package (Python) and @sie/llamaindex package (TypeScript) provide drop-in components for LlamaIndex. Use SIEEmbedding for vector stores and SIENodePostprocessor for reranking.

Terminal window
pip install sie-llamaindex

This installs sie-sdk and llama-index-core as dependencies.

Terminal window
# Docker (recommended)
docker run -p 8080:8080 ghcr.io/superlinked/sie:latest
# Or with GPU
docker run --gpus all -p 8080:8080 ghcr.io/superlinked/sie:latest

SIEEmbedding implements LlamaIndex’s BaseEmbedding interface. Set it as the default embed model or use it directly.

from llama_index.core import Settings
from sie_llamaindex import SIEEmbedding
# Set as default embedding model
Settings.embed_model = SIEEmbedding(
base_url="http://localhost:8080",
model_name="BAAI/bge-m3"
)
# Or use directly
embed_model = SIEEmbedding(model_name="BAAI/bge-m3")
embedding = embed_model.get_text_embedding("Your text here")
print(len(embedding)) # 1024
from llama_index.core import Settings, VectorStoreIndex, Document
from sie_llamaindex import SIEEmbedding
Settings.embed_model = SIEEmbedding(model_name="BAAI/bge-m3")
documents = [
Document(text="Machine learning uses algorithms to learn from data."),
Document(text="The weather is sunny today."),
]
index = VectorStoreIndex.from_documents(documents)
results = index.as_query_engine().query("What is machine learning?")

Both sync and async methods are available:

# Sync
embedding = embed_model.get_text_embedding(text)
embeddings = embed_model.get_text_embedding_batch(texts)
# Async
embedding = await embed_model.aget_text_embedding(text)
query_embedding = await embed_model.aget_query_embedding(query)

SIENodePostprocessor implements BaseNodePostprocessor. Use it to rerank retrieved nodes.

from llama_index.core.schema import NodeWithScore, TextNode, QueryBundle
from sie_llamaindex import SIENodePostprocessor
reranker = SIENodePostprocessor(
base_url="http://localhost:8080",
model="jinaai/jina-reranker-v2-base-multilingual",
top_n=3
)
nodes = [
NodeWithScore(node=TextNode(text="Machine learning is a subset of AI."), score=0.5),
NodeWithScore(node=TextNode(text="The weather is sunny today."), score=0.6),
NodeWithScore(node=TextNode(text="Deep learning uses neural networks."), score=0.4),
]
reranked = reranker.postprocess_nodes(nodes, QueryBundle(query_str="What is ML?"))
for node in reranked:
print(f"{node.score:.3f}: {node.node.get_content()[:50]}")
from llama_index.core import VectorStoreIndex
from sie_llamaindex import SIENodePostprocessor
reranker = SIENodePostprocessor(
model="jinaai/jina-reranker-v2-base-multilingual",
top_n=5
)
# Create query engine with reranking
query_engine = index.as_query_engine(
node_postprocessors=[reranker],
similarity_top_k=20 # Retrieve 20, rerank to 5
)
response = query_engine.query("What is machine learning?")

Use SIESparseEmbeddingFunction with vector stores that support hybrid search.

from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from sie_llamaindex import SIEEmbedding, SIESparseEmbeddingFunction
# Create sparse embedding function
sparse_embed_fn = SIESparseEmbeddingFunction(
base_url="http://localhost:8080",
model_name="BAAI/bge-m3"
)
# Create hybrid vector store
client = QdrantClient(":memory:")
vector_store = QdrantVectorStore(
client=client,
collection_name="hybrid_docs",
enable_hybrid=True,
sparse_embedding_function=sparse_embed_fn
)

Complete example combining embeddings, reranking, and LLM generation:

from llama_index.core import Settings, VectorStoreIndex, Document
from llama_index.llms.openai import OpenAI
from sie_llamaindex import SIEEmbedding, SIENodePostprocessor
# 1. Configure SIE embeddings
Settings.embed_model = SIEEmbedding(
base_url="http://localhost:8080",
model_name="BAAI/bge-m3"
)
Settings.llm = OpenAI(model="gpt-4o-mini")
# 2. Create documents and index
documents = [
Document(text="Machine learning is a branch of artificial intelligence."),
Document(text="Neural networks are inspired by biological neurons."),
Document(text="Deep learning uses multiple layers of neural networks."),
Document(text="Python is popular for machine learning development."),
]
index = VectorStoreIndex.from_documents(documents)
# 3. Create reranker
reranker = SIENodePostprocessor(
base_url="http://localhost:8080",
model="jinaai/jina-reranker-v2-base-multilingual",
top_n=2
)
# 4. Build query engine with reranking
query_engine = index.as_query_engine(
node_postprocessors=[reranker],
similarity_top_k=10 # Retrieve 10, rerank to 2
)
# 5. Query
response = query_engine.query("What is deep learning?")
print(response)
ParameterTypeDefaultDescription
base_urlstrhttp://localhost:8080SIE server URL
model_namestrBAAI/bge-m3Model to use
instructionstrNoneInstruction prefix for encoding
output_dtypestrNoneOutput dtype: float32, float16, int8, binary
gpustrNoneTarget GPU type for routing
timeout_sfloat180.0Request timeout in seconds
embed_batch_sizeint10Batch size for embedding multiple texts
ParameterTypeDefaultDescription
base_urlstrhttp://localhost:8080SIE server URL
modelstrjinaai/jina-reranker-v2-base-multilingualReranker model
top_nintNoneNumber of nodes to return
gpustrNoneTarget GPU type for routing
timeout_sfloat180.0Request timeout in seconds