Import all required packages
We start by importing all our required packages. Here is how you do that for python
import os
import getpass
from premai import Prem
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
if os.environ.get("PREMAI_API_KEY") is None:
os.environ["PREMAI_API_KEY"] = getpass.getpass("PremAI API Key:")
Define all the constants
After this we define our constants. For Prem, we are required to set our project_id
and embedding_model
. You can learn more about how to get your project_id
and API Key from here.
From Qdrantโs side, we need to also define our server URL, in which we will be sending requests to do all sorts of CRUD operations to our vector database. We also need to define our collection name. You can learn more about these concepts from Qdrantโs quick start guide and concepts.
Last but not the least, we also need to get our documents. For simplicity purpose, we define a small list of documents here. But in actual scenarios, this list should be derived from some source (like database or from an API call etc).
# Note: project_id: 123 is a dummy project id
# You need to have an actual project ID here. Otherwise, it will throw an error.
PROJECT_ID = 123
EMBEDDING_MODEL = "text-embedding-3-large"
COLLECTION_NAME = "prem-collection-py"
QDRANT_SERVER_URL = "http://127.0.0.1:6333"
DOCUMENTS = [
"This is a sample python document",
"We will be using qdrant and premai python sdk"
]
Setup PremAI and Qdrant clients
Once we defined all our constants, itโs time to instantiate Prem AI client and Qdrant client. Heres how you do it in both Python and JavaScript.
api_key = os.environ["PREMAI_API_KEY"]
prem_client = Prem(api_key=api_key)
qdrant_client = QdrantClient(url=QDRANT_SERVER_URL)
Writing a simple helper function to fetch embeddings from documents
Letโs write a simple function to fetch embeddings from document or a list of documents. This process will be done using Prem SDK. We then use this function to embed all our documents, before pushing it to Qdrantโs vector database.
from typing import Union, List
def get_embeddings(
project_id: int,
embedding_model: str,
documents: Union[str, List[str]]
) -> List[List[float]]:
"""
Helper function to get the embeddings from premai sdk
Args
project_id (int): The project id from prem saas platform.
embedding_model (str): The embedding model alias to choose
documents (Union[str, List[str]]): Single texts or list of texts to embed
Returns:
List[List[int]]: A list of list of integers that represents different
embeddings
"""
embeddings = []
documents = [documents] if isinstance(documents, str) else documents
for embedding in prem_client.embeddings.create(
project_id=project_id,
model=embedding_model,
input=documents
).data:
embeddings.append(embedding.embedding)
return embeddings
Convert Embeddings to Qdrant Points
Once we are done fetching our embedding vectors with our embedding function, we convert this to Qdrant points. After this, we will use this points to upsert into our Qdrant vector DB collection.
from qdrant_client.models import PointStruct
embeddings = get_embeddings(
project_id=PROJECT_ID,
embedding_model=EMBEDDING_MODEL,
documents=DOCUMENTS
)
points = [
PointStruct(
id=idx,
vector=embedding,
payload={"text": text},
) for idx, (embedding, text) in enumerate(zip(embeddings, DOCUMENTS))
]
Setting up Qdrant Collection
A collection is a named set of points (vectors with a payload) among which you can search.
If you already have a collection then you can skip this step, otherwise follow the code to create a Qdrant collection. We will be upserting our points in this collection.
qdrant_client.create_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorParams(size=3072, distance=Distance.DOT)
)
Insert Documents to the Collection
Once we have done making our collection, we upload all our document vectors in that collection. Here is how we do that
doc_ids = list(range(len(embeddings)))
qdrant_client.upsert(
collection_name=COLLECTION_NAME,
points=points
)
Searching for documents from a query in a collection
Once our collection is indexed with all our documents, we are now ready to query it and search documents which are semantically similar to the query. Hereโs how we do this.
query = "what is the extension of python document"
query_embedding = get_embeddings(
project_id=PROJECT_ID,
embedding_model=EMBEDDING_MODEL,
documents=query
)
qdrant_client.search(collection_name=COLLECTION_NAME, query_vector=query_embedding[0])
Full code
Congratulations, now you know how you can utilize Prem AI SDK with Qdrant Client to do nearest neighbor search on your documents for your LLM RAG Applications. Here is our starter boilerplate code for both Python and JavaScript.
import os
import getpass
from typing import Union, List
from premai import Prem
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
def get_embeddings(
project_id: int,
embedding_model: str,
documents: Union[str, List[str]]
) -> List[List[float]]:
"""
Helper function to get the embeddings from premai sdk
Args
project_id (int): The project id from prem saas platform.
embedding_model (str): The embedding model alias to choose
documents (Union[str, List[str]]): Single texts or list of texts to embed
Returns:
List[List[int]]: A list of list of integers that represents different
embeddings
"""
embeddings = []
documents = [documents] if isinstance(documents, str) else documents
for embedding in prem_client.embeddings.create(
project_id=project_id,
model=embedding_model,
input=documents
).data:
embeddings.append(embedding.embedding)
return embeddings
if __name__ == "__main__":
if os.environ.get("PREMAI_API_KEY") is None:
os.environ["PREMAI_API_KEY"] = getpass.getpass("PremAI API Key:")
# Note: project_id: 123 is a dummy project id
# You need to have an actual project ID here. Otherwise, it will throw an error.
PROJECT_ID = 123
EMBEDDING_MODEL = "text-embedding-3-large"
COLLECTION_NAME = "prem-collection-py"
QDRANT_SERVER_URL = "http://127.0.0.1:6333"
DOCUMENTS = [
"This is a sample python document",
"We will be using qdrant and premai python sdk"
]
api_key = os.environ["PREMAI_API_KEY"]
prem_client = Prem(api_key=api_key)
qdrant_client = QdrantClient(url=QDRANT_SERVER_URL)
# Get the embedding and create Qdrant points
embeddings = get_embeddings(
project_id=PROJECT_ID,
embedding_model=EMBEDDING_MODEL,
documents=DOCUMENTS
)
points = [
PointStruct(
id=idx,
vector=embedding,
payload={"text": text},
) for idx, (embedding, text) in enumerate(zip(embeddings, DOCUMENTS))
]
# Create a collection. Comment this if this is created already
qdrant_client.create_collection(
collection_name=COLLECTION_NAME,
vectors_config=VectorParams(size=3072, distance=Distance.DOT)
)
# Upload all the documents to the collection
doc_ids = list(range(len(embeddings)))
qdrant_client.upsert(
collection_name=COLLECTION_NAME,
points=points
)
# Query your Collection
query = "what is the extension of python document"
query_embedding = get_embeddings(
project_id=PROJECT_ID,
embedding_model=EMBEDDING_MODEL,
documents=query
)
qdrant_client.search(collection_name=COLLECTION_NAME, query_vector=query_embedding[0])