pip install premai

Import all required packages

We start by importing all our required packages. Here is how you do that for python

import os 
import getpass
from premai import Prem

from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams

if os.environ.get("PREMAI_API_KEY") is None:
    os.environ["PREMAI_API_KEY"] = getpass.getpass("PremAI API Key:")

Define all the constants

After this we define our constants. For Prem, we are required to set our project_id and embedding_model. You can learn more about how to get your project_id and API Key from here.

From Qdrant’s side, we need to also define our server URL, in which we will be sending requests to do all sorts of CRUD operations to our vector database. We also need to define our collection name. You can learn more about these concepts from Qdrant’s quick start guide and concepts.

Last but not the least, we also need to get our documents. For simplicity purpose, we define a small list of documents here. But in actual scenarios, this list should be derived from some source (like database or from an API call etc).

# Note: project_id: 123 is a dummy project id
# You need to have an actual project ID here. Otherwise, it will throw an error.

PROJECT_ID = 123
EMBEDDING_MODEL = "text-embedding-3-large"
COLLECTION_NAME = "prem-collection-py"
QDRANT_SERVER_URL = "http://127.0.0.1:6333"
DOCUMENTS = [
    "This is a sample python document",
    "We will be using qdrant and premai python sdk"
]

Setup PremAI and Qdrant clients

Once we defined all our constants, it’s time to instantiate Prem AI client and Qdrant client. Heres how you do it in both Python and JavaScript.

api_key = os.environ["PREMAI_API_KEY"]
prem_client = Prem(api_key=api_key)
qdrant_client = QdrantClient(url=QDRANT_SERVER_URL)

Writing a simple helper function to fetch embeddings from documents

Let’s write a simple function to fetch embeddings from document or a list of documents. This process will be done using Prem SDK. We then use this function to embed all our documents, before pushing it to Qdrant’s vector database.

from typing import Union, List

def get_embeddings(
    project_id: int, 
    embedding_model: str, 
    documents: Union[str, List[str]]
) -> List[List[float]]:
    """
    Helper function to get the embeddings from premai sdk 
    Args
        project_id (int): The project id from prem saas platform.
        embedding_model (str): The embedding model alias to choose
        documents (Union[str, List[str]]): Single texts or list of texts to embed
    Returns:
        List[List[int]]: A list of list of integers that represents different
            embeddings
    """
    embeddings = []
    documents = [documents] if isinstance(documents, str) else documents 
    for embedding in prem_client.embeddings.create(
        project_id=project_id,
        model=embedding_model, 
        input=documents
    ).data:
        embeddings.append(embedding.embedding)
    
    return embeddings

Convert Embeddings to Qdrant Points

Once we are done fetching our embedding vectors with our embedding function, we convert this to Qdrant points. After this, we will use this points to upsert into our Qdrant vector DB collection.

from qdrant_client.models import PointStruct

embeddings = get_embeddings(
    project_id=PROJECT_ID,
    embedding_model=EMBEDDING_MODEL, 
    documents=DOCUMENTS 
)

points = [
    PointStruct(
        id=idx, 
        vector=embedding,
        payload={"text": text},
    ) for idx, (embedding, text) in enumerate(zip(embeddings, DOCUMENTS))
]

Setting up Qdrant Collection

A collection is a named set of points (vectors with a payload) among which you can search.

If you already have a collection then you can skip this step, otherwise follow the code to create a Qdrant collection. We will be upserting our points in this collection.

qdrant_client.create_collection(
    collection_name=COLLECTION_NAME, 
    vectors_config=VectorParams(size=3072, distance=Distance.DOT)
)

Insert Documents to the Collection

Once we have done making our collection, we upload all our document vectors in that collection. Here is how we do that

doc_ids = list(range(len(embeddings)))

qdrant_client.upsert(
    collection_name=COLLECTION_NAME, 
    points=points
 )

Searching for documents from a query in a collection

Once our collection is indexed with all our documents, we are now ready to query it and search documents which are semantically similar to the query. Here’s how we do this.

query = "what is the extension of python document"

query_embedding = get_embeddings(
    project_id=PROJECT_ID, 
    embedding_model=EMBEDDING_MODEL, 
    documents=query
)

qdrant_client.search(collection_name=COLLECTION_NAME, query_vector=query_embedding[0])

Full code

Congratulations, now you know how you can utilize Prem AI SDK with Qdrant Client to do nearest neighbor search on your documents for your LLM RAG Applications. Here is our starter boilerplate code for both Python and JavaScript.

import os 
import getpass
from typing import Union, List

from premai import Prem
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct

def get_embeddings(
    project_id: int, 
    embedding_model: str, 
    documents: Union[str, List[str]]
) -> List[List[float]]:
    """
    Helper function to get the embeddings from premai sdk 
    Args
        project_id (int): The project id from prem saas platform.
        embedding_model (str): The embedding model alias to choose
        documents (Union[str, List[str]]): Single texts or list of texts to embed
    Returns:
        List[List[int]]: A list of list of integers that represents different
            embeddings
    """
    embeddings = []
    documents = [documents] if isinstance(documents, str) else documents 
    for embedding in prem_client.embeddings.create(
        project_id=project_id,
        model=embedding_model, 
        input=documents
    ).data:
        embeddings.append(embedding.embedding)
    
    return embeddings

if __name__ == "__main__":
    if os.environ.get("PREMAI_API_KEY") is None:
        os.environ["PREMAI_API_KEY"] = getpass.getpass("PremAI API Key:")

        
    # Note: project_id: 123 is a dummy project id
    # You need to have an actual project ID here. Otherwise, it will throw an error.

    PROJECT_ID = 123
    EMBEDDING_MODEL = "text-embedding-3-large"
    COLLECTION_NAME = "prem-collection-py"
    QDRANT_SERVER_URL = "http://127.0.0.1:6333"
    DOCUMENTS = [
        "This is a sample python document",
        "We will be using qdrant and premai python sdk"
    ]

    api_key = os.environ["PREMAI_API_KEY"]
    prem_client = Prem(api_key=api_key)
    qdrant_client = QdrantClient(url=QDRANT_SERVER_URL)

    # Get the embedding and create Qdrant points 
    embeddings = get_embeddings(
        project_id=PROJECT_ID,
        embedding_model=EMBEDDING_MODEL, 
        documents=DOCUMENTS 
    )

    points = [
        PointStruct(
            id=idx, 
            vector=embedding,
            payload={"text": text},
        ) for idx, (embedding, text) in enumerate(zip(embeddings, DOCUMENTS))
    ]

    # Create a collection. Comment this if this is created already
    qdrant_client.create_collection(
        collection_name=COLLECTION_NAME, 
        vectors_config=VectorParams(size=3072, distance=Distance.DOT)
    )

    # Upload all the documents to the collection

    doc_ids = list(range(len(embeddings)))
    qdrant_client.upsert(
        collection_name=COLLECTION_NAME, 
        points=points
    )

    # Query your Collection 

    query = "what is the extension of python document"
    query_embedding = get_embeddings(
        project_id=PROJECT_ID, 
        embedding_model=EMBEDDING_MODEL, 
        documents=query
    )

    qdrant_client.search(collection_name=COLLECTION_NAME, query_vector=query_embedding[0])