User guide

The Bagel Python client offers a comprehensive interface for interacting with the BagelDB service, enabling users to perform a wide range of operations such as creating clusters, adding documents, querying clusters, managing images, and more.

Installation

To install the Bagel Python client, you can use pip:

pip install betabageldb

Getting Started

Begin by initializing a Bagel client with the appropriate server settings. You can specify the server host, API implementation, and other settings using the Settings object.

import bagel
from bagel.config import Settings

# Bagel server settings
server_settings = Settings(
    bagel_api_impl="rest",
    bagel_server_host="api.bageldb.ai",
)

# Create Bagel client
client = bagel.Client(server_settings)

Ping the Bagel Server

Verify connectivity to the Bagel server by pinging it:

print("Ping:", client.ping())

Get Bagel Server Version

Retrieve the version of the Bagel server:

print("Version:", client.get_version())

Cluster Operations

Create and Delete a Cluster

Create a new cluster and then delete it:

def create_and_delete(api):
    name = str(uuid.uuid4())  # Generate a unique cluster name using UUID
    cluster = api.create_cluster(name)
    print("Cluster created:", cluster)

    api.delete_cluster(name)
    print("Cluster deleted:", name)

create_and_delete(client)

Create, Add, and Get Documents

Create a cluster, add documents to it, and then retrieve documents:

def create_add_get(api):
    name = "testing"
    cluster = api.get_or_create_cluster(name)

    cluster.add(
        documents=["Document 1", "Document 2"],
        metadatas=[{"source": "source1"}, {"source": "source2"}],
        ids=["id1", "id2"]
    )

    print("Count of documents:", cluster.count())

    document = cluster.get(ids=["id1"])
    print("Retrieved document:", document)

create_add_get(client)

Create, Add, and Find Documents

Create a cluster, add documents to it, and then query the cluster to find similar documents:

def create_add_find(api):
    name = "testing"
    cluster = api.get_or_create_cluster(name)

    cluster.add(
        documents=["Document 1", "Document 2"],
        metadatas=[{"source": "source1"}, {"source": "source2"}],
        ids=["id1", "id2"]
    )

    results = cluster.find(
        query_texts=["Document"],
        n_results=5,
        where={"source": "source1"}
    )

    print("Search results:", results)

create_add_find(client)

Create, Add, and Find Documents with Embeddings

Create a cluster, add documents with embeddings, and then query the cluster to find similar documents based on embeddings:

Note: As we're sending our own embeddings. We need to create a cluster with custom embedding model and send our dimension beforehand.

def create_add_find_em(api):
    name = "testing_embeddings"
    cluster = api.get_or_create_cluster(name=name, embedding_model="custom", dimension=3)

    cluster.add(
        embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4]],
        metadatas=[{"uri": "uri1", "style": "style1"}, {"uri": "uri2", "style": "style2"}],
        documents=["Document 1", "Document 2"],
        ids=["id1", "id2"]
    )

    results = cluster.find(query_embeddings=[[1.1, 2.3, 3.2]], n_results=5)
    print("Search results:", results)

create_add_find_em(client)

Create, Add, Modify, and Update Documents

Create a cluster, add documents to it, modify document metadata, and then update the documents:

def create_add_modify_update(api):
    name = "testing"
    new_name = "new_" + name
    cluster = api.get_or_create_cluster(name)

    print("Before:", cluster.name)
    cluster.modify(name=new_name)
    print("After:", cluster.name)

    cluster.add(
        documents=["Document 1", "Document 2"],
        metadatas=[{"source": "source1"}, {"source": "source2"}],
        ids=["id1", "id2"]
    )

    print("Before update:")
    print(cluster.get(ids=["id1"]))

    cluster.update(ids=["id1"], metadatas=[{"source": "source3"}])

    print("After update:")
    print(cluster.get(ids=["id1"]))

create_add_modify_update(client)

Create and Upsert Documents

Create a cluster, add documents to it, and then upsert additional documents:

pythonCopy codedef create_upsert(api):
    name = "testing"
    cluster = api.get_or_create_cluster(name)

    cluster.add(
        documents=["Document 1", "Document 2"],
        metadatas=[{"source": "source1"}, {"source": "source2"}],
        ids=["id1", "id2"]
    )

    cluster.upsert(
        documents=["Document 1", "Document 3"],
        metadatas=[{"source": "source1"}, {"source": "source3"}],
        ids=["id1", "id3"]
    )

    print("Count of documents :", cluster.count())

create_upsert(client)

Image Operations

Add Images and Find Similar Images

Add images to a cluster and then find similar images based on embeddings:

pythonCopy codedef add_image_find(api):
    name = "image_add_test"
    cluster = api.get_or_create_cluster(name=name, embedding_model="bagel-multi-modal")

    img_file_list = ["image1.jpg", "image2.jpg"]  # Add image paths to the list
    for filename in img_file_list:
        resp = cluster.add_image(filename)
        print("Response:", resp.json())

    print("Count of images:", cluster.count())

    first_item = cluster.peek(1)
    embeddings = first_item.get("embeddings")[0]

    results = cluster.find(query_embeddings=embeddings, n_results=5)
    print("Search results:", results)

add_image_find(client)

Add Images by URLs and Find Similar Images

Add images to a cluster by providing their URLs and then find similar images based on embeddings:

pythonCopy codedef add_image_urls_find(api):
    name = "image_add_urls_test"
    cluster = api.get_or_create_cluster(name=name, embedding_model="bagel-multi-modal")

    urls = [
        "https://example.com/image1.jpg",
        "https://example.com/image2.jpg",
    ]  # Add image URLs to the list
    ids = [str(uuid.uuid4()) for _ in range(len(urls))]
    resp = cluster.add_image_urls(ids=ids, urls=urls)
    print("Response:", resp.json())

    print("Count of images:", cluster.count())

    first_item = cluster.peek(1)
    embeddings = first_item.get("embeddings")[0]

    results = cluster.find(query_embeddings=embeddings, n_results=5)
    print("Search results:", results)

add_image_urls_find(client)

With these functionalities documented, users can effectively utilize the Bagel Python client to interact with the BagelDB service for various tasks, including document and image management, similarity search, and more.

Last updated