Skip to content

ChromadbStore#

Info

To see example of using ChromadbStore visit: How-To: Use Chromadb to Store Similarity Index

dbally.similarity.ChromadbStore #

ChromadbStore(index_name: str, chroma_client: ClientAPI, embedding_function: Union[EmbeddingClient, EmbeddingFunction], max_distance: Optional[float] = None, distance_method: Literal['l2', 'ip', 'cosine'] = 'l2')

Bases: SimilarityStore

Class that stores text embeddings using Chroma

Source code in src/dbally/similarity/chroma_store.py
def __init__(
    self,
    index_name: str,
    chroma_client: chromadb.ClientAPI,
    embedding_function: Union[EmbeddingClient, chromadb.EmbeddingFunction],
    max_distance: Optional[float] = None,
    distance_method: Literal["l2", "ip", "cosine"] = "l2",
):
    super().__init__()
    self.index_name = index_name
    self.chroma_client = chroma_client
    self.embedding_function = embedding_function
    self.max_distance = max_distance

    self._metadata = {"hnsw:space": distance_method}

index_name instance-attribute #

index_name = index_name

chroma_client instance-attribute #

chroma_client = chroma_client

embedding_function instance-attribute #

embedding_function = embedding_function

max_distance instance-attribute #

max_distance = max_distance

store async #

store(data: List[str]) -> None

Fills chroma collection with embeddings of provided string. As the id uses hash value of the string.

PARAMETER DESCRIPTION
data

The data to store.

TYPE: List[str]

Source code in src/dbally/similarity/chroma_store.py
async def store(self, data: List[str]) -> None:
    """
    Fills chroma collection with embeddings of provided string. As the id uses hash value of the string.

    Args:
        data: The data to store.
    """

    ids = [sha256(x.encode("utf-8")).hexdigest() for x in data]

    collection = self._get_chroma_collection()

    if isinstance(self.embedding_function, EmbeddingClient):
        embeddings = await self.embedding_function.get_embeddings(data)

        collection.add(ids=ids, embeddings=embeddings, documents=data)
    else:
        collection.add(ids=ids, documents=data)

find_similar async #

find_similar(text: str) -> Optional[str]

Finds the most similar text in the chroma collection or returns None if the most similar text has distance bigger than self.max_distance.

PARAMETER DESCRIPTION
text

The text to find similar to.

TYPE: str

RETURNS DESCRIPTION
Optional[str]

The most similar text or None if no similar text is found.

Source code in src/dbally/similarity/chroma_store.py
async def find_similar(self, text: str) -> Optional[str]:
    """
    Finds the most similar text in the chroma collection or returns None if the most similar text
    has distance bigger than `self.max_distance`.

    Args:
        text: The text to find similar to.

    Returns:
        The most similar text or None if no similar text is found.
    """

    collection = self._get_chroma_collection()

    if isinstance(self.embedding_function, EmbeddingClient):
        embedding = await self.embedding_function.get_embeddings([text])
        retrieved = collection.query(query_embeddings=embedding, n_results=1)
    else:
        retrieved = collection.query(query_texts=[text], n_results=1)

    return self._return_best_match(retrieved)