๐ API Reference
- Python
- JavaScript
Initialize client - Pythonโ
In-memory chromaโ
import chromadb
client = chromadb.Client()
In-memory chroma with saving/loading to diskโ
In this mode, Chroma will persist data between sessions. On load - it will load up the data in the directory you specify. And on exit - it will save to that directory.
import chromadb
from chromadb.config import Settings
client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
persist_directory="/path/to/persist/directory"
))
Run chroma just as a client to talk to a backend serviceโ
For production use cases, an in-memory database will not cut it. Run docker-compose up -d --build
to run a production backend in Docker on your local computer. Simply update your API initialization and then use the API the same way as before.
import chromadb
from chromadb.config import Settings
chroma_client = chroma.Client(Settings(chroma_api_impl="rest",
chroma_server_host="localhost",
chroma_server_http_port="8000"
))
Methods on Clientโ
Methods related to Collectionsโ
Collections are similar to AWS s3 buckets in their naming requirements because they are used in URLs in the REST API. Here's the full list.
# list all collections
client.list_collections()
# make a new collection
collection = client.create_collection("testname")
# get an existing collection
collection = client.get_collection("testname")
# get a collection or create if it doesn't exist already
collection = client.get_or_create_collection("testname")
# delete a collection
client.delete_collection("testname")
Utility methodsโ
# resets entire database - this *cant* be undone!
client.reset()
# returns timestamp to check if service is up
client.heartbeat()
Methods on Collectionโ
# change the name or metadata on a collection
collection.modify(name="testname2")
# get the number of items in a collection
collection.count()
# add new items to a collection
# either one at a time
collection.add(
embeddings=[1.5, 2.9, 3.4],
metadatas={"uri": "img9.png", "style": "style1"},
documents="doc1000101",
ids="uri9",
)
# or many, up to 100k+!
collection.add(
embeddings=[[1.5, 2.9, 3.4], [9.8, 2.3, 2.9]],
metadatas=[{"style": "style1"}, {"style": "style2"}],
ids=["uri9", "uri10"],
)
collection.add(
documents=["doc1000101", "doc288822"],
metadatas=[{"style": "style1"}, {"style": "style2"}],
ids=["uri9", "uri10"],
)
# update items in a collection
collection.update()
# upsert items. new items will be added, existing items will be updated.
collection.upsert(
ids=["id1", "id2", "id3", ...],
embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...],
metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...],
documents=["doc1", "doc2", "doc3", ...],
)
# get items from a collection
collection.get()
# convenience, get first 5 items from a collection
collection.peek()
# do nearest neighbor search to find similar embeddings or documents, supports filtering
collection.query(
query_embeddings=[[1.1, 2.3, 3.2], [5.1, 4.3, 2.2]],
n_results=2,
where={"style": "style2"}
)
# delete items
collection.delete()
# advanced: manually create the embedding search index
collection.create_index()
Run the backendโ
Run docker-compose up -d --build
to run a backend in Docker on your local computer.
Initialize client - JSโ
import { ChromaClient } from 'chromadb'
const client = new ChromaClient();
Methods on Clientโ
Methods related to Collectionsโ
Collections are similar to AWS s3 buckets in their naming requirements because they are used in URLs in the REST API. Here's the full list.
// list all collections
await client.listCollections()
// make a new collection
const collection = await client.createCollection("testname")
// get an existing collection
const collection = await client.getCollection("testname")
// delete a collection
await client.deleteCollection("testname")
Utility methodsโ
// resets entire database - this *cant* be undone!
await client.reset()
Methods on Collectionโ
// get the number of items in a collection
await collection.count()
// add new items to a collection
// either one at a time
await collection.add(
"id1",
[1.5, 2.9, 3.4],
{"source": "my_source"},
"This is a document",
)
// or many, up to 100k+!
await collection.add(
["uri9", "uri10"],
[[1.5, 2.9, 3.4], [9.8, 2.3, 2.9]],
[{"style": "style1"}, {"style": "style2"}],
["This is a document", 'that is a document']
)
// including just documents
await collection.add(
["uri9", "uri10"],
undefined,
[{"style": "style1"}, {"style": "style2"}],
["doc1000101", "doc288822"],
)
// or use upsert, so records will be updated if they already exist
// (instead of throwing an error)
await collection.upsert(
"id1",
[1.5, 2.9, 3.4],
{"source": "my_source"},
"This is a document",
)
// get items from a collection
await collection.get()
// convenience, get first 5 items from a collection
await collection.peek()
// do nearest neighbor search to find similar embeddings or documents, supports filtering
await collection.query(
query_embeddings=[[1.1, 2.3, 3.2], [5.1, 4.3, 2.2]],
n_results=2,
where={"style": "style2"}
)
// delete items
await collection.delete()