Play with DingoDB using DingoClient

In order to be more faster, DingoDB presents API which is comprehensive and powerful to do operations on the database, such as DDL or DML operation.

Examples

1. Document_index

from dingodb import SDKDocumentDingoDB, SDKClient
from dingodb.common.document_rep import DocumentType, DocumentColumn, DocumentSchema


addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)

x = SDKDocumentDingoDB(sdk_client)
print(x)

index_name = "document_index_test"

delete_index

delete_index_out = x.delete_index(index_name)
print(delete_index_out)
import time

create_index

scheme =  DocumentSchema()
col = DocumentColumn("text", DocumentType.STRING)
scheme.add_document_column(col)
col = DocumentColumn("i64", DocumentType.INT64)
scheme.add_document_column(col)
col = DocumentColumn("f64", DocumentType.DOUBLE)
scheme.add_document_column(col)
col = DocumentColumn("bytes", DocumentType.BYTES)
scheme.add_document_column(col)
col = DocumentColumn("bool", DocumentType.BOOL)
scheme.add_document_column(col)
col = DocumentColumn("datetime", DocumentType.DATETIME)
scheme.add_document_column(col)

create_index_out = x.create_index(index_name, scheme, 3, operand=[5, 10, 20])
# create_index_out = x.create_index(index_name, scheme, 3)
print(create_index_out)
time.sleep(5)

make dataset

ids = [3, 5, 7, 9, 11, 13, 15, 17, 19, 21]
documents = [
    {"text" : "Ancient empires rise and fall, shaping history's course.", "i64" : 1003, "f64" : 1003.0, "bytes" : "bytes_data_3","bool":True, "datetime": "2021-01-01T00:00:00Z"},
    {"text" : "Artistic expressions reflect diverse cultural heritages.", "i64" : 1005, "f64" : 1005.0, "bytes" : "bytes_data_5","bool":False, "datetime": "2021-01-01T00:00:00Z"},
    {"text" : "Social movements transform societies, forging new paths.", "i64" : 1007, "f64" : 1007.0, "bytes" : "bytes_data_7","bool":True, "datetime": "2022-01-01T00:00:00Z"},
    {"text" : "Economies fluctuate, reflecting the complex interplay of global forces.", "i64" : 1009, "f64" : 1009.0, "bytes" : "bytes_data_9","bool":False, "datetime": "2022-01-01T00:00:00Z"},
    {"text" : "Strategic military campaigns alter the balance of power.", "i64" : 1011, "f64" : 1011.0, "bytes" : "bytes_data_11","bool":True, "datetime": "2023-01-01T00:00:00Z"},
    {"text" : "Quantum leaps redefine understanding of physical laws.", "i64" : 1013, "f64" : 1013.0, "bytes" : "bytes_data_13","bool":False, "datetime": "2023-01-01T00:00:00Z"},
    {"text" : "Chemical reactions unlock mysteries of nature.", "i64" : 1015, "f64" : 1015.0, "bytes" : "bytes_data_15","bool":True, "datetime": "2024-01-01T00:00:00Z"},
    {"text" : "Philosophical debates ponder the essence of existence.", "i64" : 1017, "f64" : 1017.0, "bytes" : "bytes_data_17","bool":False, "datetime": "2024-01-01T00:00:00Z"},
    {"text" : "Marriages blend traditions, celebrating love's union.", "i64" : 1019, "f64" : 1019.0, "bytes" : "bytes_data_19","bool":True, "datetime": "2025-01-01T00:00:00Z"},
    {"text" : "Explorers discover uncharted territories, expanding world maps.", "i64" : 1021, "f64" : 10021.0, "bytes" : "bytes_data_21","bool":False, "datetime": "2025-01-01T00:00:00Z"}
]

document_add_out = x.document_add(index_name, documents, ids)
print(document_add_out)
print(document_add_out.to_dict())

document_query

document_query_out = x.document_query(index_name, ids, True, ["text", "i64"])
print(document_query_out)

document_get_border

document_get_border_out = x.document_get_border(index_name, True)
print(document_get_border_out)
document_get_border_out = x.document_get_border(index_name, False)
print(document_get_border_out)

document_scan_query

document_scan_query_out = x.document_scan_query(index_name, ids[0], ids[-1], False, 2)
print(document_scan_query_out)
document_scan_query_out = x.document_scan_query(index_name, ids[-1], ids[0], True, 2)
print(document_scan_query_out)
document_scan_query_out = x.document_scan_query(index_name, ids[0], ids[-1] + 10, False, 100, True, ["text", "i64"])
print(document_scan_query_out)

document_index_metrics

document_metrics_out = x.document_index_metrics(index_name)
print(document_metrics_out)

document_count_out

document_count_out = x.document_count(index_name, 0, 19)
print(document_count_out)

document_delete

document_delete_out = x.document_delete(index_name, ids)
print(document_delete_out)

2. Document_regex_index

import

from dingodb import SDKDocumentDingoDB, SDKClient
from dingodb.common.document_rep import DocumentType, DocumentColumn, DocumentSchema


addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)

x = SDKDocumentDingoDB(sdk_client)
print(x)

index_name = "document_regex_index_test"

delete_index

delete_index_out = x.delete_index(index_name)
print(delete_index_out)

create_index

import time

scheme =  DocumentSchema()
col = DocumentColumn("title", DocumentType.STRING)
scheme.add_document_column(col)
col = DocumentColumn("text", DocumentType.STRING)
scheme.add_document_column(col)

create_index_out = x.create_index(index_name, scheme, 3, operand=[5, 10, 20])
# create_index_out = x.create_index(index_name, scheme, 3)
print(create_index_out)
time.sleep(5)

make dataset

ids = [1, 2, 3]
documents = [
    {"title" : "a", "text" : "The Diary of Muadib"},
    {"title" : "bb", "text" : "A Dairy Cow"},
    {"title" : "ccc", "text" : "The Diary of a Young Girl"}
]

document_add_out = x.document_add(index_name, documents, ids)
print(document_add_out)

DocumentRegexSearch

#  base64encode Dia.* to RGlhLioq
#  text contains "Dia"
document_search_out = x.document_search(index_name, "text:RE [RGlhLio=]", 5, False, with_scalar_data=True)
print(document_search_out)

DocumentSearchLength

#  base64encode (.{0,2})  to KC57MCwyfSk=
#  title length <= 2
document_search_out = x.document_search(index_name, "title:RE [KC57MCwyfSk=]", 5, False, with_scalar_data=True)
print(document_search_out)

DocumentSearchAnd

#  base64encode (.{0,2})  to KC57MCwyfSk=
#  base64encode Dia.* to RGlhLioq
#  title length <= 2 and text contains "Dia"
document_search_out = x.document_search(index_name, "title:RE [KC57MCwyfSk=] AND text:RE [RGlhLio=]", 5, False, with_scalar_data=True)
print(document_search_out)

3. Rawkv

import

import numpy as np
import os

from dingodb import SDKRawKVDingoDB, SDKClient

# need to create region ( range(wa,wc) for this example ) before using SDKRawKVDingoDB

addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKRawKVDingoDB(sdk_client)
print(x)

put

x.rawkv_put("wb01", "value1")

batch_put

x.rawkv_batch_put([
    ("wb02", "value2"),
    ("wb03", "value3"),
    ("wb04", "value4"),
    ("wb05", "value5"),
    ("wb06", "value6"),
    ("wb07", "value7"),
    ("wb08", "value8"),
    ("wb09", "value9"),
    ("wb10", "value10"),
    ("wb11", "value11"),
    ("wb12", "value12"),
    ("wb13", "value13"),
    ("wb14", "value14"),
    ("wb15", "value15")])

get

x.rawkv_get("wb01")

kv.to dict

[kv.to_dict() for kv in x.rawkv_batch_get([
    "wb01",
    "wb02",
    "wb03",
    "wb04",
    "wb05",
    "wb06",
    "wb07",
    "wb08",
    "wb09",
    "wb10",
    "wb11",
    "wb12",
    "wb13",
    "wb14",
    "wb15"
])]
x.rawkv_put_if_absent("wb16", "value16")

delete

x.rawkv_delete("wb16")
x.rawkv_batch_delete([
    "wb01",
    "wb02",
])
[kv.to_dict() for kv in x.rawkv_batch_put_if_absent([
    ("wb01", "value1"),
    ("wb02", "value2"),
    ("wb03", "value3"),
    ("wb04", "value4"),
    ("wb05", "value5"),
    ("wb06", "value6"),
    ("wb07", "value7"),
    ("wb08", "value8"),
    ("wb09", "value9"),
    ("wb10", "value10"),
    ("wb11", "value11"),
    ("wb12", "value12"),
    ("wb13", "value13"),
    ("wb14", "value14"),
    ("wb15", "value15")])]
[kv.to_dict() for kv in x.rawkv_scan("wb", "wc", 15)]

delete range

x.rawkv_delete_range("wa", "wc")

4. Region-creator

import

import numpy as np
import os

from dingodb import SDKRegionCreatorDingoDB, SDKClient

addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKRegionCreatorDingoDB(sdk_client)
print(x)

drop_region

x.drop_region(80031)

create_region

x.create_region_id(10)
x.create_region("test1","wb00000000","wc000000",80033)

5. Vector_index

import numpy as np
import os

from dingodb import SDKVectorDingoDB, SDKClient
from dingodb.common.vector_rep import ScalarType, ScalarColumn, ScalarSchema

addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKVectorDingoDB(sdk_client)
print(x)

index_name = "test_index_grpc1"

delete_index

x.delete_index(index_name)

create_index

# help(x.create_index)
#x.create_index(index_name, 8, "binary_flat", "hamming", 3, operand=[5,10,15,20])
col = ScalarColumn("id",ScalarType.DOUBLE,True)
sca = ScalarSchema()
sca.add_scalar_column(col)
x.create_index_with_schema(index_name, 16,sca, "binary_ivf_flat", "hamming", 3, operand=[100,500,1500,3000,6000])

make dataset

d = 16                           # dimension
bd = 2                     #binary dimension
nb = 4                      # database size
np.random.seed(1234)             # make reproducible
xb = np.random.randint(0, 255, (nb, bd))  # 生成范围为 0-255 的随机整数
print(xb.shape)
xb[:, 0] += np.arange(nb) 
print(xb)
print(xb.shape)

ids = [1, 2, 3, 4]
datas = [{"id": 50}, {"id": 120}, {"id": 130}, {"id": 4.40}]
vectors = xb.tolist()

add

for i in range(10):
    x.vector_add(index_name, datas, vectors, ids,"binary")

delete

x.vector_delete(index_name,ids)

get_auto_increment_id

x.vector_get_auto_increment_id(index_name)

update_auto_increment_id

x.vector_update_auto_increment_id(index_name,16)

upsert

x.vector_upsert(index_name, datas, vectors, ids,"binary")

search with pre_filter or post_filter

x.vector_search(index_name, vectors[0], 10, {"meta_expr": {"id": 1}},value_type="binary")

get index with id

x.vector_get(index_name, [1, 2, 6])

Add: scan

x.vector_scan(index_name, 20, 60,is_reverse=True,end_id=0)

Add count

x.vector_count(index_name)

metrics

x.vector_metrics(index_name)

get_max_index_row

x.get_max_index_row(index_name)

delete_index

# delete_index
x.delete_index(index_name)

6. Vector_binary_index

import

import numpy as np
import os

from dingodb import SDKVectorDingoDB, SDKClient
from dingodb.common.vector_rep import ScalarType, ScalarColumn, ScalarSchema

addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKVectorDingoDB(sdk_client)
print(x)

index_name = "test_index_grpc1"

delete_index

x.delete_index(index_name)

create_index

# help(x.create_index)
#x.create_index(index_name, 8, "binary_flat", "hamming", 3, operand=[5,10,15,20])
col = ScalarColumn("id",ScalarType.DOUBLE,True)
sca = ScalarSchema()
sca.add_scalar_column(col)
x.create_index_with_schema(index_name, 16,sca, "binary_ivf_flat", "hamming", 3, operand=[100,500,1500,3000,6000])

make dataset

d = 16                           # dimension
bd = 2                     #binary dimension
nb = 4                      # database size
np.random.seed(1234)             # make reproducible
xb = np.random.randint(0, 255, (nb, bd))  # 生成范围为 0-255 的随机整数
print(xb.shape)
xb[:, 0] += np.arange(nb) 
print(xb)
print(xb.shape)

ids = [1, 2, 3, 4]
datas = [{"id": 50}, {"id": 120}, {"id": 130}, {"id": 4.40}]
vectors = xb.tolist()

vector_add

for i in range(10):
    x.vector_add(index_name, datas, vectors, ids,"binary")

vector_delete

x.vector_delete(index_name,ids)

vector_get_auto_increment

x.vector_get_auto_increment_id(index_name)

vector_update_auto_increment

x.vector_update_auto_increment_id(index_name,16)

vector_upsert

x.vector_upsert(index_name, datas, vectors, ids,"binary")

vector_getS

x.vector_get(index_name, [1, 2, 6])

vector_scan

x.vector_scan(index_name, 20, 60,is_reverse=True,end_id=0)

Add vector count

x.vector_count(index_name)

vector_metrics

x.vector_metrics(index_name)

get_max_index_row

x.get_max_index_row(index_name)

delete_index

x.delete_index(index_name)

7.Vector_diskann_index

import

import numpy as np
import os

from dingodb import SDKVectorDingoDB, SDKClient
from dingodb.common.vector_rep import ScalarType, ScalarColumn, ScalarSchema

addrs = "172.30.14.11:22001,172.30.14.11:22002,172.30.14.11:22003"
sdk_client = SDKClient(addrs)
x = SDKVectorDingoDB(sdk_client)
print(x)

index_name = "test_index_grpc"

delete_index

x.delete_index(index_name)

create_index

# help(x.create_index)
x.create_index(index_name, 6, "diskann", "euclidean", 3, index_config={"valueType": "float","searchListSize": 100,"maxDegree": 64}, operand=[5,10,15,20])

make dataset

d = 6                           # dimension
nb = 4                      # database size
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
print(xb)
print(xb.shape)
xb[:, 0] += np.arange(nb) / 1000.
print(xb)
print(xb.shape)

ids = [1, 2, 3, 4]
datas = [{"a1": "b1"}, {"a2": "b2"}, {"a3": "b3"}, {"a4": "b4"}]
vectors = xb.tolist()

vector_add

x.vector_import_add(index_name, datas, vectors, ids)
x.vector_import_add(index_name, datas, vectors, ids)
x.vector_import_add(index_name, datas, vectors, ids)
x.vector_import_add(index_name, datas, vectors, ids)

vector_build_by_index

x.vector_build_by_index(index_name)

r.to_dict

[r.to_dict() for r in x.vector_status_by_index(index_name)]

vector_count_memory

x.vector_count_memory(index_name)

vector_search

x.vector_search(index_name, vectors[0])

r.to_dict

[r.to_dict() for r in x.vector_load_by_index(index_name)]
[r.to_dict() for r in x.vector_reset_by_index(index_name)]
id = [1,80001]
[r.to_dict() for r in x.vector_status_by_region(index_name,id)]
[r.to_dict() for r in x.vector_build_by_region(index_name,id)]
[r.to_dict() for r in x.vector_load_by_region(index_name,id)]
[r.to_dict() for r in x.vector_reset_by_region(index_name,id)]

delete

x.vector_import_delete(index_name,ids)
x.delete_index(index_name)