Play with DingoDB using DingoClient
In order to be more faster, DingoDB presents API which is comprehensive and powerful to do operations on the database, such as DDL or DML operation.
Examples
Function about Dingo sdk: SDK Documents
1. Document_index
from dingodb import SDKDocumentDingoDB, SDKClient
from dingodb.common.document_rep import DocumentType, DocumentColumn, DocumentSchema
addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKDocumentDingoDB(sdk_client)
print(x)
index_name = "document_index_test"
delete_index
delete_index_out = x.delete_index(index_name)
print(delete_index_out)
import time
create_index
scheme = DocumentSchema()
col = DocumentColumn("text", DocumentType.STRING)
scheme.add_document_column(col)
col = DocumentColumn("i64", DocumentType.INT64)
scheme.add_document_column(col)
col = DocumentColumn("f64", DocumentType.DOUBLE)
scheme.add_document_column(col)
col = DocumentColumn("bytes", DocumentType.BYTES)
scheme.add_document_column(col)
col = DocumentColumn("bool", DocumentType.BOOL)
scheme.add_document_column(col)
col = DocumentColumn("datetime", DocumentType.DATETIME)
scheme.add_document_column(col)
create_index_out = x.create_index(index_name, scheme, 3, operand=[5, 10, 20])
# create_index_out = x.create_index(index_name, scheme, 3)
print(create_index_out)
time.sleep(5)
make dataset
ids = [3, 5, 7, 9, 11, 13, 15, 17, 19, 21]
documents = [
{"text" : "Ancient empires rise and fall, shaping history's course.", "i64" : 1003, "f64" : 1003.0, "bytes" : "bytes_data_3","bool":True, "datetime": "2021-01-01T00:00:00Z"},
{"text" : "Artistic expressions reflect diverse cultural heritages.", "i64" : 1005, "f64" : 1005.0, "bytes" : "bytes_data_5","bool":False, "datetime": "2021-01-01T00:00:00Z"},
{"text" : "Social movements transform societies, forging new paths.", "i64" : 1007, "f64" : 1007.0, "bytes" : "bytes_data_7","bool":True, "datetime": "2022-01-01T00:00:00Z"},
{"text" : "Economies fluctuate, reflecting the complex interplay of global forces.", "i64" : 1009, "f64" : 1009.0, "bytes" : "bytes_data_9","bool":False, "datetime": "2022-01-01T00:00:00Z"},
{"text" : "Strategic military campaigns alter the balance of power.", "i64" : 1011, "f64" : 1011.0, "bytes" : "bytes_data_11","bool":True, "datetime": "2023-01-01T00:00:00Z"},
{"text" : "Quantum leaps redefine understanding of physical laws.", "i64" : 1013, "f64" : 1013.0, "bytes" : "bytes_data_13","bool":False, "datetime": "2023-01-01T00:00:00Z"},
{"text" : "Chemical reactions unlock mysteries of nature.", "i64" : 1015, "f64" : 1015.0, "bytes" : "bytes_data_15","bool":True, "datetime": "2024-01-01T00:00:00Z"},
{"text" : "Philosophical debates ponder the essence of existence.", "i64" : 1017, "f64" : 1017.0, "bytes" : "bytes_data_17","bool":False, "datetime": "2024-01-01T00:00:00Z"},
{"text" : "Marriages blend traditions, celebrating love's union.", "i64" : 1019, "f64" : 1019.0, "bytes" : "bytes_data_19","bool":True, "datetime": "2025-01-01T00:00:00Z"},
{"text" : "Explorers discover uncharted territories, expanding world maps.", "i64" : 1021, "f64" : 10021.0, "bytes" : "bytes_data_21","bool":False, "datetime": "2025-01-01T00:00:00Z"}
]
document_add_out = x.document_add(index_name, documents, ids)
print(document_add_out)
print(document_add_out.to_dict())
document_search
document_search_out = x.document_search(index_name, "discover", 5, with_scalar_data=True)
print(document_search_out)
print(document_search_out.to_dict())
document_search_out = x.document_search(index_name, "of", 3, with_scalar_data=True)
print(document_search_out)
document_search_out = x.document_search(index_name, "of", 5,[13, 15], with_scalar_data=True)
print(document_search_out)
document_search_out = x.document_search_all(index_name, "of", with_scalar_data=True, query_limit=4096)
print(document_search_out)
document_search_out = x.document_search(index_name, r"(text:'of' AND i64: >= 1013)", 5, [9, 11, 13, 15], with_scalar_data=True)
print(document_search_out)
document_search_out = x.document_search(index_name, r"( bool:true)", 5, [9, 11, 13, 15], with_scalar_data=True)
print(document_search_out)
document_search_out = x.document_search(index_name, r"(datetime:'2023-01-01T00:00:00Z' )", 5, [3,5,7,9, 11, 13, 15], with_scalar_data=True)
print(document_search_out)
document_query
document_query_out = x.document_query(index_name, ids, True, ["text", "i64"])
print(document_query_out)
document_get_border
document_get_border_out = x.document_get_border(index_name, True)
print(document_get_border_out)
document_get_border_out = x.document_get_border(index_name, False)
print(document_get_border_out)
document_scan_query
document_scan_query_out = x.document_scan_query(index_name, ids[0], ids[-1], False, 2)
print(document_scan_query_out)
document_scan_query_out = x.document_scan_query(index_name, ids[-1], ids[0], True, 2)
print(document_scan_query_out)
document_scan_query_out = x.document_scan_query(index_name, ids[0], ids[-1] + 10, False, 100, True, ["text", "i64"])
print(document_scan_query_out)
document_index_metrics
document_metrics_out = x.document_index_metrics(index_name)
print(document_metrics_out)
document_count_out
document_count_out = x.document_count(index_name, 0, 19)
print(document_count_out)
document_delete
document_delete_out = x.document_delete(index_name, ids)
print(document_delete_out)
2. Document_regex_index
import
from dingodb import SDKDocumentDingoDB, SDKClient
from dingodb.common.document_rep import DocumentType, DocumentColumn, DocumentSchema
addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKDocumentDingoDB(sdk_client)
print(x)
index_name = "document_regex_index_test"
delete_index
delete_index_out = x.delete_index(index_name)
print(delete_index_out)
create_index
import time
scheme = DocumentSchema()
col = DocumentColumn("title", DocumentType.STRING)
scheme.add_document_column(col)
col = DocumentColumn("text", DocumentType.STRING)
scheme.add_document_column(col)
create_index_out = x.create_index(index_name, scheme, 3, operand=[5, 10, 20])
# create_index_out = x.create_index(index_name, scheme, 3)
print(create_index_out)
time.sleep(5)
make dataset
ids = [1, 2, 3]
documents = [
{"title" : "a", "text" : "The Diary of Muadib"},
{"title" : "bb", "text" : "A Dairy Cow"},
{"title" : "ccc", "text" : "The Diary of a Young Girl"}
]
document_add_out = x.document_add(index_name, documents, ids)
print(document_add_out)
DocumentRegexSearch
# base64encode Dia.* to RGlhLioq
# text contains "Dia"
document_search_out = x.document_search(index_name, "text:RE [RGlhLio=]", 5, False, with_scalar_data=True)
print(document_search_out)
DocumentSearchLength
# base64encode (.{0,2}) to KC57MCwyfSk=
# title length <= 2
document_search_out = x.document_search(index_name, "title:RE [KC57MCwyfSk=]", 5, False, with_scalar_data=True)
print(document_search_out)
DocumentSearchAnd
# base64encode (.{0,2}) to KC57MCwyfSk=
# base64encode Dia.* to RGlhLioq
# title length <= 2 and text contains "Dia"
document_search_out = x.document_search(index_name, "title:RE [KC57MCwyfSk=] AND text:RE [RGlhLio=]", 5, False, with_scalar_data=True)
print(document_search_out)
3. Rawkv
import
import numpy as np
import os
from dingodb import SDKRawKVDingoDB, SDKClient
# need to create region ( range(wa,wc) for this example ) before using SDKRawKVDingoDB
addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKRawKVDingoDB(sdk_client)
print(x)
put
x.rawkv_put("wb01", "value1")
batch_put
x.rawkv_batch_put([
("wb02", "value2"),
("wb03", "value3"),
("wb04", "value4"),
("wb05", "value5"),
("wb06", "value6"),
("wb07", "value7"),
("wb08", "value8"),
("wb09", "value9"),
("wb10", "value10"),
("wb11", "value11"),
("wb12", "value12"),
("wb13", "value13"),
("wb14", "value14"),
("wb15", "value15")])
get
x.rawkv_get("wb01")
kv.to dict
[kv.to_dict() for kv in x.rawkv_batch_get([
"wb01",
"wb02",
"wb03",
"wb04",
"wb05",
"wb06",
"wb07",
"wb08",
"wb09",
"wb10",
"wb11",
"wb12",
"wb13",
"wb14",
"wb15"
])]
x.rawkv_put_if_absent("wb16", "value16")
delete
x.rawkv_delete("wb16")
x.rawkv_batch_delete([
"wb01",
"wb02",
])
[kv.to_dict() for kv in x.rawkv_batch_put_if_absent([
("wb01", "value1"),
("wb02", "value2"),
("wb03", "value3"),
("wb04", "value4"),
("wb05", "value5"),
("wb06", "value6"),
("wb07", "value7"),
("wb08", "value8"),
("wb09", "value9"),
("wb10", "value10"),
("wb11", "value11"),
("wb12", "value12"),
("wb13", "value13"),
("wb14", "value14"),
("wb15", "value15")])]
[kv.to_dict() for kv in x.rawkv_scan("wb", "wc", 15)]
delete range
x.rawkv_delete_range("wa", "wc")
4. Region-creator
import
import numpy as np
import os
from dingodb import SDKRegionCreatorDingoDB, SDKClient
addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKRegionCreatorDingoDB(sdk_client)
print(x)
drop_region
x.drop_region(80031)
create_region
x.create_region_id(10)
x.create_region("test1","wb00000000","wc000000",80033)
5. Vector_index
import numpy as np
import os
from dingodb import SDKVectorDingoDB, SDKClient
from dingodb.common.vector_rep import ScalarType, ScalarColumn, ScalarSchema
addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKVectorDingoDB(sdk_client)
print(x)
index_name = "test_index_grpc1"
delete_index
x.delete_index(index_name)
create_index
# help(x.create_index)
#x.create_index(index_name, 8, "binary_flat", "hamming", 3, operand=[5,10,15,20])
col = ScalarColumn("id",ScalarType.DOUBLE,True)
sca = ScalarSchema()
sca.add_scalar_column(col)
x.create_index_with_schema(index_name, 16,sca, "binary_ivf_flat", "hamming", 3, operand=[100,500,1500,3000,6000])
make dataset
d = 16 # dimension
bd = 2 #binary dimension
nb = 4 # database size
np.random.seed(1234) # make reproducible
xb = np.random.randint(0, 255, (nb, bd)) # 生成范围为 0-255 的随机整数
print(xb.shape)
xb[:, 0] += np.arange(nb)
print(xb)
print(xb.shape)
ids = [1, 2, 3, 4]
datas = [{"id": 50}, {"id": 120}, {"id": 130}, {"id": 4.40}]
vectors = xb.tolist()
add
for i in range(10):
x.vector_add(index_name, datas, vectors, ids,"binary")
delete
x.vector_delete(index_name,ids)
get_auto_increment_id
x.vector_get_auto_increment_id(index_name)
update_auto_increment_id
x.vector_update_auto_increment_id(index_name,16)
upsert
x.vector_upsert(index_name, datas, vectors, ids,"binary")
search
# vector_search
x.vector_search(index_name, vectors[0],value_type="binary")
# return
# error RuntimeError
search with pre_filter or post_filter
x.vector_search(index_name, vectors[0], 10, {"meta_expr": {"id": 1}},value_type="binary")
get index with id
x.vector_get(index_name, [1, 2, 6])
Add: scan
x.vector_scan(index_name, 20, 60,is_reverse=True,end_id=0)
Add count
x.vector_count(index_name)
metrics
x.vector_metrics(index_name)
get_max_index_row
x.get_max_index_row(index_name)
delete_index
# delete_index
x.delete_index(index_name)
6. Vector_binary_index
import
import numpy as np
import os
from dingodb import SDKVectorDingoDB, SDKClient
from dingodb.common.vector_rep import ScalarType, ScalarColumn, ScalarSchema
addrs = "127.0.0.1:22001,127.0.0.1:22002,127.0.0.1:22003"
sdk_client = SDKClient(addrs)
x = SDKVectorDingoDB(sdk_client)
print(x)
index_name = "test_index_grpc1"
delete_index
x.delete_index(index_name)
create_index
# help(x.create_index)
#x.create_index(index_name, 8, "binary_flat", "hamming", 3, operand=[5,10,15,20])
col = ScalarColumn("id",ScalarType.DOUBLE,True)
sca = ScalarSchema()
sca.add_scalar_column(col)
x.create_index_with_schema(index_name, 16,sca, "binary_ivf_flat", "hamming", 3, operand=[100,500,1500,3000,6000])
make dataset
d = 16 # dimension
bd = 2 #binary dimension
nb = 4 # database size
np.random.seed(1234) # make reproducible
xb = np.random.randint(0, 255, (nb, bd)) # 生成范围为 0-255 的随机整数
print(xb.shape)
xb[:, 0] += np.arange(nb)
print(xb)
print(xb.shape)
ids = [1, 2, 3, 4]
datas = [{"id": 50}, {"id": 120}, {"id": 130}, {"id": 4.40}]
vectors = xb.tolist()
vector_add
for i in range(10):
x.vector_add(index_name, datas, vectors, ids,"binary")
vector_delete
x.vector_delete(index_name,ids)
vector_get_auto_increment
x.vector_get_auto_increment_id(index_name)
vector_update_auto_increment
x.vector_update_auto_increment_id(index_name,16)
vector_upsert
x.vector_upsert(index_name, datas, vectors, ids,"binary")
vector_search
x.vector_search(index_name, vectors[0],value_type="binary")
# return
# error RuntimeError
# vector_search with pre_filter or post_filter
x.vector_search(index_name, vectors[0], 10, {"meta_expr": {"id": 1}},value_type="binary")
vector_getS
x.vector_get(index_name, [1, 2, 6])
vector_scan
x.vector_scan(index_name, 20, 60,is_reverse=True,end_id=0)
Add vector count
x.vector_count(index_name)
vector_metrics
x.vector_metrics(index_name)
get_max_index_row
x.get_max_index_row(index_name)
delete_index
x.delete_index(index_name)
7.Vector_diskann_index
import
import numpy as np
import os
from dingodb import SDKVectorDingoDB, SDKClient
from dingodb.common.vector_rep import ScalarType, ScalarColumn, ScalarSchema
addrs = "172.30.14.11:22001,172.30.14.11:22002,172.30.14.11:22003"
sdk_client = SDKClient(addrs)
x = SDKVectorDingoDB(sdk_client)
print(x)
index_name = "test_index_grpc"
delete_index
x.delete_index(index_name)
create_index
# help(x.create_index)
x.create_index(index_name, 6, "diskann", "euclidean", 3, index_config={"valueType": "float","searchListSize": 100,"maxDegree": 64}, operand=[5,10,15,20])
make dataset
d = 6 # dimension
nb = 4 # database size
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
print(xb)
print(xb.shape)
xb[:, 0] += np.arange(nb) / 1000.
print(xb)
print(xb.shape)
ids = [1, 2, 3, 4]
datas = [{"a1": "b1"}, {"a2": "b2"}, {"a3": "b3"}, {"a4": "b4"}]
vectors = xb.tolist()
vector_add
x.vector_import_add(index_name, datas, vectors, ids)
x.vector_import_add(index_name, datas, vectors, ids)
x.vector_import_add(index_name, datas, vectors, ids)
x.vector_import_add(index_name, datas, vectors, ids)
vector_build_by_index
x.vector_build_by_index(index_name)
r.to_dict
[r.to_dict() for r in x.vector_status_by_index(index_name)]
vector_count_memory
x.vector_count_memory(index_name)
vector_search
x.vector_search(index_name, vectors[0])
r.to_dict
[r.to_dict() for r in x.vector_load_by_index(index_name)]
[r.to_dict() for r in x.vector_reset_by_index(index_name)]
id = [1,80001]
[r.to_dict() for r in x.vector_status_by_region(index_name,id)]
[r.to_dict() for r in x.vector_build_by_region(index_name,id)]
[r.to_dict() for r in x.vector_load_by_region(index_name,id)]
[r.to_dict() for r in x.vector_reset_by_region(index_name,id)]
delete
x.vector_import_delete(index_name,ids)
x.delete_index(index_name)