def main(dataset_name, index_file_size, nlist=16384, force=False): top_k = 10 nprobes = [32, 128] dataset = get_dataset(dataset_name) table_name = get_table_name(dataset_name, index_file_size) m = MilvusClient(table_name) if m.exists_table(): if force is True: logger.info("Re-create table: %s" % table_name) m.delete() time.sleep(10) else: logger.info("Table name: %s existed" % table_name) return data_type, dimension, metric_type = parse_dataset_name(dataset_name) m.create_table(table_name, dimension, index_file_size, metric_type) print(m.describe()) vectors = numpy.array(dataset["train"]) query_vectors = numpy.array(dataset["test"]) # m.insert(vectors) interval = 100000 loops = len(vectors) // interval + 1 for i in range(loops): start = i * interval end = min((i + 1) * interval, len(vectors)) tmp_vectors = vectors[start:end] if start < end: m.insert(tmp_vectors, ids=[i for i in range(start, end)]) time.sleep(60) print(m.count()) for index_type in ["ivf_flat", "ivf_sq8", "ivf_sq8h"]: m.create_index(index_type, nlist) print(m.describe_index()) if m.count() != len(vectors): return m.preload_table() true_ids = numpy.array(dataset["neighbors"]) for nprobe in nprobes: print("nprobe: %s" % nprobe) sum_radio = 0.0 avg_radio = 0.0 result_ids = m.query(query_vectors, top_k, nprobe) # print(result_ids[:10]) for index, result_item in enumerate(result_ids): if len(set(true_ids[index][:top_k])) != len(set(result_item)): logger.info("Error happened") # logger.info(query_vectors[index]) # logger.info(true_ids[index][:top_k], result_item) tmp = set(true_ids[index][:top_k]).intersection( set(result_item)) sum_radio = sum_radio + (len(tmp) / top_k) avg_radio = round(sum_radio / len(result_ids), 4) logger.info(avg_radio) m.drop_index()
def run(self, definition, run_type=None): if run_type == "performance": for op_type, op_value in definition.items(): run_count = op_value["run_count"] run_params = op_value["params"] if op_type == "insert": for index, param in enumerate(run_params): table_name = param["table_name"] # random_1m_100_512 (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) milvus = MilvusClient(table_name, ip=self.ip, port=self.port) # Check has table or not if milvus.exists_table(): milvus.delete() time.sleep(10) milvus.create_table(table_name, dimension, index_file_size, metric_type) res = self.do_insert(milvus, table_name, data_type, dimension, table_size, param["ni_per"]) logger.info(res) elif op_type == "query": for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) milvus = MilvusClient(table_name, ip=self.ip, port=self.port) # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser(param) for index_type in index_types: for nlist in nlists: milvus.create_index(index_type, nlist) # preload index milvus.preload_table() # Run query test for nprobe in nprobes: logger.info("index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) res = self.do_query(milvus, table_name, top_ks, nqs, nprobe, run_count) headers = [param["dataset"]] headers.extend([str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) elif run_type == "stability": for op_type, op_value in definition.items(): if op_type != "query": logger.warning("invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] nq = 10000 for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) # set default test time if "during_time" not in param: during_time = 100 # seconds else: during_time = int(param["during_time"]) * 60 # set default query process num if "query_process_num" not in param: query_process_num = 10 else: query_process_num = int(param["query_process_num"]) milvus = MilvusClient(table_name) # Check has table or not if not milvus.exists_table(): logger.warning("Table %s not existed, continue exec next params ..." % table_name) continue start_time = time.time() insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(nq)] while time.time() < start_time + during_time: processes = [] # # do query # for i in range(query_process_num): # milvus_instance = MilvusClient(table_name) # top_k = random.choice([x for x in range(1, 100)]) # nq = random.choice([x for x in range(1, 1000)]) # nprobe = random.choice([x for x in range(1, 500)]) # logger.info(nprobe) # p = Process(target=self.do_query, args=(milvus_instance, table_name, [top_k], [nq], 64, run_count, )) # processes.append(p) # p.start() # time.sleep(0.1) # for p in processes: # p.join() milvus_instance = MilvusClient(table_name) top_ks = random.sample([x for x in range(1, 100)], 4) nqs = random.sample([x for x in range(1, 1000)], 3) nprobe = random.choice([x for x in range(1, 500)]) res = self.do_query(milvus, table_name, top_ks, nqs, nprobe, run_count) # milvus_instance = MilvusClient(table_name) status, res = milvus_instance.insert(insert_vectors, ids=[x for x in range(len(insert_vectors))]) if not status.OK(): logger.error(status.message) if (time.time() - start_time) % 300 == 0: status = milvus_instance.drop_index() if not status.OK(): logger.error(status.message) index_type = random.choice(["flat", "ivf_flat", "ivf_sq8"]) status = milvus_instance.create_index(index_type, 16384) if not status.OK(): logger.error(status.message)
def run(self, run_type, table): logger.debug(run_type) logger.debug(table) table_name = table["table_name"] milvus_instance = MilvusClient(table_name=table_name, ip=self.ip) self.env_value = milvus_instance.get_server_config() if run_type == "insert_performance": (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) ni_per = table["ni_per"] build_index = table["build_index"] if milvus_instance.exists_table(): milvus_instance.delete() time.sleep(10) index_info = {} search_params = {} milvus_instance.create_table(table_name, dimension, index_file_size, metric_type) if build_index is True: index_type = table["index_type"] nlist = table["nlist"] index_info = {"index_type": index_type, "index_nlist": nlist} milvus_instance.create_index(index_type, nlist) res = self.do_insert(milvus_instance, table_name, data_type, dimension, table_size, ni_per) logger.info(res) table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, table_info, index_info, search_params) metric.metrics = { "type": "insert_performance", "value": { "total_time": res["total_time"], "qps": res["qps"], "ni_time": res["ni_time"] } } report(metric) logger.debug("Wait for file merge") time.sleep(120) elif run_type == "build_performance": (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) index_type = table["index_type"] nlist = table["nlist"] table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } index_info = {"index_type": index_type, "index_nlist": nlist} if not milvus_instance.exists_table(): logger.error("Table name: %s not existed" % table_name) return search_params = {} start_time = time.time() # drop index logger.debug("Drop index") milvus_instance.drop_index() start_mem_usage = milvus_instance.get_mem_info()["memory_used"] milvus_instance.create_index(index_type, nlist) logger.debug(milvus_instance.describe_index()) end_time = time.time() end_mem_usage = milvus_instance.get_mem_info()["memory_used"] metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, table_info, index_info, search_params) metric.metrics = { "type": "build_performance", "value": { "build_time": round(end_time - start_time, 1), "start_mem_usage": start_mem_usage, "end_mem_usage": end_mem_usage, "diff_mem": end_mem_usage - start_mem_usage } } report(metric) elif run_type == "search_performance": (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) run_count = table["run_count"] search_params = table["search_params"] table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } if not milvus_instance.exists_table(): logger.error("Table name: %s not existed" % table_name) return logger.info(milvus_instance.count()) result = milvus_instance.describe_index() index_info = { "index_type": result["index_type"], "index_nlist": result["nlist"] } logger.info(index_info) nprobes = search_params["nprobes"] top_ks = search_params["top_ks"] nqs = search_params["nqs"] milvus_instance.preload_table() logger.info("Start warm up query") res = self.do_query(milvus_instance, table_name, [1], [1], 1, 2) logger.info("End warm up query") for nprobe in nprobes: logger.info("Search nprobe: %s" % nprobe) res = self.do_query(milvus_instance, table_name, top_ks, nqs, nprobe, run_count) headers = ["Nq/Top-k"] headers.extend([str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) for index_nq, nq in enumerate(nqs): for index_top_k, top_k in enumerate(top_ks): search_param = { "nprobe": nprobe, "nq": nq, "topk": top_k } search_time = res[index_nq][index_top_k] metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, table_info, index_info, search_param) metric.metrics = { "type": "search_performance", "value": { "search_time": search_time } } report(metric) # for sift/deep datasets elif run_type == "accuracy": (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) search_params = table["search_params"] table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } if not milvus_instance.exists_table(): logger.error("Table name: %s not existed" % table_name) return logger.info(milvus_instance.count()) result = milvus_instance.describe_index() index_info = { "index_type": result["index_type"], "index_nlist": result["nlist"] } logger.info(index_info) nprobes = search_params["nprobes"] top_ks = search_params["top_ks"] nqs = search_params["nqs"] milvus_instance.preload_table() true_ids_all = self.get_groundtruth_ids(table_size) for nprobe in nprobes: logger.info("Search nprobe: %s" % nprobe) for top_k in top_ks: for nq in nqs: total = 0 search_param = { "nprobe": nprobe, "nq": nq, "topk": top_k } result_ids, result_distances = self.do_query_ids( milvus_instance, table_name, top_k, nq, nprobe) acc_value = self.get_recall_value( true_ids_all[:nq, :top_k].tolist(), result_ids) logger.info("Query accuracy: %s" % acc_value) metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, table_info, index_info, search_param) metric.metrics = { "type": "accuracy", "value": { "acc": acc_value } } report(metric) elif run_type == "ann_accuracy": hdf5_source_file = table["source_file"] table_name = table["table_name"] index_file_sizes = table["index_file_sizes"] index_types = table["index_types"] nlists = table["nlists"] search_params = table["search_params"] nprobes = search_params["nprobes"] top_ks = search_params["top_ks"] nqs = search_params["nqs"] data_type, dimension, metric_type = parser.parse_ann_table_name( table_name) table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } dataset = utils.get_dataset(hdf5_source_file) if milvus_instance.exists_table(table_name): logger.info("Re-create table: %s" % table_name) milvus_instance.delete(table_name) time.sleep(DELETE_INTERVAL_TIME) true_ids = np.array(dataset["neighbors"]) for index_file_size in index_file_sizes: milvus_instance.create_table(table_name, dimension, index_file_size, metric_type) logger.info(milvus_instance.describe()) insert_vectors = self.normalize(metric_type, np.array(dataset["train"])) # Insert batch once # milvus_instance.insert(insert_vectors) loops = len(insert_vectors) // INSERT_INTERVAL + 1 for i in range(loops): start = i * INSERT_INTERVAL end = min((i + 1) * INSERT_INTERVAL, len(insert_vectors)) tmp_vectors = insert_vectors[start:end] if start < end: if not isinstance(tmp_vectors, list): milvus_instance.insert( tmp_vectors.tolist(), ids=[i for i in range(start, end)]) else: milvus_instance.insert( tmp_vectors, ids=[i for i in range(start, end)]) time.sleep(20) logger.info("Table: %s, row count: %s" % (table_name, milvus_instance.count())) if milvus_instance.count() != len(insert_vectors): logger.error( "Table row count is not equal to insert vectors") return for index_type in index_types: for nlist in nlists: milvus_instance.create_index(index_type, nlist) # logger.info(milvus_instance.describe_index()) logger.info( "Start preload table: %s, index_type: %s, nlist: %s" % (table_name, index_type, nlist)) milvus_instance.preload_table() index_info = { "index_type": index_type, "index_nlist": nlist } for nprobe in nprobes: for nq in nqs: query_vectors = self.normalize( metric_type, np.array(dataset["test"][:nq])) for top_k in top_ks: search_params = { "nq": len(query_vectors), "nprobe": nprobe, "topk": top_k } if not isinstance(query_vectors, list): result = milvus_instance.query( query_vectors.tolist(), top_k, nprobe) else: result = milvus_instance.query( query_vectors, top_k, nprobe) result_ids = result.id_array acc_value = self.get_recall_value( true_ids[:nq, :top_k].tolist(), result_ids) logger.info("Query ann_accuracy: %s" % acc_value) metric = self.report_wrapper( milvus_instance, self.env_value, self.hostname, table_info, index_info, search_params) metric.metrics = { "type": "ann_accuracy", "value": { "acc": acc_value } } report(metric) milvus_instance.delete() elif run_type == "search_stability": (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) search_params = table["search_params"] during_time = table["during_time"] table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } if not milvus_instance.exists_table(): logger.error("Table name: %s not existed" % table_name) return logger.info(milvus_instance.count()) result = milvus_instance.describe_index() index_info = { "index_type": result["index_type"], "index_nlist": result["nlist"] } search_param = {} logger.info(index_info) g_nprobe = int(search_params["nprobes"].split("-")[1]) g_top_k = int(search_params["top_ks"].split("-")[1]) g_nq = int(search_params["nqs"].split("-")[1]) l_nprobe = int(search_params["nprobes"].split("-")[0]) l_top_k = int(search_params["top_ks"].split("-")[0]) l_nq = int(search_params["nqs"].split("-")[0]) milvus_instance.preload_table() start_mem_usage = milvus_instance.get_mem_info()["memory_used"] logger.debug(start_mem_usage) logger.info("Start warm up query") res = self.do_query(milvus_instance, table_name, [1], [1], 1, 2) logger.info("End warm up query") start_time = time.time() while time.time() < start_time + during_time * 60: top_k = random.randint(l_top_k, g_top_k) nq = random.randint(l_nq, g_nq) nprobe = random.randint(l_nprobe, g_nprobe) query_vectors = [[random.random() for _ in range(dimension)] for _ in range(nq)] logger.debug("Query nprobe:%d, nq:%d, top-k:%d" % (nprobe, nq, top_k)) result = milvus_instance.query(query_vectors, top_k, nprobe) end_mem_usage = milvus_instance.get_mem_info()["memory_used"] metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, table_info, index_info, search_param) metric.metrics = { "type": "search_stability", "value": { "during_time": during_time, "start_mem_usage": start_mem_usage, "end_mem_usage": end_mem_usage, "diff_mem": end_mem_usage - start_mem_usage } } report(metric) elif run_type == "stability": (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) search_params = table["search_params"] insert_xb = table["insert_xb"] insert_interval = table["insert_interval"] during_time = table["during_time"] table_info = { "dimension": dimension, "metric_type": metric_type, "dataset_name": table_name } if not milvus_instance.exists_table(): logger.error("Table name: %s not existed" % table_name) return logger.info(milvus_instance.count()) result = milvus_instance.describe_index() index_info = { "index_type": result["index_type"], "index_nlist": result["nlist"] } search_param = {} logger.info(index_info) g_nprobe = int(search_params["nprobes"].split("-")[1]) g_top_k = int(search_params["top_ks"].split("-")[1]) g_nq = int(search_params["nqs"].split("-")[1]) l_nprobe = int(search_params["nprobes"].split("-")[0]) l_top_k = int(search_params["top_ks"].split("-")[0]) l_nq = int(search_params["nqs"].split("-")[0]) milvus_instance.preload_table() logger.info("Start warm up query") res = self.do_query(milvus_instance, table_name, [1], [1], 1, 2) logger.info("End warm up query") start_mem_usage = milvus_instance.get_mem_info()["memory_used"] start_row_count = milvus_instance.count() start_time = time.time() i = 0 while time.time() < start_time + during_time * 60: i = i + 1 for j in range(insert_interval): top_k = random.randint(l_top_k, g_top_k) nq = random.randint(l_nq, g_nq) nprobe = random.randint(l_nprobe, g_nprobe) query_vectors = [[ random.random() for _ in range(dimension) ] for _ in range(nq)] logger.debug("Query nprobe:%d, nq:%d, top-k:%d" % (nprobe, nq, top_k)) result = milvus_instance.query(query_vectors, top_k, nprobe) insert_vectors = [[random.random() for _ in range(dimension)] for _ in range(insert_xb)] status, res = milvus_instance.insert( insert_vectors, ids=[x for x in range(len(insert_vectors))]) logger.debug("%d, row_count: %d" % (i, milvus_instance.count())) end_mem_usage = milvus_instance.get_mem_info()["memory_used"] end_row_count = milvus_instance.count() metric = self.report_wrapper(milvus_instance, self.env_value, self.hostname, table_info, index_info, search_param) metric.metrics = { "type": "stability", "value": { "during_time": during_time, "start_mem_usage": start_mem_usage, "end_mem_usage": end_mem_usage, "diff_mem": end_mem_usage - start_mem_usage, "row_count_increments": end_row_count - start_row_count } } report(metric)
def run(self, definition, run_type=None): if run_type == "performance": for op_type, op_value in definition.items(): # run docker mode run_count = op_value["run_count"] run_params = op_value["params"] container = None if op_type == "insert": if not run_params: logger.debug("No run params") continue for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["table_name"] volume_name = param["db_path_prefix"] print(table_name) (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): # Update server config utils.modify_config(k, v, type="server", db_slave=None) container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if milvus.exists_table(): milvus.delete() time.sleep(10) milvus.create_table(table_name, dimension, index_file_size, metric_type) # debug # milvus.create_index("ivf_sq8", 16384) res = self.do_insert(milvus, table_name, data_type, dimension, table_size, param["ni_per"]) logger.info(res) # wait for file merge time.sleep(table_size * dimension / 5000000) # Clear up utils.remove_container(container) elif op_type == "query": for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] volume_name = param["db_path_prefix"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) logger.debug(milvus.show_tables()) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser( param) for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) # milvus.drop_index() # milvus.create_index(index_type, nlist) result = milvus.describe_index() logger.info(result) logger.info(milvus.count()) # preload index milvus.preload_table() logger.info("Start warm up query") res = self.do_query(milvus, table_name, [1], [1], 1, 1) logger.info("End warm up query") # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) res = self.do_query( milvus, table_name, top_ks, nqs, nprobe, run_count) headers = ["Nq/Top-k"] headers.extend( [str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) utils.remove_container(container) elif run_type == "insert_performance": for op_type, op_value in definition.items(): # run docker mode run_count = op_value["run_count"] run_params = op_value["params"] container = None if not run_params: logger.debug("No run params") continue for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["table_name"] volume_name = param["db_path_prefix"] print(table_name) (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): # Update server config utils.modify_config(k, v, type="server", db_slave=None) container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if milvus.exists_table(): milvus.delete() time.sleep(10) milvus.create_table(table_name, dimension, index_file_size, metric_type) # debug # milvus.create_index("ivf_sq8", 16384) res = self.do_insert(milvus, table_name, data_type, dimension, table_size, param["ni_per"]) logger.info(res) # wait for file merge time.sleep(table_size * dimension / 5000000) # Clear up utils.remove_container(container) elif run_type == "search_performance": for op_type, op_value in definition.items(): # run docker mode run_count = op_value["run_count"] run_params = op_value["params"] container = None for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] volume_name = param["db_path_prefix"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) logger.debug(milvus.show_tables()) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser(param) for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) # milvus.drop_index() # milvus.create_index(index_type, nlist) result = milvus.describe_index() logger.info(result) logger.info(milvus.count()) # preload index milvus.preload_table() logger.info("Start warm up query") res = self.do_query(milvus, table_name, [1], [1], 1, 1) logger.info("End warm up query") # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) res = self.do_query(milvus, table_name, top_ks, nqs, nprobe, run_count) headers = ["Nq/Top-k"] headers.extend( [str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) utils.remove_container(container) elif run_type == "accuracy": """ { "dataset": "random_50m_1024_512", "index.index_types": ["flat", ivf_flat", "ivf_sq8"], "index.nlists": [16384], "nprobes": [1, 32, 128], "nqs": [100], "top_ks": [1, 64], "server.use_blas_threshold": 1100, "server.cpu_cache_capacity": 256 } """ for op_type, op_value in definition.items(): if op_type != "query": logger.warning( "invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] container = None for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] sift_acc = False if "sift_acc" in param: sift_acc = param["sift_acc"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") volume_name = param["db_path_prefix"] container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser(param) if sift_acc is True: # preload groundtruth data true_ids_all = self.get_groundtruth_ids(table_size) acc_dict = {} for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) milvus.create_index(index_type, nlist) # preload index milvus.preload_table() # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) for top_k in top_ks: for nq in nqs: result_ids = [] id_prefix = "%s_index_%s_nlist_%s_metric_type_%s_nprobe_%s_top_k_%s_nq_%s" % \ (table_name, index_type, nlist, metric_type, nprobe, top_k, nq) if sift_acc is False: self.do_query_acc( milvus, table_name, top_k, nq, nprobe, id_prefix) if index_type != "flat": # Compute accuracy base_name = "%s_index_flat_nlist_%s_metric_type_%s_nprobe_%s_top_k_%s_nq_%s" % \ (table_name, nlist, metric_type, nprobe, top_k, nq) avg_acc = self.compute_accuracy( base_name, id_prefix) logger.info( "Query: <%s> accuracy: %s" % (id_prefix, avg_acc)) else: result_ids, result_distances = self.do_query_ids( milvus, table_name, top_k, nq, nprobe) debug_file_ids = "0.5.3_result_ids" debug_file_distances = "0.5.3_result_distances" with open(debug_file_ids, "w+") as fd: total = 0 for index, item in enumerate( result_ids): true_item = true_ids_all[: nq, : top_k].tolist( )[index] tmp = set( item).intersection( set(true_item)) total = total + len(tmp) fd.write( "query: N-%d, intersection: %d, total: %d\n" % (index, len(tmp), total)) fd.write("%s\n" % str(item)) fd.write("%s\n" % str(true_item)) acc_value = self.get_recall_value( true_ids_all[:nq, :top_k]. tolist(), result_ids) logger.info( "Query: <%s> accuracy: %s" % (id_prefix, acc_value)) # # print accuracy table # headers = [table_name] # headers.extend([str(top_k) for top_k in top_ks]) # utils.print_table(headers, nqs, res) # remove container, and run next definition logger.info("remove container, and run next definition") utils.remove_container(container) elif run_type == "stability": for op_type, op_value in definition.items(): if op_type != "query": logger.warning( "invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] container = None for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] index_type = param["index_type"] volume_name = param["db_path_prefix"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) # set default test time if "during_time" not in param: during_time = 100 # seconds else: during_time = int(param["during_time"]) * 60 # set default query process num if "query_process_num" not in param: query_process_num = 10 else: query_process_num = int(param["query_process_num"]) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue start_time = time.time() insert_vectors = [[ random.random() for _ in range(dimension) ] for _ in range(10000)] i = 0 while time.time() < start_time + during_time: i = i + 1 processes = [] # do query # for i in range(query_process_num): # milvus_instance = MilvusClient(table_name) # top_k = random.choice([x for x in range(1, 100)]) # nq = random.choice([x for x in range(1, 100)]) # nprobe = random.choice([x for x in range(1, 1000)]) # # logger.info("index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) # p = Process(target=self.do_query, args=(milvus_instance, table_name, [top_k], [nq], [nprobe], run_count, )) # processes.append(p) # p.start() # time.sleep(0.1) # for p in processes: # p.join() milvus_instance = MilvusClient(table_name) top_ks = random.sample([x for x in range(1, 100)], 3) nqs = random.sample([x for x in range(1, 1000)], 3) nprobe = random.choice([x for x in range(1, 500)]) res = self.do_query(milvus, table_name, top_ks, nqs, nprobe, run_count) if i % 10 == 0: status, res = milvus_instance.insert( insert_vectors, ids=[x for x in range(len(insert_vectors))]) if not status.OK(): logger.error(status) # status = milvus_instance.drop_index() # if not status.OK(): # logger.error(status) # index_type = random.choice(["flat", "ivf_flat", "ivf_sq8"]) milvus_instance.create_index(index_type, 16384) result = milvus.describe_index() logger.info(result) # milvus_instance.create_index("ivf_sq8", 16384) utils.remove_container(container) else: logger.warning("Run type: %s not supported" % run_type)
def run(self, definition, run_type=None): if run_type == "performance": for op_type, op_value in definition.items(): run_count = op_value["run_count"] run_params = op_value["params"] if op_type == "insert": for index, param in enumerate(run_params): table_name = param["table_name"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) milvus = MilvusClient(table_name, ip=self.ip, port=self.port) # Check has table or not if milvus.exists_table(): milvus.delete() time.sleep(10) milvus.create_table(table_name, dimension, index_file_size, metric_type) res = self.do_insert(milvus, table_name, data_type, dimension, table_size, param["ni_per"]) logger.info(res) elif op_type == "query": for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) milvus = MilvusClient(table_name, ip=self.ip, port=self.port) logger.info(milvus.describe()) logger.info(milvus.describe_index()) logger.info(milvus.count()) logger.info(milvus.show_tables()) # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser( param) # milvus.drop_index() for index_type in index_types: for nlist in nlists: # milvus.create_index(index_type, nlist) # preload index logger.info("Start preloading table") milvus.preload_table() logger.info("End preloading table") # Run query test logger.info("Start warm up query") res = self.do_query(milvus, table_name, [1], [1], 1, 2) logger.info("End warm up query") for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) res = self.do_query( milvus, table_name, top_ks, nqs, nprobe, run_count) headers = ["nq/topk"] headers.extend( [str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) elif run_type == "accuracy": for op_type, op_value in definition.items(): if op_type != "query": logger.warning( "invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] sift_acc = False if "sift_acc" in param: sift_acc = param["sift_acc"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) milvus = MilvusClient(table_name, ip=self.ip, port=self.port) logger.debug(milvus.show_tables()) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser(param) if sift_acc is True: # preload groundtruth data true_ids_all = self.get_groundtruth_ids(table_size) acc_dict = {} for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) # milvus.drop_index() milvus.create_index(index_type, nlist) # preload index milvus.preload_table() # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) for top_k in top_ks: for nq in nqs: result_ids = [] id_prefix = "%s_index_%s_nlist_%s_metric_type_%s_nprobe_%s_top_k_%s_nq_%s" % \ (table_name, index_type, nlist, metric_type, nprobe, top_k, nq) if sift_acc is False: self.do_query_acc( milvus, table_name, top_k, nq, nprobe, id_prefix) if index_type != "flat": # Compute accuracy base_name = "%s_index_flat_nlist_%s_metric_type_%s_nprobe_%s_top_k_%s_nq_%s" % \ (table_name, nlist, metric_type, nprobe, top_k, nq) avg_acc = self.compute_accuracy( base_name, id_prefix) logger.info( "Query: <%s> accuracy: %s" % (id_prefix, avg_acc)) else: result_ids, result_distances = self.do_query_ids( milvus, table_name, top_k, nq, nprobe) debug_file_ids = "0.5.3_result_ids" debug_file_distances = "0.5.3_result_distances" with open(debug_file_ids, "w+") as fd: total = 0 for index, item in enumerate( result_ids): true_item = true_ids_all[: nq, : top_k].tolist( )[index] tmp = set( item).intersection( set(true_item)) total = total + len(tmp) fd.write( "query: N-%d, intersection: %d, total: %d\n" % (index, len(tmp), total)) fd.write("%s\n" % str(item)) fd.write("%s\n" % str(true_item)) acc_value = self.get_recall_value( true_ids_all[:nq, :top_k]. tolist(), result_ids) logger.info( "Query: <%s> accuracy: %s" % (id_prefix, acc_value)) # # print accuracy table # headers = [table_name] # headers.extend([str(top_k) for top_k in top_ks]) # utils.print_table(headers, nqs, res) elif run_type == "stability": for op_type, op_value in definition.items(): if op_type != "query": logger.warning( "invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] nq = 100000 for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] index_type = param["index_type"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) # set default test time if "during_time" not in param: during_time = 100 # seconds else: during_time = int(param["during_time"]) * 60 # set default query process num if "query_process_num" not in param: query_process_num = 10 else: query_process_num = int(param["query_process_num"]) milvus = MilvusClient(table_name, ip=self.ip, port=self.port) logger.debug(milvus.show_tables()) logger.debug(milvus.describe_index()) logger.debug(milvus.count()) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue start_time = time.time() insert_vectors = [[ random.random() for _ in range(dimension) ] for _ in range(nq)] i = 0 while time.time() < start_time + during_time: # processes = [] # # do query # for i in range(query_process_num): # milvus_instance = MilvusClient(table_name) # top_k = random.choice([x for x in range(1, 100)]) # nq = random.choice([x for x in range(1, 1000)]) # nprobe = random.choice([x for x in range(1, 500)]) # logger.info(nprobe) # p = Process(target=self.do_query, args=(milvus_instance, table_name, [top_k], [nq], 64, run_count, )) # processes.append(p) # p.start() # time.sleep(0.1) # for p in processes: # p.join() i = i + 1 milvus_instance = MilvusClient(table_name, ip=self.ip, port=self.port) top_ks = random.sample([x for x in range(1, 100)], 1) nqs = random.sample([x for x in range(1, 200)], 2) nprobe = random.choice([x for x in range(1, 100)]) res = self.do_query(milvus_instance, table_name, top_ks, nqs, nprobe, run_count) # milvus_instance = MilvusClient(table_name) status, res = milvus_instance.insert( insert_vectors, ids=[x for x in range(len(insert_vectors))]) if not status.OK(): logger.error(status.message) logger.debug(milvus.count()) res = self.do_query(milvus_instance, table_name, top_ks, nqs, nprobe, run_count)