def main(): sentences = word2vec.Text8Corpus("text8") # 加载语料 model = word2vec.Word2Vec(sentences, size=200, window=5, min_count=5) # 训练模型 word_set = model.wv.index2word # 单词集合 word_vec = model.wv.vectors # word2vec结果向量集合 milvus = Milvus() milvus.connect(host='localhost', port='19530') param = { 'collection_name': 'word2vec', 'dimension': 200, 'index_file_size': 1024, 'metric_type': MetricType.L2 } milvus.create_collection(param) status, ids = milvus.insert(collection_name='word2vec', records=word_vec) # 单词分类 ivf_param = {'nlist': 100} # 分成100类 milvus.create_index('word2vec', IndexType.IVF_FLAT, ivf_param) # 增加索引 status, index = milvus.describe_index( 'word2vec') # 相当于将word分成100个类别 做了聚类算法 # 查找相似度最高的单词 res = milvus.search(collection_name='word2vec', query_records=[list(word_vec[word_set.index('king')])], top_k=10, params={'nprobe': 16}) for i in range(10): id = res[1][0][i].id print(word_set[ids.index(id)]) print(1)
def validate_insert(_table_name): milvus = Milvus() milvus.connect(**server_config) status, count = milvus.count_table(_table_name) assert count == 10 * 10000, "Insert validate fail. Vectors num is not matched." milvus.disconnect()
def _test_show_tables_multiprocessing(self, connect, args): ''' target: test show tables is correct or not with processes method: create table, assert the value returned by show_tables method is equal to 0 expected: table_name in show tables ''' table_name = gen_unique_str("test_table") uri = "tcp://%s:%s" % (args["ip"], args["port"]) param = { 'table_name': table_name, 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2 } connect.create_table(param) def showtables(milvus): status, result = milvus.show_tables() assert status.OK() assert table_name in result process_num = 8 processes = [] for i in range(process_num): milvus = Milvus() milvus.connect(uri=uri) p = Process(target=showtables, args=(milvus, )) processes.append(p) p.start() for p in processes: p.join()
def _test_delete_table_multiprocessing(self, args): ''' target: test delete table with multiprocess method: create table and then delete, assert the value returned by delete method expected: status ok, and no table in tables ''' process_num = 6 processes = [] uri = "tcp://%s:%s" % (args["ip"], args["port"]) def deletetable(milvus): status = milvus.delete_table(table) # assert not status.code==0 assert assert_has_table(milvus, table) assert status.OK() for i in range(process_num): milvus = Milvus() milvus.connect(uri=uri) p = Process(target=deletetable, args=(milvus, )) processes.append(p) p.start() for p in processes: p.join()
def _test_table_describe_table_name_multiprocessing(self, connect, args): ''' target: test describe table created with multiprocess method: create table, assert the value returned by describe method expected: table_name equals with the table name created ''' table_name = gen_unique_str("test_table") uri = "tcp://%s:%s" % (args["ip"], args["port"]) param = { 'table_name': table_name, 'dimension': dim, 'index_file_size': index_file_size, 'metric_type': MetricType.L2 } connect.create_table(param) def describetable(milvus): status, res = milvus.describe_table(table_name) assert res.table_name == table_name process_num = 4 processes = [] for i in range(process_num): milvus = Milvus() milvus.connect(uri=uri) p = Process(target=describetable, args=(milvus, )) processes.append(p) p.start() for p in processes: p.join()
def del_vectors(collection_name, ids): milvus = Milvus() try: milvus.connect(MILVUS_ADDR, MILVUS_PORT) milvus.delete_by_id(collection_name=collection_name, id_array=ids) except Exception as e: raise MilvusError("There has some error when delete vectors", e)
def fit(table_name, X): milvus = Milvus() milvus.connect(host = SERVER_HOST_DEFAULT, port = SERVER_PORT_DEFAULT) start = time.time() status, ids = milvus.add_vectors(table_name, X) end = time.time() logger(status, round(end - start, 2))
def create_table(_table_name): milvus = Milvus() milvus.connect(host="localhost", port="19530") if milvus.has_table(_table_name): print(f"Table {_table_name} found, now going to delete it") status = milvus.delete_table(_table_name) assert status.OK(), "delete table {} failed".format(_table_name) time.sleep(5) if milvus.has_table(_table_name): raise Exception("Delete table error") print("delete table {} successfully!".format(_table_name)) # wait for table deleted status = milvus.create_table(param) if not status.OK(): print("Create table {} failed".format(_table_name)) # in main process, milvus must be closed before subprocess start milvus.disconnect() time.sleep(1)
def _test_create_index_multiprocessing(self, connect, table, args): ''' target: test create index interface with multiprocess method: create table and add vectors in it, create index expected: return code equals to 0, and search success ''' status, ids = connect.add_vectors(table, vectors) def build(connect): status = connect.create_index(table) assert status.OK() process_num = 8 processes = [] uri = "tcp://%s:%s" % (args["ip"], args["port"]) for i in range(process_num): m = Milvus() m.connect(uri=uri) p = Process(target=build, args=(m, )) processes.append(p) p.start() time.sleep(0.2) for p in processes: p.join() query_vec = [vectors[0]] top_k = 1 status, result = connect.search_vectors(table, top_k, nprobe, query_vec) assert len(result) == 1 assert len(result[0]) == top_k assert result[0][0].distance == 0.0
def connect(request): ip = request.config.getoption("--ip") port = request.config.getoption("--port") milvus = Milvus() try: status = milvus.connect(host=ip, port=port) logging.getLogger().info(status) if not status.OK(): # try again logging.getLogger().info("------------------------------------") logging.getLogger().info("Try to connect again") logging.getLogger().info("------------------------------------") res = milvus.connect(host=ip, port=port) except Exception as e: logging.getLogger().error(str(e)) pytest.exit("Milvus server can not connected, exit pytest ...") def fin(): try: milvus.disconnect() except: pass request.addfinalizer(fin) return milvus
def _test_table_rows_count_multiprocessing(self, connect, ip_table, args): ''' target: test table rows_count is correct or not with multiprocess method: create table and add vectors in it, assert the value returned by get_table_row_count method is equal to length of vectors expected: the count is equal to the length of vectors ''' nq = 2 uri = "tcp://%s:%s" % (args["ip"], args["port"]) vectors = gen_vectors(nq, dim) res = connect.add_vectors(table_name=ip_table, records=vectors) time.sleep(add_time_interval) def rows_count(milvus): status, res = milvus.get_table_row_count(ip_table) logging.getLogger().info(status) assert res == nq process_num = 8 processes = [] for i in range(process_num): milvus = Milvus() milvus.connect(uri=uri) p = Process(target=rows_count, args=(milvus,)) processes.append(p) p.start() logging.getLogger().info(p) for p in processes: p.join()
def milvus_client(): try: milvus = Milvus() milvus.connect(host=MILVUS_HOST, port=MILVUS_PORT) return milvus except Exception as e: print("Milvus ERROR:", e) write_log(e,1)
def _test_connect_ip_localhost(self, args): ''' target: test connect with ip value: localhost method: set host localhost expected: connected is True ''' milvus = Milvus() milvus.connect(host='localhost', port=args["port"]) assert milvus.connected()
def test_connect_correct_ip_port(self, args): ''' target: test connect with corrent ip and port value method: set correct ip and port expected: connected is True ''' milvus = Milvus() milvus.connect(host=args["ip"], port=args["port"]) assert milvus.connected()
def validate_insert(_table_name): milvus = Milvus() milvus.connect(host="localhost", port="19530") status, count = milvus.get_table_row_count(_table_name) assert count == vector_num * process_num, f"Error: validate insert not pass: "******"{vector_num * process_num} expected but {count} instead!" milvus.disconnect()
def test_connect_param_priority_no_port(self, args): ''' target: both host_ip_port / uri are both given, if port is null, use the uri params method: port set "", check if wrong uri connection is ok expected: connect raise an exception and connected is false ''' milvus = Milvus() uri_value = "tcp://%s:19540" % args["ip"] milvus.connect(host=args["ip"], port="", uri=uri_value) assert milvus.connected()
def test_connect_wrong_uri_wrong_port_null(self, args): ''' target: test uri connect with port value wouldn't connected method: set uri port null expected: connected is True ''' milvus = Milvus() uri_value = "tcp://%s:" % args["ip"] with pytest.raises(Exception) as e: milvus.connect(uri=uri_value, timeout=1)
def test_connect_uri(self, args): ''' target: test connect with correct uri method: uri format and value are both correct expected: connected is True ''' milvus = Milvus() uri_value = "tcp://%s:%s" % (args["ip"], args["port"]) milvus.connect(uri=uri_value) assert milvus.connected()
def test_connect_connected(self, args): ''' target: test connect and disconnect with corrent ip and port value, assert connected value method: set correct ip and port expected: connected is False ''' milvus = Milvus() milvus.connect(host=args["ip"], port=args["port"]) milvus.disconnect() assert not milvus.connected()
def del_milvus_collection(name): milvus = Milvus() try: milvus.connect(MILVUS_ADDR, MILVUS_PORT) res = milvus.drop_collection(collection_name=name) if not res.OK(): raise MilvusError( "There has some error when drop milvus collection", res) except Exception as e: raise MilvusError( "There has some error when delete milvus collection", e)
def _test_create_index_multiprocessing_multitable(self, connect, args): ''' target: test create index interface with multiprocess method: create table and add vectors in it, create index expected: return code equals to 0, and search success ''' process_num = 8 loop_num = 8 processes = [] table = [] j = 0 while j < (process_num * loop_num): table_name = gen_unique_str("test_create_index_multiprocessing") table.append(table_name) param = { 'table_name': table_name, 'dimension': dim, 'index_type': IndexType.FLAT, 'store_raw_vector': False } connect.create_table(param) j = j + 1 def create_index(): i = 0 while i < loop_num: # assert connect.has_table(table[ids*process_num+i]) status, ids = connect.add_vectors(table[ids * process_num + i], vectors) status = connect.create_index(table[ids * process_num + i]) assert status.OK() query_vec = [vectors[0]] top_k = 1 status, result = connect.search_vectors( table[ids * process_num + i], top_k, nprobe, query_vec) assert len(result) == 1 assert len(result[0]) == top_k assert result[0][0].distance == 0.0 i = i + 1 uri = "tcp://%s:%s" % (args["ip"], args["port"]) for i in range(process_num): m = Milvus() m.connect(uri=uri) ids = i p = Process(target=create_index, args=(m, ids)) processes.append(p) p.start() time.sleep(0.2) for p in processes: p.join()
def test_connect_param_priority_uri(self, args): ''' target: both host_ip_port / uri are both given, if host is null, use the uri params method: host set "", check if correct uri connection is ok expected: connected is False ''' milvus = Milvus() uri_value = "tcp://%s:%s" % (args["ip"], args["port"]) with pytest.raises(Exception) as e: milvus.connect(host="", port=args["port"], uri=uri_value, timeout=1) assert not milvus.connected()
def test_connect_wrong_ip_null(self, args): ''' target: test connect with wrong ip value method: set host null expected: not use default ip, connected is False ''' milvus = Milvus() ip = "" with pytest.raises(Exception) as e: milvus.connect(host=ip, port=args["port"], timeout=1) assert not milvus.connected()
def test_connect_param_priority_both_hostip_uri(self, args): ''' target: both host_ip_port / uri are both given, and not null, use the uri params method: check if wrong uri connection is ok expected: connect raise an exception and connected is false ''' milvus = Milvus() uri_value = "tcp://%s:%s" % (args["ip"], args["port"]) with pytest.raises(Exception) as e: milvus.connect(host=args["ip"], port=19540, uri=uri_value, timeout=1) assert not milvus.connected()
def test_connect_with_invalid_uri(self, get_invalid_uri): ''' target: test uri connect with invalid uri value method: set port in gen_invalid_uris expected: connected is False ''' milvus = Milvus() uri_value = get_invalid_uri with pytest.raises(Exception) as e: milvus.connect(uri=uri_value, timeout=1) assert not milvus.connected()
def test_connect_with_invalid_port(self, args, get_invalid_port): ''' target: test ip:port connect with invalid port value method: set port in gen_invalid_ports expected: connected is False ''' milvus = Milvus() port = get_invalid_port with pytest.raises(Exception) as e: milvus.connect(host=args["ip"], port=port, timeout=1) assert not milvus.connected()
def test_connect_wrong_uri_wrong_ip_null(self, args): ''' target: test uri connect with ip value wouldn't connected method: set uri ip null expected: connected is True ''' milvus = Milvus() uri_value = "tcp://:%s" % args["port"] with pytest.raises(Exception) as e: milvus.connect(uri=uri_value, timeout=1) assert not milvus.connected()
def test_connect_repeatedly(self, args): ''' target: test connect repeatedly method: connect again expected: status.code is 0, and status.message shows have connected already ''' milvus = Milvus() uri_value = "tcp://%s:%s" % (args["ip"], args["port"]) milvus.connect(uri=uri_value) milvus.connect(uri=uri_value) assert milvus.connected()
def insert_vectors(name, vectors): milvus = Milvus() try: milvus.connect(MILVUS_ADDR, MILVUS_PORT) res, ids = milvus.insert(collection_name=name, records=vectors) if not res.OK(): raise MilvusError("There has some error when insert vectors", res) return ids except Exception as e: logger.error("There has some error when insert vectors", exc_info=True) raise MilvusError("There has some error when insert vectors", e)
def test_connect_disconnect_repeatedly_once(self, args): ''' target: test connect and disconnect repeatedly method: disconnect, and then connect, assert connect status expected: status.code is 0 ''' milvus = Milvus() uri_value = "tcp://%s:%s" % (args["ip"], args["port"]) milvus.connect(uri=uri_value) milvus.disconnect() milvus.connect(uri=uri_value) assert milvus.connected()