def test_scale_proxy(self): """ target: test milvus operation after proxy expand method: 1.deploy 1 proxy replicas 2.milvus e2e test in parallel 3.expand proxy pod from 1 to 5 4.milvus e2e test 5.shrink proxy from 5 to 2 expected: 1.verify data consistent and func work """ # deploy milvus cluster with one proxy release_name = "scale-proxy" image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.proxy.replicas': 1, 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = "10.98.0.7" c_name = cf.gen_unique_str(prefix) self.e2e_milvus_parallel(5, host, c_name) log.info('Milvus test before expand') # expand proxy replicas from 1 to 5 mic.upgrade(release_name, {'spec.components.proxy.replicas': 5}, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") self.e2e_milvus_parallel(5, host, c_name) log.info('Milvus test after expand') # expand proxy replicas from 5 to 2 mic.upgrade(release_name, {'spec.components.proxy.replicas': 2}, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") self.e2e_milvus_parallel(2, host, c_name) log.info('Milvus test after shrink')
def scale_up_milvus(release_name): cus_configs = {'spec.components.queryNode.replicas': 2} milvus_op = MilvusOperator() log.info(f"scale up milvus with configs: {cus_configs}") milvus_op.upgrade(release_name, cus_configs, namespace=namespace) healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200) log.info(f"milvus healthy: {healthy}") if healthy: endpoint = milvus_op.endpoint(release_name, namespace).split(':') log.info(f"milvus endpoint: {endpoint}") host = endpoint[0] port = endpoint[1] return release_name, host, port else: return release_name, None, None
def install_milvus(release_name): cus_configs = { 'spec.components.image': 'milvusdb/milvus-dev:master-20211206-b20a238', 'metadata.namespace': namespace, 'metadata.name': release_name, 'spec.components.proxy.serviceType': 'LoadBalancer' } milvus_op = MilvusOperator() log.info(f"install milvus with configs: {cus_configs}") milvus_op.install(cus_configs) healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200) log.info(f"milvus healthy: {healthy}") if healthy: endpoint = milvus_op.endpoint(release_name, namespace).split(':') log.info(f"milvus endpoint: {endpoint}") host = endpoint[0] port = endpoint[1] return release_name, host, port else: return release_name, None, None
def deploy_default_milvus(release_name, image_tag=None): if image_tag is None: image = f'{constants.IMAGE_REPO}:{constants.IMAGE_TAG}' else: image = f'{constants.IMAGE_REPO}:{image_tag}' default_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer' } milvusOp = MilvusOperator() milvusOp.install(default_config) if milvusOp.wait_for_healthy(release_name, namespace=constants.NAMESPACE): endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE) endpoint = endpoint.split(':') host = endpoint[0] port = int(endpoint[-1]) return milvusOp, host, port else: raise Exception(f"Failed to install {release_name}")
def test_shrink_index_node(self): """ target: test shrink indexNode from 2 to 1 method: 1.deploy two indexNode 2.create index with two indexNode 3.shrink indexNode from 2 to 1 4.create index with 1 indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "shrink-index" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.indexNode.replicas': 2, 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 10 for i in range(loop): collection_w.insert(data) assert collection_w.num_entities == nb * loop # create index on collection one and two start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.info(f'Create index on 2 indexNode cost t0: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # shrink indexNode from 2 to 1 mic.upgrade(release_name, {'spec.components.indexNode.replicas': 1}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.info(f'Create index on 1 indexNode cost t1: {t1}') collection_w.drop_index() start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t2 = datetime.datetime.now() - start log.info(f'Create index on 1 indexNode cost t2: {t2}') log.debug(f'one indexNode: {t2}') log.debug(f't2 is {t2}, t0 is {t0}, t2/t0 is {t2 / t0}') # assert round(t2 / t0) == 2 except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ release_name = "scale-query" query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': 'harbor.zilliz.cc/milvus/milvus:master-20211202-ed546d0', 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(query_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = "10.98.0.8" # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) # continuously search def do_search(): while True: search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) log.debug(search_res[0].ids) assert len(search_res[0].ids) == ct.default_limit t_search = threading.Thread(target=do_search, args=(), daemon=True) t_search.start() # wait new QN running, continuously insert # time.sleep(10) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy after scale up: {healthy}") # wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(1000) collection_w.insert(tmp_df) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) time.sleep(60) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) log.debug("Shrink querynode test finished")
def test_expand_index_node(self): """ target: test expand indexNode from 1 to 2 method: 1.deploy two indexNode 2.create index with two indexNode 3.expand indexNode from 1 to 2 4.create index with one indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "scale-index" image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.indexNode.replicas': 1, 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = '10.98.0.8' # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') data = cf.gen_default_dataframe_data(nb) # create c_name = "index_scale_one" collection_w = ApiCollectionWrapper() # collection_w.init_collection(name=c_name) collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert loop = 100 for i in range(loop): collection_w.insert(data, timeout=60) assert collection_w.num_entities == nb * loop # create index on collection # note that the num of segments and the num of indexNode are related to indexing time collection_w.drop_index() start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.debug(f't0: {t0}') collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode from 1 to 2 mic.upgrade(release_name, {'spec.components.indexNode.replicas': 2}, constants.NAMESPACE) time.sleep(60) mic.wait_for_healthy(release_name, constants.NAMESPACE) start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.debug(f't1: {t1}') assert round(t0 / t1) == 2
def test_scale_data_node(self): """ target: test scale dataNode method: 1.deploy milvus cluster with 2 dataNode 2.create collection with shards_num=5 3.continuously insert new data (daemon thread) 4.expand dataNode from 2 to 5 5.create new collection with shards_num=2 6.continuously insert new collection new data (daemon thread) 7.shrink dataNode from 5 to 3 expected: Verify milvus remains healthy, Insert and flush successfully during scale Average dataNode memory usage """ release_name = "scale-data" image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200) log.info(f"milvus healthy: {healthy}") host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] # host = '10.98.0.4' # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=5) tmp_nb = 10000 def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(tmp_df) log.debug(collection_w.num_entities) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() # scale dataNode to 5 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5}, constants.NAMESPACE) time.sleep(300) log.debug("Expand dataNode test finished") # create new collection and insert new_c_name = cf.gen_unique_str("scale_query") collection_w_new = ApiCollectionWrapper() collection_w_new.init_collection( name=new_c_name, schema=cf.gen_default_collection_schema(), shards_num=2) def do_new_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w_new.insert(tmp_df) log.debug(collection_w_new.num_entities) t_insert_new = threading.Thread(target=do_new_insert, args=(), daemon=True) t_insert_new.start() # scale dataNode to 3 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3}, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(300) log.debug("Shrink dataNode test finished")
def test_scale_proxy(self): """ target: test milvus operation after proxy expand method: 1.deploy 1 proxy replicas 2.milvus e2e test in parallel 3.expand proxy pod from 1 to 5 4.milvus e2e test 5.shrink proxy from 5 to 2 expected: 1.verify data consistent and func work """ # deploy milvus cluster with one proxy fail_count = 0 release_name = "scale-proxy" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.mode': 'cluster', 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.proxy.replicas': 1, 'spec.components.dataNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: c_name = cf.gen_unique_str("proxy_scale") e2e_milvus_parallel(2, host, c_name) log.info('Milvus test before expand') # expand proxy replicas from 1 to 5 mic.upgrade(release_name, {'spec.components.proxy.replicas': 5}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") e2e_milvus_parallel(5, host, c_name) log.info('Milvus test after expand') # expand proxy replicas from 5 to 2 mic.upgrade(release_name, {'spec.components.proxy.replicas': 2}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") e2e_milvus_parallel(2, host, c_name) log.info('Milvus test after shrink') connections.connect('default', host=host, port=19530) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name) """ total start 2+5+2 process to run e2e, each time insert default_nb data, But one of the 2 processes started for the first time did not insert due to collection creation exception. So actually insert eight times """ assert collection_w.num_entities == 8 * default_nb except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ fail_count = 0 release_name = "scale-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise BaseException(f'Milvus healthy timeout 1200s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) # continuously search def do_search(): while True: search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) log.debug(search_res[0].ids) assert len(search_res[0].ids) == ct.default_limit t_search = threading.Thread(target=do_search, args=(), daemon=True) t_search.start() # wait new QN running, continuously insert mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(1000) collection_w.insert(tmp_df) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) log.debug("Shrink querynode test finished") except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_expand_data_node(self): """ target: test create and insert api after expand dataNode pod method: 1.create collection a and insert df 2.expand dataNode pod from 1 to 2 3.verify collection a property and verify create and insert of new collection expected: two collection create and insert op are both correctly """ # deploy all nodes one pod cluster milvus with helm release_name = "scale-data" # env = HelmEnv(release_name=release_name) # host = env.helm_install_cluster_milvus() # deploy cluster milvus with dataNode 1 replicas default_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': 'milvusdb/milvus-dev:master-20211020-b40513b', 'spec.components.proxy.serviceType': 'LoadBalancer', 'dependencies.etcd.inCluster.deletionPolicy': 'Delete', 'dependencies.etcd.inCluster.pvcDeletion': 'true', 'dependencies.pulsar.inCluster.deletionPolicy': 'Delete', 'dependencies.pulsar.inCluster.pvcDeletion': 'true', 'dependencies.storage.inCluster.deletionPolicy': 'Delete', 'dependencies.storage.inCluster.pvcDeletion': 'true', } milvusOp = MilvusOperator() milvusOp.install(default_config) if milvusOp.wait_for_healthy(release_name, namespace=constants.NAMESPACE): endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE) endpoint = endpoint.split(':') host = endpoint[0] port = int(endpoint[-1]) else: raise Exception(f"Failed to install {release_name}") # connect connections.add_connection(default={"host": host, "port": port}) connections.connect(alias='default') # create c_name = cf.gen_unique_str(prefix) collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # # insert data = cf.gen_default_list_data(ct.default_nb) mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb # scale dataNode to 2 pods milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE) # env.helm_upgrade_cluster_milvus(dataNode=2) # after scale, assert data consistent assert utility.has_collection(c_name) assert collection_w.num_entities == ct.default_nb # assert new operations new_cname = cf.gen_unique_str(prefix) new_collection_w = ApiCollectionWrapper() new_collection_w.init_collection( name=new_cname, schema=cf.gen_default_collection_schema()) new_mutation_res, _ = new_collection_w.insert(data) assert new_mutation_res.insert_count == ct.default_nb assert new_collection_w.num_entities == ct.default_nb # assert old collection ddl mutation_res_2, _ = collection_w.insert(data) assert mutation_res.insert_count == ct.default_nb assert collection_w.num_entities == ct.default_nb * 2 collection_w.drop() new_collection_w.drop()
def test_scale_proxy(self): """ target: test milvus operation after proxy expand method: 1.deploy 1 proxy replicas 2.milvus e2e test in parallel 3.expand proxy pod from 1 to 5 4.milvus e2e test 5.shrink proxy from 5 to 2 expected: 1.verify data consistent and func work """ # deploy milvus cluster with one proxy fail_count = 0 release_name = "scale-proxy" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.proxy.replicas': 1, 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise BaseException(f'Milvus healthy timeout 1200s') try: c_name = cf.gen_unique_str(prefix) self.e2e_milvus_parallel(5, host, c_name) log.info('Milvus test before expand') # expand proxy replicas from 1 to 5 mic.upgrade(release_name, {'spec.components.proxy.replicas': 5}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") self.e2e_milvus_parallel(5, host, c_name) log.info('Milvus test after expand') # expand proxy replicas from 5 to 2 mic.upgrade(release_name, {'spec.components.proxy.replicas': 2}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") self.e2e_milvus_parallel(2, host, c_name) log.info('Milvus test after shrink') except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_query_node(self): """ target: test scale queryNode method: 1.deploy milvus cluster with 1 queryNode 2.prepare work (connect, create, insert, index and load) 3.continuously search (daemon thread) 4.expand queryNode from 2 to 5 5.continuously insert new data (daemon thread) 6.shrink queryNode from 5 to 3 expected: Verify milvus remains healthy and search successfully during scale """ release_name = "scale-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 1, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2) # insert two segments for i in range(3): df = cf.gen_default_dataframe_data(nb) collection_w.insert(df) log.debug(collection_w.num_entities) # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] assert collection_w.index()[0] == Index( collection_w.collection, ct.default_float_vec_field_name, default_index_params) # load collection_w.load() # scale queryNode to 5 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE) @counter def do_search(): """ do search """ search_res, is_succ = collection_w.search( cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_nothing) assert len(search_res) == 1 return search_res, is_succ def loop_search(): """ continuously search """ while True: do_search() threading.Thread(target=loop_search, args=(), daemon=True).start() # wait new QN running, continuously insert mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") @counter def do_insert(): """ do insert """ return collection_w.insert(cf.gen_default_dataframe_data(1000), check_task=CheckTasks.check_nothing) def loop_insert(): """ loop insert """ while True: do_insert() threading.Thread(target=loop_insert, args=(), daemon=True).start() log.debug(collection_w.num_entities) time.sleep(20) log.debug("Expand querynode test finished") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(60) scale_common.check_succ_rate(do_search) scale_common.check_succ_rate(do_insert) log.debug("Shrink querynode test finished") except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_in_query_node_less_than_replicas(self): """ target: test scale in cluster and querynode < replica method: 1.Deploy cluster with 3 querynodes 2.Create and insert data, flush 3.Load collection with 2 replica number 4.Scale in querynode from 3 to 1 and query 5.Scale out querynode from 1 back to 3 expected: Verify search successfully after scale out """ release_name = "scale-in-query" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.mode': 'cluster', 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # prepare collection connections.connect("scale-in", host=host, port=19530) utility_w = ApiUtilityWrapper() collection_w = ApiCollectionWrapper() collection_w.init_collection( name=cf.gen_unique_str("scale_in"), schema=cf.gen_default_collection_schema(), using="scale-in") collection_w.insert(cf.gen_default_dataframe_data()) assert collection_w.num_entities == ct.default_nb # load multi replicas and search success collection_w.load(replica_number=2) search_res, is_succ = collection_w.search( cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) assert len(search_res[0].ids) == ct.default_limit log.info("Search successfully after load with 2 replicas") log.debug(collection_w.get_replicas()[0]) log.debug( utility_w.get_query_segment_info(collection_w.name, using="scale-in")) # scale in querynode from 2 to 1, less than replica number log.debug("Scale in querynode from 2 to 1") mic.upgrade(release_name, {'spec.components.queryNode.replicas': 1}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") # search and not assure success collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_nothing) log.debug( collection_w.get_replicas( check_task=CheckTasks.check_nothing)[0]) # scale querynode from 1 back to 2 mic.upgrade(release_name, {'spec.components.queryNode.replicas': 2}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") # verify search success collection_w.search(cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # Verify replica info is correct replicas = collection_w.get_replicas()[0] assert len(replicas.groups) == 2 for group in replicas.groups: assert len(group.group_nodes) == 1 # Verify loaded segment info is correct seg_info = utility_w.get_query_segment_info(collection_w.name, using="scale-in")[0] num_entities = 0 for seg in seg_info: assert len(seg.nodeIds) == 2 num_entities += seg.num_rows assert num_entities == ct.default_nb except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_query_node_replicas(self): """ target: test scale out querynode when load multi replicas method: 1.Deploy cluster with 5 querynodes 2.Create collection with 2 shards 3.Insert 10 segments and flushed 4.Load collection with 2 replicas 5.Scale out querynode from 5 to 6 while search and insert growing data expected: Verify search succ rate is 100% """ release_name = "scale-replica" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' query_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.mode': 'cluster', 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.queryNode.replicas': 5, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(query_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: scale_querynode = random.choice([6, 7, 4, 3]) connections.connect("scale-replica", host=host, port=19530) collection_w = ApiCollectionWrapper() collection_w.init_collection( name=cf.gen_unique_str("scale_out"), schema=cf.gen_default_collection_schema(), using='scale-replica', shards_num=3) # insert 10 sealed segments for i in range(5): df = cf.gen_default_dataframe_data(nb=nb, start=i * nb) collection_w.insert(df) assert collection_w.num_entities == (i + 1) * nb collection_w.load(replica_number=2) @counter def do_search(): """ do search """ search_res, is_succ = collection_w.search( cf.gen_vectors(1, ct.default_dim), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit, check_task=CheckTasks.check_nothing) assert len(search_res) == 1 return search_res, is_succ def loop_search(): """ continuously search """ while True: do_search() threading.Thread(target=loop_search, args=(), daemon=True).start() # scale out mic.upgrade( release_name, {'spec.components.queryNode.replicas': scale_querynode}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug("Scale out querynode success") time.sleep(100) scale_common.check_succ_rate(do_search) log.debug("Scale out test finished") except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_data_node(self): """ target: test scale dataNode method: 1.deploy milvus cluster with 2 dataNode 2.create collection with shards_num=5 3.continuously insert new data (daemon thread) 4.expand dataNode from 2 to 5 5.create new collection with shards_num=2 6.continuously insert new collection new data (daemon thread) 7.shrink dataNode from 5 to 3 expected: Verify milvus remains healthy, Insert and flush successfully during scale Average dataNode memory usage """ release_name = "scale-data" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' fail_count = 0 data_config = { 'metadata.namespace': constants.NAMESPACE, 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.dataNode.replicas': 2, 'spec.config.dataCoord.enableCompaction': True, 'spec.config.dataCoord.enableGarbageCollection': True } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise BaseException(f'Milvus healthy timeout 1200s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_query") # c_name = 'scale_query_DymS7kI4' collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=5) tmp_nb = 10000 def do_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w.insert(tmp_df) log.debug(collection_w.num_entities) t_insert = threading.Thread(target=do_insert, args=(), daemon=True) t_insert.start() # scale dataNode to 5 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug("Expand dataNode test finished") # create new collection and insert new_c_name = cf.gen_unique_str("scale_query") collection_w_new = ApiCollectionWrapper() collection_w_new.init_collection( name=new_c_name, schema=cf.gen_default_collection_schema(), shards_num=2) def do_new_insert(): while True: tmp_df = cf.gen_default_dataframe_data(tmp_nb) collection_w_new.insert(tmp_df) log.debug(collection_w_new.num_entities) t_insert_new = threading.Thread(target=do_new_insert, args=(), daemon=True) t_insert_new.start() # scale dataNode to 3 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(300) log.debug("Shrink dataNode test finished") except Exception as e: log.error(str(e)) fail_count += 1 # raise Exception(str(e)) finally: log.info(f'Test finished with {fail_count} fail request') assert fail_count <= 1 label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def teardown_method(self): milvus_op = MilvusOperator() milvus_op.uninstall(self.release_name, namespace)
def test_expand_index_node(self): """ target: test expand indexNode from 1 to 2 method: 1.deploy two indexNode 2.create index with two indexNode 3.expand indexNode from 1 to 2 4.create index with one indexNode expected: The cost of one indexNode is about twice that of two indexNodes """ release_name = "expand-index" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' init_replicas = 1 expand_replicas = 2 data_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.indexNode.replicas': init_replicas, 'spec.components.dataNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: # If deploy failed and want to uninsatll mic # log.warning(f'Deploy {release_name} timeout and ready to uninstall') # mic.uninstall(release_name, namespace=constants.NAMESPACE) raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create collection c_name = "index_scale_one" collection_w = ApiCollectionWrapper() collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema()) # insert data data = cf.gen_default_dataframe_data(nb) loop = 100 for i in range(loop): collection_w.insert(data, timeout=60) assert collection_w.num_entities == nb * loop # create index # Note that the num of segments and the num of indexNode are related to indexing time start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t0 = datetime.datetime.now() - start log.info(f'Create index on {init_replicas} indexNode cost t0: {t0}') # drop index collection_w.drop_index() assert not collection_w.has_index()[0] # expand indexNode mic.upgrade(release_name, {'spec.components.indexNode.replicas': expand_replicas}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") # create index again start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t1 = datetime.datetime.now() - start log.info(f'Create index on {expand_replicas} indexNode cost t1: {t1}') collection_w.drop_index() start = datetime.datetime.now() collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] t2 = datetime.datetime.now() - start log.info(f'Create index on {expand_replicas} indexNode cost t2: {t2}') log.debug(f't2 is {t2}, t0 is {t0}, t0/t2 is {t0 / t2}') # assert round(t0 / t2) == 2 except Exception as e: raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)
def test_scale_data_node(self): """ target: test scale dataNode method: 1.deploy milvus cluster with 2 dataNode 2.create collection with shards_num=5 3.continuously insert new data (daemon thread) 4.expand dataNode from 2 to 5 5.create new collection with shards_num=2 6.continuously insert new collection new data (daemon thread) 7.shrink dataNode from 5 to 3 expected: Verify milvus remains healthy, Insert and flush successfully during scale Average dataNode memory usage """ release_name = "scale-data" image_tag = get_latest_tag() image = f'{constants.IMAGE_REPOSITORY}:{image_tag}' data_config = { 'metadata.namespace': constants.NAMESPACE, 'spec.mode': 'cluster', 'metadata.name': release_name, 'spec.components.image': image, 'spec.components.proxy.serviceType': 'LoadBalancer', 'spec.components.dataNode.replicas': 2, 'spec.config.common.retentionDuration': 60 } mic = MilvusOperator() mic.install(data_config) if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800): host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0] else: raise MilvusException(message=f'Milvus healthy timeout 1800s') try: # connect connections.add_connection(default={"host": host, "port": 19530}) connections.connect(alias='default') # create c_name = cf.gen_unique_str("scale_data") collection_w = ApiCollectionWrapper() collection_w.init_collection( name=c_name, schema=cf.gen_default_collection_schema(), shards_num=4) tmp_nb = 10000 @counter def do_insert(): """ do insert and flush """ insert_res, is_succ = collection_w.insert( cf.gen_default_dataframe_data(tmp_nb)) log.debug(collection_w.num_entities) return insert_res, is_succ def loop_insert(): """ loop do insert """ while True: do_insert() threading.Thread(target=loop_insert, args=(), daemon=True).start() # scale dataNode to 5 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug("Expand dataNode test finished") # create new collection and insert new_c_name = cf.gen_unique_str("scale_data") collection_w_new = ApiCollectionWrapper() collection_w_new.init_collection( name=new_c_name, schema=cf.gen_default_collection_schema(), shards_num=3) @counter def do_new_insert(): """ do new insert """ insert_res, is_succ = collection_w_new.insert( cf.gen_default_dataframe_data(tmp_nb)) log.debug(collection_w_new.num_entities) return insert_res, is_succ def loop_new_insert(): """ loop new insert """ while True: do_new_insert() threading.Thread(target=loop_new_insert, args=(), daemon=True).start() # scale dataNode to 3 mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3}, constants.NAMESPACE) mic.wait_for_healthy(release_name, constants.NAMESPACE) wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}") log.debug(collection_w.num_entities) time.sleep(300) scale_common.check_succ_rate(do_insert) scale_common.check_succ_rate(do_new_insert) log.debug("Shrink dataNode test finished") except Exception as e: log.error(str(e)) # raise Exception(str(e)) finally: label = f"app.kubernetes.io/instance={release_name}" log.info('Start to export milvus pod logs') read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name) mic.uninstall(release_name, namespace=constants.NAMESPACE)