Exemplo n.º 1
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        # deploy all nodes one pod cluster milvus with helm
        release_name = "scale-test"
        env = HelmEnv(release_name=release_name)
        env.helm_install_cluster_milvus()
        host = env.get_svc_external_ip()

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        env.helm_upgrade_cluster_milvus(dataNode=2)
        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb*2

        collection_w.drop()
        new_collection_w.drop()
Exemplo n.º 2
0
    def test_shrink_data_node(self):
        """
        target: test shrink dataNode from 2 to 1
        method: 1.create collection and insert df 2. shrink dataNode 3.insert df
        expected: verify the property of collection which channel on shrink pod
        """
        release_name = "scale-data"
        env = HelmEnv(release_name=release_name, dataNode=2)
        host = env.helm_install_cluster_milvus(image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        c_name = "data_scale_one"
        data = cf.gen_default_list_data(ct.default_nb)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb

        c_name_2 = "data_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(name=c_name_2, schema=cf.gen_default_collection_schema())
        mutation_res2, _ = collection_w2.insert(data)
        assert mutation_res2.insert_count == ct.default_nb
        assert collection_w2.num_entities == ct.default_nb

        env.helm_upgrade_cluster_milvus(dataNode=1)

        assert collection_w.num_entities == ct.default_nb
        mutation_res2, _ = collection_w2.insert(data)
        assert collection_w2.num_entities == ct.default_nb*2
        collection_w.drop()
        collection_w2.drop()
Exemplo n.º 3
0
    def test_simd_compat_e2e(self, request, simd):
        log.info(f"start to e2e verification: {simd}")
        # parse results from previous results
        results = request.config.cache.get(simd, None)
        alias = results.get('alias', simd)
        conn = connections.connect(alias=alias)
        assert conn is not None
        simd_cache = request.config.cache.get(simd, None)
        log.info(f"simd_cache: {simd_cache}")
        # create
        name = cf.gen_unique_str("compat")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     using=alias,
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        assert collection_w.num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Exemplo n.º 4
0
class Base:
    """ Initialize class object """
    connection_wrap = None
    collection_wrap = None
    partition_wrap = None
    index_wrap = None
    utility_wrap = None
    collection_schema_wrap = None
    field_schema_wrap = None
    collection_object_list = []

    def setup_class(self):
        log.info("[setup_class] Start setup class...")

    def teardown_class(self):
        log.info("[teardown_class] Start teardown class...")
        pass

    def setup_method(self, method):
        log.info(("*" * 35) + " setup " + ("*" * 35))
        log.info("[setup_method] Start setup test case %s..." % method.__name__)
        self.connection_wrap = ApiConnectionsWrapper()
        self.utility_wrap = ApiUtilityWrapper()
        self.collection_wrap = ApiCollectionWrapper()
        self.partition_wrap = ApiPartitionWrapper()
        self.index_wrap = ApiIndexWrapper()
        self.collection_schema_wrap = ApiCollectionSchemaWrapper()
        self.field_schema_wrap = ApiFieldSchemaWrapper()

    def teardown_method(self, method):
        log.info(("*" * 35) + " teardown " + ("*" * 35))
        log.info("[teardown_method] Start teardown test case %s..." % method.__name__)

        try:
            """ Drop collection before disconnect """
            if self.connection_wrap.get_connection(alias=DefaultConfig.DEFAULT_USING)[0] is None:
                self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=param_info.param_host,
                                             port=param_info.param_port)

            if self.collection_wrap.collection is not None:
                self.collection_wrap.drop(check_task=ct.CheckTasks.check_nothing)

            for collection_object in self.collection_object_list:
                if collection_object.collection is not None \
                        and collection_object.name in self.utility_wrap.list_collections()[0]:
                    collection_object.drop(check_task=ct.CheckTasks.check_nothing)

        except Exception as e:
            log.debug(str(e))

        try:
            """ Delete connection and reset configuration"""
            res = self.connection_wrap.list_connections()
            for i in res[0]:
                self.connection_wrap.remove_connection(i[0])

            # because the connection is in singleton mode, it needs to be restored to the original state after teardown
            self.connection_wrap.add_connection(default={"host": DefaultConfig.DEFAULT_HOST,
                                                         "port": DefaultConfig.DEFAULT_PORT})
        except Exception as e:
            log.debug(str(e))

    @pytest.fixture(scope="module", autouse=True)
    def initialize_env(self, request):
        """ clean log before testing """
        host = request.config.getoption("--host")
        port = request.config.getoption("--port")
        handler = request.config.getoption("--handler")
        clean_log = request.config.getoption("--clean_log")

        """ params check """
        assert ip_check(host) and number_check(port)

        """ modify log files """
        cf.modify_file(file_path_list=[log_config.log_debug, log_config.log_info, log_config.log_err], is_modify=clean_log)

        log.info("#" * 80)
        log.info("[initialize_milvus] Log cleaned up, start testing...")
        param_info.prepare_param_info(host, port, handler)
Exemplo n.º 5
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        # deploy all nodes one pod cluster milvus with helm
        release_name = "scale-data"
        # env = HelmEnv(release_name=release_name)
        # host = env.helm_install_cluster_milvus()

        # deploy cluster milvus with dataNode 1 replicas
        default_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image':
            'milvusdb/milvus-dev:master-20211020-b40513b',
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'dependencies.etcd.inCluster.deletionPolicy': 'Delete',
            'dependencies.etcd.inCluster.pvcDeletion': 'true',
            'dependencies.pulsar.inCluster.deletionPolicy': 'Delete',
            'dependencies.pulsar.inCluster.pvcDeletion': 'true',
            'dependencies.storage.inCluster.deletionPolicy': 'Delete',
            'dependencies.storage.inCluster.pvcDeletion': 'true',
        }
        milvusOp = MilvusOperator()
        milvusOp.install(default_config)
        if milvusOp.wait_for_healthy(release_name,
                                     namespace=constants.NAMESPACE):
            endpoint = milvusOp.endpoint(release_name, constants.NAMESPACE)
            endpoint = endpoint.split(':')
            host = endpoint[0]
            port = int(endpoint[-1])
        else:
            raise Exception(f"Failed to install {release_name}")

        # connect
        connections.add_connection(default={"host": host, "port": port})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        milvusOp.upgrade(release_name,
                         {'spec.components.dataNode.replicas': 2},
                         constants.NAMESPACE)
        # env.helm_upgrade_cluster_milvus(dataNode=2)
        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(
            name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb * 2

        collection_w.drop()
        new_collection_w.drop()
Exemplo n.º 6
0
    def test_shrink_index_node(self):
        """
        target: test shrink indexNode from 2 to 1
        method: 1.deploy two indexNode
                2.create index with two indexNode
                3.shrink indexNode from 2 to 1
                4.create index with 1 indexNode
        expected: The cost of one indexNode is about twice that of two indexNodes
        """
        release_name = "shrink-index"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        data_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.indexNode.replicas': 2,
            'spec.components.dataNode.replicas': 2,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(data_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            data = cf.gen_default_dataframe_data(nb)

            # create
            c_name = "index_scale_one"
            collection_w = ApiCollectionWrapper()
            # collection_w.init_collection(name=c_name)
            collection_w.init_collection(
                name=c_name, schema=cf.gen_default_collection_schema())
            # insert
            loop = 10
            for i in range(loop):
                collection_w.insert(data)
            assert collection_w.num_entities == nb * loop

            # create index on collection one and two
            start = datetime.datetime.now()
            collection_w.create_index(ct.default_float_vec_field_name,
                                      default_index_params)
            assert collection_w.has_index()[0]
            t0 = datetime.datetime.now() - start

            log.info(f'Create index on 2 indexNode cost t0: {t0}')

            collection_w.drop_index()
            assert not collection_w.has_index()[0]

            # shrink indexNode from 2 to 1
            mic.upgrade(release_name,
                        {'spec.components.indexNode.replicas': 1},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            start = datetime.datetime.now()
            collection_w.create_index(ct.default_float_vec_field_name,
                                      default_index_params)
            assert collection_w.has_index()[0]
            t1 = datetime.datetime.now() - start
            log.info(f'Create index on 1 indexNode cost t1: {t1}')
            collection_w.drop_index()

            start = datetime.datetime.now()
            collection_w.create_index(ct.default_float_vec_field_name,
                                      default_index_params)
            assert collection_w.has_index()[0]
            t2 = datetime.datetime.now() - start
            log.info(f'Create index on 1 indexNode cost t2: {t2}')

            log.debug(f'one indexNode: {t2}')
            log.debug(f't2 is {t2}, t0 is {t0}, t2/t0 is {t2 / t0}')
            # assert round(t2 / t0) == 2

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Exemplo n.º 7
0
    def test_shrink_index_node(self):
        """
        target: test shrink indexNode from 2 to 1
        method: 1.deploy two indexNode
                2.create index with two indexNode
                3.shrink indexNode from 2 to 1
                4.create index with 1 indexNode
        expected: The cost of one indexNode is about twice that of two indexNodes
        """
        release_name = "scale-index"
        env = HelmEnv(release_name=release_name, indexNode=2)
        env.helm_install_cluster_milvus()

        # connect
        connections.add_connection(default={
            "host": '10.98.0.8',
            "port": 19530
        })
        connections.connect(alias='default')

        data = cf.gen_default_dataframe_data(nb)

        # create
        c_name = "index_scale_one"
        collection_w = ApiCollectionWrapper()
        # collection_w.init_collection(name=c_name)
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # insert
        loop = 10
        for i in range(loop):
            collection_w.insert(data)
        assert collection_w.num_entities == nb * loop

        # create index on collection one and two
        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t0 = datetime.datetime.now() - start

        log.debug(f'two indexNodes: {t0}')

        collection_w.drop_index()
        assert not collection_w.has_index()[0]

        # expand indexNode from 1 to 2
        # pdb.set_trace()
        env.helm_upgrade_cluster_milvus(indexNode=1)

        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t1 = datetime.datetime.now() - start

        log.debug(f'one indexNode: {t1}')
        log.debug(t1 / t0)
        assert round(t1 / t0) == 2
Exemplo n.º 8
0
    def test_scale_proxy(self):
        """
        target: test milvus operation after proxy expand
        method: 1.deploy 1 proxy replicas
                2.milvus e2e test in parallel
                3.expand proxy pod from 1 to 5
                4.milvus e2e test
                5.shrink proxy from 5 to 2
        expected: 1.verify data consistent and func work
        """
        # deploy milvus cluster with one proxy
        fail_count = 0
        release_name = "scale-proxy"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        data_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.mode': 'cluster',
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.proxy.replicas': 1,
            'spec.components.dataNode.replicas': 2,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(data_config)
        if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800):
            host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            c_name = cf.gen_unique_str("proxy_scale")
            e2e_milvus_parallel(2, host, c_name)
            log.info('Milvus test before expand')

            # expand proxy replicas from 1 to 5
            mic.upgrade(release_name, {'spec.components.proxy.replicas': 5}, constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            e2e_milvus_parallel(5, host, c_name)
            log.info('Milvus test after expand')

            # expand proxy replicas from 5 to 2
            mic.upgrade(release_name, {'spec.components.proxy.replicas': 2}, constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            e2e_milvus_parallel(2, host, c_name)
            log.info('Milvus test after shrink')

            connections.connect('default', host=host, port=19530)
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(name=c_name)
            """
            total start 2+5+2 process to run e2e, each time insert default_nb data, But one of the 2 processes started
            for the first time did not insert due to collection creation exception. So actually insert eight times
            """
            assert collection_w.num_entities == 8 * default_nb

        except Exception as e:
            log.error(str(e))
            fail_count += 1
            # raise Exception(str(e))

        finally:
            log.info(f'Test finished with {fail_count} fail request')
            assert fail_count <= 1
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
    def test_customize_segment_size(self, seg_size, seg_count):
        """
       steps
       """
        log.info(f"start to install milvus with segment size {seg_size}")
        release_name, host, port = _install_milvus(seg_size)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")

        log.info(f"start to e2e verification: {seg_size}")
        # create
        name = cf.gen_unique_str("segsiz")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        nb = 50000
        data = cf.gen_default_list_data(nb=nb)
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res
        # insert 2 million entities
        rounds = 40
        for _ in range(rounds - 1):
            _, res = collection_w.insert(data)
        entities = collection_w.num_entities
        assert entities == nb * rounds

        # load
        collection_w.load()
        utility_wrap = ApiUtilityWrapper()
        segs, _ = utility_wrap.get_query_segment_info(collection_w.name)
        log.info(f"assert segments: {len(segs)}")
        assert len(segs) == seg_count

        # search
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str(),
            timeout=120)
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr, timeout=30)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Exemplo n.º 10
0
    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        release_name = "scale-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'spec.mode': 'cluster',
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create
            c_name = cf.gen_unique_str("scale_query")
            # c_name = 'scale_query_DymS7kI4'
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=c_name,
                schema=cf.gen_default_collection_schema(),
                shards_num=2)

            # insert two segments
            for i in range(3):
                df = cf.gen_default_dataframe_data(nb)
                collection_w.insert(df)
                log.debug(collection_w.num_entities)

            # create index
            collection_w.create_index(ct.default_float_vec_field_name,
                                      default_index_params)
            assert collection_w.has_index()[0]
            assert collection_w.index()[0] == Index(
                collection_w.collection, ct.default_float_vec_field_name,
                default_index_params)

            # load
            collection_w.load()

            # scale queryNode to 5
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 5},
                        constants.NAMESPACE)

            @counter
            def do_search():
                """ do search """
                search_res, is_succ = collection_w.search(
                    cf.gen_vectors(1, ct.default_dim),
                    ct.default_float_vec_field_name,
                    ct.default_search_params,
                    ct.default_limit,
                    check_task=CheckTasks.check_nothing)
                assert len(search_res) == 1
                return search_res, is_succ

            def loop_search():
                """ continuously search """
                while True:
                    do_search()

            threading.Thread(target=loop_search, args=(), daemon=True).start()

            # wait new QN running, continuously insert
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            @counter
            def do_insert():
                """ do insert """
                return collection_w.insert(cf.gen_default_dataframe_data(1000),
                                           check_task=CheckTasks.check_nothing)

            def loop_insert():
                """ loop insert """
                while True:
                    do_insert()

            threading.Thread(target=loop_insert, args=(), daemon=True).start()

            log.debug(collection_w.num_entities)
            time.sleep(20)
            log.debug("Expand querynode test finished")

            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 3},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            log.debug(collection_w.num_entities)
            time.sleep(60)
            scale_common.check_succ_rate(do_search)
            scale_common.check_succ_rate(do_insert)
            log.debug("Shrink querynode test finished")

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Exemplo n.º 11
0
 def __init__(self):
     super().__init__()
     self.c_wrapper = ApiCollectionWrapper()
Exemplo n.º 12
0
    def test_scale_in_query_node_less_than_replicas(self):
        """
        target: test scale in cluster and querynode < replica
        method: 1.Deploy cluster with 3 querynodes
                2.Create and insert data, flush
                3.Load collection with 2 replica number
                4.Scale in querynode from 3 to 1 and query
                5.Scale out querynode from 1 back to 3
        expected: Verify search successfully after scale out
        """
        release_name = "scale-in-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.mode': 'cluster',
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 2,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')
        try:
            # prepare collection
            connections.connect("scale-in", host=host, port=19530)
            utility_w = ApiUtilityWrapper()
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=cf.gen_unique_str("scale_in"),
                schema=cf.gen_default_collection_schema(),
                using="scale-in")
            collection_w.insert(cf.gen_default_dataframe_data())
            assert collection_w.num_entities == ct.default_nb

            # load multi replicas and search success
            collection_w.load(replica_number=2)
            search_res, is_succ = collection_w.search(
                cf.gen_vectors(1, ct.default_dim),
                ct.default_float_vec_field_name, ct.default_search_params,
                ct.default_limit)
            assert len(search_res[0].ids) == ct.default_limit
            log.info("Search successfully after load with 2 replicas")
            log.debug(collection_w.get_replicas()[0])
            log.debug(
                utility_w.get_query_segment_info(collection_w.name,
                                                 using="scale-in"))

            # scale in querynode from 2 to 1, less than replica number
            log.debug("Scale in querynode from 2 to 1")
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 1},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            # search and not assure success
            collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                ct.default_float_vec_field_name,
                                ct.default_search_params,
                                ct.default_limit,
                                check_task=CheckTasks.check_nothing)
            log.debug(
                collection_w.get_replicas(
                    check_task=CheckTasks.check_nothing)[0])

            # scale querynode from 1 back to 2
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 2},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            # verify search success
            collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                ct.default_float_vec_field_name,
                                ct.default_search_params, ct.default_limit)
            # Verify replica info is correct
            replicas = collection_w.get_replicas()[0]
            assert len(replicas.groups) == 2
            for group in replicas.groups:
                assert len(group.group_nodes) == 1
            # Verify loaded segment info is correct
            seg_info = utility_w.get_query_segment_info(collection_w.name,
                                                        using="scale-in")[0]
            num_entities = 0
            for seg in seg_info:
                assert len(seg.nodeIds) == 2
                num_entities += seg.num_rows
            assert num_entities == ct.default_nb

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Exemplo n.º 13
0
    def test_scale_query_node_replicas(self):
        """
        target: test scale out querynode when load multi replicas
        method: 1.Deploy cluster with 5 querynodes
                2.Create collection with 2 shards
                3.Insert 10 segments and flushed
                4.Load collection with 2 replicas
                5.Scale out querynode from 5 to 6 while search and insert growing data
        expected: Verify search succ rate is 100%
        """
        release_name = "scale-replica"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.mode': 'cluster',
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 5,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            scale_querynode = random.choice([6, 7, 4, 3])
            connections.connect("scale-replica", host=host, port=19530)

            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=cf.gen_unique_str("scale_out"),
                schema=cf.gen_default_collection_schema(),
                using='scale-replica',
                shards_num=3)

            # insert 10 sealed segments
            for i in range(5):
                df = cf.gen_default_dataframe_data(nb=nb, start=i * nb)
                collection_w.insert(df)
                assert collection_w.num_entities == (i + 1) * nb

            collection_w.load(replica_number=2)

            @counter
            def do_search():
                """ do search """
                search_res, is_succ = collection_w.search(
                    cf.gen_vectors(1, ct.default_dim),
                    ct.default_float_vec_field_name,
                    ct.default_search_params,
                    ct.default_limit,
                    check_task=CheckTasks.check_nothing)
                assert len(search_res) == 1
                return search_res, is_succ

            def loop_search():
                """ continuously search """
                while True:
                    do_search()

            threading.Thread(target=loop_search, args=(), daemon=True).start()

            # scale out
            mic.upgrade(
                release_name,
                {'spec.components.queryNode.replicas': scale_querynode},
                constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")
            log.debug("Scale out querynode success")

            time.sleep(100)
            scale_common.check_succ_rate(do_search)
            log.debug("Scale out test finished")

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Exemplo n.º 14
0
    def test_chaos_data_consist(self, connection, chaos_yaml):
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.debug(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.debug(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param={"nprobe": 16},
            limit=1)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.debug(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 4)
        reconnect(connections, self.host, self.port)

        # verify collection persists
        assert utility.has_collection(c_name)
        log.debug("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.debug("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.debug("assert index persists")
        # verify search results persist

        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert query_res2 == query_res
        log.debug("assert query result persists")
Exemplo n.º 15
0
    def test_chaos_memory_stress_indexnode(self, connection, chaos_yaml):
        """
        target: test inject memory stress into indexnode
        method: 1.Deploy milvus and limit indexnode memory resource 1Gi
                2.Create collection and insert some data
                3.Create index
                4.Inject memory stress chaos 512Mi
        expected:
        """
        # init collection and insert 250 nb
        # nb = 50000  # vector size: 512*4*nb about 100Mi and create index need 600Mi memory
        nb = 256000
        dim = 512
        # c_name = cf.gen_unique_str('chaos_memory')
        c_name = 'chaos_memory_gKs8aSUu'
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 128
            }
        }

        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(
            name=c_name,
            schema=cf.gen_default_collection_schema(dim=dim),
            shards_num=1)

        # insert 256000 512 dim entities, size 512Mi
        for i in range(2):
            t0_insert = datetime.datetime.now()
            df = cf.gen_default_dataframe_data(nb=nb // 2, dim=dim)
            res = collection_w.insert(df)[0]
            assert res.insert_count == nb // 2
            # log.info(f'After {i + 1} insert, num_entities: {collection_w.num_entities}')
            tt_insert = datetime.datetime.now() - t0_insert
            log.info(f"{i} insert data cost: {tt_insert}")

        # flush
        t0_flush = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt_flush = datetime.datetime.now() - t0_flush
        log.info(f'flush {nb * 10} entities cost: {tt_flush}')

        # create index
        t0_index = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params)
        tt_index = datetime.datetime.now() - t0_index

        log.info(f"create index cost: {tt_index}")
        log.info(collection_w.indexes)

        # indexNode start build index, inject chaos memory stress
        chaos_config = gen_experiment_config(chaos_yaml)
        log.debug(chaos_config)
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)
        chaos_res.create(chaos_config)
        log.debug("inject chaos")
Exemplo n.º 16
0
 def collection_wrap_4_flush(self, connection):
     c_wrap = ApiCollectionWrapper()
     c_wrap.init_collection(name=cf.gen_unique_str("collection_4_insert"),
                            schema=cf.gen_default_collection_schema(),
                            check_task="check_nothing")
     return c_wrap
Exemplo n.º 17
0
    def test_scale_query_node(self):
        """
        target: test scale queryNode
        method: 1.deploy milvus cluster with 1 queryNode
                2.prepare work (connect, create, insert, index and load)
                3.continuously search (daemon thread)
                4.expand queryNode from 2 to 5
                5.continuously insert new data (daemon thread)
                6.shrink queryNode from 5 to 3
        expected: Verify milvus remains healthy and search successfully during scale
        """
        release_name = "scale-query"
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': 'harbor.zilliz.cc/milvus/milvus:master-20211202-ed546d0',
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 1,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(query_config)
        healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200)
        log.info(f"milvus healthy: {healthy}")
        host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        # host = "10.98.0.8"

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # create
        c_name = cf.gen_unique_str("scale_query")
        # c_name = 'scale_query_DymS7kI4'
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema(), shards_num=2)

        # insert two segments
        for i in range(3):
            df = cf.gen_default_dataframe_data(nb)
            collection_w.insert(df)
            log.debug(collection_w.num_entities)

        # create index
        collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
        assert collection_w.has_index()[0]
        assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
                                                default_index_params)

        # load
        collection_w.load()

        # scale queryNode to 5
        mic.upgrade(release_name, {'spec.components.queryNode.replicas': 5}, constants.NAMESPACE)

        # continuously search
        def do_search():
            while True:
                search_res, _ = collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                                    ct.default_float_vec_field_name,
                                                    ct.default_search_params, ct.default_limit)
                log.debug(search_res[0].ids)
                assert len(search_res[0].ids) == ct.default_limit

        t_search = threading.Thread(target=do_search, args=(), daemon=True)
        t_search.start()

        # wait new QN running, continuously insert
        # time.sleep(10)
        healthy = mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1200)
        log.info(f"milvus healthy after scale up: {healthy}")
        # wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

        def do_insert():
            while True:
                tmp_df = cf.gen_default_dataframe_data(1000)
                collection_w.insert(tmp_df)

        t_insert = threading.Thread(target=do_insert, args=(), daemon=True)
        t_insert.start()

        log.debug(collection_w.num_entities)
        time.sleep(20)
        log.debug("Expand querynode test finished")

        mic.upgrade(release_name, {'spec.components.queryNode.replicas': 3}, constants.NAMESPACE)
        time.sleep(60)
        wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

        log.debug(collection_w.num_entities)
        time.sleep(60)
        log.debug("Shrink querynode test finished")
Exemplo n.º 18
0
    def test_scale_data_node(self):
        """
        target: test scale dataNode
        method: 1.deploy milvus cluster with 2 dataNode
                2.create collection with shards_num=5
                3.continuously insert new data (daemon thread)
                4.expand dataNode from 2 to 5
                5.create new collection with shards_num=2
                6.continuously insert new collection new data (daemon thread)
                7.shrink dataNode from 5 to 3
        expected: Verify milvus remains healthy, Insert and flush successfully during scale
                  Average dataNode memory usage
        """
        release_name = "scale-data"
        image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}'
        data_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.dataNode.replicas': 2,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(data_config)
        healthy = mic.wait_for_healthy(release_name,
                                       constants.NAMESPACE,
                                       timeout=1200)
        log.info(f"milvus healthy: {healthy}")
        host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        # host = '10.98.0.4'

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # create
        c_name = cf.gen_unique_str("scale_query")
        # c_name = 'scale_query_DymS7kI4'
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema(),
                                     shards_num=5)

        tmp_nb = 10000

        def do_insert():
            while True:
                tmp_df = cf.gen_default_dataframe_data(tmp_nb)
                collection_w.insert(tmp_df)
                log.debug(collection_w.num_entities)

        t_insert = threading.Thread(target=do_insert, args=(), daemon=True)
        t_insert.start()

        # scale dataNode to 5
        mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5},
                    constants.NAMESPACE)
        time.sleep(300)
        log.debug("Expand dataNode test finished")

        # create new collection and insert
        new_c_name = cf.gen_unique_str("scale_query")
        collection_w_new = ApiCollectionWrapper()
        collection_w_new.init_collection(
            name=new_c_name,
            schema=cf.gen_default_collection_schema(),
            shards_num=2)

        def do_new_insert():
            while True:
                tmp_df = cf.gen_default_dataframe_data(tmp_nb)
                collection_w_new.insert(tmp_df)
                log.debug(collection_w_new.num_entities)

        t_insert_new = threading.Thread(target=do_new_insert,
                                        args=(),
                                        daemon=True)
        t_insert_new.start()

        # scale dataNode to 3
        mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3},
                    constants.NAMESPACE)
        wait_pods_ready(constants.NAMESPACE,
                        f"app.kubernetes.io/instance={release_name}")

        log.debug(collection_w.num_entities)
        time.sleep(300)
        log.debug("Shrink dataNode test finished")
Exemplo n.º 19
0
    def test_simd_compat_e2e(self, simd_id):
        """
       steps
       1. [test_milvus_install]: set up milvus with customized simd configured
       2. [test_simd_compat_e2e]: verify milvus is working well
       4. [test_milvus_cleanup]: delete milvus instances in teardown
       """
        simd = supported_simd_types[simd_id]
        log.info(f"start to install milvus with simd {simd}")
        release_name, host, port = _install_milvus(simd)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")
        log.info(f"milvus simdType: {mil.simd_type}")
        assert str(mil.simd_type).lower() in [
            simd_type.lower() for simd_type in supported_simd_types[simd_id:]
        ]

        log.info(f"start to e2e verification: {simd}")
        # create
        name = cf.gen_unique_str("compat")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        data = cf.gen_default_list_data()
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = time.time()
        assert collection_w.num_entities == len(data[0]) + entities
        tt = time.time() - t0
        entities = collection_w.num_entities
        log.info(f"assert flush: {tt}, entities: {entities}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str())
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
Exemplo n.º 20
0
    def test_expand_index_node(self):
        """
        target: test expand indexNode from 1 to 2
        method: 1.deploy two indexNode
                2.create index with two indexNode
                3.expand indexNode from 1 to 2
                4.create index with one indexNode
        expected: The cost of one indexNode is about twice that of two indexNodes
        """
        release_name = "scale-index"
        image = f'{constants.IMAGE_REPOSITORY}:{constants.IMAGE_TAG}'
        data_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.indexNode.replicas': 1,
            'spec.components.dataNode.replicas': 2,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(data_config)
        healthy = mic.wait_for_healthy(release_name,
                                       constants.NAMESPACE,
                                       timeout=1200)
        log.info(f"milvus healthy: {healthy}")
        host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        # host = '10.98.0.8'

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        data = cf.gen_default_dataframe_data(nb)

        # create
        c_name = "index_scale_one"
        collection_w = ApiCollectionWrapper()
        # collection_w.init_collection(name=c_name)
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())

        # insert
        loop = 100
        for i in range(loop):
            collection_w.insert(data, timeout=60)
        assert collection_w.num_entities == nb * loop

        # create index on collection
        # note that the num of segments and the num of indexNode are related to indexing time
        collection_w.drop_index()
        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t0 = datetime.datetime.now() - start

        log.debug(f't0: {t0}')

        collection_w.drop_index()
        assert not collection_w.has_index()[0]

        # expand indexNode from 1 to 2
        mic.upgrade(release_name, {'spec.components.indexNode.replicas': 2},
                    constants.NAMESPACE)
        time.sleep(60)
        mic.wait_for_healthy(release_name, constants.NAMESPACE)

        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t1 = datetime.datetime.now() - start

        log.debug(f't1: {t1}')
        assert round(t0 / t1) == 2
Exemplo n.º 21
0
def e2e_milvus(host, c_name, collection_exist=False):
    # connect
    connections.add_connection(default={"host": host, "port": 19530})
    connections.connect(alias='default')

    # create
    collection_w = ApiCollectionWrapper()
    if collection_exist:
        collection_w.init_collection(name=c_name)
    else:
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())

    # insert
    data = cf.gen_default_list_data(ct.default_nb)
    mutation_res, _ = collection_w.insert(data)
    assert mutation_res.insert_count == ct.default_nb
    log.debug(collection_w.num_entities)

    # create index
    collection_w.create_index(ct.default_float_vec_field_name,
                              ct.default_index)
    assert collection_w.has_index()[0]
    assert collection_w.index()[0] == Index(collection_w.collection,
                                            ct.default_float_vec_field_name,
                                            ct.default_index)

    # search
    collection_w.load()
    search_res, _ = collection_w.search(data[-1][:ct.default_nq],
                                        ct.default_float_vec_field_name,
                                        ct.default_search_params,
                                        ct.default_limit)
    assert len(search_res[0]) == ct.default_limit
    log.debug(search_res[0][0].id)

    # query
    ids = search_res[0].ids[0]
    term_expr = f'{ct.default_int64_field_name} in [{ids}]'
    query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"])
    assert query_res[0][ct.default_int64_field_name] == ids
Exemplo n.º 22
0
    def test_expand_index_node(self):
        """
        target: test expand indexNode from 1 to 2
        method: 1.deploy two indexNode
                2.create index with two indexNode
                3.expand indexNode from 1 to 2
                4.create index with one indexNode
        expected: The cost of one indexNode is about twice that of two indexNodes
        """
        release_name = "scale-index"
        milvusOp, host, port = scale_common.deploy_default_milvus(release_name)

        # connect
        connections.add_connection(default={"host": host, "port": port})
        connections.connect(alias='default')

        data = cf.gen_default_dataframe_data(nb)

        # create
        c_name = "index_scale_one"
        collection_w = ApiCollectionWrapper()
        # collection_w.init_collection(name=c_name)
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # insert
        loop = 10
        for i in range(loop):
            collection_w.insert(data)
        assert collection_w.num_entities == nb * loop

        # create index on collection one and two
        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t0 = datetime.datetime.now() - start

        log.debug(f't0: {t0}')

        collection_w.drop_index()
        assert not collection_w.has_index()[0]

        # expand indexNode from 1 to 2
        milvusOp.upgrade(release_name,
                         {'spec.components.indexNode.replicas': 2},
                         constants.NAMESPACE)
        milvusOp.wait_for_healthy(release_name, constants.NAMESPACE)

        start = datetime.datetime.now()
        collection_w.create_index(ct.default_float_vec_field_name,
                                  default_index_params)
        assert collection_w.has_index()[0]
        t1 = datetime.datetime.now() - start

        log.debug(f't1: {t1}')
        assert round(t0 / t1) == 2
Exemplo n.º 23
0
    def test_scale_data_node(self):
        """
        target: test scale dataNode
        method: 1.deploy milvus cluster with 2 dataNode
                2.create collection with shards_num=5
                3.continuously insert new data (daemon thread)
                4.expand dataNode from 2 to 5
                5.create new collection with shards_num=2
                6.continuously insert new collection new data (daemon thread)
                7.shrink dataNode from 5 to 3
        expected: Verify milvus remains healthy, Insert and flush successfully during scale
                  Average dataNode memory usage
        """
        release_name = "scale-data"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        fail_count = 0

        data_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.dataNode.replicas': 2,
            'spec.config.dataCoord.enableCompaction': True,
            'spec.config.dataCoord.enableGarbageCollection': True
        }
        mic = MilvusOperator()
        mic.install(data_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1200):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            # log.warning(f'Deploy {release_name} timeout and ready to uninstall')
            # mic.uninstall(release_name, namespace=constants.NAMESPACE)
            raise BaseException(f'Milvus healthy timeout 1200s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create
            c_name = cf.gen_unique_str("scale_query")
            # c_name = 'scale_query_DymS7kI4'
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=c_name,
                schema=cf.gen_default_collection_schema(),
                shards_num=5)

            tmp_nb = 10000

            def do_insert():
                while True:
                    tmp_df = cf.gen_default_dataframe_data(tmp_nb)
                    collection_w.insert(tmp_df)
                    log.debug(collection_w.num_entities)

            t_insert = threading.Thread(target=do_insert, args=(), daemon=True)
            t_insert.start()

            # scale dataNode to 5
            mic.upgrade(release_name, {'spec.components.dataNode.replicas': 5},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")
            log.debug("Expand dataNode test finished")

            # create new collection and insert
            new_c_name = cf.gen_unique_str("scale_query")
            collection_w_new = ApiCollectionWrapper()
            collection_w_new.init_collection(
                name=new_c_name,
                schema=cf.gen_default_collection_schema(),
                shards_num=2)

            def do_new_insert():
                while True:
                    tmp_df = cf.gen_default_dataframe_data(tmp_nb)
                    collection_w_new.insert(tmp_df)
                    log.debug(collection_w_new.num_entities)

            t_insert_new = threading.Thread(target=do_new_insert,
                                            args=(),
                                            daemon=True)
            t_insert_new.start()

            # scale dataNode to 3
            mic.upgrade(release_name, {'spec.components.dataNode.replicas': 3},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            log.debug(collection_w.num_entities)
            time.sleep(300)
            log.debug("Shrink dataNode test finished")

        except Exception as e:
            log.error(str(e))
            fail_count += 1
            # raise Exception(str(e))

        finally:
            log.info(f'Test finished with {fail_count} fail request')
            assert fail_count <= 1
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)

            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Exemplo n.º 24
0
    def test_expand_query_node(self):
        release_name = "scale-query"
        env = HelmEnv(release_name=release_name)
        host = env.helm_install_cluster_milvus()

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # create
        c_name = "query_scale_one"
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        # insert
        data = cf.gen_default_list_data(ct.default_nb)
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # # create index
        # collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
        # assert collection_w.has_index()[0]
        # assert collection_w.index()[0] == Index(collection_w.collection, ct.default_float_vec_field_name,
        #                                         default_index_params)
        collection_w.load()
        # vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(5)]
        res1, _ = collection_w.search(data[-1][:5],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)

        # scale queryNode pod
        env.helm_upgrade_cluster_milvus(queryNode=2)

        c_name_2 = "query_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        collection_w2.insert(data)
        assert collection_w2.num_entities == ct.default_nb
        collection_w2.load()
        res2, _ = collection_w2.search(data[-1][:5],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)

        assert res1[0].ids == res2[0].ids
Exemplo n.º 25
0
    def test_expand_data_node(self):
        """
        target: test create and insert api after expand dataNode pod
        method: 1.create collection a and insert df
                2.expand dataNode pod from 1 to 2
                3.verify collection a property and verify create and insert of new collection
        expected: two collection create and insert op are both correctly
        """
        release_name = "scale-data"
        milvusOp, host, port = scale_common.deploy_default_milvus(release_name)


        # connect
        connections.add_connection(default={"host": host, "port": port})
        connections.connect(alias='default')
        # create
        c_name = cf.gen_unique_str(prefix)
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())
        # # insert
        data = cf.gen_default_list_data()
        mutation_res, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        # scale dataNode to 2 pods
        milvusOp.upgrade(release_name, {'spec.components.dataNode.replicas': 2}, constants.NAMESPACE)
        milvusOp.wait_for_healthy(release_name, constants.NAMESPACE)

        # after scale, assert data consistent
        assert utility.has_collection(c_name)
        assert collection_w.num_entities == ct.default_nb
        # assert new operations
        new_cname = cf.gen_unique_str(prefix)
        new_collection_w = ApiCollectionWrapper()
        new_collection_w.init_collection(name=new_cname, schema=cf.gen_default_collection_schema())
        new_mutation_res, _ = new_collection_w.insert(data)
        assert new_mutation_res.insert_count == ct.default_nb
        assert new_collection_w.num_entities == ct.default_nb
        # assert old collection ddl
        mutation_res_2, _ = collection_w.insert(data)
        assert mutation_res.insert_count == ct.default_nb
        assert collection_w.num_entities == ct.default_nb*2

        collection_w.drop()
        new_collection_w.drop()
Exemplo n.º 26
0
    def test_shrink_query_node(self):
        """
        target: test shrink queryNode from 2 to 1
        method: 1.deploy two queryNode
                2.search two collections in two queryNode
                3.upgrade queryNode from 2 to 1
                4.search second collection
        expected: search result is correct
        """
        # deploy
        release_name = "scale-query"
        env = HelmEnv(release_name=release_name, queryNode=2)
        host = env.helm_install_cluster_milvus(
            image_pull_policy=constants.IF_NOT_PRESENT)

        # connect
        connections.add_connection(default={"host": host, "port": 19530})
        connections.connect(alias='default')

        # collection one
        data = cf.gen_default_list_data(nb)
        c_name = "query_scale_one"
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        collection_w.load()
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]

        # collection two
        c_name_2 = "query_scale_two"
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(
            name=c_name_2, schema=cf.gen_default_collection_schema())
        collection_w2.insert(data)
        assert collection_w2.num_entities == nb
        collection_w2.load()
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]

        # scale queryNode pod
        env.helm_upgrade_cluster_milvus(queryNode=1)

        # search
        res1, _ = collection_w.search(data[-1][:nq],
                                      ct.default_float_vec_field_name,
                                      ct.default_search_params,
                                      ct.default_limit)
        assert res1[0].ids[0] == data[0][0]
        res2, _ = collection_w2.search(data[-1][:nq],
                                       ct.default_float_vec_field_name,
                                       ct.default_search_params,
                                       ct.default_limit)
        assert res2[0].ids[0] == data[0][0]
Exemplo n.º 27
0
class Base:
    """ Initialize class object """
    connection_wrap = None
    collection_wrap = None
    partition_wrap = None
    index_wrap = None
    utility_wrap = None
    collection_schema_wrap = None
    field_schema_wrap = None
    collection_object_list = []

    def setup_class(self):
        log.info("[setup_class] Start setup class...")

    def teardown_class(self):
        log.info("[teardown_class] Start teardown class...")

    def setup_method(self, method):
        log.info(("*" * 35) + " setup " + ("*" * 35))
        log.info("[setup_method] Start setup test case %s." % method.__name__)
        self.connection_wrap = ApiConnectionsWrapper()
        self.utility_wrap = ApiUtilityWrapper()
        self.collection_wrap = ApiCollectionWrapper()
        self.partition_wrap = ApiPartitionWrapper()
        self.index_wrap = ApiIndexWrapper()
        self.collection_schema_wrap = ApiCollectionSchemaWrapper()
        self.field_schema_wrap = ApiFieldSchemaWrapper()

    def teardown_method(self, method):
        log.info(("*" * 35) + " teardown " + ("*" * 35))
        log.info("[teardown_method] Start teardown test case %s..." %
                 method.__name__)

        try:
            """ Drop collection before disconnect """
            if self.connection_wrap.get_connection(
                    alias=DefaultConfig.DEFAULT_USING)[0] is None:
                self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING,
                                             host=param_info.param_host,
                                             port=param_info.param_port)

            if self.collection_wrap.collection is not None:
                self.collection_wrap.drop(
                    check_task=ct.CheckTasks.check_nothing)

            collection_list = self.utility_wrap.list_collections()[0]
            for collection_object in self.collection_object_list:
                if collection_object.collection is not None and collection_object.name in collection_list:
                    collection_object.drop(
                        check_task=ct.CheckTasks.check_nothing)

        except Exception as e:
            log.debug(str(e))

        try:
            """ Delete connection and reset configuration"""
            res = self.connection_wrap.list_connections()
            for i in res[0]:
                self.connection_wrap.remove_connection(i[0])

            # because the connection is in singleton mode, it needs to be restored to the original state after teardown
            self.connection_wrap.add_connection(
                default={
                    "host": DefaultConfig.DEFAULT_HOST,
                    "port": DefaultConfig.DEFAULT_PORT
                })
        except Exception as e:
            log.debug(str(e))
Exemplo n.º 28
0
    def test_chaos_data_consist(self, connection, chaos_yaml):
        """
        target: verify data consistence after chaos injected and recovered
        method: 1. create a collection, insert some data, search and query
                2. inject a chaos object
                3. reconnect to service
                4. verify a) data entities persists, index persists,
                          b) search and query results persist
        expected: collection data and results persist
        """
        c_name = cf.gen_unique_str('chaos_collection_')
        nb = 5000
        i_name = cf.gen_unique_str('chaos_index_')
        index_params = {
            "index_type": "IVF_SQ8",
            "metric_type": "L2",
            "params": {
                "nlist": 64
            }
        }

        # create
        t0 = datetime.datetime.now()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=c_name,
                                     schema=cf.gen_default_collection_schema())
        tt = datetime.datetime.now() - t0
        log.info(f"assert create: {tt}")
        assert collection_w.name == c_name

        # insert
        data = cf.gen_default_list_data(nb=nb)
        t0 = datetime.datetime.now()
        _, res = collection_w.insert(data)
        tt = datetime.datetime.now() - t0
        log.info(f"assert insert: {tt}")
        assert res

        # flush
        t0 = datetime.datetime.now()
        assert collection_w.num_entities == nb
        tt = datetime.datetime.now() - t0
        log.info(f"assert flush: {tt}")

        # search
        collection_w.load()
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = datetime.datetime.now()
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1

        # index
        t0 = datetime.datetime.now()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=index_params,
            name=i_name)
        tt = datetime.datetime.now() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,999,99]'
        t0 = datetime.datetime.now()
        query_res, _ = collection_w.query(term_expr)
        tt = datetime.datetime.now() - t0
        log.info(f"assert query: {tt}")
        assert len(query_res) == 4

        # reboot a pod
        reboot_pod(chaos_yaml)

        # parse chaos object
        chaos_config = cc.gen_experiment_config(chaos_yaml)
        meta_name = chaos_config.get('metadata', None).get('name', None)

        # wait all pods ready
        log.info(
            f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label app.kubernetes.io/instance={meta_name}"
        )
        wait_pods_ready(constants.CHAOS_NAMESPACE,
                        f"app.kubernetes.io/instance={meta_name}")
        log.info(
            f"wait for pods in namespace {constants.CHAOS_NAMESPACE} with label release={meta_name}"
        )
        wait_pods_ready(constants.CHAOS_NAMESPACE, f"release={meta_name}")
        log.info("all pods are ready")

        # reconnect if needed
        sleep(constants.WAIT_PER_OP * 3)
        reconnect(connections, alias='default')

        # verify collection persists
        assert utility.has_collection(c_name)
        log.info("assert collection persists")
        collection_w2 = ApiCollectionWrapper()
        collection_w2.init_collection(c_name)
        # verify data persist
        assert collection_w2.num_entities == nb
        log.info("assert data persists")
        # verify index persists
        assert collection_w2.has_index(i_name)
        log.info("assert index persists")
        # verify search results persist
        collection_w2.load()
        search_res, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1)
        tt = datetime.datetime.now() - t0
        log.info(f"assert search: {tt}")
        assert len(search_res) == 1
        # verify query results persist
        query_res2, _ = collection_w2.query(term_expr)
        assert len(query_res2) == len(query_res)
        log.info("assert query result persists")
Exemplo n.º 29
0
    def test_expand_index_node(self):
        """
        target: test expand indexNode from 1 to 2
        method: 1.deploy two indexNode
                2.create index with two indexNode
                3.expand indexNode from 1 to 2
                4.create index with one indexNode
        expected: The cost of one indexNode is about twice that of two indexNodes
        """
        release_name = "expand-index"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        init_replicas = 1
        expand_replicas = 2
        data_config = {
            'metadata.namespace': constants.NAMESPACE,
            'spec.mode': 'cluster',
            'metadata.name': release_name,
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.indexNode.replicas': init_replicas,
            'spec.components.dataNode.replicas': 2,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(data_config)
        if mic.wait_for_healthy(release_name, constants.NAMESPACE, timeout=1800):
            host = mic.endpoint(release_name, constants.NAMESPACE).split(':')[0]
        else:
            # If deploy failed and want to uninsatll mic
            # log.warning(f'Deploy {release_name} timeout and ready to uninstall')
            # mic.uninstall(release_name, namespace=constants.NAMESPACE)
            raise MilvusException(message=f'Milvus healthy timeout 1800s')

        try:
            # connect
            connections.add_connection(default={"host": host, "port": 19530})
            connections.connect(alias='default')

            # create collection
            c_name = "index_scale_one"
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(name=c_name, schema=cf.gen_default_collection_schema())

            # insert data
            data = cf.gen_default_dataframe_data(nb)
            loop = 100
            for i in range(loop):
                collection_w.insert(data, timeout=60)
            assert collection_w.num_entities == nb * loop

            # create index
            # Note that the num of segments and the num of indexNode are related to indexing time
            start = datetime.datetime.now()
            collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
            assert collection_w.has_index()[0]
            t0 = datetime.datetime.now() - start
            log.info(f'Create index on {init_replicas} indexNode cost t0: {t0}')

            # drop index
            collection_w.drop_index()
            assert not collection_w.has_index()[0]

            # expand indexNode
            mic.upgrade(release_name, {'spec.components.indexNode.replicas': expand_replicas}, constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE, f"app.kubernetes.io/instance={release_name}")

            # create index again
            start = datetime.datetime.now()
            collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
            assert collection_w.has_index()[0]
            t1 = datetime.datetime.now() - start
            log.info(f'Create index on {expand_replicas} indexNode cost t1: {t1}')
            collection_w.drop_index()

            start = datetime.datetime.now()
            collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
            assert collection_w.has_index()[0]
            t2 = datetime.datetime.now() - start
            log.info(f'Create index on {expand_replicas} indexNode cost t2: {t2}')

            log.debug(f't2 is {t2}, t0 is {t0}, t0/t2 is {t0 / t2}')
            # assert round(t0 / t2) == 2

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE, label_selector=label, release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
Exemplo n.º 30
0
def e2e_milvus(host, c_name):
    """ e2e milvus """
    log.debug(f'pid: {os.getpid()}')
    # connect
    connections.add_connection(default={"host": host, "port": 19530})
    connections.connect(alias='default')

    # create
    collection_w = ApiCollectionWrapper()
    collection_w.init_collection(name=c_name,
                                 schema=cf.gen_default_collection_schema())

    # insert
    df = cf.gen_default_dataframe_data()
    mutation_res, _ = collection_w.insert(df)
    assert mutation_res.insert_count == ct.default_nb
    log.debug(collection_w.num_entities)

    # create index
    collection_w.create_index(ct.default_float_vec_field_name,
                              ct.default_index)
    assert collection_w.has_index()[0]
    assert collection_w.index()[0] == Index(collection_w.collection,
                                            ct.default_float_vec_field_name,
                                            ct.default_index)

    # search
    collection_w.load()
    search_res, _ = collection_w.search(cf.gen_vectors(1, dim=ct.default_dim),
                                        ct.default_float_vec_field_name,
                                        ct.default_search_params,
                                        ct.default_limit)
    assert len(search_res[0]) == ct.default_limit
    log.debug(search_res[0].ids)

    # query
    ids = search_res[0].ids[0]
    term_expr = f'{ct.default_int64_field_name} in [{ids}]'
    query_res, _ = collection_w.query(term_expr, output_fields=["*", "%"])
    assert query_res[0][ct.default_int64_field_name] == ids