示例#1
0
    def test_memory_stress_replicas_group_load_balance(self, prepare_collection):
        """
        target: test apply memory stress on replicas and load balance inside group
        method: 1.Deploy milvus and limit querynode memory 6Gi
                2.Insret 1000,000 entities (500Mb), load 2 replicas (memory usage 1.5Gb)
                3.Apply memory stress 4Gi on querynode
        expected: Verify that load balancing occurs
        """
        collection_w = prepare_collection
        utility_w = ApiUtilityWrapper()
        release_name = "mic-memory"

        # load and searchc
        collection_w.load(replica_number=2)
        progress, _ = utility_w.loading_progress(collection_w.name)
        assert progress["loading_progress"] == "100%"

        # get the replica and random chaos querynode
        replicas, _ = collection_w.get_replicas()
        chaos_querynode_id = replicas.groups[0].group_nodes[0]
        label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode"
        querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label)
        chaos_querynode_pod = querynode_id_pod_pair[chaos_querynode_id]

        # get the segment num before chaos
        seg_info_before, _ = utility_w.get_query_segment_info(collection_w.name)
        seg_distribution_before = cf.get_segment_distribution(seg_info_before)
        segments_num_before = len(seg_distribution_before[chaos_querynode_id]["sealed"])
        log.debug(segments_num_before)
        log.debug(seg_distribution_before[chaos_querynode_id]["sealed"])

        # apply memory stress
        chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_replicas_memory_stress_pods.yaml")
        chaos_config['spec']['selector']['pods']['chaos-testing'] = [chaos_querynode_pod]
        log.debug(chaos_config)
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)
        chaos_res.create(chaos_config)
        log.debug(f"Apply memory stress on querynode {chaos_querynode_id}, pod {chaos_querynode_pod}")

        duration = chaos_config.get('spec').get('duration')
        duration = duration.replace('h', '*3600+').replace('m', '*60+').replace('s', '*1+') + '+0'
        sleep(eval(duration))

        chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None))

        # Verfiy auto load loadbalance
        seg_info_after, _ = utility_w.get_query_segment_info(collection_w.name)
        seg_distribution_after = cf.get_segment_distribution(seg_info_after)
        segments_num_after = len(seg_distribution_after[chaos_querynode_id]["sealed"])
        log.debug(segments_num_after)
        log.debug(seg_distribution_after[chaos_querynode_id]["sealed"])

        assert segments_num_after < segments_num_before
        search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                            ct.default_float_vec_field_name, ct.default_search_params,
                                            ct.default_limit, timeout=120)
        assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
示例#2
0
class LoadBalanceChecker(Checker):
    """check loadbalance operations in a dependent thread"""
    def __init__(self, collection_name=None):
        super().__init__(collection_name=collection_name)
        self.utility_wrap = ApiUtilityWrapper()
        self.c_wrap.load(enable_traceback=enable_traceback)

    def keep_running(self):
        while True:
            c_name = self.c_wrap.name
            res, _ = self.c_wrap.get_replicas()
            # prepare load balance params
            # find a group which has multi nodes
            group_nodes = []
            for g in res.groups:
                if len(g.group_nodes) >= 2:
                    group_nodes = list(g.group_nodes)
                    break
            src_node_id = group_nodes[0]
            dst_node_ids = group_nodes[1:]
            res, _ = self.utility_wrap.get_query_segment_info(c_name)
            segment_distribution = cf.get_segment_distribution(res)
            sealed_segment_ids = segment_distribution[src_node_id]["sealed"]
            # load balance
            t0 = time.time()
            _, result = self.utility_wrap.load_balance(c_name, src_node_id,
                                                       dst_node_ids,
                                                       sealed_segment_ids)
            t1 = time.time()
            # get segments distribution after load balance
            time.sleep(3)
            res, _ = self.utility_wrap.get_query_segment_info(c_name)
            segment_distribution = cf.get_segment_distribution(res)
            sealed_segment_ids_after_load_banalce = segment_distribution[
                src_node_id]["sealed"]
            check_1 = len(
                set(sealed_segment_ids)
                & set(sealed_segment_ids_after_load_banalce)) == 0
            des_sealed_segment_ids = []
            for des_node_id in dst_node_ids:
                des_sealed_segment_ids += segment_distribution[des_node_id][
                    "sealed"]
            # assert sealed_segment_ids is subset of des_sealed_segment_ids
            check_2 = set(sealed_segment_ids).issubset(
                set(des_sealed_segment_ids))

            if result and (check_1 and check_2):
                self.rsp_times.append(t1 - t0)
                self.average_time = (
                    (t1 - t0) + self.average_time * self._succ) / (self._succ +
                                                                   1)
                self._succ += 1
                log.debug(
                    f"load balance success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}"
                )
            else:
                self._fail += 1
            sleep(10)
示例#3
0
文件: checker.py 项目: avmi/milvus
 def __init__(self, collection_name=None):
     if collection_name is None:
         collection_name = cf.gen_unique_str("LoadBalanceChecker_")
     super().__init__(collection_name=collection_name)
     self.utility_wrap = ApiUtilityWrapper()
     self.c_wrap.load()
     self.sealed_segment_ids = None
     self.dst_node_ids = None
     self.src_node_id = None
示例#4
0
 def __init__(self, flush=False):
     super().__init__()
     self.utility_wrap = ApiUtilityWrapper()
     self.schema = cf.gen_default_collection_schema()
     self.flush = flush
     self.files = ["bulk_load_data_source.json"]
     self.row_based = True
     self.recheck_failed_task = False
     self.failed_tasks = []
示例#5
0
 def setup(self):
     log.info(("*" * 35) + " setup " + ("*" * 35))
     self.connection_wrap = ApiConnectionsWrapper()
     self.utility_wrap = ApiUtilityWrapper()
     self.collection_wrap = ApiCollectionWrapper()
     self.partition_wrap = ApiPartitionWrapper()
     self.index_wrap = ApiIndexWrapper()
     self.collection_schema_wrap = ApiCollectionSchemaWrapper()
     self.field_schema_wrap = ApiFieldSchemaWrapper()
示例#6
0
 def setup_method(self, method):
     log.info(("*" * 35) + " setup " + ("*" * 35))
     log.info("[setup_method] Start setup test case %s." % method.__name__)
     self.connection_wrap = ApiConnectionsWrapper()
     self.utility_wrap = ApiUtilityWrapper()
     self.collection_wrap = ApiCollectionWrapper()
     self.partition_wrap = ApiPartitionWrapper()
     self.index_wrap = ApiIndexWrapper()
     self.collection_schema_wrap = ApiCollectionSchemaWrapper()
     self.field_schema_wrap = ApiFieldSchemaWrapper()
示例#7
0
    def test_memory_stress_replicas_cross_group_load_balance(self, prepare_collection):
        """
        target: test apply memory stress on one group and no load balance cross replica groups
        method: 1.Limit all querynodes memory 6Gi
                2.Create and insert 1000,000 entities
                3.Load collection with two replicas
                4.Apply memory stress on one grooup 80%
        expected: Verify that load balancing across groups is not occurring
        """
        collection_w = prepare_collection
        utility_w = ApiUtilityWrapper()
        release_name = "mic-memory"

        # load and searchc
        collection_w.load(replica_number=2)
        progress, _ = utility_w.loading_progress(collection_w.name)
        assert progress["loading_progress"] == "100%"
        seg_info_before, _ = utility_w.get_query_segment_info(collection_w.name)

        # get the replica and random chaos querynode
        replicas, _ = collection_w.get_replicas()
        group_nodes = list(replicas.groups[0].group_nodes)
        label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode"
        querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label)
        group_nodes_pod = [querynode_id_pod_pair[node_id] for node_id in group_nodes]

        # apply memory stress
        chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_replicas_memory_stress_pods.yaml")
        chaos_config['spec']['selector']['pods']['chaos-testing'] = group_nodes_pod
        log.debug(chaos_config)
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)
        chaos_res.create(chaos_config)
        log.debug(f"Apply memory stress on querynode {group_nodes}, pod {group_nodes_pod}")

        duration = chaos_config.get('spec').get('duration')
        duration = duration.replace('h', '*3600+').replace('m', '*60+').replace('s', '*1+') + '+0'
        sleep(eval(duration))

        chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None))

        # Verfiy auto load loadbalance
        seg_info_after, _ = utility_w.get_query_segment_info(collection_w.name)
        seg_distribution_before = cf.get_segment_distribution(seg_info_before)
        seg_distribution_after = cf.get_segment_distribution(seg_info_after)
        for node_id in group_nodes:
            assert len(seg_distribution_before[node_id]) == len(seg_distribution_after[node_id])

        search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                            ct.default_float_vec_field_name, ct.default_search_params,
                                            ct.default_limit, timeout=120)
        assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
示例#8
0
    def test_memory_stress_replicas_group_insufficient(self, prepare_collection, mode):
        """
        target: test apply stress memory on different number querynodes and the group failed to load,
                bacause of the memory is insufficient
        method: 1.Limit querynodes memory 5Gi
                2.Create collection and insert 1000,000 entities
                3.Apply memory stress on querynodes and it's memory is not enough to load replicas
        expected: Verify load raise exception, and after delete chaos, load and search successfully
        """
        collection_w = prepare_collection
        utility_w = ApiUtilityWrapper()
        chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml")

        # Update config
        chaos_config['spec']['mode'] = mode
        chaos_config['spec']['stressors']['memory']['size'] = '5Gi'
        log.debug(chaos_config)
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)
        chaos_res.create(chaos_config)
        # chaos_start = time.time()
        log.debug("chaos injected")
        sleep(10)

        try:
            # load failed
            err = {"err_code": 1, "err_msg": "shuffleSegmentsToQueryNodeV2: insufficient memory of available node"}
            collection_w.load(replica_number=5, timeout=60, check_task=CheckTasks.err_res, check_items=err)

            # query failed because not loaded
            err = {"err_code": 1, "err_msg": "not loaded into memory"}
            collection_w.query("int64 in [0]", check_task=CheckTasks.err_res, check_items=err)

            # delete chaos
            meta_name = chaos_config.get('metadata', None).get('name', None)
            chaos_res.delete(metadata_name=meta_name)
            sleep(10)

            # after delete chaos load and query successfully
            collection_w.load(replica_number=5, timeout=60)
            progress, _ = utility_w.loading_progress(collection_w.name)
            # assert progress["loading_progress"] == "100%"
            query_res, _ = collection_w.query("int64 in [0]")
            assert len(query_res) != 0

            collection_w.release()

        except Exception as e:
            raise Exception(str(e))

        finally:
            log.debug("Test finished")
示例#9
0
文件: checker.py 项目: avmi/milvus
 def __init__(self, collection_name=None, files=[]):
     if collection_name is None:
         collection_name = cf.gen_unique_str("BulkLoadChecker_")
     super().__init__(collection_name=collection_name)
     self.utility_wrap = ApiUtilityWrapper()
     self.schema = cf.gen_default_collection_schema()
     self.files = files
     self.row_based = True
     self.recheck_failed_task = False
     self.failed_tasks = []
     self.c_name = None
示例#10
0
 def test_wait_index_invalid_name(self, get_invalid_collection_name):
     """
     target: test wait_index
     method: input invalid name
     expected: raise exception
     """
     self._connect()
     c_name = get_invalid_collection_name
     ut = ApiUtilityWrapper()
     ex, _ = ut.wait_for_index_building_complete(c_name)
     log.error(str(ex))
     assert "invalid" or "illegal" in str(ex)
示例#11
0
 def test_list_collections_using_invalid(self):
     """
     target: test list_collections with invalid using
     method: input invalid name
     expected: raise exception
     """
     self._connect()
     using = "empty"
     ut = ApiUtilityWrapper(using=using)
     ex, _ = ut.list_collections()
     log.error(str(ex))
     assert "invalid" or "illegal" in str(ex)
示例#12
0
 def test_index_process_invalid_name(self, get_invalid_collection_name):
     """
     target: test building_process
     method: input invalid name
     expected: raise exception
     """
     self._connect()
     c_name = get_invalid_collection_name
     ut = ApiUtilityWrapper()
     ex, _ = ut.index_building_progress(c_name)
     log.error(str(ex))
     assert "invalid" or "illegal" in str(ex)
示例#13
0
 def test_has_partition_name_invalid(self, get_invalid_partition_name):
     """
     target: test has_partition with error partition name
     method: input invalid name
     expected: raise exception
     """
     self._connect()
     ut = ApiUtilityWrapper()
     c_name = cf.gen_unique_str(prefix)
     p_name = get_invalid_partition_name
     ex, _ = ut.has_partition(c_name, p_name)
     log.error(str(ex))
     assert "invalid" or "illegal" in str(ex)
示例#14
0
 def _test_list_collections_using_invalid(self):
     """
     target: test list_collections with invalid using
     method: input invalid name
     expected: raise exception
     """
     self._connect()
     using = "empty"
     ut = ApiUtilityWrapper()
     ex, _ = ut.list_collections(using=using,
                                 check_items={
                                     ct.err_code: 0,
                                     ct.err_msg: "should create connect"
                                 })
示例#15
0
文件: checker.py 项目: avmi/milvus
class BulkLoadChecker(Checker):
    """check bulk load operations in a dependent thread"""

    def __init__(self, collection_name=None, files=[]):
        if collection_name is None:
            collection_name = cf.gen_unique_str("BulkLoadChecker_")
        super().__init__(collection_name=collection_name)
        self.utility_wrap = ApiUtilityWrapper()
        self.schema = cf.gen_default_collection_schema()
        self.files = files
        self.row_based = True
        self.recheck_failed_task = False
        self.failed_tasks = []
        self.c_name = None

    def update(self, files=None, schema=None, row_based=None):
        if files is not None:
            self.files = files
        if schema is not None:
            self.schema = schema
        if row_based is not None:
            self.row_based = row_based

    @trace()
    def bulk_load(self):
        task_ids, result = self.utility_wrap.bulk_load(collection_name=self.c_name,
                                                       row_based=self.row_based,
                                                       files=self.files)
        completed, result = self.utility_wrap.wait_for_bulk_load_tasks_completed(task_ids=task_ids, timeout=30)
        return task_ids, completed

    @exception_handler()
    def run_task(self):
        if self.recheck_failed_task and self.failed_tasks:
            self.c_name = self.failed_tasks.pop(0)
            log.debug(f"check failed task: {self.c_name}")
        else:
            self.c_name = cf.gen_unique_str("BulkLoadChecker_")
        self.c_wrap.init_collection(name=self.c_name, schema=self.schema)
        # import data
        task_ids, completed = self.bulk_load()
        if not completed:
            self.failed_tasks.append(self.c_name)
        return task_ids, completed

    def keep_running(self):
        while self._keep_running:
            self.run_task()
            sleep(constants.WAIT_PER_OP / 10)
示例#16
0
class Base:
    """ Initialize class object """
    connection_wrap = None
    collection_wrap = None
    partition_wrap = None
    index_wrap = None
    utility_wrap = None
    collection_schema_wrap = None
    field_schema_wrap = None
    collection_object_list = []

    def setup_class(self):
        log.info("[setup_class] Start setup class...")

    def teardown_class(self):
        log.info("[teardown_class] Start teardown class...")

    def setup_method(self, method):
        log.info(("*" * 35) + " setup " + ("*" * 35))
        log.info("[setup_method] Start setup test case %s." % method.__name__)
        self.connection_wrap = ApiConnectionsWrapper()
        self.utility_wrap = ApiUtilityWrapper()
        self.collection_wrap = ApiCollectionWrapper()
        self.partition_wrap = ApiPartitionWrapper()
        self.index_wrap = ApiIndexWrapper()
        self.collection_schema_wrap = ApiCollectionSchemaWrapper()
        self.field_schema_wrap = ApiFieldSchemaWrapper()

    def teardown_method(self, method):
        log.info(("*" * 35) + " teardown " + ("*" * 35))
        log.info("[teardown_method] Start teardown test case %s..." % method.__name__)

        try:
            """ Drop collection before disconnect """
            if self.connection_wrap.get_connection(alias=DefaultConfig.DEFAULT_USING)[0] is None:
                self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=param_info.param_host,
                                             port=param_info.param_port)

            if self.collection_wrap.collection is not None:
                self.collection_wrap.drop(check_task=ct.CheckTasks.check_nothing)

            collection_list = self.utility_wrap.list_collections()[0]
            for collection_object in self.collection_object_list:
                if collection_object.collection is not None and collection_object.name in collection_list:
                    collection_object.drop(check_task=ct.CheckTasks.check_nothing)

        except Exception as e:
            log.debug(str(e))

        try:
            """ Delete connection and reset configuration"""
            res = self.connection_wrap.list_connections()
            for i in res[0]:
                self.connection_wrap.remove_connection(i[0])

            # because the connection is in singleton mode, it needs to be restored to the original state after teardown
            self.connection_wrap.add_connection(default={"host": DefaultConfig.DEFAULT_HOST,
                                                         "port": DefaultConfig.DEFAULT_PORT})
        except Exception as e:
            log.debug(str(e))
示例#17
0
class CompactChecker(Checker):
    """check compact operations in a dependent thread"""
    def __init__(self, collection_name=None):
        super().__init__(collection_name=collection_name)
        self.ut = ApiUtilityWrapper()
        self.c_wrap.load(
            enable_traceback=enable_traceback)  # load before compact

    def keep_running(self):
        while True:
            seg_info = self.ut.get_query_segment_info(self.c_wrap.name)
            t0 = time.time()
            res, result = self.c_wrap.compact(timeout=timeout)
            print(f"compact done: res {res}")
            self.c_wrap.wait_for_compaction_completed()
            self.c_wrap.get_compaction_plans()
            t1 = time.time()
            if result:
                self.rsp_times.append(t1 - t0)
                self.average_time = (
                    (t1 - t0) + self.average_time * self._succ) / (self._succ +
                                                                   1)
                self._succ += 1
                log.debug(
                    f"compact success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}"
                )
            else:
                self._fail += 1
            sleep(constants.WAIT_PER_OP / 10)
示例#18
0
 def test_memory_stress_replicas_befor_load(self, prepare_collection):
     """
     target: test querynode group load with insufficient memory
     method: 1.Limit querynode memory ? 2Gi
             2.Load sealed data (needed memory > memory limit)
     expected: Raise an exception
     """
     collection_w = prepare_collection
     utility_w = ApiUtilityWrapper()
     err = {"err_code": 1, "err_msg": "xxxxxxxxx"}
     # collection_w.load(replica_number=2, timeout=60, check_task=CheckTasks.err_res, check_items=err)
     collection_w.load(replica_number=5)
     utility_w.loading_progress(collection_w.name)
     search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                         ct.default_float_vec_field_name, ct.default_search_params,
                                         ct.default_limit, timeout=60)
示例#19
0
    def test_memory_stress_replicas_load_balance_single_node(self, prepare_collection):
        """
        target: test apply memory stress on single node replica, and it OOMKilled
        method: 1.Deploy 2 querynodes and limit memory 6Gi
                2.Loading 1000,000 entities (data_size=500Mb) with 2 replicas (memory_usage=1.5Gb)
                3.Apply memory stress on one querynode and make it OOMKilled
        expected: After deleting chaos, querynode turns running, search successfully
        """
        collection_w = prepare_collection
        utility_w = ApiUtilityWrapper()

        # load and searchc
        collection_w.load(replica_number=2)
        progress, _ = utility_w.loading_progress(collection_w.name)
        assert progress["loading_progress"] == "100%"
        query_res, _ = collection_w.query("int64 in [0]")
        assert len(query_res) != 0

        # apply memory stress
        chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml")

        # Update config
        chaos_config['spec']['mode'] = "one"
        chaos_config['spec']['stressors']['memory']['size'] = '6Gi'
        chaos_config['spec']['duration'] = "1m"
        log.debug(chaos_config)
        duration = chaos_config.get('spec').get('duration')
        duration = duration.replace('h', '*3600+').replace('m', '*60+').replace('s', '*1+') + '+0'
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)
        chaos_res.create(chaos_config)

        sleep(eval(duration))
        chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None))

        # release and load again
        collection_w.release()
        collection_w.load(replica_number=2)
        progress, _ = utility_w.loading_progress(collection_w.name)
        assert progress["loading_progress"] == "100%"
        search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                            ct.default_float_vec_field_name, ct.default_search_params,
                                            ct.default_limit, timeout=120)
        assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
示例#20
0
 def test_has_partition_name_invalid(self, get_invalid_partition_name):
     """
     target: test has_partition with error partition name
     method: input invalid name
     expected: raise exception
     """
     self._connect()
     ut = ApiUtilityWrapper()
     c_name = cf.gen_unique_str(prefix)
     p_name = get_invalid_partition_name
     if isinstance(p_name, str) and p_name:
         ex, _ = ut.has_partition(c_name,
                                  p_name,
                                  check_task=CheckTasks.err_res,
                                  check_items={
                                      ct.err_code: 1,
                                      ct.err_msg: "Invalid"
                                  })
示例#21
0
    def test_chaos_memory_stress_replicas_OOM(self, prepare_collection, mode):
        """
        target: test apply memory stress during loading, and querynode OOMKilled
        method: 1.Deploy and limit querynode memory limit 6Gi
                2.Create collection and insert 1000,000 entities
                3.Apply memory stress and querynode OOMKilled during loading replicas
        expected: Verify the mic is available to load and search querynode restart
        """
        collection_w = prepare_collection
        utility_w = ApiUtilityWrapper()

        chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml")
        chaos_config['spec']['mode'] = mode
        chaos_config['spec']['duration'] = '3m'
        chaos_config['spec']['stressors']['memory']['size'] = '6Gi'
        log.debug(chaos_config)
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)

        chaos_res.create(chaos_config)
        log.debug("chaos injected")
        collection_w.load(replica_number=2, timeout=60, _async=True)

        utility_w.wait_for_loading_complete(collection_w.name)
        progress, _ = utility_w.loading_progress(collection_w.name)
        assert progress["loading_progress"] == '100%'

        sleep(180)
        chaos_res.delete(metadata_name=chaos_config.get('metadata', None).get('name', None))

        # TODO search failed
        search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                            ct.default_float_vec_field_name, ct.default_search_params,
                                            ct.default_limit, timeout=120)
        assert 1 == len(search_res) and ct.default_limit == len(search_res[0])

        collection_w.release()
        collection_w.load(replica_number=2)
        search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                            ct.default_float_vec_field_name, ct.default_search_params,
                                            ct.default_limit, timeout=120)
        assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
示例#22
0
文件: checker.py 项目: avmi/milvus
class LoadBalanceChecker(Checker):
    """check loadbalance operations in a dependent thread"""

    def __init__(self, collection_name=None):
        if collection_name is None:
            collection_name = cf.gen_unique_str("LoadBalanceChecker_")
        super().__init__(collection_name=collection_name)
        self.utility_wrap = ApiUtilityWrapper()
        self.c_wrap.load()
        self.sealed_segment_ids = None
        self.dst_node_ids = None
        self.src_node_id = None

    @trace()
    def load_balance(self):
        res, result = self.utility_wrap.load_balance(
            self.c_wrap.name, self.src_node_id, self.dst_node_ids, self.sealed_segment_ids)
        return res, result

    def prepare(self):
        """prepare load balance params"""
        res, _ = self.c_wrap.get_replicas()
        # find a group which has multi nodes
        group_nodes = []
        for g in res.groups:
            if len(g.group_nodes) >= 2:
                group_nodes = list(g.group_nodes)
                break
        self.src_node_id = group_nodes[0]
        self.dst_node_ids = group_nodes[1:]
        res, _ = self.utility_wrap.get_query_segment_info(self.c_wrap.name)
        segment_distribution = cf.get_segment_distribution(res)
        self.sealed_segment_ids = segment_distribution[self.src_node_id]["sealed"]

    @exception_handler()
    def run_task(self):
        self.prepare()
        res, result = self.load_balance()
        return res, result

    def keep_running(self):
        while self._keep_running:
            self.run_task()
            sleep(constants.WAIT_PER_OP / 10)
示例#23
0
    def test_memory_stress_replicas_group_sufficient(self, prepare_collection, mode):
        """
        target: test apply stress memory on one querynode and the memory is enough to load replicas
        method: 1.Limit all querynodes memory 6Gi
                2.Apply 3Gi memory stress on different number of querynodes (load whole collection need about 1.5GB)
        expected: Verify load successfully and search result are correct
        """
        collection_w = prepare_collection
        utility_w = ApiUtilityWrapper()

        # # apply memory stress chaos
        chaos_config = gen_experiment_config("./chaos_objects/memory_stress/chaos_querynode_memory_stress.yaml")
        chaos_config['spec']['mode'] = mode
        chaos_config['spec']['duration'] = '3m'
        chaos_config['spec']['stressors']['memory']['size'] = '3Gi'
        log.debug(chaos_config)
        chaos_res = CusResource(kind=chaos_config['kind'],
                                group=constants.CHAOS_GROUP,
                                version=constants.CHAOS_VERSION,
                                namespace=constants.CHAOS_NAMESPACE)
        chaos_res.create(chaos_config)
        log.debug("chaos injected")
        sleep(20)
        #
        try:
            collection_w.load(replica_number=2, timeout=60)
            utility_w.loading_progress(collection_w.name)
            replicas, _ = collection_w.get_replicas()
            log.debug(replicas)
            search_res, _ = collection_w.search(cf.gen_vectors(1, dim=self.dim),
                                                ct.default_float_vec_field_name, ct.default_search_params,
                                                ct.default_limit, timeout=120)
            assert 1 == len(search_res) and ct.default_limit == len(search_res[0])
            collection_w.release()

        except Exception as e:
            raise Exception(str(e))

        finally:
            # delete chaos
            meta_name = chaos_config.get('metadata', None).get('name', None)
            chaos_res.delete(metadata_name=meta_name)
            log.debug("Test finished")
示例#24
0
class Base:
    """ Initialize class object """
    connection_wrap = None
    collection_wrap = None
    partition_wrap = None
    index_wrap = None
    utility_wrap = None
    collection_schema_wrap = None
    field_schema_wrap = None
    collection_object_list = []

    def setup_class(self):
        log.info("[setup_class] Start setup class...")

    def teardown_class(self):
        log.info("[teardown_class] Start teardown class...")
        pass

    def setup_method(self, method):
        log.info(("*" * 35) + " setup " + ("*" * 35))
        log.info("[setup_method] Start setup test case %s..." % method.__name__)
        self.connection_wrap = ApiConnectionsWrapper()
        self.utility_wrap = ApiUtilityWrapper()
        self.collection_wrap = ApiCollectionWrapper()
        self.partition_wrap = ApiPartitionWrapper()
        self.index_wrap = ApiIndexWrapper()
        self.collection_schema_wrap = ApiCollectionSchemaWrapper()
        self.field_schema_wrap = ApiFieldSchemaWrapper()

    def teardown_method(self, method):
        log.info(("*" * 35) + " teardown " + ("*" * 35))
        log.info("[teardown_method] Start teardown test case %s..." % method.__name__)

        try:
            """ Drop collection before disconnect """
            if self.connection_wrap.get_connection(alias=DefaultConfig.DEFAULT_USING)[0] is None:
                self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING, host=param_info.param_host,
                                             port=param_info.param_port)

            if self.collection_wrap.collection is not None:
                self.collection_wrap.drop(check_task=ct.CheckTasks.check_nothing)

            for collection_object in self.collection_object_list:
                if collection_object.collection is not None \
                        and collection_object.name in self.utility_wrap.list_collections()[0]:
                    collection_object.drop(check_task=ct.CheckTasks.check_nothing)

        except Exception as e:
            log.debug(str(e))

        try:
            """ Delete connection and reset configuration"""
            res = self.connection_wrap.list_connections()
            for i in res[0]:
                self.connection_wrap.remove_connection(i[0])

            # because the connection is in singleton mode, it needs to be restored to the original state after teardown
            self.connection_wrap.add_connection(default={"host": DefaultConfig.DEFAULT_HOST,
                                                         "port": DefaultConfig.DEFAULT_PORT})
        except Exception as e:
            log.debug(str(e))

    @pytest.fixture(scope="module", autouse=True)
    def initialize_env(self, request):
        """ clean log before testing """
        host = request.config.getoption("--host")
        port = request.config.getoption("--port")
        handler = request.config.getoption("--handler")
        clean_log = request.config.getoption("--clean_log")

        """ params check """
        assert ip_check(host) and number_check(port)

        """ modify log files """
        cf.modify_file(file_path_list=[log_config.log_debug, log_config.log_info, log_config.log_err], is_modify=clean_log)

        log.info("#" * 80)
        log.info("[initialize_milvus] Log cleaned up, start testing...")
        param_info.prepare_param_info(host, port, handler)
示例#25
0
 def __init__(self, collection_name=None):
     super().__init__(collection_name=collection_name)
     self.utility_wrap = ApiUtilityWrapper()
     self.c_wrap.load(enable_traceback=enable_traceback)
示例#26
0
class BulkLoadChecker(Checker):
    """check bulk load operations in a dependent thread"""
    def __init__(self, flush=False):
        super().__init__()
        self.utility_wrap = ApiUtilityWrapper()
        self.schema = cf.gen_default_collection_schema()
        self.flush = flush
        self.files = ["bulk_load_data_source.json"]
        self.row_based = True
        self.recheck_failed_task = False
        self.failed_tasks = []

    def update(self, files=None, schema=None, row_based=None):
        if files is not None:
            self.files = files
        if schema is not None:
            self.schema = schema
        if row_based is not None:
            self.row_based = row_based

    def keep_running(self):
        while True:
            if self.recheck_failed_task and self.failed_tasks:
                c_name = self.failed_tasks.pop(0)
                log.info(f"check failed task: {c_name}")
            else:
                c_name = cf.gen_unique_str("BulkLoadChecker_")
            self.c_wrap.init_collection(name=c_name, schema=self.schema)
            if self.flush:
                t0 = time.time()
                pre_entities_num = self.c_wrap.num_entities
                tt = time.time() - t0
                log.info(f"flush before bulk load, cost time: {tt:.4f}")
            # import data
            t0 = time.time()
            task_ids, res_1 = self.utility_wrap.bulk_load(
                collection_name=c_name,
                row_based=self.row_based,
                files=self.files)
            log.info(f"bulk load task ids:{task_ids}")
            completed, res_2 = self.utility_wrap.wait_for_bulk_load_tasks_completed(
                task_ids=task_ids, timeout=30)
            tt = time.time() - t0
            # added_num = sum(res_2[task_id].row_count for task_id in task_ids)
            if completed:
                self.rsp_times.append(tt)
                self.average_time = (tt + self.average_time * self._succ) / (
                    self._succ + 1)
                self._succ += 1
                log.info(
                    f"bulk load success for collection {c_name}, time: {tt:.4f}, average_time: {self.average_time:4f}"
                )
                if self.flush:
                    t0 = time.time()
                    cur_entities_num = self.c_wrap.num_entities
                    tt = time.time() - t0
                    log.info(f"flush after bulk load, cost time: {tt:.4f}")
            else:
                self._fail += 1
                # if the task failed, store the failed collection name for further checking after chaos
                self.failed_tasks.append(c_name)
                log.info(
                    f"bulk load failed for collection {c_name} time: {tt:.4f}, average_time: {self.average_time:4f}"
                )
                sleep(constants.WAIT_PER_OP / 10)
示例#27
0
    def test_scale_in_query_node_less_than_replicas(self):
        """
        target: test scale in cluster and querynode < replica
        method: 1.Deploy cluster with 3 querynodes
                2.Create and insert data, flush
                3.Load collection with 2 replica number
                4.Scale in querynode from 3 to 1 and query
                5.Scale out querynode from 1 back to 3
        expected: Verify search successfully after scale out
        """
        release_name = "scale-in-query"
        image_tag = get_latest_tag()
        image = f'{constants.IMAGE_REPOSITORY}:{image_tag}'
        query_config = {
            'metadata.namespace': constants.NAMESPACE,
            'metadata.name': release_name,
            'spec.mode': 'cluster',
            'spec.components.image': image,
            'spec.components.proxy.serviceType': 'LoadBalancer',
            'spec.components.queryNode.replicas': 2,
            'spec.config.common.retentionDuration': 60
        }
        mic = MilvusOperator()
        mic.install(query_config)
        if mic.wait_for_healthy(release_name,
                                constants.NAMESPACE,
                                timeout=1800):
            host = mic.endpoint(release_name,
                                constants.NAMESPACE).split(':')[0]
        else:
            raise MilvusException(message=f'Milvus healthy timeout 1800s')
        try:
            # prepare collection
            connections.connect("scale-in", host=host, port=19530)
            utility_w = ApiUtilityWrapper()
            collection_w = ApiCollectionWrapper()
            collection_w.init_collection(
                name=cf.gen_unique_str("scale_in"),
                schema=cf.gen_default_collection_schema(),
                using="scale-in")
            collection_w.insert(cf.gen_default_dataframe_data())
            assert collection_w.num_entities == ct.default_nb

            # load multi replicas and search success
            collection_w.load(replica_number=2)
            search_res, is_succ = collection_w.search(
                cf.gen_vectors(1, ct.default_dim),
                ct.default_float_vec_field_name, ct.default_search_params,
                ct.default_limit)
            assert len(search_res[0].ids) == ct.default_limit
            log.info("Search successfully after load with 2 replicas")
            log.debug(collection_w.get_replicas()[0])
            log.debug(
                utility_w.get_query_segment_info(collection_w.name,
                                                 using="scale-in"))

            # scale in querynode from 2 to 1, less than replica number
            log.debug("Scale in querynode from 2 to 1")
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 1},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            # search and not assure success
            collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                ct.default_float_vec_field_name,
                                ct.default_search_params,
                                ct.default_limit,
                                check_task=CheckTasks.check_nothing)
            log.debug(
                collection_w.get_replicas(
                    check_task=CheckTasks.check_nothing)[0])

            # scale querynode from 1 back to 2
            mic.upgrade(release_name,
                        {'spec.components.queryNode.replicas': 2},
                        constants.NAMESPACE)
            mic.wait_for_healthy(release_name, constants.NAMESPACE)
            wait_pods_ready(constants.NAMESPACE,
                            f"app.kubernetes.io/instance={release_name}")

            # verify search success
            collection_w.search(cf.gen_vectors(1, ct.default_dim),
                                ct.default_float_vec_field_name,
                                ct.default_search_params, ct.default_limit)
            # Verify replica info is correct
            replicas = collection_w.get_replicas()[0]
            assert len(replicas.groups) == 2
            for group in replicas.groups:
                assert len(group.group_nodes) == 1
            # Verify loaded segment info is correct
            seg_info = utility_w.get_query_segment_info(collection_w.name,
                                                        using="scale-in")[0]
            num_entities = 0
            for seg in seg_info:
                assert len(seg.nodeIds) == 2
                num_entities += seg.num_rows
            assert num_entities == ct.default_nb

        except Exception as e:
            raise Exception(str(e))

        finally:
            label = f"app.kubernetes.io/instance={release_name}"
            log.info('Start to export milvus pod logs')
            read_pod_log(namespace=constants.NAMESPACE,
                         label_selector=label,
                         release_name=release_name)
            mic.uninstall(release_name, namespace=constants.NAMESPACE)
    def test_customize_segment_size(self, seg_size, seg_count):
        """
       steps
       """
        log.info(f"start to install milvus with segment size {seg_size}")
        release_name, host, port = _install_milvus(seg_size)
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        mil = MilvusSys(alias="default")
        log.info(f"milvus build version: {mil.build_version}")

        log.info(f"start to e2e verification: {seg_size}")
        # create
        name = cf.gen_unique_str("segsiz")
        t0 = time.time()
        collection_w = ApiCollectionWrapper()
        collection_w.init_collection(name=name,
                                     schema=cf.gen_default_collection_schema(),
                                     timeout=40)
        tt = time.time() - t0
        assert collection_w.name == name
        entities = collection_w.num_entities
        log.info(f"assert create collection: {tt}, init_entities: {entities}")

        # insert
        nb = 50000
        data = cf.gen_default_list_data(nb=nb)
        t0 = time.time()
        _, res = collection_w.insert(data)
        tt = time.time() - t0
        log.info(f"assert insert: {tt}")
        assert res
        # insert 2 million entities
        rounds = 40
        for _ in range(rounds - 1):
            _, res = collection_w.insert(data)
        entities = collection_w.num_entities
        assert entities == nb * rounds

        # load
        collection_w.load()
        utility_wrap = ApiUtilityWrapper()
        segs, _ = utility_wrap.get_query_segment_info(collection_w.name)
        log.info(f"assert segments: {len(segs)}")
        assert len(segs) == seg_count

        # search
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        search_params = {"metric_type": "L2", "params": {"nprobe": 16}}
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")
        assert len(res_1) == 1
        collection_w.release()

        # index
        d = cf.gen_default_list_data()
        collection_w.insert(d)
        log.info(f"assert index entities: {collection_w.num_entities}")
        _index_params = {
            "index_type": "IVF_SQ8",
            "params": {
                "nlist": 64
            },
            "metric_type": "L2"
        }
        t0 = time.time()
        index, _ = collection_w.create_index(
            field_name=ct.default_float_vec_field_name,
            index_params=_index_params,
            name=cf.gen_unique_str(),
            timeout=120)
        tt = time.time() - t0
        log.info(f"assert index: {tt}")
        assert len(collection_w.indexes) == 1

        # search
        t0 = time.time()
        collection_w.load()
        tt = time.time() - t0
        log.info(f"assert load: {tt}")
        search_vectors = cf.gen_vectors(1, ct.default_dim)
        t0 = time.time()
        res_1, _ = collection_w.search(
            data=search_vectors,
            anns_field=ct.default_float_vec_field_name,
            param=search_params,
            limit=1,
            timeout=30)
        tt = time.time() - t0
        log.info(f"assert search: {tt}")

        # query
        term_expr = f'{ct.default_int64_field_name} in [1001,1201,4999,2999]'
        t0 = time.time()
        res, _ = collection_w.query(term_expr, timeout=30)
        tt = time.time() - t0
        log.info(f"assert query result {len(res)}: {tt}")
示例#29
0
文件: checker.py 项目: avmi/milvus
 def __init__(self, collection_name=None):
     if collection_name is None:
         collection_name = cf.gen_unique_str("CompactChecker_")
     super().__init__(collection_name=collection_name)
     self.ut = ApiUtilityWrapper()
     self.c_wrap.load()  # load before compact