Пример #1
0
    def extract_cases(self, collection):
        collection_name = collection[
            "collection_name"] if "collection_name" in collection else None
        (data_type, dimension,
         metric_type) = parser.parse_ann_collection_name(collection_name)
        # hdf5_source_file: The path of the source data file saved on the NAS
        hdf5_source_file = collection["source_file"]
        index_types = collection["index_types"]
        index_params = collection["index_params"]
        top_ks = collection["top_ks"]
        nqs = collection["nqs"]
        guarantee_timestamp = collection[
            "guarantee_timestamp"] if "guarantee_timestamp" in collection else None
        search_params = collection["search_params"]
        vector_type = utils.get_vector_type(data_type)
        index_field_name = utils.get_default_field_name(vector_type)
        dataset = utils.get_dataset(hdf5_source_file)
        collection_info = {
            "dimension": dimension,
            "metric_type": metric_type,
            "dataset_name": collection_name
        }
        filters = collection["filters"] if "filters" in collection else []
        filter_query = []
        # Convert list data into a set of dictionary data
        search_params = utils.generate_combinations(search_params)
        index_params = utils.generate_combinations(index_params)
        cases = list()
        case_metrics = list()
        self.init_metric(self.name, collection_info, {}, search_info=None)

        # true_ids: The data set used to verify the results returned by query
        true_ids = np.array(dataset["neighbors"])
        for index_type in index_types:
            for index_param in index_params:
                index_info = {
                    "index_type": index_type,
                    "index_param": index_param
                }
                for search_param in search_params:
                    if not filters:
                        filters.append(None)
                    for filter in filters:
                        filter_param = []
                        if isinstance(filter, dict) and "range" in filter:
                            filter_query.append(eval(filter["range"]))
                            filter_param.append(filter["range"])
                        if isinstance(filter, dict) and "term" in filter:
                            filter_query.append(eval(filter["term"]))
                            filter_param.append(filter["term"])
                        for nq in nqs:
                            query_vectors = utils.normalize(
                                metric_type, np.array(dataset["test"][:nq]))
                            for top_k in top_ks:
                                search_info = {
                                    "topk":
                                    top_k,
                                    "query":
                                    query_vectors,
                                    "metric_type":
                                    utils.metric_type_trans(metric_type),
                                    "params":
                                    search_param
                                }
                                # TODO: only update search_info
                                case_metric = copy.deepcopy(self.metric)
                                # set metric type as case
                                case_metric.set_case_metric_type()
                                case_metric.index = index_info
                                case_metric.search = {
                                    "nq": nq,
                                    "topk": top_k,
                                    "search_param": search_param,
                                    "filter": filter_param,
                                    "guarantee_timestamp": guarantee_timestamp
                                }
                                vector_query = {
                                    "vector": {
                                        index_field_name: search_info
                                    }
                                }
                                case = {
                                    "collection_name": collection_name,
                                    "dataset": dataset,
                                    "index_field_name": index_field_name,
                                    "dimension": dimension,
                                    "data_type": data_type,
                                    "metric_type": metric_type,
                                    "vector_type": vector_type,
                                    "index_type": index_type,
                                    "index_param": index_param,
                                    "filter_query": filter_query,
                                    "vector_query": vector_query,
                                    "true_ids": true_ids,
                                    "guarantee_timestamp": guarantee_timestamp
                                }
                                # Obtain the parameters of the use case to be tested
                                cases.append(case)
                                case_metrics.append(case_metric)
        return cases, case_metrics
Пример #2
0
 def extract_cases(self, collection):
     collection_name = collection[
         "collection_name"] if "collection_name" in collection else None
     (data_type, dimension,
      metric_type) = parser.parse_ann_collection_name(collection_name)
     hdf5_source_file = collection["source_file"]
     index_types = collection["index_types"]
     index_params = collection["index_params"]
     top_ks = collection["top_ks"]
     nqs = collection["nqs"]
     search_params = collection["search_params"]
     vector_type = utils.get_vector_type(data_type)
     index_field_name = utils.get_default_field_name(vector_type)
     dataset = utils.get_dataset(hdf5_source_file)
     collection_info = {
         "dimension": dimension,
         "metric_type": metric_type,
         "dataset_name": collection_name
     }
     filters = collection["filters"] if "filters" in collection else []
     filter_query = []
     search_params = utils.generate_combinations(search_params)
     index_params = utils.generate_combinations(index_params)
     cases = list()
     case_metrics = list()
     self.init_metric(self.name, collection_info, {}, search_info=None)
     true_ids = np.array(dataset["neighbors"])
     for index_type in index_types:
         for index_param in index_params:
             index_info = {
                 "index_type": index_type,
                 "index_param": index_param
             }
             for search_param in search_params:
                 if not filters:
                     filters.append(None)
                 for filter in filters:
                     filter_param = []
                     if isinstance(filter, dict) and "range" in filter:
                         filter_query.append(eval(filter["range"]))
                         filter_param.append(filter["range"])
                     if isinstance(filter, dict) and "term" in filter:
                         filter_query.append(eval(filter["term"]))
                         filter_param.append(filter["term"])
                     for nq in nqs:
                         query_vectors = utils.normalize(
                             metric_type, np.array(dataset["test"][:nq]))
                         for top_k in top_ks:
                             search_info = {
                                 "topk":
                                 top_k,
                                 "query":
                                 query_vectors,
                                 "metric_type":
                                 utils.metric_type_trans(metric_type),
                                 "params":
                                 search_param
                             }
                             # TODO: only update search_info
                             case_metric = copy.deepcopy(self.metric)
                             case_metric.index = index_info
                             case_metric.search = {
                                 "nq": nq,
                                 "topk": top_k,
                                 "search_param": search_param,
                                 "filter": filter_param
                             }
                             vector_query = {
                                 "vector": {
                                     index_field_name: search_info
                                 }
                             }
                             case = {
                                 "collection_name": collection_name,
                                 "dataset": dataset,
                                 "index_field_name": index_field_name,
                                 "dimension": dimension,
                                 "data_type": data_type,
                                 "metric_type": metric_type,
                                 "vector_type": vector_type,
                                 "index_type": index_type,
                                 "index_param": index_param,
                                 "filter_query": filter_query,
                                 "vector_query": vector_query,
                                 "true_ids": true_ids
                             }
                             cases.append(case)
                             case_metrics.append(case_metric)
     return cases, case_metrics