Пример #1
0
    def init_collection_general(self,
                                prefix="test",
                                insert_data=False,
                                nb=ct.default_nb,
                                partition_num=0,
                                is_binary=False,
                                is_all_data_type=False,
                                auto_id=False,
                                dim=ct.default_dim,
                                is_index=False,
                                primary_field=ct.default_int64_field_name,
                                is_flush=True,
                                name=None,
                                **kwargs):
        """
        target: create specified collections
        method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
                2. create partitions if specified
                3. insert specified (binary/non-binary, default/all data type) data
                   into each partition if any
                4. not load if specifying is_index as True
        expected: return collection and raw data, insert ids
        """
        log.info("Test case of search interface: initialize before test case")
        self._connect()
        collection_name = cf.gen_unique_str(prefix)
        if name is not None:
            collection_name = name
        vectors = []
        binary_raw_vectors = []
        insert_ids = []
        time_stamp = 0
        # 1 create collection
        default_schema = cf.gen_default_collection_schema(
            auto_id=auto_id, dim=dim, primary_field=primary_field)
        if is_binary:
            default_schema = cf.gen_default_binary_collection_schema(
                auto_id=auto_id, dim=dim, primary_field=primary_field)
        if is_all_data_type:
            default_schema = cf.gen_collection_schema_all_datatype(
                auto_id=auto_id, dim=dim, primary_field=primary_field)
        log.info("init_collection_general: collection creation")
        collection_w = self.init_collection_wrap(name=collection_name,
                                                 schema=default_schema,
                                                 **kwargs)
        # 2 add extra partitions if specified (default is 1 partition named "_default")
        if partition_num > 0:
            cf.gen_partitions(collection_w, partition_num)
        # 3 insert data if specified
        if insert_data:
            collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
                cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim)
            if is_flush:
                assert collection_w.is_empty is False
                assert collection_w.num_entities == nb
            # This condition will be removed after auto index feature
            if not is_index:
                collection_w.load()

        return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp
Пример #2
0
 def test_collection_binary_created_by_dataframe(self):
     """
     target: test collection with dataframe
     method: create collection with dataframe
     expected: create successfully
     """
     conn = self._connect()
     c_name = cf.gen_unique_str(prefix)
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     schema = cf.gen_default_binary_collection_schema()
     self.collection_wrap.init_collection(name=c_name, data=df, check_task=CheckTasks.check_collection_property,
                                          check_items={exp_name: c_name, exp_schema: schema})
     conn.flush([c_name])
Пример #3
0
    def init_collection_general(self,
                                prefix,
                                insert_data=False,
                                nb=ct.default_nb,
                                partition_num=0,
                                is_binary=False,
                                is_all_data_type=False,
                                auto_id=False,
                                dim=ct.default_dim,
                                is_index=False):
        """
        target: create specified collections
        method: 1. create collections (binary/non-binary)
                2. create partitions if specified
                3. insert specified binary/non-binary data
                   into each partition if any
        expected: return collection and raw data
        """
        log.info("Test case of search interface: initialize before test case")
        self._connect()
        collection_name = cf.gen_unique_str(prefix)
        vectors = []
        binary_raw_vectors = []
        insert_ids = []
        # 1 create collection
        default_schema = cf.gen_default_collection_schema(auto_id=auto_id,
                                                          dim=dim)
        if is_binary:
            default_schema = cf.gen_default_binary_collection_schema(
                auto_id=auto_id, dim=dim)
        if is_all_data_type:
            default_schema = cf.gen_collection_schema_all_datatype(
                auto_id=auto_id, dim=dim)
        log.info("init_collection_general: collection creation")
        collection_w = self.init_collection_wrap(name=collection_name,
                                                 schema=default_schema)
        # 2 add extra partitions if specified (default is 1 partition named "_default")
        if partition_num > 0:
            cf.gen_partitions(collection_w, partition_num)
        # 3 insert data if specified
        if insert_data:
            collection_w, vectors, binary_raw_vectors, insert_ids = \
                cf.insert_data(collection_w, nb, is_binary, is_all_data_type,
                               auto_id=auto_id, dim=dim)
            assert collection_w.is_empty is False
            assert collection_w.num_entities == nb
            # This condition will be removed after auto index feature
            if not is_index:
                collection_w.load()

        return collection_w, vectors, binary_raw_vectors, insert_ids
Пример #4
0
    def test_compact_after_binary_index(self):
        """
        target: test compact after create index
        method: 1.insert binary data into two segments
                2.create binary index
                3.compact
                4.search
        expected: Verify segment info and index info
        """
        # create collection with 1 shard and insert 2 segments
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1,
                                                 schema=cf.gen_default_binary_collection_schema())
        for i in range(2):
            df, _ = cf.gen_default_binary_dataframe_data()
            collection_w.insert(data=df)
            assert collection_w.num_entities == (i + 1) * ct.default_nb

        # create index
        collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index)
        log.debug(collection_w.index())

        # load and search
        collection_w.load()
        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}}
        search_res_one, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(),
                                                ct.default_binary_vec_field_name, search_params, ct.default_limit)

        # compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans = collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)[0]

        # waiting for handoff completed and search
        cost = 30
        start = time()
        while True:
            sleep(5)
            segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0]
            if len(segment_info) != 0 and segment_info[0].segmentID == c_plans.plans[0].target:
                log.debug(segment_info)
                break
            if time() - start > cost:
                raise MilvusException(1, f"Handoff after compact and index cost more than {cost}s")

        # verify search result
        search_res_two, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(),
                                                ct.default_binary_vec_field_name, search_params, ct.default_limit)
        assert len(search_res_one) == ct.default_nq
        for hits in search_res_one:
            assert len(hits) == ct.default_limit
Пример #5
0
 def test_insert_binary_after_index(self):
     """
     target: test insert binary after index
     method: 1.create index 2.insert binary data
     expected: 1.index ok 2.num entities correct
     """
     schema = cf.gen_default_binary_collection_schema()
     collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema)
     collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params)
     assert collection_w.has_index()[0]
     index, _ = collection_w.index()
     assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params)
     assert collection_w.indexes[0] == index
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     collection_w.insert(data=df)
     assert collection_w.num_entities == ct.default_nb
Пример #6
0
    def init_collection_general(self,
                                prefix,
                                insert_data=False,
                                nb=ct.default_nb,
                                partition_num=0,
                                is_binary=False):
        """
        target: create specified collections
        method: 1. create collections (binary/non-binary)
                2. create partitions if specified
                3. insert specified binary/non-binary data
                   into each partition if any
        expected: return collection and raw data
        """
        log.info("Test case of search interface: initialize before test case")
        conn = self._connect()
        collection_name = cf.gen_unique_str(prefix)
        vectors = []
        binary_raw_vectors = []
        # 1 create collection
        if is_binary:
            default_schema = cf.gen_default_binary_collection_schema()
        else:
            default_schema = cf.gen_default_collection_schema()
        log.info("init_collection_general: collection creation")
        collection_w = self.init_collection_wrap(name=collection_name,
                                                 schema=default_schema)
        # 2 add extra partitions if specified (default is 1 partition named "_default")
        if partition_num > 0:
            cf.gen_partitions(collection_w, partition_num)
        # 3 insert data if specified
        if insert_data:
            collection_w, vectors, binary_raw_vectors = cf.insert_data(
                collection_w, nb, is_binary)
            if nb <= 32000:
                conn.flush([collection_w.name])
                assert collection_w.is_empty == False
                assert collection_w.num_entities == nb
            collection_w.load()

        return collection_w, vectors, binary_raw_vectors
Пример #7
0
import pytest
from pymilvus import Index

from base.client_base import TestcaseBase
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks

prefix = "insert"
exp_name = "name"
exp_schema = "schema"
exp_num = "num_entities"
exp_primary = "primary"
default_schema = cf.gen_default_collection_schema()
default_binary_schema = cf.gen_default_binary_collection_schema()
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}}


class TestInsertParams(TestcaseBase):
    """ Test case of Insert interface """

    @pytest.fixture(scope="function", params=ct.get_invalid_strs)
    def get_non_data_type(self, request):
        if isinstance(request.param, list) or request.param is None:
            pytest.skip("list and None type is valid data type")
        yield request.param

    @pytest.fixture(scope="module", params=ct.get_invalid_strs)
    def get_invalid_field_name(self, request):
Пример #8
0
    def test_compact_after_binary_index(self):
        """
        target: test compact after create index
        method: 1.insert binary data into two segments
                2.create binary index
                3.compact
                4.search
        expected: Verify segment info and index info
        """
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix),
            shards_num=1,
            schema=cf.gen_default_binary_collection_schema())
        for i in range(2):
            df, _ = cf.gen_default_binary_dataframe_data()
            collection_w.insert(data=df)
            assert collection_w.num_entities == (i + 1) * ct.default_nb

        # create index
        collection_w.create_index(ct.default_binary_vec_field_name,
                                  ct.default_binary_index)
        log.debug(collection_w.index())

        collection_w.load()

        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
        vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1]
        search_res_one, _ = collection_w.search(
            vectors, ct.default_binary_vec_field_name, search_params,
            ct.default_limit)
        assert len(search_res_one) == ct.default_nq
        for hits in search_res_one:
            assert len(hits) == ct.default_limit

        # compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans(
            check_task=CheckTasks.check_merge_compact)

        # verify index re-build and re-load
        search_params = {"metric_type": "L1", "params": {"nprobe": 10}}
        search_res_two, _ = collection_w.search(
            vectors,
            ct.default_binary_vec_field_name,
            search_params,
            ct.default_limit,
            check_task=CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "metric type not found: (L1)"
            })

        # verify search result
        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
        search_res_two, _ = collection_w.search(
            vectors, ct.default_binary_vec_field_name, search_params,
            ct.default_limit)
        for i in range(ct.default_nq):
            for j in range(ct.default_limit):
                assert search_res_two[i][j].id == search_res_one[i][j].id