示例#1
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        # only 5 documents to index for this client
        source = params.Slice(io.StringAsFileSource, 0, 5)
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [3, 2]
        # lines should include meta-data
        expected_line_sizes = [6, 4]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
示例#2
0
 def test_generate_action_meta_data_without_id_conflicts(self):
     self.assertEqual(
         '{"index": {"_index": "test_index", "_type": "test_type"}}',
         next(
             params.GenerateActionMetaData("test_index",
                                           "test_type",
                                           conflicting_ids=None)))
示例#3
0
    def test_read_bulk_with_offset(self):
        data = [
            '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}',
            '{"key": "value4"}', '{"key": "value5"}'
        ]
        bulk_size = 50

        source = params.Slice(io.StringAsFileSource, 3, len(data))
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [(len(data) - 3)]
        # lines should include meta-data
        expected_line_sizes = [(len(data) - 3) * 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
示例#4
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        # only 5 documents to index for this client
        source = params.Slice(StringAsFileSource, 0, 5)
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        # always double the amount as one line contains the data and one line contains the index command
        expected_bulk_sizes = [6, 4]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
示例#5
0
def create_reader(bulk_size):
    metadata = params.GenerateActionMetaData(index_name="test-idx",
                                             type_name=None)

    source = params.Slice(StaticSource, 0, sys.maxsize)
    reader = params.MetadataIndexDataReader(data_file="bogus",
                                            batch_size=bulk_size,
                                            bulk_size=bulk_size,
                                            file_source=source,
                                            action_metadata=metadata,
                                            index_name="test-idx",
                                            type_name=None)
    return reader
示例#6
0
    def test_read_bulk_larger_than_number_of_docs(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}'
        ]
        bulk_size = 50

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None)

        reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler,
                                        index_name="test_index", type_name="test_type")

        expected_bulk_sizes = [len(data) * 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
示例#7
0
    def test_generate_action_meta_data_with_id_conflicts(self):
        pseudo_random_sequence = iter([
            # first column == 3 -> we'll draw a "random" id, second column == "random" id
            3,
            1,
            3,
            3,
            3,
            2,
            0,
            3,
            0
        ])

        generator = params.GenerateActionMetaData(
            "test_index",
            "test_type",
            conflicting_ids=[100, 200, 300, 400],
            rand=lambda x, y: next(pseudo_random_sequence))

        # first one is always not drawn from a random index
        self.assertEqual(
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "100"}}',
            next(generator))
        # now we start using random ids, i.e. look in the first line of the pseudo-random sequence
        self.assertEqual(
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "200"}}',
            next(generator))
        self.assertEqual(
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "400"}}',
            next(generator))
        self.assertEqual(
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "300"}}',
            next(generator))
        # "random" returns 0 instead of 3 -> we draw the next sequential one, which is 200
        self.assertEqual(
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "200"}}',
            next(generator))
        # and we're back to random
        self.assertEqual(
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "100"}}',
            next(generator))