def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}', ] bulk_size = 3 # only 5 documents to index for this client source = params.Slice(io.StringAsFileSource, 0, 5) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [3, 2] # lines should include meta-data expected_line_sizes = [6, 4] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_line_sizes)
def test_read_bulks_and_assume_metadata_line_in_source_file(self): data = [ '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value1"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value2"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value3"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value4"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value5"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value6"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value7"}' ] bulk_size = 3 source = params.Slice(io.StringAsFileSource, 0, len(data)) am_handler = params.SourceActionMetaData(source) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [3, 3, 1] # lines should include meta-data expected_line_sizes = [6, 6, 2] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_line_sizes)
def test_read_bulk_with_offset(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}' ] bulk_size = 50 source = params.Slice(io.StringAsFileSource, 3, len(data)) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [(len(data) - 3)] # lines should include meta-data expected_line_sizes = [(len(data) - 3) * 2] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_line_sizes)
def test_read_bulks_and_assume_metadata_line_in_source_file(self): data = [ '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value1"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value2"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value3"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value4"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value5"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value6"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value7"}' ] bulk_size = 3 source = params.Slice(StringAsFileSource, 0, len(data)) am_handler = params.SourceActionMetaData(source) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") # always double the amount as one line contains the data and one line contains the index command expected_bulk_sizes = [6, 6, 2] self.assert_bulks_sized(reader, expected_bulk_sizes)
def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}', ] bulk_size = 3 # only 5 documents to index for this client source = params.Slice(StringAsFileSource, 0, 5) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") # always double the amount as one line contains the data and one line contains the index command expected_bulk_sizes = [6, 4] self.assert_bulks_sized(reader, expected_bulk_sizes)
def test_slice_with_slice_larger_than_source(self): source = params.Slice(io.StringAsFileSource, 0, 5) data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', ] source.open(data, "r") self.assertEqual(data, list(source)) source.close()
def test_slice_with_source_larger_than_slice(self): source = params.Slice(StringAsFileSource, 2, 5) data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}', '{"key": "value8"}', '{"key": "value9"}', '{"key": "value10"}' ] source.open(data, "r") self.assertEqual(data[2:7], list(source)) source.close()
def create_reader(bulk_size): metadata = params.GenerateActionMetaData(index_name="test-idx", type_name=None) source = params.Slice(StaticSource, 0, sys.maxsize) reader = params.MetadataIndexDataReader(data_file="bogus", batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=metadata, index_name="test-idx", type_name=None) return reader
def test_source_file_action_meta_data(self): source = params.Slice(io.StringAsFileSource, 0, 5) generator = params.SourceActionMetaData(source) data = [ '{"index": {"_index": "test_index", "_type": "test_type", "_id": "1"}}', '{"index": {"_index": "test_index", "_type": "test_type", "_id": "2"}}', '{"index": {"_index": "test_index", "_type": "test_type", "_id": "3"}}', '{"index": {"_index": "test_index", "_type": "test_type", "_id": "4"}}', '{"index": {"_index": "test_index", "_type": "test_type", "_id": "5"}}', ] source.open(data, "r") self.assertEqual(data, list(generator)) source.close()
def test_read_bulk_larger_than_number_of_docs(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}' ] bulk_size = 50 source = params.Slice(io.StringAsFileSource, 0, len(data)) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [len(data) * 2] self.assert_bulks_sized(reader, expected_bulk_sizes)
def test_read_bulks_and_assume_no_metadata(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}' ] bulk_size = 3 source = params.Slice(io.StringAsFileSource, 0, len(data)) am_handler = params.NoneActionMetaData() reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") # no meta-data, hence line numbers and bulk sizes need to be identical expected_bulk_sizes = [3, 3, 1] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_bulk_sizes)