def test_read_all_many_file_patterns(self): pattern1, expected_data1 = write_pattern([5, 3, 12, 8, 8, 4]) assert len(expected_data1) == 40 pattern2, expected_data2 = write_pattern([3, 7, 9]) assert len(expected_data2) == 19 pattern3, expected_data3 = write_pattern([11, 20, 5, 5]) assert len(expected_data3) == 41 expected_data = [] expected_data.extend(expected_data1) expected_data.extend(expected_data2) expected_data.extend(expected_data3) pipeline = TestPipeline() pcoll = pipeline | 'Create' >> Create( [pattern1, pattern2, pattern3]) |'ReadAll' >> ReadAllFromText() assert_that(pcoll, equal_to(expected_data)) pipeline.run()
def test_read_all_many_file_patterns(self): pattern1, expected_data1 = write_pattern([5, 3, 12, 8, 8, 4]) assert len(expected_data1) == 40 pattern2, expected_data2 = write_pattern([3, 7, 9]) assert len(expected_data2) == 19 pattern3, expected_data3 = write_pattern([11, 20, 5, 5]) assert len(expected_data3) == 41 expected_data = [] expected_data.extend(expected_data1) expected_data.extend(expected_data2) expected_data.extend(expected_data3) pipeline = TestPipeline() pcoll = pipeline | 'Create' >> Create( [pattern1, pattern2, pattern3]) |'ReadAll' >> ReadAllFromText() assert_that(pcoll, equal_to(expected_data)) pipeline.run()
def test_read_from_text_file_pattern(self): pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4]) assert len(expected_data) == 40 pipeline = TestPipeline() pcoll = pipeline | 'Read' >> ReadFromText(pattern) assert_that(pcoll, equal_to(expected_data)) pipeline.run()
def test_read_from_text_file_pattern(self): pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4]) assert len(expected_data) == 40 pipeline = TestPipeline() pcoll = pipeline | 'Read' >> ReadFromText(pattern) assert_that(pcoll, equal_to(expected_data)) pipeline.run()
def test_read_all_file_pattern(self): pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4]) assert len(expected_data) == 40 pipeline = TestPipeline() pcoll = (pipeline | 'Create' >> Create([pattern]) |'ReadAll' >> ReadAllFromText()) assert_that(pcoll, equal_to(expected_data)) pipeline.run()
def test_read_all_file_pattern(self): pattern, expected_data = write_pattern([5, 3, 12, 8, 8, 4]) assert len(expected_data) == 40 pipeline = TestPipeline() pcoll = (pipeline | 'Create' >> Create([pattern]) |'ReadAll' >> ReadAllFromText()) assert_that(pcoll, equal_to(expected_data)) pipeline.run()
def test_read_file_pattern(self): pattern, expected_data = write_pattern( [TextSourceTest.DEFAULT_NUM_RECORDS * 5, TextSourceTest.DEFAULT_NUM_RECORDS * 3, TextSourceTest.DEFAULT_NUM_RECORDS * 12, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 4]) assert len(expected_data) == TextSourceTest.DEFAULT_NUM_RECORDS * 40 self._run_read_test(pattern, expected_data)
def test_read_file_pattern(self): pattern, expected_data = write_pattern( [TextSourceTest.DEFAULT_NUM_RECORDS * 5, TextSourceTest.DEFAULT_NUM_RECORDS * 3, TextSourceTest.DEFAULT_NUM_RECORDS * 12, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 4]) assert len(expected_data) == TextSourceTest.DEFAULT_NUM_RECORDS * 40 self._run_read_test(pattern, expected_data)
def test_read_skip_header_pattern_insufficient_lines(self): line_counts = [ 5, 3, # Fewer lines in file than we want to skip 12, 8, 8, 4 ] skip_header_lines = 4 pattern, data = write_pattern(line_counts) data = self._remove_lines(data, line_counts, skip_header_lines) read_data = self._read_skip_header_lines(pattern, skip_header_lines) self.assertEqual(len(data), len(read_data)) self.assertCountEqual(data, read_data)
def test_read_file_pattern_with_empty_files(self): pattern, expected_data = write_pattern( [5 * TextSourceTest.DEFAULT_NUM_RECORDS, 3 * TextSourceTest.DEFAULT_NUM_RECORDS, 12 * TextSourceTest.DEFAULT_NUM_RECORDS, 8 * TextSourceTest.DEFAULT_NUM_RECORDS, 8 * TextSourceTest.DEFAULT_NUM_RECORDS, 4 * TextSourceTest.DEFAULT_NUM_RECORDS], no_data=True) assert len(expected_data) == 40 * TextSourceTest.DEFAULT_NUM_RECORDS assert not expected_data[0] self._run_read_test(pattern, expected_data)
def test_read_skip_header_pattern_insufficient_lines(self): line_counts = [ 5, 3, # Fewer lines in file than we want to skip 12, 8, 8, 4 ] skip_header_lines = 4 pattern, data = write_pattern(line_counts) data = self._remove_lines(data, line_counts, skip_header_lines) read_data = self._read_skip_header_lines(pattern, skip_header_lines) self.assertEqual(len(data), len(read_data)) self.assertCountEqual(data, read_data)
def test_read_file_pattern_with_empty_files(self): pattern, expected_data = write_pattern( [5 * TextSourceTest.DEFAULT_NUM_RECORDS, 3 * TextSourceTest.DEFAULT_NUM_RECORDS, 12 * TextSourceTest.DEFAULT_NUM_RECORDS, 8 * TextSourceTest.DEFAULT_NUM_RECORDS, 8 * TextSourceTest.DEFAULT_NUM_RECORDS, 4 * TextSourceTest.DEFAULT_NUM_RECORDS], no_data=True) assert len(expected_data) == 40 * TextSourceTest.DEFAULT_NUM_RECORDS assert not expected_data[0] self._run_read_test(pattern, expected_data)
def test_read_skip_header_pattern(self): line_counts = [ TextSourceTest.DEFAULT_NUM_RECORDS * 5, TextSourceTest.DEFAULT_NUM_RECORDS * 3, TextSourceTest.DEFAULT_NUM_RECORDS * 12, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 4 ] skip_header_lines = 2 pattern, data = write_pattern(line_counts) expected_data = self._remove_lines(data, line_counts, skip_header_lines) read_data = self._read_skip_header_lines(pattern, skip_header_lines) self.assertEqual(len(expected_data), len(read_data)) self.assertCountEqual(expected_data, read_data)
def test_read_skip_header_pattern(self): line_counts = [ TextSourceTest.DEFAULT_NUM_RECORDS * 5, TextSourceTest.DEFAULT_NUM_RECORDS * 3, TextSourceTest.DEFAULT_NUM_RECORDS * 12, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 8, TextSourceTest.DEFAULT_NUM_RECORDS * 4 ] skip_header_lines = 2 pattern, data = write_pattern(line_counts) expected_data = self._remove_lines(data, line_counts, skip_header_lines) read_data = self._read_skip_header_lines(pattern, skip_header_lines) self.assertEqual(len(expected_data), len(read_data)) self.assertCountEqual(expected_data, read_data)