Пример #1
0
    def test_get_file_number(self):
        writer_config = self.get_writer_config()
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        expected_file = '{}/exporter_test0000.jl.gz'.format(self.tmp_dir)
        self.assertTrue(expected_file in writer.written_files)
Пример #2
0
    def test_get_file_number(self):
        writer_config = self.get_writer_config()
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        expected_file = '{}/exporter_test0000.jl.gz'.format(self.tmp_dir)
        self.assertTrue(expected_file in writer.written_files)
Пример #3
0
    def test_writer_md5_generation(self):
        # given
        options = self.get_writer_config()
        options['options']['generate_md5'] = True

        # when:
        writer = FSWriter(options, meta())
        with closing(writer) as w:
            w.write_batch(self.get_batch())
            w.flush()
            w.finish_writing()

        self.assertTrue(
            os.path.isfile(os.path.join(self.tmp_dir, 'md5checksum.md5')),
            "Didn't found an expected md5checksum.md5 file")
Пример #4
0
    def test_no_compression(self):
        writer_config = self.get_writer_config()
        writer_config['options'].update({'compression': 'none'})
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        expected_file = '{}/exporter_test0000.jl'.format(self.tmp_dir)
        self.assertTrue(expected_file in writer.written_files)

        written = []
        with open(expected_file, 'r') as fin:
            for line in fin:
                written.append(json.loads(line))
        self.assertEqual(written, self.get_batch())
Пример #5
0
    def test_writer_with_grouped_data(self):
        # given:
        batch = [
            BaseRecord(city=u'Madrid', country=u'ES', monument='Royal Palace'),
            BaseRecord(city=u'Valencia',
                       country=u'ES',
                       monument='Torres de Serranos'),
            BaseRecord(city=u'Paris', country=u'FR', monument='Eiffel Tour'),
            BaseRecord(city=u'Paris', country=u'FR', monument='Champ de Mars'),
            BaseRecord(city=u'Paris',
                       country=u'FR',
                       monument='Arc de Triomphe'),
        ]
        grouped_batch = self._build_grouped_batch(
            batch, python_expressions=["item['country']", "item['city']"])

        options = self.get_writer_config()
        options['options']['filebase'] = os.path.join(
            self.tmp_dir, '{groups[0]}/{groups[1]}/file')
        options['options']['items_per_buffer_write'] = 2
        writer = FSWriter(options=options, metadata=meta())

        # when:
        with closing(writer) as w:
            w.write_batch(grouped_batch)
            w.flush()
            w.finish_writing()

        # then:
        expected_files = [
            'ES/Madrid/file0000.jl.gz',
            'ES/Valencia/file0000.jl.gz',
            'FR/Paris/file0000.jl.gz',
            'FR/Paris/file0001.jl.gz',
        ]
        expected = [os.path.join(self.tmp_dir, f) for f in expected_files]

        def listdir_recursive(path):
            return [
                os.path.join(d, f) for d, _, fnames in os.walk(path)
                for f in fnames
            ]

        self.assertEqual(sorted(expected),
                         sorted(listdir_recursive(self.tmp_dir)))
Пример #6
0
    def test_compression_zip_format(self):
        writer_config = self.get_writer_config()
        writer_config['options'].update({'compression': 'zip'})
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        expected_file = '{}/exporter_test0000.jl.zip'.format(self.tmp_dir)
        self.assertTrue(expected_file in writer.written_files)

        import zipfile
        written = []
        with zipfile.ZipFile(expected_file) as z:
            with z.open('exporter_test0000.jl') as f:
                for line in f:
                    written.append(json.loads(line))
        self.assertEqual(written, self.get_batch())
Пример #7
0
    def test_no_compression(self):
        writer_config = self.get_writer_config()
        writer_config['options'].update({'compression': 'none'})
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        expected_file = '{}/exporter_test0000.jl'.format(self.tmp_dir)
        self.assertTrue(expected_file in writer.written_files)

        written = []
        with open(expected_file, 'r') as fin:
            for line in fin:
                written.append(json.loads(line))
        self.assertEqual(written, self.get_batch())
Пример #8
0
    def test_get_file_number_with_date(self):
        file_path = '/tmp/%Y%m%d/'
        file_name = '{file_number}_exporter_test_%m%d%y'
        start_file_count = 1
        writer_config = self.get_writer_config()
        writer_config.update({'options': {
            'filebase': file_path + file_name,
            'start_file_count': start_file_count
        }})
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        file_path = datetime.datetime.now().strftime(file_path).format(file_number=start_file_count)
        file_name = datetime.datetime.now().strftime(file_name).format(file_number=start_file_count)
        self.assertIn(file_path + file_name + '.jl.gz', writer.written_files)
Пример #9
0
    def test_get_file_number_with_date(self):
        file_path = '/tmp/%Y%m%d/'
        file_name = '{file_number}_exporter_test_%m%d%y'
        start_file_count = 1
        writer_config = self.get_writer_config()
        writer_config.update({
            'options': {
                'filebase': file_path + file_name,
                'start_file_count': start_file_count
            }
        })
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        file_path = datetime.datetime.now().strftime(file_path).format(
            file_number=start_file_count)
        file_name = datetime.datetime.now().strftime(file_name).format(
            file_number=start_file_count)
        self.assertIn(file_path + file_name + '.jl.gz', writer.written_files)
Пример #10
0
    def test_compression_zip_format(self):
        writer_config = self.get_writer_config()
        writer_config['options'].update({'compression': 'zip'})
        writer = FSWriter(writer_config, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()
        expected_file = '{}/exporter_test0000.jl.zip'.format(self.tmp_dir)
        self.assertTrue(expected_file in writer.written_files)

        import zipfile
        written = []
        with zipfile.ZipFile(expected_file) as z:
            with z.open('exporter_test0000.jl') as f:
                for line in f:
                    written.append(json.loads(line))
        self.assertEqual(written, self.get_batch())
Пример #11
0
    def test_check_writer_consistency(self):
        # given
        options = self.get_writer_config()
        options['options']['check_consistency'] = True

        # when:
        writer = FSWriter(options, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()

        # Consistency check passes
        writer.finish_writing()

        with open(os.path.join(self.tmp_dir, 'exporter_test0000.jl.gz'), 'w'):
            with self.assertRaisesRegexp(InconsistentWriteState, 'Wrong size for file'):
                writer.finish_writing()

        os.remove(os.path.join(self.tmp_dir, 'exporter_test0000.jl.gz'))
        with self.assertRaisesRegexp(InconsistentWriteState, 'file is not present at destination'):
            writer.finish_writing()
Пример #12
0
    def test_check_writer_consistency(self):
        # given
        options = self.get_writer_config()
        options['options']['check_consistency'] = True

        # when:
        writer = FSWriter(options, meta())
        try:
            writer.write_batch(self.get_batch())
            writer.flush()

        finally:
            writer.close()

        # Consistency check passes
        writer.finish_writing()

        with open(os.path.join(self.tmp_dir, 'exporter_test0000.jl.gz'), 'w'):
            with self.assertRaisesRegexp(InconsistentWriteState,
                                         'Wrong size for file'):
                writer.finish_writing()

        os.remove(os.path.join(self.tmp_dir, 'exporter_test0000.jl.gz'))
        with self.assertRaisesRegexp(InconsistentWriteState,
                                     'file is not present at destination'):
            writer.finish_writing()