Exemplo n.º 1
0
    def test_s3_cp_download_corrupted_raise_transient_error_after_retried_once(self, ensure_integrity_mock):

        from baiji.exceptions import get_transient_error_class

        ensure_integrity_mock.side_effect = get_transient_error_class()('etag does not match')

        with self.assertRaises(get_transient_error_class()):
            s3.cp(self.existing_remote_file, self.tmp_dir, force=True)
Exemplo n.º 2
0
    def test_s3_cp_download_corrupted_raise_transient_error_after_retried_once(
            self, ensure_integrity_mock):

        from baiji.exceptions import get_transient_error_class

        ensure_integrity_mock.side_effect = get_transient_error_class()(
            'etag does not match')

        with self.assertRaises(get_transient_error_class()):
            s3.cp(self.existing_remote_file, self.tmp_dir, force=True)
Exemplo n.º 3
0
 def test_s3_cp_download_corrupted_recover_in_one_retry(
         self, ensure_integrity_mock):
     from baiji.exceptions import get_transient_error_class
     ensure_integrity_mock.side_effect = [
         get_transient_error_class()('etag does not match'), None
     ]
     s3.cp(self.existing_remote_file, self.tmp_dir, force=True)
Exemplo n.º 4
0
 def ensure_integrity(self, filename):
     '''
     Ensure integrity of downloaded file; raise TransientError if there's a mismatch
     '''
     if not self.connection.etag_matches(filename, self.src.etag()):
         raise get_transient_error_class()(
             'Destinaton file for ({}) is corrupted, retry count {}'.format(
                 self.src.uri, self.retries_made))
Exemplo n.º 5
0
 def test_downloads_from_s3_are_atomic_under_truncation(self, download_mock):
     from baiji.exceptions import get_transient_error_class
     def write_fake_truncated_file(fp, **kwargs): # just capturing whatever is thrown at us: pylint: disable=unused-argument
         fp.write("12345")
     download_mock.side_effect = write_fake_truncated_file
     # Now when the call to download the file is made, the etags won't match
     with self.assertRaises(get_transient_error_class()):
         s3.cp(self.existing_remote_file, os.path.join(self.tmp_dir, 'truncated.foo'), validate=True)
     self.assertFalse(os.path.exists(os.path.join(self.tmp_dir, 'truncated.foo')))
Exemplo n.º 6
0
    def test_downloads_from_s3_are_atomic_under_truncation(
            self, download_mock):
        from baiji.exceptions import get_transient_error_class

        def write_fake_truncated_file(fp, **kwargs):  # just capturing whatever is thrown at us: pylint: disable=unused-argument
            fp.write("12345".encode('utf-8'))

        download_mock.side_effect = write_fake_truncated_file
        # Now when the call to download the file is made, the etags won't match
        with self.assertRaises(get_transient_error_class()):
            s3.cp(self.existing_remote_file,
                  os.path.join(self.tmp_dir, 'truncated.foo'),
                  validate=True)
        self.assertFalse(
            os.path.exists(os.path.join(self.tmp_dir, 'truncated.foo')))
Exemplo n.º 7
0
    def download(self):
        '''
        Download to local file

        If `validate` is set, check etags and retry once if not match.
        Raise TransientError when download is corrupted again after retry

        '''
        import shutil
        from baiji.util import tempfile
        from baiji.util.with_progressbar import FileTransferProgressbar
        # We create, close, and delete explicitly rather than using
        # a `with` block, since on windows we can't have a file open
        # twice by the same process.
        tf = tempfile.NamedTemporaryFile(delete=False)
        try:
            key = self.src.lookup(version_id=self.version_id)

            with FileTransferProgressbar(supress=(not self.progress)) as cb:
                key.get_contents_to_file(tf, cb=cb)
            tf.close()

            if self.validate:
                self.ensure_integrity(tf.name)

            # We only actually write to dst.path if the download succeeds and
            # if necessary is validated. This avoids leaving partially
            # downloaded files if something goes wrong.
            shutil.copy(tf.name, self.dst.path)

        except (get_transient_error_class(), KeyNotFound) as retryable_error:
            # Printed here so that papertrail can alert us when this occurs
            print(retryable_error)

            # retry once or raise
            if self.retries_made < self.retries_allowed:
                self._retries += 1
                self.download()
            else:
                raise
        finally:
            self.connection.rm(tf.name)
Exemplo n.º 8
0
    def download(self):
        '''
        Download to local file

        If `validate` is set, check etags and retry once if not match.
        Raise TransientError when download is corrupted again after retry

        '''
        import shutil
        from baiji.util import tempfile
        from baiji.util.with_progressbar import FileTransferProgressbar
        # We create, close, and delete explicitly rather than using
        # a `with` block, since on windows we can't have a file open
        # twice by the same process.
        tf = tempfile.NamedTemporaryFile(delete=False)
        try:
            key = self.src.lookup(version_id=self.version_id)

            with FileTransferProgressbar(supress=(not self.progress)) as cb:
                key.get_contents_to_file(tf, cb=cb)
            tf.close()

            if self.validate:
                self.ensure_integrity(tf.name)

            # We only actually write to dst.path if the download succeeds and
            # if necessary is validated. This avoids leaving partially
            # downloaded files if something goes wrong.
            shutil.copy(tf.name, self.dst.path)

        except (get_transient_error_class(), KeyNotFound) as retryable_error:
            # Printed here so that papertrail can alert us when this occurs
            print retryable_error

            # retry once or raise
            if self.retries_made < self.retries_allowed:
                self._retries += 1
                self.download()
            else:
                raise
        finally:
            self.connection.rm(tf.name)
Exemplo n.º 9
0
 def ensure_integrity(self, filename):
     '''
     Ensure integrity of downloaded file; raise TransientError if there's a mismatch
     '''
     if not self.connection.etag_matches(filename, self.src.etag()):
         raise get_transient_error_class()('Destinaton file for ({}) is corrupted, retry count {}'.format(self.src.uri, self.retries_made))
Exemplo n.º 10
0
 def test_s3_cp_download_corrupted_recover_in_one_retry(self, ensure_integrity_mock):
     from baiji.exceptions import get_transient_error_class
     ensure_integrity_mock.side_effect = [get_transient_error_class()('etag does not match'), None]
     s3.cp(self.existing_remote_file, self.tmp_dir, force=True)