Exemplo n.º 1
0
 def reset_files_extension(self, storage_name, prefix):
     bucket = self.__s3.get_bucket(storage_name)
     for key in bucket.list(prefix=prefix):
         if key.name.endswith('-done'):
             new_key_name = key.name.replace('-done','')
             bucket.copy_key(new_key_name=new_key_name, src_bucket_name=storage_name, src_key_name=key.name)
             bucket.delete_key(key.name)
Exemplo n.º 2
0
    def test_write_s3_check_consistency_wrong_items_count(self):
        # given
        items_to_write = self.get_batch()
        options = self.get_writer_config()
        options['options']['check_consistency'] = True

        # when:
        try:
            writer = S3Writer(options, ExportMeta(options))
            writer.write_batch(items_to_write)
            writer.flush()
        finally:
            writer.close()
        bucket = self.s3_conn.get_bucket('fake_bucket')
        key = bucket.get_key('tests/0.jl.gz')
        content = key.get_contents_as_string()
        bucket.delete_key('tests/0.jl.gz')
        new_key = bucket.new_key('tests/0.jl.gz')
        new_key.update_metadata({'total': 999})
        new_key.set_contents_from_string(content)

        # then:
        with self.assertRaisesRegexp(InconsistentWriteState,
                                     'Unexpected number of records'):
            writer.finish_writing()
Exemplo n.º 3
0
def s3_delete(bucket_name, s3_filename):
    conn = boto.connect_s3()
    bucket = boto.s3.bucket.Bucket(conn, bucket_name)
    try:
        bucket.delete_key(s3_filename)
    except Exception:
        logging.warning('Failed to delete ' + s3_filename)
Exemplo n.º 4
0
 def __get_file_contents_list_from_bucket(bucket, prefix, bucket_name):
     json_files_list = []
     for key in bucket.list(prefix=prefix):
         if key.name.endswith('/') or key.name.endswith('-done'):
             continue
         try:
             new_key_name = "{}-done".format(key.name)
             bucket.copy_key(new_key_name=new_key_name, src_bucket_name=bucket_name, src_key_name=key.name)
             bucket.delete_key(key.name)
             new_key = bucket.get_key(new_key_name)
             new_key.get_contents_to_filename(filename="tmp.json.gz")
             f = gzip.open('tmp.json.gz', 'rb')
             json_files_list.append(f.read())
             f.close()
         except Exception as ex:
             Logger.log("warning", "{} FAILED: {}".format(key.name, ex.message))
     return json_files_list
Exemplo n.º 5
0
    def files_delete(self, files_saved, dry_run=False, ):
        """workhorse for deletion

            `files_saved`
                `dict`
                format =
                    files_saved[size] = (target_filename, bucket_name)

            `dry_run`
                default = `False`
                should we just pretend to save?
        """

        # setup the s3 connection
        s3_buckets = self.s3_buckets

        # convert to a list, because we delete the items from the dict
        for size in list(files_saved.keys()):

            # grab the stash
            (target_filename, bucket_name) = files_saved[size]

            # active bucket
            bucket = s3_buckets[bucket_name]

            # delete it
            log.debug("going to delete %s from %s" %
                      (target_filename, bucket_name))

            if not dry_run:
                bucket.delete_key(target_filename)

                # external logging
                if self._saverLogger:
                    self._saverLogger.log_delete(
                        bucket_name=bucket_name,
                        key=target_filename,
                    )

            # internal cleanup
            del files_saved[size]

        return files_saved
Exemplo n.º 6
0
    def test_write_s3_check_consistency_key_not_present(self):
        # given
        items_to_write = self.get_batch()
        options = self.get_writer_config()
        options['options']['check_consistency'] = True

        # when:
        try:
            writer = S3Writer(options, ExportMeta(options))
            writer.write_batch(items_to_write)
            writer.flush()
        finally:
            writer.close()
        bucket = self.s3_conn.get_bucket('fake_bucket')
        bucket.delete_key('tests/0.jl.gz')

        # then:
        with self.assertRaisesRegexp(InconsistentWriteState, 'not found in bucket'):
            writer.finish_writing()
Exemplo n.º 7
0
    def files_delete(self, files_saved, dry_run=False, ):
        """workhorse for deletion

            `files_saved`
                `dict`
                format =
                    files_saved[size] = (target_filename, bucket_name)

            `dry_run`
                default = `False`
                should we just pretend to save?
        """

        # setup the s3 connection
        s3_buckets = self.s3_buckets

        for size in files_saved.keys():

            # grab the stash
            (target_filename, bucket_name) = files_saved[size]

            # active bucket
            bucket = s3_buckets[bucket_name]

            # delete it
            log.debug("going to delete %s from %s" %
                      (target_filename, bucket_name))

            if not dry_run:
                bucket.delete_key(target_filename)

                # external logging
                if self._saverLogger:
                    self._saverLogger.log_delete(
                        bucket_name=bucket_name,
                        key=target_filename,
                    )

            # internal cleanup
            del files_saved[size]

        return files_saved
Exemplo n.º 8
0
    def test_write_s3_check_consistency_key_not_present(self):
        # given
        items_to_write = self.get_batch()
        options = self.get_writer_config()
        options['options']['check_consistency'] = True

        # when:
        try:
            writer = S3Writer(options, ExportMeta(options))
            writer.write_batch(items_to_write)
            writer.flush()
        finally:
            writer.close()
        bucket = self.s3_conn.get_bucket('fake_bucket')
        bucket.delete_key('tests/0.jl.gz')

        # then:
        with self.assertRaisesRegexp(InconsistentWriteState,
                                     'not found in bucket'):
            writer.finish_writing()
Exemplo n.º 9
0
def delete_from_s3(bucket_name, file_name, prefix=None, access_key=None, secret_key=None, dry_run=False):
    valid = True
    result_string = ""
    s3_conn = _get_s3_connection(access_key, secret_key)
    if s3_conn.lookup(bucket_name):
        bucket = s3_conn.get_bucket(bucket_name)
        k = Key(bucket)
        if prefix:
            k.key = os.path.join(prefix, file_name)
        else:
            k.key = file_name
        if dry_run:
            result_string += "Skipping actual delete from S3 due to dry run.\n"
        else:
            bucket.delete_key(k)
            result_string += "Deleted file " + file_name + " from S3 bucket " + bucket_name + "\n"
    else:
        result_string += "Cannot find S3 bucket with name " + bucket_name + "\n"
        valid = False
    return {"valid": valid, "result_string": result_string}
Exemplo n.º 10
0
def delete_from_s3(bucket_name, file_name, prefix=None, access_key=None, secret_key=None, dry_run=False):
    valid = True
    result_string = ""
    s3_conn = _get_s3_connection(access_key, secret_key)
    if s3_conn.lookup(bucket_name):
        bucket = s3_conn.get_bucket(bucket_name)
        k = Key(bucket)
        if prefix:
            k.key = os.path.join(prefix, file_name)
        else:
            k.key = file_name
        if dry_run:
            result_string += 'Skipping actual delete from S3 due to dry run.\n'
        else:
            bucket.delete_key(k)
            result_string += "Deleted file " + file_name + " from S3 bucket " + bucket_name + '\n'
    else:
        result_string += "Cannot find S3 bucket with name " + bucket_name + "\n"
        valid = False
    return {"valid": valid, "result_string": result_string}
Exemplo n.º 11
0
    def test_write_s3_check_consistency_wrong_items_count(self):
        # given
        items_to_write = self.get_batch()
        options = self.get_writer_config()
        options['options']['check_consistency'] = True

        # when:
        try:
            writer = S3Writer(options, ExportMeta(options))
            writer.write_batch(items_to_write)
            writer.flush()
        finally:
            writer.close()
        bucket = self.s3_conn.get_bucket('fake_bucket')
        key = bucket.get_key('tests/0.jl.gz')
        content = key.get_contents_as_string()
        bucket.delete_key('tests/0.jl.gz')
        new_key = bucket.new_key('tests/0.jl.gz')
        new_key.update_metadata({'total': 999})
        new_key.set_contents_from_string(content)

        # then:
        with self.assertRaisesRegexp(InconsistentWriteState, 'Unexpected number of records'):
            writer.finish_writing()