示例#1
0
def get_s3_string(bucketname, bucket_filename):
    conn = S3Connection()

    # `bucketname` may be just a bucket name, or it may be a
    # bucket_name+key_prefix combo. We'll split it up.
    bucketname, key_prefix = psi_ops_s3.split_bucket_id(bucketname)
    bucket_filename = '%s/%s' % (key_prefix, bucket_filename)

    bucket = conn.get_bucket(bucketname, validate=False)
    key = bucket.get_key(bucket_filename)
    return key.get_contents_as_string()
示例#2
0
def get_s3_cached_file(cache_dir, bucketname, bucket_filename):
    '''
    Returns a tuple of the file-type object for the data and a boolean indicating
    if this data is new (not from the cache).
    This function checks if the file has already been downloaded. If it has,
    it checks that the checksum still matches the file in S3. If the file doesn't
    exist, or if it the checksum doesn't match, the file is downloaded and
    cached to disk.
    '''

    cache_path = get_s3_cached_filepath(cache_dir, bucketname, bucket_filename)

    # Make the connection using the credentials in the boto config file.
    conn = S3Connection()

    # `bucketname` may be just a bucket name, or it may be a
    # bucket_name+key_prefix combo. We'll split it up.
    bucketname, key_prefix = psi_ops_s3.split_bucket_id(bucketname)
    bucket_filename = '%s/%s' % (key_prefix, bucket_filename)

    # If we don't specify `validate=False`, then this call will attempt to
    # list all keys, which might not be permitted by the bucket (and isn't).
    bucket = conn.get_bucket(bucketname, validate=False)
    key = bucket.get_key(bucket_filename)
    etag = key.etag.strip('"').lower()

    # Check if the file exists. If so, check if it's stale.
    if os.path.isfile(cache_path):
        cache_file = open(cache_path, 'rb')
        cache_hex = hashlib.md5(cache_file.read()).hexdigest().lower()

        # Do the hashes match?
        if etag == cache_hex:
            cache_file.seek(0)
            return (cache_file, False)

        cache_file.close()

    # The cached file either doesn't exist or is stale.
    cache_file = open(cache_path, 'w')
    key.get_file(cache_file)

    # Close the file and re-open for read-only
    cache_file.close()
    cache_file = open(cache_path, 'rb')

    return (cache_file, True)