def get_bucket():
    if g.bucket == None:
        bucket = s3.Bucket('yelp-data-shared-labs18')
        g.bucket = bucket
        return g.bucket
    else:
        return g.bucket
示例#2
0
def download_model():
    s3 = boto3.resource('s3')
    if os.path.exists(os.cwd() + '/checkpoint'):
        # assume warm
        return
    os.mkdir('checkpoint')
    os.mkdir(f'checkpoint/{run_name}')
    bucket = s3.Bucket(BUCKET_NAME)
    for o in bucket.objects.filter(Prefix = 'checkpoint/{run_name}'):
        bucket.download_file(o.key, o.key)
    os.mkdir('models')
    os.mkdir('models/117M')
    for o in bucket.objects.filter(Prefix = 'models/117M'):
        bucket.download_file(o.key, o.key)
示例#3
0
    green = workbook.add_format()
    green.set_bg_color("#b7e1cd")
    worksheet.conditional_format(2, 2, i, 11, {
        "type": "text",
        "criteria": "containing",
        "value": "N",
        "format": green,
    })
    worksheet.conditional_format(2, 2, i, 11, {
        "type": "text",
        "criteria": "containing",
        "value": "Y",
        "format": red,
    })


def build(name, buckets):
    workbook = xlsxwriter.Workbook("{}.xlsx".format(name))
    worksheet = workbook.add_worksheet(name)
    worksheet.hide_gridlines(2)

    write_header(workbook, worksheet)
    write_data(workbook, worksheet, buckets)

    workbook.close()


if __name__ == "__main__":
    import s3
    build("test.xlsx", [s3.Bucket("hey") for _ in range(10)])
def get_bucket(bucket_name='yelp-data-shared-labs18'):
    if g.bucket == None:
        bucket = s3.Bucket(bucket_name)
        g.bucket = bucket
        return g.bucket
    return g.bucket
示例#5
0
            else:
                raise ValueError(
                    'must supply either -k or filesystem/level args')

    except (getopt.GetoptError, ValueError, IndexError) as e:
        usage(str(e))

    # load config
    try:
        config = s3.AWSConfig(config_file)
    except s3.AWSConfigError as e:
        sys.stderr.write('Error in config file %s: %s' % (config_file, e))
        sys.exit(1)

    global bucket
    bucket = s3.Bucket(config)
    bucket.ratelimit = ratelimit
    if '-i' in opts: bucket.set_storage_class(s3.STORAGE_IA)
    bucket_stdout = sys.stdout
    if '-q' in opts: bucket_stdout = None

    signal.signal(signal.SIGUSR1, ChangeRatelimit)
    signal.signal(signal.SIGUSR2, ChangeRatelimit)

    if cmd == 'init' or cmd == 'initialize':
        # initialize dumps bucket
        print('Creating bucket %s' % config.bucket_name)
        print(bucket.create_bucket().reason)
        print('Testing ability to write, read, and delete:')
        print(bucket.put('testkey', s3.S3Object('this is a test')).reason)
        print(bucket.get('testkey').reason)
示例#6
0
import os
import shutil
import tempfile
import subprocess
import time
import sys

import json

import config
import s3

# Create a representation of the S3 bucket.
bucket = s3.Bucket(config.BUCKET_NAME, config.AWS_ACCESS_KEY_ID,
                   config.AWS_SECRET_ACCESS_KEY)

# Retrieve 'notes.json' from the bucket, and write it if it doesn't exist.
print('Loading JSON data from file.')
note_data = json.load(open('notes.json'))
try:
    note_data = json.loads(bucket.getFile('notes.json'))
except:
    print('Failed to download notes.json from the S3 bucket.')
    bucket.uploadFile('notes.json')


def processPayload(payload):
    print('Processing the JSON payload.')

    print('Loading the added/modified/etc. tex files in the commit.')
    courses = {}
    if len(jobs) == 1:
        new_jobs.job_list = None
    else:
        new_jobs.job_list = jobs[1:]


def generate_job(savepath, job_type):
    job_data = {'Key': savepath}
    job_name = ''.join([job_type, '_', savepath.split('/')[-1], '_job.json'])
    temp_job_path = '/tmp/' + job_name
    with open(temp_job_path, 'w') as file:
        json.dump(job_data, file)
    bucket.save(temp_job_path, 'Jobs/{}'.format(job_name))
    os.remove(temp_job_path)


if __name__ == "__main__":

    new_jobs = job_list()
    bucket = s3.Bucket('yelp-data-shared-labs18')
    print('connected to bucket')

    # Main while loop
    while is_nlp_jobs_empty(bucket) == False:
        path = read_next_job(bucket)
        df = get_df(path)
        processed_df = process(df)
        put_in_processed(processed_df, path)
        delete_last_job(bucket)
        break  # Remove break to run all jobs.  For Testing/Timing Purposes only.
示例#8
0
 def setUp(self):
     # Initialize bucket
     warnings.filterwarnings("ignore", category=ResourceWarning, message="unclosed.*<ssl.SSLSocket.*>")
     self.bucket = s3.Bucket('yelp-data-shared-labs18')