示例#1
0
def upload():
    if request.method == "POST":
        f = request.files['file']
        f.save(f.filename)
        upload_file(f.filename, BUCKET)
        # TODO Change to url for storage on ec2/container
        return redirect(url_for('storage'))
def images():
    if request.method == "POST":
        if request.files['file']:
            f = request.files['file']
            f.save(f.filename)
            upload_file(f"{f.filename}", BUCKET)
            return redirect("/storage")
示例#3
0
def process_file():
    """Process the file. Download from S3. Parse and upload newly formatted
       file.
    """
    local_file = s3.download_file(BUCKET_NAME, S3_OBJECT)
    logging.info('File downloaded: ' + local_file)
    try:
        if local_file is not None:
            with open(local_file) as fp:
                with open(NEW_LOG_FILE, "w") as nf:
                    logging.info('Creating new file')
                    line = fp.readline()
                    while line:
                        line = fp.readline()
                        if line.startswith('['):
                            trim_line = line[29:]
                            t = re.findall(r"[\S]+\S+\S", trim_line)
                            res = lc.parse_line(t)
                            new_data = lc.clean_date(res)
                            nf.write(lc.format_new_line(line, new_data) + '\n')
                        else:
                            nf.write(line + '\n')

    except Exception as e:
        logging.error(e)

    finally:
        # Clean up. Close files, upload to S3 and delete temporary files
        nf.close()
        fp.close()
        logging.info('New log file completed')
        s3.upload_file(nf.name, BUCKET_NAME)
        os.remove(nf.name)
        os.remove(fp.name)
示例#4
0
def upload_file(file_path):
    LOGGER.info("Uploading {} to s3://{}/{}".format(file_path,
                                                    os.environ['S3_BUCKET'],
                                                    os.environ['S3_PREFIX']))

    s3.upload_file(file_path)

    basename = os.path.basename(file_path)
    return s3.http_url(basename)
def put_in_processed(df, path):
    # getting the original file name from the path
    filename = path.split('/')[-1]
    # creating temporary local parquet file
    df.to_parquet('temp_parquet_file.parquet')
    processed_file_path = 'Processed/' + filename
    s3.upload_file(file_path='temp_parquet_file.parquet',
                   bucket='yelp-data-shared-labs18',
                   object_name=processed_file_path)
    generate_job(savepath=processed_file_path, job_type="POST")
示例#6
0
def test_s3():
    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    app_args = "--readUrl {} --writeUrl {}".format(
        s3.s3n_url('linecount.txt'),
        s3.s3n_url("linecount-out"))

    args = ["--conf",
            "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
                os.environ["AWS_ACCESS_KEY_ID"]),
            "--conf",
            "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
                os.environ["AWS_SECRET_ACCESS_KEY"]),
            "--class", "S3Job"]
    utils.run_tests(app_url=_scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    app_name="/spark",
                    args=args)

    assert len(list(s3.list("linecount-out"))) > 0

    app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt'))

    args = ["--conf",
            "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
                os.environ["AWS_ACCESS_KEY_ID"]),
            "--conf",
            "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
                os.environ["AWS_SECRET_ACCESS_KEY"]),
            "--class", "S3Job"]
    utils.run_tests(app_url=_scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    app_name="/spark",
                    args=args)

    app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt'))

    args = ["--conf",
            "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
                os.environ["AWS_ACCESS_KEY_ID"]),
            "--conf",
            "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
                os.environ["AWS_SECRET_ACCESS_KEY"]),
            "--class", "S3Job"]
    utils.run_tests(app_url=_scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    app_name="/spark",
                    args=args)
示例#7
0
 def log_to_s3(self, file_name, time_label):
     """send a small timing log to s3 - used in timing stats"""
     lines = []
     line = '[time]'
     lines.append(line)
     line = time_label + '=' + str(datetime.datetime.now())
     lines.append(line)
     utility.list_to_file(file_name, lines)
     s3.upload_file(
         self.s3_bucket, self.aws_region, file_name,
         self.s3_results + '/' + self.rescue_id + '/' + file_name)
     # now delete the file because we don't need it
     os.remove(file_name)
示例#8
0
def upload():
    # validate file
    if "file" not in request.files:
        return "no file", 400
    f = request.files["file"]
    if f.filename == "":
        return "no file", 400
    if not filetype("png", f.filename):
        return "wrong file type", 400
    image = Image(remove_ext(f.filename), f, current_user)
    db.session.add(image)
    duplicate = Image.query.filter_by(digest=image.digest).first()
    if duplicate.id == image.id:
        upload_file(f, image.digest, config.S3_BUCKET)
    db.session.commit()
    return jsonify(image.dict)
示例#9
0
文件: render.py 项目: phi-line/goboto
    def save(self):
        with GameStateRenderer(state=self.state, *self.config) as frame:
            timestamp = datetime.datetime.now().isoformat()
            full_path = f'{self.state.assets_directory}/{timestamp}.jpg'
            frame.save(full_path, quality=80)

        s3.flush_directory(self.state.assets_directory)
        return s3.upload_file(full_path)
示例#10
0
def test_s3():
    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    app_args = "{} {}".format(s3.s3n_url('linecount.txt'),
                              s3.s3n_url("linecount-out"))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(_upload_file(os.environ["SCALA_TEST_JAR_PATH"]), app_args,
                    "", args)

    assert len(list(s3.list("linecount-out"))) > 0
示例#11
0
def upload_model_data(task, username):
    files = [
        f'checkpoints/{username}_model.pt',
        f'{username}_accuracy_change.jpg',
    ]
    if task == 'text':
        files += [f'{username}_tokenizer.pkl']
    else:
        files += [
            f'{username}_correct_predictions.jpg',
            f'{username}_incorrect_predictions.jpg'
        ]

    for f in files:
        source = f
        if source[:10] == 'checkpoint':
            target = source[12:]
        else:
            target = f
        upload_file(os.path.join(DATA_PATH, source), f'inference/{target}')
示例#12
0
def setup_inference(token, task_type, accuracy, model_path, acc_plot_path, metadata):
    inference_config = fetch_json(INFERENCE_CONFIG)

    # Upload model
    s3_model_path = os.path.join(task_type, os.path.basename(model_path))
    upload_file(model_path, s3_model_path)

    if task_type == 'sentimentanalysis':
        s3_meta_path = os.path.join(task_type, os.path.basename(metadata['metadata_filename']))
        upload_file(metadata['metadata_filename'], s3_meta_path)
        metadata['metadata_filename'] = s3_meta_path

    # Upload new inference config to S3
    inference_config[token] = {
        'task_type': task_type,
        'model_filename': s3_model_path,
        **metadata,
        'accuracy': accuracy,
        'accuracy_plot': image_to_base64(acc_plot_path),
        'created': datetime.now().strftime('%d-%m-%y %H:%M')
    }
    put_object(INFERENCE_CONFIG, inference_config)
示例#13
0
def success():
    if isLoggedIn():
        # netid = "jyxu"
        user_obj = add_get_user(session['username'])
        non_valid = []
        if request.method == 'POST':
            album = request.form['a_name']
            album_obj = add_get_album(album, user_obj)

            files = request.files.getlist("file")
            file_tag = {}

            for f in files:
                # upload file to aws
                f.save(f.filename)
                if check_file_bytes(f.filename) == 0:
                    non_valid.append(f.filename)
                    continue
                upload_file(f"{f.filename}", BUCKET)
                link = "https://iw-spring.s3.amazonaws.com/uploads/%s" % f.filename

                f.seek(0)
                content = f.read()
                tags, d_types = annotate_img_bytestream(content)

                # gtags, d_types = annotate_img_bytestream(content)
                # custom_tags = custom_tagger(content)
                # tags, d_types = combine_tags(gtags, d_types, custom_tags)
                img_obj = add_image(album_obj, link, tags, d_types)

                type_tags = img_tags_all_category(img_obj)
                file_tag[f] = type_tags

                os.remove(f.filename)
            return render_template("success.html",
                                   album=album,
                                   file_tag=file_tag,
                                   non_valid=non_valid)
    return render_template("signin.html")
示例#14
0
def send_squawk():
    sender = myphone()
    util.log("sent squawk from %s to %s" %
             (sender, ' '.join(args()['recipients'])))
    if args()['recipients'] == [robot.ROBOT_PHONE]:
        robot.send_robot_message(sender)
    if sender:
        duration = args().get('duration', -1)
        data = flask.request.data
        filename = s3.generate_unique_filename_with_ext('m4a')
        audio_url = s3.upload_file(filename, data)
        success = deliver_squawk(args()['recipients'], sender, audio_url,
                                 duration)
        return json.dumps({"success": success})
    else:
        return json.dumps({"success": False, "error": "bad_token"})
示例#15
0
文件: test_s3.py 项目: gabrielvv/boom
 def test_upload_file(self, client):
     res = upload_file('file_name', 'bucket')
     self.assertEqual(res, True)
     fake_client.upload_file.assert_called_once_with(
         'file_name', 'bucket', 'file_name')
示例#16
0
文件: insta.py 项目: korniichuk/insta
           "fWCm9MsvzJDeZcszmFhntItkoFb3Y3bldiqnQy"

# Get list all of the photos in Google Photos album
ptoto_ids = get_photos_by_album_id(album_id)

# Download photos from Google Photos by ids
filenames = download_photos_by_ids(ptoto_ids)

# Upload all photos to Amazon S3 bucket
bucket_name = 'photos.insta'
length = len(filenames)
for i, filename in enumerate(filenames):
    sys.stdout.write('\r')
    sys.stdout.write('uploading: %s/%s' % (i+1, length))
    sys.stdout.flush()
    s3.upload_file(filename, bucket_name)
sys.stdout.write('\n')
sys.stdout.flush()

# Make photo collage
make_collage(filenames, 'insta.png', 600, 300)

# Send photo collage to grandma
path = '~/.key/insta.enc'
token = getpassword(path)
data = {'file':('insta.png', open('insta.png', 'rb'), 'png')}
params = {'initial_comment':'Hello, World!', 'title':'insta.png',
          'filename':'insta.png', 'token':token, 'channels':['#family']}
r = requests.post("https://slack.com/api/files.upload", params=params,
                  files=data)
示例#17
0
    print('Cluster status: {}'.format(status))
    if status == 'available':
        break
    time.sleep(30)

REDSHIFT_ENDPOINT = redshift.describe_clusters(ClusterIdentifier=REDSHIFT_CLUSTER_IDENTIFIER).get('Clusters')[0]['Endpoint']['Address']
REDSHIFT_VPC_ID = redshift.describe_clusters(ClusterIdentifier=REDSHIFT_CLUSTER_IDENTIFIER).get('Clusters')[0]['VpcId']

set_external_access(ec2_conn=ec2, vpc_id=REDSHIFT_VPC_ID, redshift_port=REDSHIFT_PORT)

print('---- Creating a S3 Bucket ----')
S3_BUCKET_NAME = create_bucket(s3, 'mark')
time.sleep(5)

print('---- Uploading a file ----')
upload_file(s3, './data', S3_BUCKET_NAME)
time.sleep(5)

print('---- Creating table ----')
script = open("./sql/1.create_table.sql", "r").read()
execute_single_sql(dbname=REDSHIFT_DB,
                   host=REDSHIFT_ENDPOINT,
                   port=REDSHIFT_PORT,
                   user=REDSHIFT_USER,
                   password=REDSHIFT_PASSWORD,
                   sql=script)

print('---- Transferring data from S3 to Redshift ----')
script = open("./sql/2.fill_tables.sql", "r").read()
script = script\
    .replace('{%aws_ian%}', REDSHIFT_ROLE_ARN)\
示例#18
0
def backup():
    if os.getenv('Disabled') == 'yes':
        return

    logging.info('Contents of host volumes directory...')
    logging.info(os.listdir('/HostVolumeData'))

    session = boto3.session.Session()
    s3_client = session.client(
        service_name='s3',
        aws_access_key_id=config.get_s3_access_key(),
        aws_secret_access_key=config.get_s3_secret_key(),
        region_name=config.get_s3_region(),
        endpoint_url=config.get_s3_endpoint(),
    )

    if s3_client is not None:
        volumes_to_backup = config.get_volumes_to_backup()
        if not '' == volumes_to_backup:

            # Generating temporary directory
            if not os.path.exists('temp'):
                os.makedirs('temp')

            # Check if each volume listed in environment variables exists within host filesystem
            arr_volumes = [x.strip() for x in volumes_to_backup.split(',')]
            for vol in arr_volumes:
                if vol not in os.listdir('/HostVolumeData'):
                    arr_volumes.remove(vol)
                    logging.error(
                        'Volume \'%s\' is not in host\'s Docker filesystem.' %
                        vol)

            # # # S3 directory structure
            #
            # env_bucket_name/
            #     env_directory_name/
            #         volume_name/
            #             BACKUP_<date-time>.tar.gz
            #             BACKUP_<date-time>.tar.gz
            #             ...
            #             SNAPSHOT_<snapshot-number>.tar.gz
            for vol_name in arr_volumes:

                # Open archive file to pack backup data into
                tar = tarfile.open('./temp/archive_build.tar.gz', 'w:gz')
                for file_name in os.listdir('/HostVolumeData/%s/_data/' %
                                            vol_name):
                    # File name to archive
                    file_path = '/HostVolumeData/%s/_data/%s' % (vol_name,
                                                                 file_name)
                    tar.add(file_path, arcname=file_name)
                tar.close()

                latest_snapshot_path = s3.get_key_from_prefix(
                    '%s/%s/SNAPSHOT-' %
                    (config.get_directory_name(), vol_name), s3_client)
                if latest_snapshot_path is not False:
                    latest_snapshot_number = latest_snapshot_path[
                        latest_snapshot_path.index('SNAPSHOT-') +
                        len('SNAPSHOT-'):-7]
                else:
                    latest_snapshot_number = 0

                if int(latest_snapshot_number) > 0:
                    if not int(latest_snapshot_number
                               ) + 1 > config.get_backup_interval():
                        response = s3.delete_objects_by_prefix(
                            '%s/SNAPSHOT-' % vol_name, s3_client)
                    else:
                        # Copy file to new object name and then delete old version
                        s3_client.copy(  # From here
                                       {
                                           'Bucket': config.get_bucket_name(),
                                           'Key': s3.get_key_from_prefix('%s/%s/SNAPSHOT-' % (
                                                          config.get_directory_name(), vol_name
                                                      ), s3_client
                                                  )
                                       },
                                       # To here
                                       config.get_bucket_name(),
                                       '%s/%s/BACKUP-%s.tar.gz' % (
                                           config.get_directory_name(),
                                           vol_name,
                                           datetime.now().strftime('%Y%m%d-%H%M%S')
                                       )
                        )
                        s3.delete_objects_by_prefix('%s/SNAPSHOT-' % vol_name,
                                                    s3_client)
                        latest_snapshot_number = '0'

                response = s3.upload_file(
                    './temp/archive_build.tar.gz', s3_client,
                    '%s/SNAPSHOT-%s.tar.gz' %
                    (vol_name, str(int(latest_snapshot_number) + 1)))
        else:
            logging.critical('No volumes were specified.')
            sys.exit(1)
    else:
        logging.critical('Client failed to be instantiated.')
        sys.exit(1)