def upload(): if request.method == "POST": f = request.files['file'] f.save(f.filename) upload_file(f.filename, BUCKET) # TODO Change to url for storage on ec2/container return redirect(url_for('storage'))
def images(): if request.method == "POST": if request.files['file']: f = request.files['file'] f.save(f.filename) upload_file(f"{f.filename}", BUCKET) return redirect("/storage")
def process_file(): """Process the file. Download from S3. Parse and upload newly formatted file. """ local_file = s3.download_file(BUCKET_NAME, S3_OBJECT) logging.info('File downloaded: ' + local_file) try: if local_file is not None: with open(local_file) as fp: with open(NEW_LOG_FILE, "w") as nf: logging.info('Creating new file') line = fp.readline() while line: line = fp.readline() if line.startswith('['): trim_line = line[29:] t = re.findall(r"[\S]+\S+\S", trim_line) res = lc.parse_line(t) new_data = lc.clean_date(res) nf.write(lc.format_new_line(line, new_data) + '\n') else: nf.write(line + '\n') except Exception as e: logging.error(e) finally: # Clean up. Close files, upload to S3 and delete temporary files nf.close() fp.close() logging.info('New log file completed') s3.upload_file(nf.name, BUCKET_NAME) os.remove(nf.name) os.remove(fp.name)
def upload_file(file_path): LOGGER.info("Uploading {} to s3://{}/{}".format(file_path, os.environ['S3_BUCKET'], os.environ['S3_PREFIX'])) s3.upload_file(file_path) basename = os.path.basename(file_path) return s3.http_url(basename)
def put_in_processed(df, path): # getting the original file name from the path filename = path.split('/')[-1] # creating temporary local parquet file df.to_parquet('temp_parquet_file.parquet') processed_file_path = 'Processed/' + filename s3.upload_file(file_path='temp_parquet_file.parquet', bucket='yelp-data-shared-labs18', object_name=processed_file_path) generate_job(savepath=processed_file_path, job_type="POST")
def test_s3(): linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) app_args = "--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url("linecount-out")) args = ["--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"] utils.run_tests(app_url=_scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", app_name="/spark", args=args) assert len(list(s3.list("linecount-out"))) > 0 app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt')) args = ["--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"] utils.run_tests(app_url=_scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", app_name="/spark", args=args) app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt')) args = ["--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"] utils.run_tests(app_url=_scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", app_name="/spark", args=args)
def log_to_s3(self, file_name, time_label): """send a small timing log to s3 - used in timing stats""" lines = [] line = '[time]' lines.append(line) line = time_label + '=' + str(datetime.datetime.now()) lines.append(line) utility.list_to_file(file_name, lines) s3.upload_file( self.s3_bucket, self.aws_region, file_name, self.s3_results + '/' + self.rescue_id + '/' + file_name) # now delete the file because we don't need it os.remove(file_name)
def upload(): # validate file if "file" not in request.files: return "no file", 400 f = request.files["file"] if f.filename == "": return "no file", 400 if not filetype("png", f.filename): return "wrong file type", 400 image = Image(remove_ext(f.filename), f, current_user) db.session.add(image) duplicate = Image.query.filter_by(digest=image.digest).first() if duplicate.id == image.id: upload_file(f, image.digest, config.S3_BUCKET) db.session.commit() return jsonify(image.dict)
def save(self): with GameStateRenderer(state=self.state, *self.config) as frame: timestamp = datetime.datetime.now().isoformat() full_path = f'{self.state.assets_directory}/{timestamp}.jpg' frame.save(full_path, quality=80) s3.flush_directory(self.state.assets_directory) return s3.upload_file(full_path)
def test_s3(): linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) app_args = "{} {}".format(s3.s3n_url('linecount.txt'), s3.s3n_url("linecount-out")) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(_upload_file(os.environ["SCALA_TEST_JAR_PATH"]), app_args, "", args) assert len(list(s3.list("linecount-out"))) > 0
def upload_model_data(task, username): files = [ f'checkpoints/{username}_model.pt', f'{username}_accuracy_change.jpg', ] if task == 'text': files += [f'{username}_tokenizer.pkl'] else: files += [ f'{username}_correct_predictions.jpg', f'{username}_incorrect_predictions.jpg' ] for f in files: source = f if source[:10] == 'checkpoint': target = source[12:] else: target = f upload_file(os.path.join(DATA_PATH, source), f'inference/{target}')
def setup_inference(token, task_type, accuracy, model_path, acc_plot_path, metadata): inference_config = fetch_json(INFERENCE_CONFIG) # Upload model s3_model_path = os.path.join(task_type, os.path.basename(model_path)) upload_file(model_path, s3_model_path) if task_type == 'sentimentanalysis': s3_meta_path = os.path.join(task_type, os.path.basename(metadata['metadata_filename'])) upload_file(metadata['metadata_filename'], s3_meta_path) metadata['metadata_filename'] = s3_meta_path # Upload new inference config to S3 inference_config[token] = { 'task_type': task_type, 'model_filename': s3_model_path, **metadata, 'accuracy': accuracy, 'accuracy_plot': image_to_base64(acc_plot_path), 'created': datetime.now().strftime('%d-%m-%y %H:%M') } put_object(INFERENCE_CONFIG, inference_config)
def success(): if isLoggedIn(): # netid = "jyxu" user_obj = add_get_user(session['username']) non_valid = [] if request.method == 'POST': album = request.form['a_name'] album_obj = add_get_album(album, user_obj) files = request.files.getlist("file") file_tag = {} for f in files: # upload file to aws f.save(f.filename) if check_file_bytes(f.filename) == 0: non_valid.append(f.filename) continue upload_file(f"{f.filename}", BUCKET) link = "https://iw-spring.s3.amazonaws.com/uploads/%s" % f.filename f.seek(0) content = f.read() tags, d_types = annotate_img_bytestream(content) # gtags, d_types = annotate_img_bytestream(content) # custom_tags = custom_tagger(content) # tags, d_types = combine_tags(gtags, d_types, custom_tags) img_obj = add_image(album_obj, link, tags, d_types) type_tags = img_tags_all_category(img_obj) file_tag[f] = type_tags os.remove(f.filename) return render_template("success.html", album=album, file_tag=file_tag, non_valid=non_valid) return render_template("signin.html")
def send_squawk(): sender = myphone() util.log("sent squawk from %s to %s" % (sender, ' '.join(args()['recipients']))) if args()['recipients'] == [robot.ROBOT_PHONE]: robot.send_robot_message(sender) if sender: duration = args().get('duration', -1) data = flask.request.data filename = s3.generate_unique_filename_with_ext('m4a') audio_url = s3.upload_file(filename, data) success = deliver_squawk(args()['recipients'], sender, audio_url, duration) return json.dumps({"success": success}) else: return json.dumps({"success": False, "error": "bad_token"})
def test_upload_file(self, client): res = upload_file('file_name', 'bucket') self.assertEqual(res, True) fake_client.upload_file.assert_called_once_with( 'file_name', 'bucket', 'file_name')
"fWCm9MsvzJDeZcszmFhntItkoFb3Y3bldiqnQy" # Get list all of the photos in Google Photos album ptoto_ids = get_photos_by_album_id(album_id) # Download photos from Google Photos by ids filenames = download_photos_by_ids(ptoto_ids) # Upload all photos to Amazon S3 bucket bucket_name = 'photos.insta' length = len(filenames) for i, filename in enumerate(filenames): sys.stdout.write('\r') sys.stdout.write('uploading: %s/%s' % (i+1, length)) sys.stdout.flush() s3.upload_file(filename, bucket_name) sys.stdout.write('\n') sys.stdout.flush() # Make photo collage make_collage(filenames, 'insta.png', 600, 300) # Send photo collage to grandma path = '~/.key/insta.enc' token = getpassword(path) data = {'file':('insta.png', open('insta.png', 'rb'), 'png')} params = {'initial_comment':'Hello, World!', 'title':'insta.png', 'filename':'insta.png', 'token':token, 'channels':['#family']} r = requests.post("https://slack.com/api/files.upload", params=params, files=data)
print('Cluster status: {}'.format(status)) if status == 'available': break time.sleep(30) REDSHIFT_ENDPOINT = redshift.describe_clusters(ClusterIdentifier=REDSHIFT_CLUSTER_IDENTIFIER).get('Clusters')[0]['Endpoint']['Address'] REDSHIFT_VPC_ID = redshift.describe_clusters(ClusterIdentifier=REDSHIFT_CLUSTER_IDENTIFIER).get('Clusters')[0]['VpcId'] set_external_access(ec2_conn=ec2, vpc_id=REDSHIFT_VPC_ID, redshift_port=REDSHIFT_PORT) print('---- Creating a S3 Bucket ----') S3_BUCKET_NAME = create_bucket(s3, 'mark') time.sleep(5) print('---- Uploading a file ----') upload_file(s3, './data', S3_BUCKET_NAME) time.sleep(5) print('---- Creating table ----') script = open("./sql/1.create_table.sql", "r").read() execute_single_sql(dbname=REDSHIFT_DB, host=REDSHIFT_ENDPOINT, port=REDSHIFT_PORT, user=REDSHIFT_USER, password=REDSHIFT_PASSWORD, sql=script) print('---- Transferring data from S3 to Redshift ----') script = open("./sql/2.fill_tables.sql", "r").read() script = script\ .replace('{%aws_ian%}', REDSHIFT_ROLE_ARN)\
def backup(): if os.getenv('Disabled') == 'yes': return logging.info('Contents of host volumes directory...') logging.info(os.listdir('/HostVolumeData')) session = boto3.session.Session() s3_client = session.client( service_name='s3', aws_access_key_id=config.get_s3_access_key(), aws_secret_access_key=config.get_s3_secret_key(), region_name=config.get_s3_region(), endpoint_url=config.get_s3_endpoint(), ) if s3_client is not None: volumes_to_backup = config.get_volumes_to_backup() if not '' == volumes_to_backup: # Generating temporary directory if not os.path.exists('temp'): os.makedirs('temp') # Check if each volume listed in environment variables exists within host filesystem arr_volumes = [x.strip() for x in volumes_to_backup.split(',')] for vol in arr_volumes: if vol not in os.listdir('/HostVolumeData'): arr_volumes.remove(vol) logging.error( 'Volume \'%s\' is not in host\'s Docker filesystem.' % vol) # # # S3 directory structure # # env_bucket_name/ # env_directory_name/ # volume_name/ # BACKUP_<date-time>.tar.gz # BACKUP_<date-time>.tar.gz # ... # SNAPSHOT_<snapshot-number>.tar.gz for vol_name in arr_volumes: # Open archive file to pack backup data into tar = tarfile.open('./temp/archive_build.tar.gz', 'w:gz') for file_name in os.listdir('/HostVolumeData/%s/_data/' % vol_name): # File name to archive file_path = '/HostVolumeData/%s/_data/%s' % (vol_name, file_name) tar.add(file_path, arcname=file_name) tar.close() latest_snapshot_path = s3.get_key_from_prefix( '%s/%s/SNAPSHOT-' % (config.get_directory_name(), vol_name), s3_client) if latest_snapshot_path is not False: latest_snapshot_number = latest_snapshot_path[ latest_snapshot_path.index('SNAPSHOT-') + len('SNAPSHOT-'):-7] else: latest_snapshot_number = 0 if int(latest_snapshot_number) > 0: if not int(latest_snapshot_number ) + 1 > config.get_backup_interval(): response = s3.delete_objects_by_prefix( '%s/SNAPSHOT-' % vol_name, s3_client) else: # Copy file to new object name and then delete old version s3_client.copy( # From here { 'Bucket': config.get_bucket_name(), 'Key': s3.get_key_from_prefix('%s/%s/SNAPSHOT-' % ( config.get_directory_name(), vol_name ), s3_client ) }, # To here config.get_bucket_name(), '%s/%s/BACKUP-%s.tar.gz' % ( config.get_directory_name(), vol_name, datetime.now().strftime('%Y%m%d-%H%M%S') ) ) s3.delete_objects_by_prefix('%s/SNAPSHOT-' % vol_name, s3_client) latest_snapshot_number = '0' response = s3.upload_file( './temp/archive_build.tar.gz', s3_client, '%s/SNAPSHOT-%s.tar.gz' % (vol_name, str(int(latest_snapshot_number) + 1))) else: logging.critical('No volumes were specified.') sys.exit(1) else: logging.critical('Client failed to be instantiated.') sys.exit(1)