def backup(self): for d in DATABASES: d = d.strip() backup_file = '{0}/{1}-{2}.sql.gz'.format(BACKUP_DIR, d, today) print 'Creating archive for ' + backup_file os.popen(mysql_dump_path + " -u {0} -p{1} -h {2} -e --opt -c {3}|gzip -c > {4}".format(mysql_username, mysql_password, mysql_hostname, d, backup_file)) print 'Uploading database dump to S3' + backup_file + '...' k = Key(self.b) k.Key = backup_file k.set_contents_from_filename(backup_file) k.set_acl("public-read")
def getandprocesstask(task_queue, process_queue, table, s3_bucket, no_of_images): is_dup=0 if(no_of_images < 60): try: msg=task_queue.get_messages() task_msg=msg[0].get_body() json_msg=json.loads(task_msg) task_queue.delete_message(msg[0]) key = str(json_msg["task_id"]) try: element = table.get_item(hash_key = key) is_dup=1; except Exception: is_dup=0 if is_dup==0: element_data = {'Body': 'True'} element = table.new_item(hash_key = key, attrs = element_data) element.put() exe = 'wget ' + str(json_msg["task"]) os.system(exe) else: print 'Duplicate task: ' + key except Exception as e: print "Listening"; exec 'time.sleep(1)' getandprocesstask(task_queue, process_queue, table, s3_bucket, no_of_images) if(no_of_images==60): os.system("./setup.sh") tempcommand = 'mv output.mpg 0.mpg' os.system(tempcommand) k = Key(s3_bucket) k.Key = "0.mpg" k.set_contents_from_filename("0.mpg") k.set_canned_acl('public-read') video_url = k.generate_url(0, query_auth=False, force_http=True) processtask(0, process_queue, video_url) tempcommand = 'rm -rf 0.mpg' os.system(tempcommand)
def imageProcess(cID, jID): try: os.system("./image.sh") file_name = str(cID) + str(jID) +".mpg" tempcommand = 'mv output.mpg ' + file_name os.system(tempcommand) print 'Uploading File' k = Key(bucket) k.Key = file_name k.set_contents_from_filename(file_name) k.set_canned_acl('public-read') url = k.generate_url(0, query_auth=False, force_http=True) print url m1 = Message() j = responseMessage(cID, jID, url) m1.set_body(j) processed_queue.write(m1) tempcommand = 'rm -rf ' + file_name os.system(tempcommand) global imageCount imageCount = 0 except Exception as e: print 'Interrupted' + str(e) imageCount = 0
def copy_to_other_s3(self, filename, other_s3, to_filename): key = Key(self.bucket) key.key = self.prefix + filename key.copy(other_s3.bucket, to_filename)
def enviar(self, caminho_arquivo_local, caminho_s3): chave_s3 = Key(self.balde) chave_s3.key = caminho_s3 chave_s3.set_contents_from_filename(caminho_arquivo_local) chave_s3.set_acl('private')
def put_object(self, name, size): buf = 'A' * size k = Key(self.bucket, name) start_time = time.time() k.set_contents_from_string(buf)
def delete_video(self, filename): bucket = self.connection.get_bucket(self.app.config['AWS_BUCKET']) key = Key(bucket) key.key = filename bucket.delete_key(key)
def update_product(product_id): products = db.products if session["user"] != "manager": return render_template("home.html", error="You are not manager") if request.method == "POST": product_info = products.find_one({"code": product_id}) cur_images_count = len(product_info["product_images"]) + 1 title = request.form["title"] description = request.form["description"] price = request.form["price"] quantity = request.form["quantity"] product_image = request.files.getlist("product_image[]") product_images = [] bucket = get_s3_bucket() for each_image in product_image: if each_image.filename == "": break file_contents = each_image.read() file_name = secure_filename(each_image.filename) filetype = file_name.split(".")[1] if filetype == "jpg" or filetype == "png" or filetype == "jpeg": actual_filename = product_id + "_" + str( cur_images_count) + "." + filetype k = Key(bucket) k.key = actual_filename k.set_contents_from_string(file_contents) cur_images_count += 1 product_images.append(actual_filename) if product_info["product_images"] != []: all_images = product_info["product_images"] + product_images else: all_images = product_info["product_images"] products.update_one({"code": product_id}, { "$set": { "code": product_id, "title": title, "description": description, "price": price, "quantity": quantity, "product_images": all_images, } }) return redirect( url_for("manager.update_product", product_id=product_id)) else: product_info = products.find_one({"code": product_id}) if product_info == None: return render_template("update_product.html", error="Product Not Found") product_images = [] count_images = 0 bucket = get_s3_bucket() for each in product_info["product_images"]: count_images += 1 bucket_key = bucket.get_key(each) bucket_url = bucket_key.generate_url(3600, query_auth=True, force_http=True) product_images.append(bucket_url) title = product_info["title"] description = product_info["description"] price = product_info["price"] quantity = product_info["quantity"] return render_template("update_product.html", product_images=product_images, count_images=count_images, price=price, description=description, title=title, quantity=quantity, product_id=product_id)
from boto.s3.key import Key tournament = 'RBC Heritage' tournament_string = 'rbc-heritage' year = 2015 hours_offset = 4 # create connection to bucket c = S3Connection('AKIAIQQ36BOSTXH3YEBA', 'cXNBbLttQnB9NB3wiEzOWLF13Xw8jKujvoFxmv3L') # create connection to bucket b = c.get_bucket('public.tenthtee') # get tee times k1 = Key(b) k1.key = 'sportsData/' + str( year) + '/' + tournament + '/rounds/1/teetimes.json' rd1_tee_times = k1.get_contents_as_string() rd1_tee_times = json.loads(rd1_tee_times) k2 = Key(b) k2.key = 'sportsData/' + str( year) + '/' + tournament + '/rounds/2/teetimes.json' rd2_tee_times = k2.get_contents_as_string() rd2_tee_times = json.loads(rd2_tee_times) def find_windspeed_average(day, rd, time, forecast): return 1
conn_u2 = boto.connect_s3( aws_access_key_id = u2_access_key, aws_secret_access_key = u2_secret_key, host = RADOSHOST, port = RADOSPORT, is_secure = False, calling_format = boto.s3.connection.OrdinaryCallingFormat()) #################################################### ################## TEST CASE ####################### print "\nCreating and populating the bucket for user1..." b1 = conn_u1.create_bucket(BUCKETNAME) k = Key(b1) for i in range(1, 11): print "\tCreating obj %d" % (i) keyv = 'keynum' + str(i) valv = 'Contents of object' + str(i) k.key = keyv k.set_contents_from_string(valv) print "\nSetting ACL..." b1.set_acl('authenticated-read') ##b1.set_acl('private') urlname = b1.generate_url(100) print "The URL is: " + str(urlname) ####################################################
(type, path, branch, revision, build_url) = sys.argv[1:] cfg = utils.get_build_config() if not cfg.verify_aws(): print "Error: Need both AWS_KEY and AWS_SECRET in the environment or config.json" sys.exit(1) bucket = cfg.open_bucket() sha1 = utils.shasum(path) filename = os.path.basename(path) filesize = os.path.getsize(path) print 'uploading %s (branch %s / revision %s)...' % (filename, branch, revision) key = Key(bucket) key.key = '%s/%s/%s' % (type, branch, filename) key.set_metadata('git_revision', revision) key.set_metadata('git_branch', branch) key.set_metadata('build_url', build_url) key.set_metadata('build_type', type) key.set_metadata('sha1', sha1) max_retries = 5 uploaded = False for i in range(1, max_retries + 1): try: key.set_contents_from_filename(path) print "-> succesfully uploaded on attempt #%d" % i uploaded = True break
def deploy_file(src, dst, headers={}): """ Deploy a single file to S3, if the local version is different. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) k = bucket.get_key(dst) s3_md5 = None if k: s3_md5 = k.etag.strip('"') else: k = Key(bucket) k.key = dst file_headers = copy.copy(headers) if app_config.S3_BUCKET == app_config.STAGING_S3_BUCKET: policy = 'private' else: policy = 'public-read' if 'Content-Type' not in headers: file_headers['Content-Type'] = mimetypes.guess_type(src)[0] if file_headers['Content-Type'] == 'text/html': # Force character encoding header file_headers['Content-Type'] = '; '.join( [file_headers['Content-Type'], 'charset=utf-8']) # Gzip file if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES: file_headers['Content-Encoding'] = 'gzip' with open(src, 'rb') as f_in: contents = f_in.read() output = StringIO() f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output, mtime=0) f_out.write(contents) f_out.close() local_md5 = hashlib.md5() local_md5.update(output.getvalue()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s (gzipped)' % (src, dst) k.set_contents_from_string(output.getvalue(), file_headers, policy=policy) # Non-gzip file else: with open(src, 'rb') as f: local_md5 = hashlib.md5() local_md5.update(f.read()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s' % (src, dst) k.set_contents_from_filename(src, file_headers, policy=policy)
if __name__ == '__main__': # Uses environment variables: # AWS_ACCESS_KEY_ID -- AWS Access Key ID # AWS_SECRET_ACCESS_KEY -- AWS Secret Access Key argv.pop(0) if len(argv) != 4: raise SystemExit( "USAGE: node_modules_cache.py <download | upload> <friendly name> <dependencies file> <directory>" ) mode, friendly_name, dependencies_file, directory = argv conn = S3Connection() bucket = conn.lookup(BUCKET_NAME) if bucket is None: raise SystemExit("Could not access bucket!") dependencies_file_hash = _sha256_of_file(dependencies_file) key = Key(bucket, dependencies_file_hash) key.storage_class = 'REDUCED_REDUNDANCY' if mode == 'download': download(directory) elif mode == 'upload': if isfile(NEED_TO_UPLOAD_MARKER): # FIXME upload(directory) else: print("No need to upload anything.") else: raise SystemExit("Unrecognized mode {!r}".format(mode))
def delete(self, docKey): k = Key(self.__getBucket()) k.key = docKey k.delete()
def uploadResultToS3(bucket, game_folder_name, srcDir): """ GETOPT """ try: opts, args = getopt.getopt(sys.argv[1:], "hanl:v", ["help", "all", "new", "language"]) except getopt.GetoptError as err: # print help information and exit: print str(err) # will print something like "option -a not recognized" usage() sys.exit(2) """ PARAMS """ output = None verbose = False upload_all = False """ PARSE OPTS """ for o, a in opts: if o == "-v": verbose = True elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-n", "--new"): upload_all = False print "upload all set True" elif o in ("-a", "--all"): upload_all = True print "upload all set True" elif o in ("-l", "--language"): print "language chosen:" + a LANGUAGE_CODE = a else: assert False, "unhandled option" """ BOTO """ b = conn.get_bucket(bucket) k = Key(b) """ PATTERN MATCHING """ file_pattern = re.compile( r'.*\.(md$|aif$|tiff$|au$|psd$|xcf$|sh$|py$|pyc$|php$|bat$|git$|gitignore$|gitkeep$|tm_properties$|txt$|jar$|DS_Store$)' ) folder_pattern = re.compile( r'.*(/node_modules/|/node_modules|/glue/|/glue|/doc/|/doc|/config/|/config|/lib/|/lib|/tools/|/tools|/git/|/git|/.git/|/.git)' ) folder_pattern_windows = re.compile( r'.*(\\node_modules\\|\\node_modules|\\glue\\|\\glue|\\doc\\|\\doc|\\config\\|\\config|\\lib\\|\\lib|\\tools\\|\\tools|\\git\\|\\git|\\.git\\|\\.git)' ) """ UPLOAD SETTINGS """ day_freshness = 1 seconds_freshness = 86400 / 2 if upload_all: print 'uploading ALL files in folders ...' else: print 'uploading files < ' + str( day_freshness) + ' days' + ' and < ' + str( seconds_freshness / 3600) + ' hours old ...' """ WALKING THE BUCKET """ print 'preparing to walk the bucket named ' + b.name + '...' for path, dir, files in os.walk(srcDir): for file in files: """ filter out unwanted file extensions (eg: xcf,sh,py)""" if not re.match(file_pattern, file) and not re.match( folder_pattern, path) and not re.match( folder_pattern_windows, path): """ get freshness """ last_modified_time_epoch_seconds = os.path.getmtime( os.path.join(path, file)) last_modified_time = datetime.fromtimestamp( last_modified_time_epoch_seconds) delta = datetime.now() - last_modified_time if upload_all: upload(k, b, game_folder_name, path, file.decode('utf8'), srcDir, LANGUAGE_CODE) else: if delta.days < day_freshness and delta.seconds < seconds_freshness: upload(k, b, game_folder_name, path, file.decode('utf8'), srcDir, LANGUAGE_CODE)
# create connection to bucket b = c.get_bucket('public.tenthtee') link = 'http://www.pgatour.com/tournaments/' + tournament_link + '/field.html' field = [] r = requests.get(link) soup = BeautifulSoup(r.text) player_table = soup.find(class_='field-table-content') players = player_table.find_all("p") for player in players: raw_name = player.text clean_name = raw_name.split(',') clean_name = clean_name[1][1:] + ' ' + clean_name[0] field.append(clean_name) print field print len(field) field = json.dumps(field) k = Key(b) k.key = 'field' k.set_contents_from_string(field) k.make_public() k1 = Key(b) k1.key = 'field/' + str(year) + '/' + tournament + '/field' k1.set_contents_from_string(field)
def perform(args): performStart = datetime.now() md5 = None replayDB = None try: sc2reader_to_esdb = SC2ReaderToEsdb() # # at this point the 'hash' may actually be an S3 key like '/uploads/1234-5667-1234234/filename.sc2replay' # or simply '{md5}' # # not to worry, in a few lines, we'll rename the S3 key to be md5.sc2replay # filename = args['hash'] if re.search('.sc2replay', filename, re.IGNORECASE) is None: filename = filename + ".SC2Replay" bucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.REPLAY_BUCKET_NAME) # logging.getLogger("jobs").info("trying to get key {}".format(filename)); k = bucket.get_key(filename) replaystring = k.get_contents_as_string() md5 = hashlib.md5(replaystring).hexdigest() # # rename the S3 key to simply be md5.SC2Replay, so it's easier for us to find it # when we need it. # # http://stackoverflow.com/questions/2481685/amazon-s3-boto-how-to-rename-a-file-in-a-bucket k.copy(settings.REPLAY_BUCKET_NAME, md5 + ".SC2Replay", metadata=None, preserve_acl=False) replayDB, blob = sc2reader_to_esdb.processReplay( StringIO(replaystring), args['channel']) if len(blob) > 0: blobbucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.BLOB_BUCKET_NAME) k = Key(blobbucket) k.key = "%i" % (replayDB.match.id) blobdump = json.dumps(blob) k.set_contents_from_string(blobdump) except Exception as e: tb = traceback.format_exc() exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logging.getLogger("jobs").info( "parsing failed for replay {}. oh well. exception={}. {} {} {} {}" .format(args['hash'].encode('ascii'), e, exc_type, fname, exc_tb.tb_lineno, tb)) pass finally: alldone = datetime.now() # Enqueue ruby PostParse job, always. ResQ(server=settings.REDIS_SERVER).enqueue_from_string( 'ESDB::Jobs::Sc2::Replay::PostParse', 'replays-high', { 'uuid': args['uuid'], 'hash': md5, 'provider_id': str(args['provider_id']), 'ggtracker_received_at': args['ggtracker_received_at'], 'esdb_received_at': args['esdb_received_at'], 'preparse_received_at': args['preparse_received_at'], 'jobspy_received_at': performStart.strftime('%s.%f'), 'jobspy_done_at': alldone.strftime('%s.%f'), }) # regarding converting times to floating point seconds since the # epoch, using %s above is dangerous because its not python, it # calls the underlying OS. i tried using the solution here: # http://stackoverflow.com/questions/6999726/python-getting-millis-since-epoch-from-datetime/11111177#11111177 # but i ran into timezone issues and did the lazy thing instead. matchId = 0 if replayDB and hasattr(replayDB, "match") and replayDB.match.id: matchId = replayDB.match.id logging.getLogger("jobs").info( "all done with match {}. total time in ParseReplay.perform() = {}" .format(matchId, alldone - performStart))
if len(all_header) == 0: filename = folder_name + "-" + str(count) else: filename = all_header.pop() csv_name = filename + ".csv" csv_path = path + "/" + csv_name final_path = path with open(csv_path, 'w', encoding='utf-8-sig', newline='') as f: logging.debug("Writing tables to CSV") writer = csv.writer(f) writer.writerows(newTable) logging.debug("Creating Zip file") shutil.make_archive("out", 'zip', final_path) bucketname = accesskey.lower() + "nuadsgroup3" + accNumber bucket = s3_connection.create_bucket(bucketname) logging.debug("Creating AWS S3 bucket " + bucketname) upload_to = Key(bucket) upload_to.key = 'problem1' logging.debug("Zip File & Log File Uploaded to AWS S3 bucket " + bucketname) upload_to.set_contents_from_filename(str("out" + ".zip")) upload_to.set_contents_from_filename(str(logName)) else: logging.debug("Error 404: CIK or Accession Number Does not exist!") print('Web site does not exist')
command = ["python", "/home/fortytwo/eddie/eddie-self-deploy.py"] assets = S3Assets(args.serviceType, "fortytwo-builds") if args.version == 'latest': last_build = requests.get( 'http://localhost:8080/job/all-quick-s3/lastStableBuild/api/json' ).json() log("Uploading assets for build %s/%s" % (last_build['fullDisplayName'], args.serviceType)) latest_asset = None for artifact in last_build['artifacts']: relative_path = artifact['relativePath'] potential_key = Key(assets.bucket, os.path.basename(relative_path)) potential_asset = S3Asset(potential_key) if potential_asset.serviceType == args.serviceType: if potential_key.exists(): log('Build asset %s already exists, continuing with deploy' % relative_path) else: tmp_uuid = str(uuid.uuid4()) source_dir = 'deploy-tmp/%s' % (tmp_uuid) source_path = '%s/%s' % (source_dir, relative_path) os.makedirs(os.path.dirname(source_path)) jenkins_file = requests.get( 'http://localhost:8080/job/all-quick-s3/lastStableBuild/artifact/%s' % relative_path) with open(source_path, 'wb') as handle: for chunk in jenkins_file.iter_content(1024):
def s3_save(files_to_write, the_record): """ Write a sketch, scrape, and html file to S3 """ db.session.add(the_record) # These are the content-types for the files S3 will be serving up reponse_types = { 'sketch': 'image/png', 'scrape': 'text/plain', 'html': 'text/html' } # Iterate through each file we need to write to s3 for capture_type, file_name in files_to_write.items(): # Connect to S3, generate Key, set path based on capture_type, write file to S3 conn = boto.s3.connect_to_region( region_name=app.config.get('S3_BUCKET_REGION_NAME'), calling_format=boto.s3.connection.OrdinaryCallingFormat()) key = Key(conn.get_bucket(app.config.get('S3_BUCKET_PREFIX'))) path = "sketchy/{}/{}".format(capture_type, file_name) key.key = path key.set_contents_from_filename(app.config['LOCAL_STORAGE_FOLDER'] + '/' + file_name) # Generate a URL for downloading the files url = conn.generate_url(app.config.get('S3_LINK_EXPIRATION'), 'GET', bucket=app.config.get('S3_BUCKET_PREFIX'), key=key.key, response_headers={ 'response-content-type': reponse_types[capture_type], 'response-content-disposition': 'attachment; filename=' + file_name }) # Generate appropriate url based on capture_type if capture_type == 'sketch': the_record.sketch_url = str(url) if capture_type == 'scrape': the_record.scrape_url = str(url) if capture_type == 'html': the_record.html_url = str(url) # Remove local files if we are saving to S3 os.remove( os.path.join(app.config['LOCAL_STORAGE_FOLDER'], files_to_write['sketch'])) os.remove( os.path.join(app.config['LOCAL_STORAGE_FOLDER'], files_to_write['scrape'])) os.remove( os.path.join(app.config['LOCAL_STORAGE_FOLDER'], files_to_write['html'])) # If we don't have a finisher task is complete if the_record.callback: the_record.capture_status = 'S3_ITEMS_SAVED' else: the_record.capture_status = 'S3_ITEMS_SAVED' the_record.job_status = 'COMPLETED' db.session.commit()
def insert_object(bucket_name, upload_type, conn): """ Stores an object or starts a resumable upload. Args: bucket_name: A string specifying a bucket name. object_name: A string specifying an object name. conn: An S3Connection instance. Returns: A JSON string representing an object. """ bucket = conn.get_bucket(bucket_name) object_name = request.args.get('name', default=None) upload_id = request.args.get('upload_id', default=None) if upload_type == 'media': if object_name is None: return error('Object name is required.', HTTP_BAD_REQUEST) # Decompress content if necessary. if 'Content-Encoding' in request.headers: if request.headers['Content-Encoding'] == 'gzip': content = gzip.decompress(request.data) else: return error('Unrecognized Content-Encoding.', HTTP_NOT_IMPLEMENTED) else: content = request.data key = Key(bucket, object_name) key.set_contents_from_string(content) obj = object_info( key, last_modified=datetime.datetime.now(datetime.timezone.utc)) return Response(json.dumps(obj), mimetype='application/json') if upload_type == 'resumable' and upload_id is None: if object_name is None: request_data = request.get_json() try: object_name = request_data['name'] except KeyError: return error('Object name is required.', HTTP_BAD_REQUEST) new_upload_id = ''.join( random.choice(current_app.config['RESUMABLE_ID_CHARS']) for _ in range(current_app.config['RESUMABLE_ID_LENGTH'])) current_app.logger.debug('new upload_id: {}, object: {}'.format( new_upload_id, object_name)) state = {'object': object_name, 'status': UploadStates.NEW} upsert_upload_state(new_upload_id, state) upload_url = url_for('insert_object', bucket_name=bucket_name) redirect = request.url_root[:-1] + upload_url + \ '?uploadType=resumable&upload_id={}'.format(new_upload_id) response = Response('') response.headers['Location'] = redirect return response if upload_type == 'multipart': try: match = re.match(r"^multipart/related; boundary='(.*)'$", request.headers['Content-Type']) boundary = match.group(1) except (KeyError, AttributeError): return error('Invalid Content-Type.', HTTP_BAD_REQUEST) parts = request.data.split(b'--' + boundary.encode()) metadata = json.loads(parts[1].decode().splitlines()[-1]) file_data = parts[2].split(b'\n\n', maxsplit=1)[-1] if file_data.endswith(b'\n'): file_data = file_data[:-1] current_app.logger.debug('metadata: {}'.format(metadata)) object_name = metadata['name'] key = Key(bucket, object_name) if 'contentType' in metadata: key.set_metadata('Content-Type', metadata['contentType']) key.set_contents_from_string(file_data) obj = object_info( key, last_modified=datetime.datetime.now(datetime.timezone.utc)) return Response(json.dumps(obj), mimetype='application/json') return error('Invalid uploadType.', HTTP_BAD_REQUEST)
if __name__ == "__main__": from boto.s3.connection import S3Connection from boto.s3.key import Key from datetime import datetime from lots.local_settings import AWS_SECRET_ACCESS_KEY, AWS_ACCESS_KEY_ID conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket('large-lots-data') k = Key(bucket) k.key = 'largelots_%s.sqlite' % datetime.now().isoformat() k.set_contents_from_filename('db.sqlite3')
def search_bucket(conn, b_name): b = conn.get_bucket(b_name) print b k = Key(b) print k, k.name, k.metadata
def upload_video(self, data, filename): bucket = self.connection.get_bucket(self.app.config['AWS_BUCKET']) key = Key(bucket) key.key = filename key.set_contents_from_string(data) key.set_acl('public-read')
def put_file(conn, b_name, src_file, remote_file): b = conn.get_bucket(b_name) k = Key(b) k.key = remote_file k.set_contents_from_filename(src_file)
from settings import * l = {} conn = S3Connection(AWS_ACCESS_KEY, AWS_SECRET_KEY) bucket = conn.get_bucket(BUCKET) # Retrieve the list of existing files rs = bucket.list() for key in rs: l[key.name] = '[s3]' prefix_length = len(LOCAL_DIR) + 1 # Walk the local directory, upload any files not in the list of existing files for root, dirs, files in os.walk(LOCAL_DIR): d = root[prefix_length:] m = re.match(r"(\d\d\d\d)_(\d\d)_(\d\d)", d) if m: for f in files: k = "pictures/%s/%s/%s/%s" % (m.group(1), m.group(2), m.group(3), f) if k in l: l[k] = "[ok]" else: print k, key = Key(bucket) key.key = k k.set_contents_from_filename(os.path.join(root, f)) print "done" l[k] = "[ ]"
from boto.emr.connection import EmrConnection import boto.emr.step as step import boto.emr # <codecell> ### Create connection to own bucket s3con = S3Connection('AKIAJRV3RN6NXQTSSTBA', '3e212d6rs99xtiPgwKnfN1QD30WZk2hJwCWjMcGc') # <codecell> b = s3con.get_bucket('wambia660fall2013') # <codecell> k = Key(b) k.key = 'fullNgramNamesBoto.hql' k.set_contents_from_filename('/Users/winteram/Documents/Teaching/BIA_Fall2013/fullNgramNamesBoto.hql') k.close() # <codecell> ### Will run Hive via EMR emrcon = EmrConnection('AKIAJRV3RN6NXQTSSTBA', '3e212d6rs99xtiPgwKnfN1QD30WZk2hJwCWjMcGc') # <codecell> install_hive_step = step.InstallHiveStep(hive_versions='0.11.0.1') # <codecell>
fileName = options.file[0] bucket = options.bucket[0] if config.USING_AWS_S3_FOR_STORAGE: aws_access_key = os.environ.get('AWS_ACCESSKEYID') aws_secret_access_key = os.environ.get('AWS_SECRETACCESSKEY') conn = S3Connection(aws_access_key, aws_secret_access_key) #conn = boto.connect_s3() mybucket = conn.get_bucket(bucket) fname = os.path.basename(fileName) from boto.s3.key import Key k = Key(mybucket) if options.folder: k.key = options.folder[0] + "/" + fname else: k.key = fname # Check if it already exists possible_key = mybucket.get_key(k.key) if force or not possible_key: if verbose: print "storing to s3:", bucket, k.key k.set_contents_from_filename(fileName) mybucket.set_acl('public-read', k.key)
def read(self, filename): key = Key(self.bucket) key.key = self.prefix + filename return key.get_contents_as_string()
def import_uploads(import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: if processing_avatars and processing_emojis: raise AssertionError("Cannot import avatars and emojis at the same time!") if processing_avatars: logging.info("Importing avatars") elif processing_emojis: logging.info("Importing emojis") else: logging.info("Importing uploaded files") records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) # type: List[Dict[str, Any]] timestamp = datetime_to_timestamp(timezone_now()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) s3_uploads = settings.LOCAL_UPLOADS_DIR is None if s3_uploads: if processing_avatars or processing_emojis: bucket_name = settings.S3_AVATAR_BUCKET else: bucket_name = settings.S3_AUTH_UPLOADS_BUCKET conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) count = 0 for record in records: count += 1 if count % 1000 == 0: logging.info("Processed %s/%s uploads" % (count, len(records))) if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt relative_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) if record['s3_path'].endswith('.original'): relative_path += '.original' else: relative_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' relative_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) record['last_modified'] = timestamp else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_file' relative_path = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) path_maps['attachment_path'][record['s3_path']] = relative_path if s3_uploads: key = Key(bucket) key.key = relative_path # Exported custom emoji from tools like Slack don't have # the data for what user uploaded them in `user_profile_id`. if not processing_emojis: user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in ID_MAP["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = ID_MAP["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) if 'last_modified' in record: key.set_metadata("orig_last_modified", record['last_modified']) key.set_metadata("realm_id", str(record['realm_id'])) # Zulip exports will always have a content-type, but third-party exports might not. content_type = record.get("content_type") if content_type is None: content_type = guess_type(record['s3_path'])[0] headers = {'Content-Type': content_type} # type: Dict[str, Any] key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) else: if processing_avatars or processing_emojis: file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", relative_path) else: file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", relative_path) orig_file_path = os.path.join(import_dir, record['path']) os.makedirs(os.path.dirname(file_path), exist_ok=True) shutil.copy(orig_file_path, file_path) if processing_avatars: from zerver.lib.upload import upload_backend # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) if settings.LOCAL_UPLOADS_DIR is not None: avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def read_to_fp(self, filename, fp): key = Key(self.bucket) key.key = self.prefix + filename return key.get_contents_to_file(fp)
try: output_bucket = conn.get_bucket(thumbnails_bucket_name) except boto.exception.S3ResponseError: print "Ooops, thumbnail bucket " + thumbnails_bucket_name + " doesn't exist?" for b in input_bucket_names: try: input_bucket = conn.get_bucket(b) keys = input_bucket.list() for i in keys: print "Processing: " + i.key prefix, ext = os.path.splitext(i.key) i.get_contents_to_filename(tempfilename) print "Downloaded" im = Image.open(tempfilename) im.thumbnail(thumbnail_size, Image.ANTIALIAS) print "Thumbnailed" im.save(tempfilename + "_small", "JPEG") print "Saved" tk = Key(output_bucket) tk.key = prefix + '_small' + ext print "Key Created" tk.set_contents_from_filename(tempfilename + "_small") print "Uploaded" except boto.exception.S3ResponseError as e: print "Ooops, bucket " + b + " doesn't exist?" print e
def put_backup(self, backup_id, infile): """ Upload the backup file to the expected path """ k = Key(self.bucket) k.key = backup_id k.set_contents_from_filename(infile) return ('{}/{}'.format(self.bucket, backup_id))