def get_file_from_s3(self, s3key): print "retrieving key = %s" % s3key k = Key(self.replaybucket) k.key = s3key stringio = StringIO.StringIO() k.get_contents_to_file(stringio) return stringio
def move_object(bucket_from, bucket_to, file): conn = connection.connect() bucket_from_name = bucket_from.name #self.list_object() #src_uri = boto.storage_uri(bucket_from_name + '/' + file.name, 'gs') # Create a file-like object for holding the object contents. object_contents = StringIO.StringIO() try: from boto.s3.key import Key k = Key(bucket_from) k.key = file.name #k.key.get_file(object_contents) k.get_contents_to_file(object_contents) dst_uri = boto.storage_uri(bucket_to.name + '/' + file.name, 'gs') object_contents.seek(0) dst_uri.new_key().set_contents_from_file(object_contents) # bucket = conn.get_bucket(bucket_to.name) # key = bucket.new_key(file.name) # # key.set_contents_from_file(object_contents) object_contents.close() except: msg = 'Sorry, but fail to move file: "%s".' % file.name return msg else: msg = 'Successfully move "%s"!' % file.name return msg
def read_file( file_name, outfile, **kw ): context = kw['context'] log = context.log config = context.config secrets = context.secrets log.debug("Reading File: " + file_name) assert config != None, "No config given" assert secrets != None, "No AWS API tokens given" assert config.has_key("BUCKET"), "No bucket name given" bucket_name = config['BUCKET'] bucket = get_bucket(context, bucket_name) if bucket == None: raise Exception("Failed to get bucket") from boto.s3.key import Key k = Key(bucket) k.key = file_name rc = 500 try: k.get_contents_to_file(outfile) rc = 200 log.debug("Read data from s3") except Exception, e: log.error("Failed to read %s" % file_name) log.exception(e)
def read_file(file_name, outfile, **kw): context = kw['context'] log = context.log config = context.config secrets = context.secrets log.debug("Reading File: " + file_name) assert config != None, "No config given" assert secrets != None, "No AWS API tokens given" assert config.has_key("BUCKET"), "No bucket name given" bucket_name = config['BUCKET'] bucket = get_bucket(context, bucket_name) if bucket == None: raise Exception("Failed to get bucket") from boto.s3.key import Key k = Key(bucket) k.key = file_name rc = 500 try: k.get_contents_to_file(outfile) rc = 200 log.debug("Read data from s3") except Exception, e: log.error("Failed to read %s" % file_name) log.exception(e)
def download(bucket, src, dest, **aws_config): conn = boto.connect_s3(aws_config['access'], aws_config['secret']) key = Key(conn.get_bucket(bucket), src) f = open(dest, 'wb') key.get_contents_to_file(f) f.close()
def install(app, version): print "INSTALL " + app, version stop_webserver() local_dir = os.path.join(settings.APPS_LOCALDIR, app) try: os.makedirs(local_dir) except: pass localfile = os.path.join(local_dir, app + '.tar.bz2') """ Download file form S3 """ bucket = get_bucket() k = Key(bucket) k.key = 'applications/' + app + '_' + version + '.tar.bz2' k.get_contents_to_file(open(localfile, 'w')) """ extract file """ os.system('cd ' + local_dir + ' && tar -jxvf %s' % app + '.tar.bz2') os.unlink(localfile) """ Run install scrpt """ os.system('cd ' + local_dir + ' && /bin/bash mwpackage/install.sh') """ write new version to config """ write_localinfo_new(app, version)
def index(pin): s3_conn = S3Connection(AWS_KEY, AWS_SECRET) bucket = s3_conn.get_bucket('property-image-cache') s3_key = Key(bucket) s3_key.key = '{0}.jpg'.format(pin) if s3_key.exists(): output = BytesIO() s3_key.get_contents_to_file(output) else: image_viewer = 'http://www.cookcountyassessor.com/PropertyImage.aspx?pin={0}' image_url = image_viewer.format(pin) image = requests.get(image_url) print(image.headers) if 'image/jpeg' in image.headers['Content-Type']: output = BytesIO(image.content) s3_key.set_metadata('Content-Type', 'image/jpg') s3_key.set_contents_from_file(output) s3_key.set_acl('public-read') else: sentry.captureMessage('Could not find image for PIN %s' % pin) abort(404) output.seek(0) response = make_response(output.read()) response.headers['Content-Type'] = 'image/jpg' return response
def _download_manifest(self, bucket, download_dir, manifest_name): k = Key(bucket) k.key = manifest_name manifest_local_file = os.path.join(download_dir, manifest_name) with open(manifest_local_file,'wb') as man_file: k.get_contents_to_file(man_file) return manifest_local_file
def read_chunk(chunk_path, outfile, config, secrets): log.debug("Reading File: " + chunk_path) assert config is not None, "No config given" assert secrets is not None, "No AWS API tokens given" assert config.has_key("BUCKET"), "No bucket name given" bucket_name = config["BUCKET"] bucket = get_bucket(context, bucket_name) if bucket == None: raise Exception("Failed to get bucket") k = Key(bucket) k.key = chunk_path rc = 0 try: k.get_contents_to_file(outfile) log.debug("Read data from s3") except Exception, e: log.error("Failed to read %s" % chunk_path) log.exception(e) rc = -errno.REMOTEIO
def test_simple_multipart(self): """ test a simple multipart upload """ log = logging.getLogger("test_simple_multipart") key_name = "test_key" part_count = 2 path_template = os.path.join(test_dir_path, "test_simple_multipart_{0:02}") test_file_paths = [ path_template.format(n + 1) for n in range(part_count) ] retrieve_path = os.path.join(test_dir_path, "retrieve_multipart") # 5mb is the minimum size s3 will take test_file_size = 1024**2 * 5 test_blobs = [os.urandom(test_file_size) for _ in range(part_count)] for test_file_path, test_blob in zip(test_file_paths, test_blobs): with open(test_file_path, "wb") as output_file: output_file.write(test_blob) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # assert that we have no uploads in progress upload_list = bucket.get_all_multipart_uploads() self.assertEqual(len(upload_list), 0) # start the multipart upload multipart_upload = bucket.initiate_multipart_upload(key_name) # assert that our upload is in progress upload_list = bucket.get_all_multipart_uploads() self.assertEqual(len(upload_list), 1) self.assertEqual(upload_list[0].id, multipart_upload.id) # upload a file in pieces for index, test_file_path in enumerate(test_file_paths): with open(test_file_path, "rb") as input_file: multipart_upload.upload_part_from_file(input_file, index + 1) # complete the upload completed_upload = multipart_upload.complete_upload() key = Key(bucket, key_name) with open(retrieve_path, "wb") as output_file: key.get_contents_to_file(output_file) # compare files with open(retrieve_path, "rb") as input_file: for test_blob in test_blobs: retrieve_blob = input_file.read(test_file_size) self.assertEqual(retrieve_blob, test_blob, "compare files") # delete the key key.delete() # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def pull_from_s3_and_submit_to_pcp(operation, params): statsd.incr("pull_from_s3_and_submit_to_pcp") print "pulling from S3" params = loads(operation.params) video_id = params['video_id'] workflow = params['workflow'] video = Video.objects.get(id=video_id) ouuid = operation.uuid filename = video.filename() suffix = video.extension() conn = boto.connect_s3( settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.get_bucket(settings.AWS_S3_UPLOAD_BUCKET) k = Key(bucket) k.key = video.s3_key() t = tempfile.NamedTemporaryFile(suffix=suffix) k.get_contents_to_file(t) t.seek(0) operation.log(info="downloaded from S3") # TODO: figure out how to re-use submit_to_pcp() print "submitting to PCP" pcp = PCP(settings.PCP_BASE_URL, settings.PCP_USERNAME, settings.PCP_PASSWORD) filename = str(ouuid) + suffix print "submitted with filename %s" % filename title = "%s-%s" % (str(ouuid), strip_special_characters(video.title)) print "submitted with title %s" % title pcp.upload_file(t, filename, workflow, title, video.description) return ("submitted", "submitted to PCP")
def getFile(self, file): key = Key(self.bucket) key.key = file if not key: return local = '%s/%s' % (self.target.rstrip('/'), file) dir = os.path.dirname(local) if not os.path.isdir(dir): os.makedirs(dir) fp = open(local, 'w') key.get_contents_to_file(fp) fp.close() est = pytz.timezone('Australia/Sydney') gmt = pytz.timezone('GMT') tzf = '%Y-%m-%d %H:%M:%S %Z%z' #print 'last_modified=', key.last_modified dt = datetime.strptime(key.last_modified, "%a, %d %b %Y %H:%M:%S %Z") #print 'dt=', dt.strftime(tzf) dt = gmt.localize(dt) adt = est.normalize(dt.astimezone(est)) #print 'adt=', adt.strftime(tzf) t = time.mktime(adt.timetuple()) os.utime(local, (t, t)) sys.stdout.write('%s %s\n' % (adt.strftime(tzf), local)) return
def read_file(self, filepath): """ Read the specified file and return it's handle. """ key = Key(self.bucket) key.key = filepath filehandle = SpooledTemporaryFile(max_size=10 * 1024 * 1024) key.get_contents_to_file(filehandle) return filehandle
def get_from_s3_compressed(bucket, keyname, filename): key = Key(bucket, keyname) with io.BytesIO() as b: key.get_contents_to_file(b) b.seek(0) with gzip.GzipFile(mode='rb', fileobj=b) as g, \ open(filename, 'wb') as f: shutil.copyfileobj(g, f)
def download_file_from_bucket(self, bucket, bucket_folder_name, filename): key_name = bucket_folder_name + '/' + filename key = Key(bucket) key.key = key_name local_file = self.open_file_from_tmp_dir(filename, mode='wb') key.get_contents_to_file(local_file) local_file.close()
def _process_email(self, **kwargs): try: send_to = kwargs.get('send_to') if settings.DEBUG: send_to = settings.DEFAULT_EMAIL_ID msg = MIMEMultipart() msg['Subject'] = kwargs.get('subject') msg['From'] = kwargs.get('send_from') msg['To'] = send_to msg['cc'] = [] if kwargs.get('cc') is None else kwargs.get('cc') msg.attach(MIMEText(kwargs.get('body'), 'html', 'utf-8')) attachments = kwargs.get('attachments') if settings.EMAIL_QUEUE['aws'].get('attachment_bucket') is not None and attachments is not None: if self._s3_conn is None: self._s3_conn = S3Connection(settings.EMAIL_QUEUE['aws']['key'], settings.EMAIL_QUEUE['aws']['secret']) bucket = self._s3_conn.get_bucket(settings.EMAIL_QUEUE['aws']['attachment_bucket']) for attachment in attachments: url = attachment.get('url') contenttype = attachment.get('type') if contenttype is not None: with TemporaryFile() as f: k = Key(bucket) k.key = os.path.basename(urlparse.urlsplit(url).path) k.get_contents_to_file(f) f.seek(0) # if contenttype == 'image': # img = open(url, 'rb').read() # imgMsg = MIMEImage(img) # imgMsg.add_header('Content-ID', '<image1>') # filename = os.path.basename(urlparse.urlsplit(url).path) # imgMsg.add_header('Content-Disposition', 'attachment', filename=filename) # msg.attach(imgMsg) content = f.read() applicationMsg = MIMEApplication(content, contenttype) applicationMsg.add_header('Content-ID', '<' + contenttype + '1' + '>') filename = os.path.basename(urlparse.urlsplit(url).path) applicationMsg.add_header('Content-Disposition', 'attachment', filename=filename) msg.attach(applicationMsg) smtp_server = settings.EMAIL_HOST smtp_port = settings.EMAIL_PORT user_name = settings.EMAIL_HOST_USER password = settings.EMAIL_HOST_PASSWORD server = smtplib.SMTP(smtp_server, smtp_port) server.starttls() server.ehlo() server.login(user_name, password) server.sendmail(kwargs.get('send_from'), send_to, msg.as_string()) server.quit() except Exception as e: error_logger.error("", exc_info=True, extra={}) return False return True
def test_simple_multipart(self): """ test a simple multipart upload """ log = logging.getLogger("test_simple_multipart") key_name = "test_key" part_count = 2 path_template = os.path.join(test_dir_path, "test_simple_multipart_{0:02}") test_file_paths = [path_template.format(n + 1) for n in range(part_count)] retrieve_path = os.path.join(test_dir_path, "retrieve_multipart") # 5mb is the minimum size s3 will take test_file_size = 1024 ** 2 * 5 test_blobs = [os.urandom(test_file_size) for _ in range(part_count)] for test_file_path, test_blob in zip(test_file_paths, test_blobs): with open(test_file_path, "wb") as output_file: output_file.write(test_blob) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # assert that we have no uploads in progress upload_list = bucket.get_all_multipart_uploads() self.assertEqual(len(upload_list), 0) # start the multipart upload multipart_upload = bucket.initiate_multipart_upload(key_name) # assert that our upload is in progress upload_list = bucket.get_all_multipart_uploads() self.assertEqual(len(upload_list), 1) self.assertEqual(upload_list[0].id, multipart_upload.id) # upload a file in pieces for index, test_file_path in enumerate(test_file_paths): with open(test_file_path, "rb") as input_file: multipart_upload.upload_part_from_file(input_file, index + 1) # complete the upload completed_upload = multipart_upload.complete_upload() key = Key(bucket, key_name) with open(retrieve_path, "wb") as output_file: key.get_contents_to_file(output_file) # compare files with open(retrieve_path, "rb") as input_file: for test_blob in test_blobs: retrieve_blob = input_file.read(test_file_size) self.assertEqual(retrieve_blob, test_blob, "compare files") # delete the key key.delete() # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def test_key_with_files_and_callback(self): """ test simple key 'from_file' and 'to_file' functions """ def _archive_callback(bytes_sent, total_bytes): print("archived {0} out of {1}".format(bytes_sent, total_bytes)) def _retrieve_callback(bytes_sent, total_bytes): print("retrieved {0} out of {1}".format(bytes_sent, total_bytes)) log = logging.getLogger("test_key_with_files") key_name = "A" * 1024 test_file_path = os.path.join(test_dir_path, "test_key_with_files-orignal") test_file_size = 1024 ** 2 buffer_size = 1024 log.debug("writing {0} bytes to {1}".format(test_file_size, test_file_path)) bytes_written = 0 with open(test_file_path, "wb") as output_file: while bytes_written < test_file_size: output_file.write(os.urandom(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file, cb=_archive_callback) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data retrieve_file_path = os.path.join(test_dir_path, "test_key_with_files-orignal") # 2011-08-08 dougfort boto aborts if you don't tell it the size read_key.size = test_file_size with open(retrieve_file_path, "wb") as retrieve_file: read_key.get_contents_to_file(retrieve_file, cb=_retrieve_callback) self.assertTrue(filecmp.cmp(test_file_path, retrieve_file_path, shallow=False)) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def download(self, keyname): k = Key(self.bucket) k.key = keyname encrypted_out = tempfile.TemporaryFile() k.get_contents_to_file(encrypted_out) encrypted_out.seek(0) return encrypted_out
def _load_np( self, truth_s3_file ): k = Key(self.results_bucket) k.key = truth_s3_file with tempfile.SpooledTemporaryFile() as temp: k.get_contents_to_file(temp) temp.seek(0) accuracy = np.load(temp) self.logger.debug( "Loaded %s" % truth_s3_file ) return accuracy
def get_remote_dbs(): s3 = boto.connect_s3() sync = s3.get_bucket('cityscan-resources') k = Key(bucket=sync) dumps = sync.list('dbdump') for dump in dumps: filename = os.path.basename(dump.name) # download to directory k.get_contents_to_file(open(filename, 'wb'))
def getFile(self,path,file,localfile): if self._conn == None: raise Exception("Must connect first.") k = Key(self._bconn) k.name = self.buildPath(path,file) if k.exists(): print ("Key name:%s" % k.name) k.get_contents_to_file(localfile)
def get(self, obj): k = Key(self.bucket) k.key = obj if not self.args.out_file: out = sys.stdout else: out = open(self.args.out_file, 'wb') k.get_contents_to_file(out, version_id=self.args.version_id)
def restore_file(self, deis_bucket_name, key_name, deis_bucket=None, remote_bucket=None): key = Key(remote_bucket, key_name) deis_key = Key(deis_bucket, self.get_deis_key_name(deis_bucket_name, key.key)) temp = tempfile.SpooledTemporaryFile(self._max_spool_bytes) key.get_contents_to_file(temp) self.set_contents_from_file(deis_key, temp) temp.close() key.close() deis_key.close()
def deprecated__load_np( self, bucket, s3_file): conn = boto.connect_s3() b = conn.get_bucket( bucket ) k = Key(b) k.key = s3_file with tempfile.SpooledTemporaryFile() as temp: k.get_contents_to_file(temp) temp.seek(0) accuracy = np.load(temp) return accuracy
def read(self, fnm): b = self.conn.get_bucket(self.cdmi_bucket_name) k = Key(b) k.key = fnm # read in the content to a temporary file on disk fp = NamedTemporaryFile(prefix="aws_s3", suffix=".buffer", delete=True) k.get_contents_to_file(fp) fp.seek(0) # roll back to start return fp
def download_manifests(self, bucket, manifests, directory): if len(manifests) > 0: if not os.path.exists(directory): os.makedirs(directory) for manifest in manifests: k = Key(bucket) k.key = manifest print 'Downloading', manifest manifest_file = open(os.path.join(directory, manifest), 'wb') k.get_contents_to_file(manifest_file) manifest_file.close()
def download_parts(self, bucket, manifests, directory): for manifest in manifests: manifest_filename = os.path.join(directory, manifest) parts = self.get_parts(manifest_filename) for part in parts: k = Key(bucket) k.key = part print 'Downloading', part part_file = open(os.path.join(directory, part), 'wb') k.get_contents_to_file(part_file) part_file.close()
def read_gzip(self): conn = boto.connect_s3( settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.get_bucket(self.obj.s3_bucket_name()) k = Key(bucket) k.key = self.obj.s3_key() sio = StringIO() k.get_contents_to_file(sio) sio.seek(0) gzf = GzipFile(fileobj=sio, mode="rb") return gzf.read()
def download_string_from_s3(upload_file_name, bucket_name=None): conn = boto.connect_s3() # conn = boto.s3.connect_to_region(region_name='the_region') # conn = S3Connection('aws_key', 'aws_secret') bucket_name = __get_bucket_name(bucket_name) bucket = conn.get_bucket(bucket_name) k = Key(bucket) k.key = upload_file_name downloaded_file = StringIO() k.get_contents_to_file(downloaded_file) downloaded_file.seek(0) return downloaded_file
def process_serve_chestct(): # Init imgfile = request.args.get('imgfile') print "Process/Serving Image: " + imgfile fnamesplit = imgfile.rsplit('.', 1) ext = fnamesplit[1] imgprefix = fnamesplit[0] prfxsplt = imgprefix.rsplit('-', 2) prefix = prfxsplt[0] nfiles = int(prfxsplt[1]) idx = int(prfxsplt[2]) matrix3D = np.array([]) initFile = True imgarr = [] conn = S3Connection(ACCESS_KEY, SECRET_KEY) bkt = conn.get_bucket('chestcad') k = Key(bkt) print prefix, str(nfiles) try: for x in range(0, nfiles): mykey = prefix + '-' + str(nfiles) + '-' + str(x) k.key = mykey # NEW: Process file here... fout = cStringIO.StringIO() k.get_contents_to_file(fout) if initFile: matrix3D = readChestCTFile(fout) else: matrix3D = np.concatenate((matrix3D, readChestCTFile(fout)), axis=2) initFile = False result = 1 except: result = 0 dimension = matrix3D.shape[0] panel1, panel2, panel3, panel4 = processMatrix(matrix3D, dimension) print 'prior to printing matrix' imgarr, data_encode = printMatrix(panel1, panel2, panel3, panel4, prefix, nfiles) return jsonify({ "success": result, "imagefile": data_encode, "imgarr": imgarr })
def test_key_with_files(self): """ test simple key 'from_file' and 'to_file' functions """ log = logging.getLogger("test_key_with_files") key_name = "A" * 1024 test_file_path = os.path.join(test_dir_path, "test_key_with_files-orignal") test_file_size = 1024**2 buffer_size = 1024 log.debug("writing {0} bytes to {1}".format(test_file_size, test_file_path)) bytes_written = 0 with open(test_file_path, "wb") as output_file: while bytes_written < test_file_size: output_file.write(os.urandom(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data retrieve_file_path = os.path.join(test_dir_path, "test_key_with_files-orignal") with open(retrieve_file_path, "wb") as retrieve_file: read_key.get_contents_to_file(retrieve_file) self.assertTrue( filecmp.cmp(test_file_path, retrieve_file_path, shallow=False)) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def get_from_s3(file_name, bucket_name): if file_name in os.listdir(cache): with open(cache + file_name) as file: return file.read() bucket = conn.get_bucket(bucket_name) k = Key(bucket) k.key = file_name string = StringIO.StringIO() k.get_contents_to_file(string) contents = string.getvalue() with open(cache + file_name, "w") as file: file.write(contents) return contents
def get(self, key): """Get the file associated with key""" try: get_key = Key(self.s3_bucket) get_key.key = key get_file = tempfile.NamedTemporaryFile(mode='w+t',delete=False) #get_file = tempfile.mkstemp() get_key.get_contents_to_file(get_file) get_file.seek(0) return get_file except Exception,e: raise exceptions.KeyDoesNotExist(get_key)
def download(self, key_name): """ Download a file @param string key_name - Key of the file to download """ key = Key(self.bucket, key_name) encrypted_out = TemporaryFile() log.debug("Saving contents of key %s to file %s" % (key_name, encrypted_out)) key.get_contents_to_file(encrypted_out) encrypted_out.seek(0) return encrypted_out
def run(self): self.logger.info("START: %s sift" % self.entry["_id"]) kin = Key(self.bucket) kin.key = self.entry["filename"] kout = Key(self.bucket) splitext = os.path.splitext(self.entry["filename"]) kout.key = "%s.key.gz" % splitext[0] with tempfile.NamedTemporaryFile(suffix=splitext[1], delete=False) as f: kin.get_contents_to_file(f) f.close() try: im = Image.open(f.name) except IOError, e: self.logger.info("ERROR: %s %s sift" % (self.entry["_id"], e)) return False
def get(self, obj): k = Key(self.bucket) k.key = obj if not self.args.out_file: out = sys.stdout else: out = open(self.args.out_file, 'wb') headers = {} if self.args.if_modified_since: headers['If-Modified-Since'] = self.args.if_modified_since if self.args.if_unmodified_since: headers['If-Unmodified-Since'] = self.args.if_unmodified_since k.get_contents_to_file(out, version_id=self.args.version_id, headers=headers)
def test_S3Sink(self): with closing(RollingSink('/foo/bar/{0}', 2, RecordSink(S3Sink(SinkTest.conn, 'nkrishna-mids205')))) as f: f.open() f.write('"Record 1"') f.write('"Record 2"') f.write('"Record 3"') f.write('"Record 4"') b = SinkTest.conn.lookup('nkrishna-mids205') k = Key(b, '/foo/bar/0') f = BytesIO() k.get_contents_to_file(f) self.assertEqual(f.getvalue(), '[\n"Record 1",\n"Record 2"\n]\n') k = Key(b, '/foo/bar/1') f = BytesIO() k.get_contents_to_file(f) self.assertEqual(f.getvalue(), '[\n"Record 3",\n"Record 4"\n]\n')
def move_to_tre_server(self, fnm): b = self.conn.get_bucket(self.cdmi_bucket_name) k = Key(b) k.key = fnm # read in the content to a temporary file on disk fp = NamedTemporaryFile(prefix="aws_s3", suffix=".buffer", delete=False) k.get_contents_to_file(fp) source = os.path.join(c(self.backend_name, 'blob.datadir'), fp.name) target = os.path.join(c('general', 'tre_data_folder'), fnm) try: os.symlink(os.path.abspath(source), os.path.abspath(target)) except OSError, ex: if ex.errno == 17: pass
def open_url(url, offset=None, length=None): p = urlparse(url) bucket_name = p.netloc key = p.path[1:] conn = S3Connection() b = conn.get_bucket(bucket_name) k = Key(b) k.key = key s = StringIO() if offset is not None and length is not None: headers = {'Range': 'bytes=%d-%d' % (offset, offset + length)} elif offset is not None: headers = {'Range': 'bytes=%d-' % (offset)} else: headers = {} k.get_contents_to_file(s, headers=headers) s.seek(0) return s
def process_record(connection, record): if record['eventName'] == u"ObjectCreated:Put": application.logger.info('Processing Event [%s] for s3://%s/%s', record['eventName'], record['s3']['bucket']['name'], record['s3']['object']['key']) src_bucket = connection.get_bucket(record['s3']['bucket']['name']) src_key = Key(src_bucket) src_key.key = urllib.unquote_plus(urllib.unquote(record['s3']['object']['key'])).decode('utf8') img_io = StringIO() src_key.get_contents_to_file(img_io) img_io.seek(0) im = Image.open(img_io) dest_bucket = connection.get_bucket(AWS_BUCKET_NAME) filename = src_key.key.split('/')[-1] resize_and_save_image(dest_bucket, im, image_sizes, filename)
def _download_parts(self, bucket, download_dir, manifest_name): man_file_loc = os.path.join(download_dir, manifest_name) parts = self._get_parts(man_file_loc) logger.debug("%d parts to be downloaded" % len(parts)) part_files = [] for idx, part in enumerate(parts): part_loc = os.path.join(download_dir, part) part_files.append(part_loc) if os.path.exists(part_loc): #Do not re-download an existing part file continue part_file = open(part_loc, 'wb') k = Key(bucket) k.key = part if idx % 5 == 0: logger.debug("Downloading part %d/%d.." % (idx+1, len(parts))) k.get_contents_to_file(part_file) part_file.close() return part_files
def test_S3Sink(self): with closing( RollingSink( '/foo/bar/{0}', 2, RecordSink(S3Sink(SinkTest.conn, 'nkrishna-mids205')))) as f: f.open() f.write('"Record 1"') f.write('"Record 2"') f.write('"Record 3"') f.write('"Record 4"') b = SinkTest.conn.lookup('nkrishna-mids205') k = Key(b, '/foo/bar/0') f = BytesIO() k.get_contents_to_file(f) self.assertEqual(f.getvalue(), '[\n"Record 1",\n"Record 2"\n]\n') k = Key(b, '/foo/bar/1') f = BytesIO() k.get_contents_to_file(f) self.assertEqual(f.getvalue(), '[\n"Record 3",\n"Record 4"\n]\n')
def test_file_error(self): key = Key() class CustomException(Exception): pass key.get_contents_to_file = mock.Mock( side_effect=CustomException('File blew up!')) # Ensure our exception gets raised instead of a file or IO error with self.assertRaises(CustomException): key.get_contents_to_filename('foo.txt')
def download_manifests(self, bucket, manifests, directory): if len(manifests) > 0: if not os.path.exists(directory): os.makedirs(directory) for manifest in manifests: k = Key(bucket) k.key = manifest manifest_filename = os.path.join(directory, manifest) manifest_file = open(manifest_filename, 'wb') try: k.get_contents_to_file(manifest_file) except S3ResponseError, s3error: s3error_string = '%s' % s3error if s3error_string.find('200') < 0: print s3error_string print 'unable to download manifest %s' % manifest if os.path.exists(manifest_filename): os.remove(manifest_filename) return False manifest_file.close()
def restore(filename, **kwargs): log = kwargs.get("logger", app_logger) conf = kwargs.get("conf", None) bucket = get_bucket(conf) if not bucket: return if not filename: log.error("No file to restore, use -f to specify one.") return keys = [ key.name for key in bucket.get_all_keys() if key.name.startswith(filename) ] if not keys: log.error("No file matched.") return key_name = sorted(keys, reverse=True)[0] log.info("Restoring " + key_name) k = Key(bucket) k.key = (key_name) encrypted_out = StringIO() k.get_contents_to_file(encrypted_out) encrypted_out.seek(0) password = kwargs.get("password") if not password: password = getpass() out = StringIO() decrypt(encrypted_out, out, password) out.seek(0) tar = tarfile.open(fileobj=out) tar.extractall() tar.close() return True
def connect_s3(aws_access_key_id, aws_secret_access_key, bucket, filename): s3 = S3Connection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) bucket = s3.get_bucket(bucket) k = Key(bucket) k.key = filename k.set_contents_from_filename(filename) fp = BytesIO() uploaded_file = k.get_contents_to_file(fp) # text = img2digit(uploaded_file) return uploaded_file
def copy_files_to_local(self, bucket_name, prefix, output_dir): """ Enables to copy files from specific bucket from s3 to local directory. Prefix should be suppliet otherwise all content of s3 bucket will be tried to download. S3 has flat structure so there is no directories abstraction. Example We need to download all events for requested report date from s3 bucket. Files with described by urls below are in s3 bucket s3://roqad-logs/dsp/events/2017-03-10/events_2017-03-10-1489104106289.json s3://roqad-logs/dsp/events/2017-03-10/events_2017-03-10-1489104106539.json To download all those file we should invoke method like: client.copy_files_to_local("roqad_logs", "dsp/events/2017-03-02/", "./result") After that all files will be available in local directory result with name without prefix part of url ./result/events_2017-03-10-1489104106289.json ./result/events_2017-03-10-1489104106539.json :param bucket_name: s3 bucket name :param prefix: 'directory' on s3 to download files from :param output_dir: local directory where downloaded files will be saved """ logging.info("Downloading files from s3://%s/%s to directory %s'", bucket_name, prefix, output_dir) self.__ensure_output_dir_exists(output_dir) conn = boto.connect_s3(self.aws_access_key_id, self.aws_secret_access_key, host=self.host) bucket = conn.get_bucket(bucket_name) events_files = bucket.list(prefix) for s3_object in events_files: logging.info("\tDownloading file: '%s'", s3_object.key) file_key = Key(bucket=bucket, name=s3_object.key) output_file_path = os.path.join(output_dir, os.path.basename(s3_object.key)) with open(output_file_path, 'w') as result_file: file_key.get_contents_to_file(result_file) logging.info("All files copied to local directory")
def download(bucket, filename): key = Key(bucket, filename) headers = {} mode = 'wb' updating = False if os.path.isfile(filename): mode = 'r+b' updating = True print "File exists, adding If-Modified-Since header" modified_since = os.path.getmtime(filename) timestamp = datetime.datetime.utcfromtimestamp(modified_since) headers['If-Modified-Since'] = timestamp.strftime( "%a, %d %b %Y %H:%M:%S GMT") try: with open(filename, mode) as f: key.get_contents_to_file(f, headers) f.truncate() except boto.exception.S3ResponseError as e: if not updating: # got an error and we are not updating an existing file # delete the file that was created due to mode = 'wb' os.remove(filename) return e.status return 200
def read_to_fp(self, filename, fp): key = Key(self.bucket) key.key = self.prefix + filename return key.get_contents_to_file(fp)