def download_aws_billing_file(self, cred, bucket_name, date=None): if date is None: date = datetime.datetime.utcnow().date() conn = get_s3_conn(cred) bucket = conn.get_bucket(bucket_name) account_id = cryptotool.decrypt_scalr(app.crypto_key, cred['account_id']) file_name = get_aws_csv_file_name(account_id, date) key = bucket.get_key(file_name) if not key: msg = "AWS detailed billing CSV file {0} wasn't found in bucket {1}" msg = msg.format(file_name, bucket_name) if datetime.datetime.utcnow().day == 1: LOG.warning(msg) return None else: raise Exception(msg) last_modified_dt = datetime.datetime.strptime( key.last_modified, self.last_modified_format) update_interval = self.config['interval'] utcnow = datetime.datetime.utcnow() delta = datetime.timedelta(seconds=update_interval) condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta condition2 = ((utcnow - last_modified_dt).seconds / 3600) % 8 == 0 if condition1 or condition2: local_file_path = os.path.join(self.tmp_dir, file_name) LOG.debug('Downloading {0}'.format(file_name)) key.get_contents_to_filename(local_file_path) return local_file_path else: return None
def download_aws_billing_file(self, cred, bucket_name, date=None): if date is None: date = datetime.datetime.utcnow().date() conn = get_s3_conn(cred) bucket = conn.get_bucket(bucket_name) account_id = cryptotool.decrypt_scalr(app.crypto_key, cred['account_id']) file_name = get_aws_csv_file_name(account_id, date) key = bucket.get_key(file_name) if not key: msg = "AWS detailed billing CSV file {0} wasn't found in bucket {1}" msg = msg.format(file_name, bucket_name) if datetime.datetime.utcnow().day == 1: LOG.warning(msg) return None else: raise Exception(msg) last_modified_dt = datetime.datetime.strptime(key.last_modified, self.last_modified_format) update_interval = self.config['interval'] utcnow = datetime.datetime.utcnow() delta = datetime.timedelta(seconds=update_interval) condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta condition2 = ((utcnow - last_modified_dt).seconds / 3600) % 8 == 0 if condition1 or condition2: local_file_path = os.path.join(self.tmp_dir, file_name) LOG.debug('Downloading {0}'.format(file_name)) key.get_contents_to_filename(local_file_path) return local_file_path else: return None
def command_install(bucket, requirements, verbose=False, requirements_py3=None): with utility.temp_dir() as archives_dir: for key in bucket.list(prefix=PREFIX): archive = key.name[len(PREFIX):] key.get_contents_to_filename(os.path.join(archives_dir, archive)) archives_url = "file://" + archives_dir pip_args = get_pip_args(archives_url) run_pip_uninstall(OBSOLETE) run_pip_install(pip_args, requirements, verbose=verbose) run_pip3_install(pip_args, requirements_py3, verbose=verbose)
def get_file(self, key_name, file_name): bucket = self.connection.get_bucket(self.bucket_name) if bucket is None: return False key = bucket.get_key(key_name) if key is None: return False key.get_contents_to_filename(file_name) return True
def download_file(self, destination, bucket_name, key_name): """ Downloads a file to the local filesystem from Amazon S3. Args: destination: A str containing the name of the file on the local filesystem that we should download our file to. bucket_name: A str containing the name of the bucket that the file should be downloaded from. key_name: A str containing the name of the key that the file should be downloaded from. """ bucket = self.connection.lookup(bucket_name) key = boto.s3.key.Key(bucket) key.key = key_name key.get_contents_to_filename(destination)
def worker(base_path): while 1: try: op, queued_path = queue.get() path = queued_path if path is None: return fspath = os.path.join(base_path, path) if op is DELETE: try: os.remove(fspath) except Exception: raise else: # download try: key = boto.s3.key.Key(bucket) key.key = bucket_prefix + path try: parent = os.path.dirname(fspath) if not os.path.exists(parent): try: os.makedirs(parent) except OSError: if not os.path.exists(parent): raise key.get_contents_to_filename( fspath.encode(encoding)) except Exception: logger.exception('downloading %r, retrying' % fspath) time.sleep(9) key.get_contents_to_filename( fspath.encode(encoding)) except Exception: raise except Exception: logger.exception('processing %r %r' % (op, path)) finally: queue.task_done()
def __getattribute__(self, name): value = _BaseImage.__getattribute__(self, name) if name == 'files' and value is None: # source is 's3://bucket/path/to/object' (bucket_name, object_name) = self.source[5:].split('/', 1) s3 = S3Connection(self._s3_access_key, self._s3_secret_key, calling_format=OrdinaryCallingFormat()) bucket = s3.get_bucket(bucket_name) key = boto.s3.key.Key(bucket) key.key = object_name self._source_base = os.path.basename(self.source) self._temp_source = '%s/%s' % (self._tempdir, self._source_base) key.get_contents_to_filename(self._temp_source) s3.close() self._unpack() return self.files return value
def _kick_off_downloads(self, s3url, bucket, source, target): '''Kick off download tasks, or directly download the file if the file is small.''' key = bucket.get_key(s3url.path) # optional checks if self.opt.dry_run: message('%s => %s', source, target) return elif self.opt.sync_check and self.sync_check(target, key): message('%s => %s (synced)', source, target) return elif not self.opt.force and os.path.exists(target): raise Failure('File already exists: %s' % target) if key is None: raise Failure('The key "%s" does not exists.' % (s3url.path,)) resp = self.s3.make_request('HEAD', bucket = bucket, key = key) fsize = int(resp.getheader('content-length')) # Small file optimization. if fsize < self.opt.max_singlepart_download_size: tempfile = tempfile_get(target) try: # Atomically download file with if self.opt.recursive: self.mkdirs(tempfile) key.get_contents_to_filename(tempfile) self.update_privilege(source, tempfile) self._verify_file_size(key, tempfile) tempfile_set(tempfile, target) message('%s => %s', source, target) except Exception as e: tempfile_set(tempfile, None) raise RetryFailure('Download Failure: %s, Source: %s.' % (e.message, source)) return # Here we use temp filename as the id of mpi. for args in self.get_file_splits(tempfile_get(target), source, target, fsize, self.opt.multipart_split_size): self.pool.download(*args) return
def main(): BASE_PATH = sys.argv[1] start = int(sys.argv[2]) end = int(sys.argv[3]) conn = S3Connection(os.environ['AWS_ACCESS_KEY'], os.environ['AWS_SECRET_KEY']) bucket = conn.get_bucket('fcp-indi') pairs = get_key_name_pairs(bucket) if not os.path.exists(BASE_PATH): os.mkdir(BASE_PATH) print 'Creating directory at', BASE_PATH for pair in pairs[start:end]: key, f_name = pair print 'Downloading file', f_name key.get_contents_to_filename(BASE_PATH + f_name, cb = call, num_cb = 100) floc = BASE_PATH + str(f_name) print 'file location is', floc subj = f_name.split('.')[0] print 'unzipping', f_name os.system('tar -zxf ' + floc + ' -C ' + BASE_PATH) print 'removing old zip of', floc os.system('rm ' + floc)
def download_billing_file(self, env, date=None, force=False): date = date or datetime.datetime.utcnow().date() bucket_name = env['ec2.detailed_billing.bucket'] if env.get('ec2.detailed_billing.payer_account'): envs = self.analytics.load_aws_accounts_ids_envs([env['ec2.detailed_billing.payer_account']]) self.analytics.load_env_credentials(envs, platform='ec2') for e in envs: if e['client_id'] == env['client_id']: credentials_env = e break else: msg = 'Can not found AWS credentials for PayerAccount {}' msg = msg.format(env['ec2.detailed_billing.payer_account']) raise Exception(msg) else: credentials_env = env.copy() kwds = { 'aws_access_key_id': cryptotool.decrypt_scalr(self.config['crypto_key'], credentials_env['ec2.access_key']), 'aws_secret_access_key': cryptotool.decrypt_scalr(self.config['crypto_key'], credentials_env['ec2.secret_key']), 'proxy': self.config['aws_proxy'].get('host'), 'proxy_port': self.config['aws_proxy'].get('port'), 'proxy_user': self.config['aws_proxy'].get('user'), 'proxy_pass': self.config['aws_proxy'].get('pass'), } default_region_map = { 'regular': 'us-east-1', 'gov-cloud': 'us-gov-west-1', 'cn-cloud': 'cn-north-1', } default_region = default_region_map[env.get('account_type', 'regular')] region = env.get('ec2.detailed_billing.region', default_region) conn = boto.s3.connect_to_region(region, **kwds) bucket = conn.get_bucket(bucket_name) default_file_name = get_default_aws_csv_file_name(credentials_env['ec2.account_id'], date) file_name_tmplate = get_aws_csv_file_name_tmplate(credentials_env['ec2.account_id'], date) files_in_bucket = [key.name for key in bucket.list() if re.match(file_name_tmplate, key.name)] if not files_in_bucket: utcnow = datetime.datetime.utcnow() if date.month == utcnow.month and utcnow.day < 2: return None else: msg = "Not found any valid files({}, {}) in bucket '{}'" msg = msg.format(default_file_name, file_name_tmplate, bucket_name) raise exceptions.FileNotFoundError(msg) if default_file_name not in files_in_bucket: file_name = files_in_bucket[0] # use first valid file msg = "Default AWS detailed billing statistics file '{}' not found in bucket '{}', available {}, use '{}'" msg = msg.format(default_file_name, bucket_name, files_in_bucket, file_name) LOG.warning(msg) else: file_name = default_file_name try: key = bucket.get_key(file_name) last_modified_dt = datetime.datetime.strptime(key.last_modified, self.last_modified_format) utcnow = datetime.datetime.utcnow() seconds_from_last_modified_dt = (utcnow - last_modified_dt).seconds if hasattr(self, 'task_info') and self.task_info.get('period') > 300: delta = datetime.timedelta(seconds=self.task_info['period'] - 90) else: delta = datetime.timedelta(seconds=210) condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta condition2 = seconds_from_last_modified_dt > 3600 condition3 = (seconds_from_last_modified_dt / 3600) % 24 == 0 if force or condition1 or (condition2 and condition3): with self._downloading_lock: self.downloading_locks.setdefault(file_name, gevent.lock.RLock()) with self.downloading_locks[file_name]: csv_zip_file = os.path.join(self.cache_dir, file_name) csv_file = csv_zip_file.rstrip('.zip') if os.path.exists(csv_file): msg = "'{}' already exists in cache directory, use it" msg = msg.format(os.path.basename(csv_file)) LOG.debug(msg) return csv_file while key.size * 3 > helper.get_free_space(self.cache_dir): LOG.error('Disk is full, waiting 60 sec') gevent.sleep(60) LOG.debug("Downloading '{}' for environment {}".format(file_name, env['id'])) attempts = 2 downloading_start_time = time.time() while True: try: key.get_contents_to_filename(csv_zip_file) assert os.path.isfile(csv_zip_file), os.listdir(self.cache_dir) downloading_end_time = time.time() break except: attempts -= 1 if not attempts: raise downloading_time = downloading_end_time - downloading_start_time msg = "Downloading '{0}' done in {1:.1f} seconds".format(file_name, downloading_time) LOG.info(msg) LOG.debug('Unzipping to {}'.format(csv_file)) while os.path.getsize(csv_zip_file) * 1 > helper.get_free_space(self.cache_dir): LOG.error('Disk is full, waiting 60 sec') gevent.sleep(60) with zipfile.ZipFile(csv_zip_file, 'r') as f: f.extract(f.infolist()[0], self.cache_dir) os.remove(csv_zip_file) return csv_file else: msg = "Skipping AWS billing file '{}' for environment {}" msg = msg.format(file_name, env['id']) LOG.debug(msg) return None except: msg = "File '{}', bucket '{}', reason: {}" msg = msg.format(file_name, bucket_name, helper.exc_info()) raise Exception, Exception(msg), sys.exc_info()[2]
def download_from_s3( package_directory, directory, settings, region='us-east-1', log=logging.getLogger(__name__), ): """Clones the S3 Package Index into the provided directory""" new_index_files = set() s3_conn = None try: s3_conn = boto.s3.connect_to_region(region) except ( boto.exception.NoAuthHandlerFound, ) as error: log.critical(error) else: s3_prefix = ensure_ends_with_slash(settings.get('s3.prefix')) s3_bucket = s3_conn.get_bucket(settings.get('s3.bucket')) if not s3_bucket.get_key('/'.join(( s3_prefix, 'index.html', ))): log.debug( 'Package index "%s" in "%s" is not initialised', s3_prefix, s3_bucket.name, ) new_index_files.add('*') return new_index_files download_files = set() for postfix in os.listdir(package_directory): simple_package_directory = postfix.split('-')[0].lower() safe_simple_package_directory = ( setuptools.package_index.safe_name(simple_package_directory) ) if not s3_bucket.get_key('/'.join(( s3_prefix, simple_package_directory, 'index.html', ))): log.debug( 'Package "%s" does not exist in package index', simple_package_directory, ) new_index_files.add('index.html') new_index_files.add( '{}/index.html'.format(simple_package_directory), ) download_files.add('index.html') elif not s3_bucket.get_key('/'.join(( s3_prefix, simple_package_directory, postfix, ))): new_index_files.add( '{}/index.html'.format(simple_package_directory), ) download_files.add( '{}/index.html'.format(simple_package_directory), ) if simple_package_directory != safe_simple_package_directory: if not s3_bucket.get_key('/'.join(( s3_prefix, simple_package_directory, 'index.html', ))): log.debug( 'Package "%s" does not exist in package index', simple_package_directory, ) new_index_files.add('index.html') new_index_files.add( '{}/index.html'.format(simple_package_directory), ) download_files.add('index.html') elif not s3_bucket.get_key('/'.join(( s3_prefix, simple_package_directory, postfix, ))): new_index_files.add( '{}/index.html'.format(simple_package_directory), ) download_files.add( '{}/index.html'.format(simple_package_directory), ) if not download_files: return new_index_files log.info( 'Downloading files from "%s" in "%s" to "%s"', s3_prefix, s3_bucket.name, directory, ) for postfix in download_files: key = boto.s3.key.Key( bucket=s3_bucket, name=(s3_prefix + postfix), ) local_file = os.path.join( directory, key.name[len(s3_prefix):], ) log.debug( 'Downloading "%s" from "%s" in "%s"', local_file, key.name, key.bucket.name, ) os.makedirs( os.path.dirname(local_file), exist_ok=True, ) key.get_contents_to_filename( local_file, ) if 'index.html' in download_files: recreate_root_folders( directory, ) return new_index_files finally: if s3_conn: s3_conn.close()