示例#1
0
    def download_aws_billing_file(self, cred, bucket_name, date=None):
        if date is None:
            date = datetime.datetime.utcnow().date()

        conn = get_s3_conn(cred)
        bucket = conn.get_bucket(bucket_name)
        account_id = cryptotool.decrypt_scalr(app.crypto_key,
                                              cred['account_id'])
        file_name = get_aws_csv_file_name(account_id, date)
        key = bucket.get_key(file_name)

        if not key:
            msg = "AWS detailed billing CSV file {0} wasn't found in bucket {1}"
            msg = msg.format(file_name, bucket_name)
            if datetime.datetime.utcnow().day == 1:
                LOG.warning(msg)
                return None
            else:
                raise Exception(msg)

        last_modified_dt = datetime.datetime.strptime(
            key.last_modified, self.last_modified_format)
        update_interval = self.config['interval']
        utcnow = datetime.datetime.utcnow()
        delta = datetime.timedelta(seconds=update_interval)
        condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta
        condition2 = ((utcnow - last_modified_dt).seconds / 3600) % 8 == 0
        if condition1 or condition2:
            local_file_path = os.path.join(self.tmp_dir, file_name)
            LOG.debug('Downloading {0}'.format(file_name))
            key.get_contents_to_filename(local_file_path)
            return local_file_path
        else:
            return None
    def download_aws_billing_file(self, cred, bucket_name, date=None):
        if date is None:
            date = datetime.datetime.utcnow().date()

        conn = get_s3_conn(cred)
        bucket = conn.get_bucket(bucket_name)
        account_id = cryptotool.decrypt_scalr(app.crypto_key, cred['account_id'])
        file_name = get_aws_csv_file_name(account_id, date)
        key = bucket.get_key(file_name)

        if not key:
            msg = "AWS detailed billing CSV file {0} wasn't found in bucket {1}"
            msg = msg.format(file_name, bucket_name)
            if datetime.datetime.utcnow().day == 1:
                LOG.warning(msg)
                return None
            else:
                raise Exception(msg)

        last_modified_dt = datetime.datetime.strptime(key.last_modified, self.last_modified_format)
        update_interval = self.config['interval']
        utcnow = datetime.datetime.utcnow()
        delta = datetime.timedelta(seconds=update_interval)
        condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta
        condition2 = ((utcnow - last_modified_dt).seconds / 3600) % 8 == 0
        if condition1 or condition2:
            local_file_path = os.path.join(self.tmp_dir, file_name)
            LOG.debug('Downloading {0}'.format(file_name))
            key.get_contents_to_filename(local_file_path)
            return local_file_path
        else:
            return None
示例#3
0
文件: pipdeps.py 项目: mjs/juju
def command_install(bucket, requirements, verbose=False,
                    requirements_py3=None):
    with utility.temp_dir() as archives_dir:
        for key in bucket.list(prefix=PREFIX):
            archive = key.name[len(PREFIX):]
            key.get_contents_to_filename(os.path.join(archives_dir, archive))
        archives_url = "file://" + archives_dir
        pip_args = get_pip_args(archives_url)
        run_pip_uninstall(OBSOLETE)
        run_pip_install(pip_args, requirements, verbose=verbose)
        run_pip3_install(pip_args, requirements_py3, verbose=verbose)
示例#4
0
文件: s3.py 项目: sdayu/nokkhum-cloud
    def get_file(self, key_name, file_name):
        bucket = self.connection.get_bucket(self.bucket_name)

        if bucket is None:
            return False

        key = bucket.get_key(key_name)
        if key is None:
            return False

        key.get_contents_to_filename(file_name)
        return True
示例#5
0
def command_install(bucket,
                    requirements,
                    verbose=False,
                    requirements_py3=None):
    with utility.temp_dir() as archives_dir:
        for key in bucket.list(prefix=PREFIX):
            archive = key.name[len(PREFIX):]
            key.get_contents_to_filename(os.path.join(archives_dir, archive))
        archives_url = "file://" + archives_dir
        pip_args = get_pip_args(archives_url)
        run_pip_uninstall(OBSOLETE)
        run_pip_install(pip_args, requirements, verbose=verbose)
        run_pip3_install(pip_args, requirements_py3, verbose=verbose)
示例#6
0
  def download_file(self, destination, bucket_name, key_name):
    """ Downloads a file to the local filesystem from Amazon S3.

    Args:
      destination: A str containing the name of the file on the local filesystem
        that we should download our file to.
      bucket_name: A str containing the name of the bucket that the file should
        be downloaded from.
      key_name: A str containing the name of the key that the file should be
        downloaded from.
    """
    bucket = self.connection.lookup(bucket_name)
    key = boto.s3.key.Key(bucket)
    key.key = key_name
    key.get_contents_to_filename(destination)
示例#7
0
    def worker(base_path):
        while 1:
            try:
                op, queued_path = queue.get()

                path = queued_path
                if path is None:
                    return

                fspath = os.path.join(base_path, path)

                if op is DELETE:
                    try:
                        os.remove(fspath)
                    except Exception:
                        raise

                else:  # download
                    try:
                        key = boto.s3.key.Key(bucket)
                        key.key = bucket_prefix + path
                        try:
                            parent = os.path.dirname(fspath)
                            if not os.path.exists(parent):
                                try:
                                    os.makedirs(parent)
                                except OSError:
                                    if not os.path.exists(parent):
                                        raise

                            key.get_contents_to_filename(
                                fspath.encode(encoding))
                        except Exception:
                            logger.exception('downloading %r, retrying' %
                                             fspath)
                            time.sleep(9)
                            key.get_contents_to_filename(
                                fspath.encode(encoding))

                    except Exception:
                        raise

            except Exception:
                logger.exception('processing %r %r' % (op, path))
            finally:
                queue.task_done()
示例#8
0
 def __getattribute__(self, name):
     value = _BaseImage.__getattribute__(self, name)
     if name == 'files' and value is None:
         # source is 's3://bucket/path/to/object'
         (bucket_name, object_name) = self.source[5:].split('/', 1)
         s3 = S3Connection(self._s3_access_key, 
                           self._s3_secret_key, 
                           calling_format=OrdinaryCallingFormat())
         bucket = s3.get_bucket(bucket_name)
         key = boto.s3.key.Key(bucket)
         key.key = object_name
         self._source_base = os.path.basename(self.source)
         self._temp_source = '%s/%s' % (self._tempdir, self._source_base)
         key.get_contents_to_filename(self._temp_source)
         s3.close()
         self._unpack()
         return self.files
     return value
示例#9
0
 def __getattribute__(self, name):
     value = _BaseImage.__getattribute__(self, name)
     if name == 'files' and value is None:
         # source is 's3://bucket/path/to/object'
         (bucket_name, object_name) = self.source[5:].split('/', 1)
         s3 = S3Connection(self._s3_access_key,
                           self._s3_secret_key,
                           calling_format=OrdinaryCallingFormat())
         bucket = s3.get_bucket(bucket_name)
         key = boto.s3.key.Key(bucket)
         key.key = object_name
         self._source_base = os.path.basename(self.source)
         self._temp_source = '%s/%s' % (self._tempdir, self._source_base)
         key.get_contents_to_filename(self._temp_source)
         s3.close()
         self._unpack()
         return self.files
     return value
示例#10
0
  def _kick_off_downloads(self, s3url, bucket, source, target):
    '''Kick off download tasks, or directly download the file if the file is small.'''
    key = bucket.get_key(s3url.path)

    # optional checks
    if self.opt.dry_run:
      message('%s => %s', source, target)
      return
    elif self.opt.sync_check and self.sync_check(target, key):
      message('%s => %s (synced)', source, target)
      return
    elif not self.opt.force and os.path.exists(target):
      raise Failure('File already exists: %s' % target)

    if key is None:
      raise Failure('The key "%s" does not exists.' % (s3url.path,))

    resp = self.s3.make_request('HEAD', bucket = bucket, key = key)
    fsize = int(resp.getheader('content-length'))

    # Small file optimization.
    if fsize < self.opt.max_singlepart_download_size:
      tempfile = tempfile_get(target)
      try: # Atomically download file with
        if self.opt.recursive:
          self.mkdirs(tempfile)
        key.get_contents_to_filename(tempfile)
        self.update_privilege(source, tempfile)
        self._verify_file_size(key, tempfile)
        tempfile_set(tempfile, target)
        message('%s => %s', source, target)
      except Exception as e:
        tempfile_set(tempfile, None)
        raise RetryFailure('Download Failure: %s, Source: %s.' % (e.message, source))

      return

    # Here we use temp filename as the id of mpi.
    for args in self.get_file_splits(tempfile_get(target), source, target, fsize, self.opt.multipart_split_size):
      self.pool.download(*args)
    return
示例#11
0
  def _kick_off_downloads(self, s3url, bucket, source, target):
    '''Kick off download tasks, or directly download the file if the file is small.'''
    key = bucket.get_key(s3url.path)

    # optional checks
    if self.opt.dry_run:
      message('%s => %s', source, target)
      return
    elif self.opt.sync_check and self.sync_check(target, key):
      message('%s => %s (synced)', source, target)
      return
    elif not self.opt.force and os.path.exists(target):
      raise Failure('File already exists: %s' % target)

    if key is None:
      raise Failure('The key "%s" does not exists.' % (s3url.path,))

    resp = self.s3.make_request('HEAD', bucket = bucket, key = key)
    fsize = int(resp.getheader('content-length'))

    # Small file optimization.
    if fsize < self.opt.max_singlepart_download_size:
      tempfile = tempfile_get(target)
      try: # Atomically download file with
        if self.opt.recursive:
          self.mkdirs(tempfile)
        key.get_contents_to_filename(tempfile)
        self.update_privilege(source, tempfile)
        self._verify_file_size(key, tempfile)
        tempfile_set(tempfile, target)
        message('%s => %s', source, target)
      except Exception as e:
        tempfile_set(tempfile, None)
        raise RetryFailure('Download Failure: %s, Source: %s.' % (e.message, source))

      return

    # Here we use temp filename as the id of mpi.
    for args in self.get_file_splits(tempfile_get(target), source, target, fsize, self.opt.multipart_split_size):
      self.pool.download(*args)
    return
def main():
    BASE_PATH = sys.argv[1]
    start = int(sys.argv[2])
    end = int(sys.argv[3])
    conn = S3Connection(os.environ['AWS_ACCESS_KEY'], os.environ['AWS_SECRET_KEY'])
    bucket = conn.get_bucket('fcp-indi')
    pairs = get_key_name_pairs(bucket)
    if not os.path.exists(BASE_PATH):
        os.mkdir(BASE_PATH)
        print 'Creating directory at', BASE_PATH
    for pair in pairs[start:end]:
        key, f_name = pair
        print 'Downloading file', f_name
        key.get_contents_to_filename(BASE_PATH + f_name,
                                    cb = call, num_cb = 100)
        floc = BASE_PATH + str(f_name)
        print 'file location is', floc
        subj = f_name.split('.')[0]
        print 'unzipping', f_name
        os.system('tar -zxf ' + floc + ' -C ' + BASE_PATH)
        print 'removing old zip of', floc
        os.system('rm ' + floc)
示例#13
0
文件: billing.py 项目: complues/scalr
    def download_billing_file(self, env, date=None, force=False):
        date = date or datetime.datetime.utcnow().date()
        bucket_name = env['ec2.detailed_billing.bucket']
        if env.get('ec2.detailed_billing.payer_account'):
            envs = self.analytics.load_aws_accounts_ids_envs([env['ec2.detailed_billing.payer_account']])
            self.analytics.load_env_credentials(envs, platform='ec2')
            for e in envs:
                if e['client_id'] == env['client_id']:
                    credentials_env = e
                    break
            else:
                msg = 'Can not found AWS credentials for PayerAccount {}'
                msg = msg.format(env['ec2.detailed_billing.payer_account'])
                raise Exception(msg)
        else:
            credentials_env = env.copy()
        kwds = {
            'aws_access_key_id': cryptotool.decrypt_scalr(self.config['crypto_key'],
                                                          credentials_env['ec2.access_key']),
            'aws_secret_access_key': cryptotool.decrypt_scalr(self.config['crypto_key'],
                                                              credentials_env['ec2.secret_key']),
            'proxy': self.config['aws_proxy'].get('host'),
            'proxy_port': self.config['aws_proxy'].get('port'),
            'proxy_user': self.config['aws_proxy'].get('user'),
            'proxy_pass': self.config['aws_proxy'].get('pass'),
        }
        default_region_map = {
            'regular': 'us-east-1',
            'gov-cloud': 'us-gov-west-1',
            'cn-cloud': 'cn-north-1',
        }
        default_region = default_region_map[env.get('account_type', 'regular')]
        region = env.get('ec2.detailed_billing.region', default_region)
        conn = boto.s3.connect_to_region(region, **kwds)
        bucket = conn.get_bucket(bucket_name)
        default_file_name = get_default_aws_csv_file_name(credentials_env['ec2.account_id'], date)
        file_name_tmplate = get_aws_csv_file_name_tmplate(credentials_env['ec2.account_id'], date)
        files_in_bucket = [key.name for key in bucket.list()
                           if re.match(file_name_tmplate, key.name)]
        if not files_in_bucket:
            utcnow = datetime.datetime.utcnow()
            if date.month == utcnow.month and utcnow.day < 2:
                return None
            else:
                msg = "Not found any valid files({}, {}) in bucket '{}'"
                msg = msg.format(default_file_name, file_name_tmplate, bucket_name)
                raise exceptions.FileNotFoundError(msg)
        if default_file_name not in files_in_bucket:
            file_name = files_in_bucket[0]  # use first valid file
            msg = "Default AWS detailed billing statistics file '{}' not found in bucket '{}', available {}, use '{}'"
            msg = msg.format(default_file_name, bucket_name, files_in_bucket, file_name)
            LOG.warning(msg)
        else:
            file_name = default_file_name
        try:
            key = bucket.get_key(file_name)
            last_modified_dt = datetime.datetime.strptime(key.last_modified, self.last_modified_format)
            utcnow = datetime.datetime.utcnow()

            seconds_from_last_modified_dt = (utcnow - last_modified_dt).seconds
            if hasattr(self, 'task_info') and self.task_info.get('period') > 300:
                delta = datetime.timedelta(seconds=self.task_info['period'] - 90)
            else:
                delta = datetime.timedelta(seconds=210)
            condition1 = utcnow > last_modified_dt and utcnow < last_modified_dt + delta
            condition2 = seconds_from_last_modified_dt > 3600
            condition3 = (seconds_from_last_modified_dt / 3600) % 24 == 0

            if force or condition1 or (condition2 and condition3):
                with self._downloading_lock:
                    self.downloading_locks.setdefault(file_name, gevent.lock.RLock())
                with self.downloading_locks[file_name]:
                    csv_zip_file = os.path.join(self.cache_dir, file_name)
                    csv_file = csv_zip_file.rstrip('.zip')
                    if os.path.exists(csv_file):
                        msg = "'{}' already exists in cache directory, use it"
                        msg = msg.format(os.path.basename(csv_file))
                        LOG.debug(msg)
                        return csv_file
                    while key.size * 3 > helper.get_free_space(self.cache_dir):
                        LOG.error('Disk is full, waiting 60 sec')
                        gevent.sleep(60)
                    LOG.debug("Downloading '{}' for environment {}".format(file_name, env['id']))
                    attempts = 2
                    downloading_start_time = time.time()
                    while True:
                        try:
                            key.get_contents_to_filename(csv_zip_file)
                            assert os.path.isfile(csv_zip_file), os.listdir(self.cache_dir)
                            downloading_end_time = time.time()
                            break
                        except:
                            attempts -= 1
                            if not attempts:
                                raise
                    downloading_time = downloading_end_time - downloading_start_time
                    msg = "Downloading '{0}' done in {1:.1f} seconds".format(file_name, downloading_time)
                    LOG.info(msg)
                    LOG.debug('Unzipping to {}'.format(csv_file))
                    while os.path.getsize(csv_zip_file) * 1 > helper.get_free_space(self.cache_dir):
                        LOG.error('Disk is full, waiting 60 sec')
                        gevent.sleep(60)
                    with zipfile.ZipFile(csv_zip_file, 'r') as f:
                        f.extract(f.infolist()[0], self.cache_dir)
                    os.remove(csv_zip_file)
                    return csv_file
            else:
                msg = "Skipping AWS billing file '{}' for environment {}"
                msg = msg.format(file_name, env['id'])
                LOG.debug(msg)
                return None
        except:
            msg = "File '{}', bucket '{}', reason: {}"
            msg = msg.format(file_name, bucket_name, helper.exc_info())
            raise Exception, Exception(msg), sys.exc_info()[2]
示例#14
0
文件: __init__.py 项目: Kazzer/s3pi
def download_from_s3(
        package_directory,
        directory,
        settings,
        region='us-east-1',
        log=logging.getLogger(__name__),
):
    """Clones the S3 Package Index into the provided directory"""
    new_index_files = set()
    s3_conn = None
    try:
        s3_conn = boto.s3.connect_to_region(region)
    except (
            boto.exception.NoAuthHandlerFound,
    ) as error:
        log.critical(error)
    else:
        s3_prefix = ensure_ends_with_slash(settings.get('s3.prefix'))

        s3_bucket = s3_conn.get_bucket(settings.get('s3.bucket'))

        if not s3_bucket.get_key('/'.join((
                s3_prefix,
                'index.html',
        ))):
            log.debug(
                'Package index "%s" in "%s" is not initialised',
                s3_prefix,
                s3_bucket.name,
            )
            new_index_files.add('*')
            return new_index_files

        download_files = set()
        for postfix in os.listdir(package_directory):
            simple_package_directory = postfix.split('-')[0].lower()
            safe_simple_package_directory = (
                setuptools.package_index.safe_name(simple_package_directory)
            )
            if not s3_bucket.get_key('/'.join((
                    s3_prefix,
                    simple_package_directory,
                    'index.html',
            ))):
                log.debug(
                    'Package "%s" does not exist in package index',
                    simple_package_directory,
                )
                new_index_files.add('index.html')
                new_index_files.add(
                    '{}/index.html'.format(simple_package_directory),
                )
                download_files.add('index.html')
            elif not s3_bucket.get_key('/'.join((
                    s3_prefix,
                    simple_package_directory,
                    postfix,
            ))):
                new_index_files.add(
                    '{}/index.html'.format(simple_package_directory),
                )
                download_files.add(
                    '{}/index.html'.format(simple_package_directory),
                )
            if simple_package_directory != safe_simple_package_directory:
                if not s3_bucket.get_key('/'.join((
                        s3_prefix,
                        simple_package_directory,
                        'index.html',
                ))):
                    log.debug(
                        'Package "%s" does not exist in package index',
                        simple_package_directory,
                    )
                    new_index_files.add('index.html')
                    new_index_files.add(
                        '{}/index.html'.format(simple_package_directory),
                    )
                    download_files.add('index.html')
                elif not s3_bucket.get_key('/'.join((
                        s3_prefix,
                        simple_package_directory,
                        postfix,
                ))):
                    new_index_files.add(
                        '{}/index.html'.format(simple_package_directory),
                    )
                    download_files.add(
                        '{}/index.html'.format(simple_package_directory),
                    )

        if not download_files:
            return new_index_files

        log.info(
            'Downloading files from "%s" in "%s" to "%s"',
            s3_prefix,
            s3_bucket.name,
            directory,
        )
        for postfix in download_files:
            key = boto.s3.key.Key(
                bucket=s3_bucket,
                name=(s3_prefix + postfix),
            )
            local_file = os.path.join(
                directory,
                key.name[len(s3_prefix):],
            )
            log.debug(
                'Downloading "%s" from "%s" in "%s"',
                local_file,
                key.name,
                key.bucket.name,
            )
            os.makedirs(
                os.path.dirname(local_file),
                exist_ok=True,
            )
            key.get_contents_to_filename(
                local_file,
            )

        if 'index.html' in download_files:
            recreate_root_folders(
                directory,
            )

        return new_index_files
    finally:
        if s3_conn:
            s3_conn.close()