示例#1
0
def ecr_create_fq_respository_name(repository_name, policy_resource_package=None, policy_resource_name=None):
    ecr_client = b3.client('ecr', region_name=profile_get_region())
    # Create or fetch the repository in AWS (to store the image)
    try:
        response = ecr_client.create_repository(
            repositoryName=repository_name
        )
        repository_metadata = response['repository']
        # Set the policy on the repository
        if policy_resource_package is not None and policy_resource_name is not None:
            policy = pkg_resources.resource_string(policy_resource_package.__name__, policy_resource_name)
            _ = ecr_client.set_repository_policy(
                registryId=repository_metadata['registryId'],
                repositoryName=repository_name,
                policyText=policy,
                force=True
            )
    except ClientError as e:
        if e.response['Error']['Code'] == 'RepositoryAlreadyExistsException':
            response = ecr_client.describe_repositories(
                repositoryNames=[repository_name]
            )
            repository_metadata = response['repositories'][0]
        elif e.response['Error']['Code'] == 'AccessDeniedException':
            _logger.warn("Error [AccessDeniedException] when creating repo {}, trying to continue...".format(repository_name))
        else:
            raise e
    return repository_metadata['repositoryUri']
示例#2
0
def get_s3_key(bucket, key, filename=None):
    """

    Args:
        bucket:
        key:
        file_name:
        s3: A boto3.resource('s3')

    Returns:

    """

    #print "PID({}) START bkt[] key[{}] file[{}]".format(multiprocessing.current_process(),key,filename)

    dl_retry = 3

    s3 = b3.resource('s3')

    if filename is None:
        filename = os.path.basename(key)
    else:
        path = os.path.dirname(filename)
        if not os.path.exists(path):
            try:
                os.makedirs(path)
            except os.error as ose:
                # swallow error -- likely directory already exists from other process
                _logger.debug("aws_s3.get_s3_key: Error code {}".format(
                    os.strerror(ose.errno)))

    while dl_retry > 0:
        try:
            s3.Bucket(bucket).download_file(key, filename)
            dl_retry = -1
        except Exception as e:
            _logger.warn(
                "aws_s3.get_s3_key Retry Count [{}] on download_file raised exception {}"
                .format(dl_retry, e))
            dl_retry -= 1
            if dl_retry <= 0:
                _logger.warn(
                    "aws_s3.get_s3_key Fail on downloading file after 3 retries with exception {}"
                    .format(e))
                raise

    #print "PID({}) STOP bkt[] key[{}] file[{}]".format(multiprocessing.current_process(),key,filename)

    return filename
示例#3
0
    def add_tags(self, tags):
        """ Add tag to our set of input tags

        Args:
            k,v (dict): string:string dictionary

        Returns:
            self
        """
        if not self.open:
            _logger.warn("Open the bundle to modify tags.")
            return
        if self.closed:
            _logger.warn("Unable to modify tags in a closed bundle.")
            return
        super(Bundle, self).add_tags(tags)
        return self
示例#4
0
def ls_s3_url_objects(s3_url):
    """
    Return aws boto3 ObjectSummary's

    Note: There is no current way in boto3 to do globs -- you filter on the client side.

    Returns:
        list:str: list of ObjectSummary's under this path
    """
    result = []

    if s3_url[-1] is not '/':
        s3_url += '/'

    bucket, s3_path = split_s3_url(s3_url)

    #if not s3_bucket_exists(bucket):
    #    return result

    if False:
        client = b3.client('s3')
        paginator = client.get_paginator('list_objects_v2')
        # use delimiter to groupby, which means, list things only at this level.
        #page_iterator = paginator.paginate(Bucket=bucket, Delimiter='/', Prefix=s3_path)
        page_iterator = paginator.paginate(Bucket=bucket, Prefix=s3_path)
        for page in page_iterator:
            result += [obj['Key'] for obj in page['Contents']]
    else:
        s3 = b3.resource('s3')
        try:
            s3_b = s3.Bucket(bucket)
            for i in s3_b.objects.filter(Prefix=s3_path, MaxKeys=1024):
                result.append(i)
            if len(result) == 1024:
                _logger.warn(
                    "ls_s3_url_objects: hit MaxKeys 1024 limit in result set.")
        except Exception as e:
            _logger.error(
                "ls_s3_url_objects: failed with exception {}".format(e))
            raise

    return result
示例#5
0
文件: api.py 项目: pombredanne/disdat
    def _open(self):
        """ Add the bundle to this local context

        Note, we don't need to checkout this context, we just need the relevant context object.

        Args:

        Returns:
            None
        """
        if self.open:
            _logger.warn("Bundle is already open.")
            return
        elif not self.closed:

            self.local_dir, self.pb.uuid, self.remote_dir = self.data_context.make_managed_path()
            self.open = True
        else:
            _logger.warn("Bundle is closed -- unable to re-open.")
        return self
示例#6
0
def apply(output_bundle, pipe_params, pipe_cls, input_tags, output_tags, force,
          output_bundle_uuid=None, central_scheduler=False, workers=1, data_context=None,
          incremental_push=False, incremental_pull=False):
    """
    Given an input bundle, run the pipesline on the bundle.
    Note, we first make a copy of all tasks that are parameterized identically to the tasks we will run.
    This is so we can figure out what we will need to re-run.
    This is why we make a single uuid for the output bundle of apply (for the driver).

    Args:
        output_bundle: The new bundle to be created
        pipe_params (str):   Luigi Task parameters string
        pipe_cls:      String <module.ClassName>
        force:         force recomputation of dependencies
        input_tags (dict):  Tags used to find the input bundle
        output_tags (dict):  Tags that need to be placed on the output bundle
        force (bool): whether to re-run this pipe
        output_bundle_uuid (str):  Optionally specify exactly the UUID of the output bundle IFF we actually need to produce it
        central_scheduler: Use a centralized Luigi scheduler (default False, i.e., --local-scheduler is used)
        workers: The number of luigi workers to use for this workflow (default 1)
        data_context: Actual context object or None and read current context.
        incremental_push (bool): Whether this job should push tasks as they complete to the remote (if configured)
        incremental_pull (bool): Whether this job should localize bundles as needed from the remote (if configured)

    Returns:
        bool: True if tasks needed to be run, False if no tasks (beyond wrapper task) executed.
    """

    _logger.debug("driver {}".format(driver.DriverTask))
    _logger.debug("pipe_cls {}".format(pipe_cls))
    _logger.debug("pipe params: {}".format(pipe_params))
    _logger.debug("force: {}".format(force))
    _logger.debug("input tags: {}".format(input_tags))
    _logger.debug("output tags: {}".format(output_tags))
    _logger.debug("sys.path {}".format(sys.path))
    _logger.debug("central_scheduler {}".format(central_scheduler))
    _logger.debug("workers {}".format(workers))
    _logger.debug("incremental_push {}".format(incremental_push))
    _logger.debug("incremental_pull {}".format(incremental_pull))

    if incremental_push:
        _logger.warn("incremental_push {}".format(incremental_push))

    if incremental_pull:
        _logger.warn("incremental_pull {}".format(incremental_pull))

    pfs = fs.DisdatFS()

    if data_context is None:
        if not pfs.in_context():
            _logger.warning('Not in a data context')
            return None
        data_context = pfs.curr_context

    # Re-execute logic -- make copy of task DAG
    # Creates a cache of {pipe:path_cache_entry} in the pipesFS object.
    # This "task_path_cache" is used throughout execution to find output bundles.
    reexecute_dag = driver.DriverTask(output_bundle, pipe_params,
                                      pipe_cls, input_tags, output_tags, force,
                                      data_context, incremental_push, incremental_pull)

    # Get version information for pipeline
    users_root_task = reexecute_dag.deps()[0]
    pipeline_path = os.path.dirname(sys.modules[users_root_task.__module__].__file__)
    fs.DisdatFS().get_pipe_version(pipeline_path)

    did_work = resolve_workflow_bundles(reexecute_dag, data_context)

    # At this point the path cache should be full of existing or new UUIDs.
    # we are going to replace the final pipe's UUID if the user has passed one in.
    # this happens when we run the docker container.
    # TODO: don't replace if it already exists.
    if output_bundle_uuid is not None:
        users_root_task = reexecute_dag.deps()[0]
        pce = pfs.get_path_cache(users_root_task)
        if pce.rerun: # if we have to re-run then replace it with our UUID
            # TODO: this is the same code as new_output_hframe, FIX!!!
            dir, uuid, _ = data_context.make_managed_path(output_bundle_uuid)
            fs.DisdatFS.put_path_cache(users_root_task,
                                       uuid,
                                       dir,
                                       pce.rerun,
                                       pce.is_left_edge_task,
                                       overwrite=True)

    success = build([reexecute_dag], local_scheduler=not central_scheduler, workers=workers)

    # After running a pipeline, blow away our path cache and git hash. Needed if we're run twice in the same process.
    fs.DisdatFS().clear_pipe_version()
    fs.DisdatFS().clear_path_cache()

    return {'success': success, 'did_work': did_work}