Пример #1
0
    def run(self,
            job_status,
            expand_arrays,
            job_queue=None,
            job_ids=None,
            show_details=False):
        """
        print list of jobs, by filtering by queue or by ids
        """
        if job_ids:
            self.__populate_output_by_job_ids(
                job_status, job_ids, show_details or len(job_ids) == 1)
            # explicitly asking for job details,
            # or asking for a single job that is not an array (the output is not a list of jobs)
            details_required = show_details or (len(job_ids) == 1
                                                and self.output.length() == 1)
        elif job_queue:
            self.__populate_output_by_queue(job_queue, job_status,
                                            expand_arrays, show_details)
            details_required = show_details
        else:
            fail(
                "Error listing jobs from AWS Batch. job_ids or job_queue must be defined"
            )

        if details_required:
            self.output.show()
        else:
            self.output.show_table([
                'jobId', 'jobName', 'status', 'startedAt', 'stoppedAt',
                'exitCode'
            ])
Пример #2
0
    def __get_ecs_clusters(self, compute_environments):
        """
        Get Compute Environments from AWS Batch and create a list of ECS Cluster ARNs.

        :param compute_environments: compute environments to query
        :return: a list of ECS clusters
        """
        ecs_clusters = []
        try:
            # connect to batch and ask for compute environments
            batch_client = self.boto3_factory.get_client("batch")
            next_token = ""
            while next_token is not None:
                response = batch_client.describe_compute_environments(
                    computeEnvironments=compute_environments,
                    nextToken=next_token)
                ecs_clusters.extend(
                    self.__get_clusters(response["computeEnvironments"]))
                next_token = response.get("nextToken")
        except Exception as e:
            fail(
                "Error listing compute environments from AWS Batch. Failed with exception: %s"
                % e)

        return ecs_clusters
Пример #3
0
def main():
    """Command entrypoint."""
    try:
        # parse input parameters and  config file
        args = _get_parser().parse_args()
        log = config_logger(args.log_level)
        log.info("Input parameters: %s" % args)
        config = AWSBatchCliConfig(log, args.cluster)
        boto3_factory = Boto3ClientFactory(
            region=config.region,
            proxy=config.proxy,
            aws_access_key_id=config.aws_access_key_id,
            aws_secret_access_key=config.aws_secret_access_key,
        )

        AWSBhostsCommand(log, boto3_factory).run(
            compute_environments=[config.compute_environment],
            instance_ids=args.instance_ids,
            show_details=args.details)

    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #4
0
def main():
    """Command entrypoint."""
    try:
        # parse input parameters and config file
        args = _get_parser().parse_args()
        _validate_parameters(args)
        log = config_logger(args.log_level)
        log.info("Input parameters: %s" % args)
        config = AWSBatchCliConfig(log=log, cluster=args.cluster)
        boto3_factory = Boto3ClientFactory(
            region=config.region,
            proxy=config.proxy,
            aws_access_key_id=config.aws_access_key_id,
            aws_secret_access_key=config.aws_secret_access_key,
        )

        AWSBoutCommand(log, boto3_factory).run(
            job_id=args.job_id, head=args.head, tail=args.tail, stream=args.stream, stream_period=args.stream_period
        )

    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #5
0
    def __populate_output_by_job_ids(self, job_status, job_ids, details):
        """
        Add Job item or jobs array children to the output
        :param job_status: list of job status to ask
        :param job_ids: job ids or ARNs
        :param details: ask for job details
        """
        try:
            if job_ids:
                self.log.info("Describing jobs (%s), details (%s)" %
                              (job_ids, details))
                single_jobs = []
                job_array_ids = []
                jobs = self.batch_client.describe_jobs(jobs=job_ids)['jobs']
                for job in jobs:
                    if is_job_array(job):
                        job_array_ids.append(job['jobId'])
                    else:
                        single_jobs.append(job)

                # create output items for job array children
                self.__populate_output_by_array_ids(job_status, job_array_ids,
                                                    details)

                # add single jobs to the output
                self.__add_jobs(single_jobs, details)
        except Exception as e:
            fail(
                "Error describing jobs from AWS Batch. Failed with exception: %s"
                % e)
Пример #6
0
def _validate_parameters(args):
    """
    Validate input parameters
    :param args: args variable
    """
    if args.command_file:
        if not type(args.command) == str:
            fail(
                "The command parameter is required with --command-file option")
        elif not os.path.isfile(args.command):
            fail("The command parameter (%s) must be an existing file" %
                 args.command)
    elif not sys.stdin.isatty():
        # stdin
        if args.arguments or type(args.command) == str:
            fail(
                "Error: command and arguments cannot be specified when submitting by stdin."
            )
    elif not type(args.command) == str:
        fail("Parameters validation error: command parameter is required.")

    if args.depends_on and not re.match(
            '(jobId|type)=[^\s,]+([\s,]?(jobId|type)=[^\s]+)*',
            args.depends_on):
        fail(
            "Parameters validation error: please double check --depends-on parameter syntax."
        )
Пример #7
0
def main():
    """Command entrypoint."""
    try:
        # parse input parameters and config file
        args = _get_parser().parse_args()
        _validate_parameters(args)
        log = config_logger(args.log_level)
        log.info("Input parameters: %s" % args)
        config = AWSBatchCliConfig(log=log, cluster=args.cluster)
        boto3_factory = Boto3ClientFactory(
            region=config.region,
            proxy=config.proxy,
            aws_access_key_id=config.aws_access_key_id,
            aws_secret_access_key=config.aws_secret_access_key,
        )

        AWSBoutCommand(log,
                       boto3_factory).run(job_id=args.job_id,
                                          head=args.head,
                                          tail=args.tail,
                                          stream=args.stream,
                                          stream_period=args.stream_period)

    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #8
0
    def __populate_output_by_queue(self, job_queue, job_status, expand_children, details):
        """
        Add Job items to the output asking for given queue and status.

        :param job_queue: job queue name or ARN
        :param job_status: list of job status to ask
        :param expand_children: if True, the job with children will be expanded by creating a row for each child
        :param details: ask for job details
        """
        try:
            single_jobs = []
            jobs_with_children = []
            for status in job_status:
                next_token = ""  # nosec
                while next_token is not None:
                    response = self.batch_client.list_jobs(jobStatus=status, jobQueue=job_queue, nextToken=next_token)

                    for job in response["jobSummaryList"]:
                        if get_job_type(job) != "SIMPLE" and expand_children is True:
                            jobs_with_children.append(job["jobId"])
                        else:
                            single_jobs.append(job)
                    next_token = response.get("nextToken")

            # create output items for job array children
            self.__populate_output_by_job_ids(jobs_with_children, details)

            # add single jobs to the output
            self.__add_jobs(single_jobs, details)

        except Exception as e:
            fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e)
Пример #9
0
    def __add_jobs(self, jobs, details=False):
        """
        Get job info from AWS Batch and add to the output.

        :param jobs: list of jobs items (output of the list_jobs function)
        :param details: ask for job details
        """
        try:
            if jobs:
                self.log.debug("Adding jobs to the output (%s)" % jobs)
                if details:
                    self.log.info("Asking for jobs details")
                    jobs_to_show = self.__chunked_describe_jobs([job["jobId"] for job in jobs])
                else:
                    jobs_to_show = jobs

                for job in jobs_to_show:
                    self.log.debug("Adding job to the output (%s)", job)

                    job_converter = self.__JOB_CONVERTERS[get_job_type(job)]

                    self.output.add(job_converter.convert(job))
        except KeyError as e:
            fail("Error building Job item. Key (%s) not found." % e)
        except Exception as e:
            fail("Error adding jobs to the output. Failed with exception: %s" % e)
Пример #10
0
def main(argv=None):
    """Command entrypoint."""
    try:
        # parse input parameters and config file
        args = _get_parser().parse_args(argv)
        log = config_logger(args.log_level)
        log.info("Input parameters: %s", args)
        config = AWSBatchCliConfig(log=log, cluster=args.cluster)
        boto3_factory = Boto3ClientFactory(region=config.region, proxy=config.proxy)

        job_status_set = OrderedDict((status.strip().upper(), "") for status in args.status.split(","))
        if "ALL" in job_status_set:
            # add all the statuses in the list
            job_status_set = OrderedDict((status, "") for status in AWS_BATCH_JOB_STATUS)
        job_status = list(job_status_set)

        AWSBstatCommand(log, boto3_factory).run(
            job_status=job_status,
            expand_children=args.expand_children,
            job_ids=args.job_ids,
            job_queue=config.job_queue,
            show_details=args.details,
        )

    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #11
0
def config_logger(log_level):
    """
    Define a logger for aws-parallelcluster-awsbatch-cli.

    :param log_level logging level
    :return: the logger
    """
    try:
        logfile = os.path.expanduser(os.path.join("~", ".parallelcluster", "awsbatch-cli.log"))
        logdir = os.path.dirname(logfile)
        os.makedirs(logdir)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(logdir):
            pass
        else:
            fail("Cannot create log file (%s). Failed with exception: %s" % (logfile, e))

    formatter = logging.Formatter("%(asctime)s %(levelname)s [%(module)s:%(funcName)s] %(message)s")

    logfile_handler = RotatingFileHandler(logfile, maxBytes=5 * 1024 * 1024, backupCount=1)
    logfile_handler.setFormatter(formatter)

    logger = logging.getLogger("awsbatch-cli")
    logger.addHandler(logfile_handler)
    try:
        logger.setLevel(log_level.upper())
    except (TypeError, ValueError) as e:
        fail("Error setting log level. Failed with exception: %s" % e)

    return logger
Пример #12
0
    def __init__(self, log, cluster):
        """
        Constructor.

        Search for the [cluster cluster-name] section in the /etc/awsbatch-cli.cfg configuration file, if there
        or ask to the pcluster status.

        :param log: log
        :param cluster: cluster name
        """
        # Check if credentials and region have been provided in parallelcluster config
        self.aws_access_key_id = None
        self.aws_secret_access_key = None
        self.region = None
        self.env_blacklist = None
        parallelcluster_config_file = os.path.expanduser(
            os.path.join("~", ".parallelcluster", "config"))
        if os.path.isfile(parallelcluster_config_file):
            self.__init_from_parallelcluster_config(
                parallelcluster_config_file, log)

        # search for awsbatch-cli config
        cli_config_file = os.path.expanduser(
            os.path.join("~", ".parallelcluster", "awsbatch-cli.cfg"))
        if os.path.isfile(cli_config_file):
            self.__init_from_config(cli_config_file, cluster, log)
        elif cluster:
            self.__init_from_stack(cluster, log)

        else:
            fail("Error: cluster parameter is required")

        self.__verify_initialization(log)
Пример #13
0
def config_logger(log_level):
    """
    Define a logger for aws-parallelcluster-awsbatch-cli.

    :param log_level logging level
    :return: the logger
    """
    try:
        logfile = os.path.expanduser(
            os.path.join("~", ".parallelcluster", "awsbatch-cli.log"))
        logdir = os.path.dirname(logfile)
        os.makedirs(logdir)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(logdir):
            pass
        else:
            fail("Cannot create log file (%s). Failed with exception: %s" %
                 (logfile, e))

    formatter = logging.Formatter(
        "%(asctime)s %(levelname)s [%(module)s:%(funcName)s] %(message)s")

    logfile_handler = RotatingFileHandler(logfile,
                                          maxBytes=5 * 1024 * 1024,
                                          backupCount=1)
    logfile_handler.setFormatter(formatter)

    logger = logging.getLogger("awsbatch-cli")
    logger.addHandler(logfile_handler)
    try:
        logger.setLevel(log_level.upper())
    except (TypeError, ValueError) as e:
        fail("Error setting log level. Failed with exception: %s" % e)

    return logger
Пример #14
0
    def __init__(self, log, cluster):
        """
        Constructor.

        Search for the [cluster cluster-name] section in the /etc/awsbatch-cli.cfg configuration file, if there
        or ask to the pcluster status.

        :param log: log
        :param cluster: cluster name
        """
        # Check if credentials and region have been provided in parallelcluster config
        self.aws_access_key_id = None
        self.aws_secret_access_key = None
        self.region = None
        self.env_blacklist = None
        parallelcluster_config_file = os.path.expanduser(os.path.join("~", ".parallelcluster", "config"))
        if os.path.isfile(parallelcluster_config_file):
            self.__init_from_parallelcluster_config(parallelcluster_config_file, log)

        # search for awsbatch-cli config
        cli_config_file = os.path.expanduser(os.path.join("~", ".parallelcluster", "awsbatch-cli.cfg"))
        if os.path.isfile(cli_config_file):
            self.__init_from_config(cli_config_file, cluster, log)
        elif cluster:
            self.__init_from_stack(cluster, log)

        else:
            fail("Error: cluster parameter is required")

        self.__verify_initialization(log)
Пример #15
0
def main():
    """Command entrypoint."""
    try:
        # parse input parameters and config file
        args = _get_parser().parse_args()
        log = config_logger(args.log_level)
        log.info("Input parameters: %s" % args)
        config = AWSBatchCliConfig(log=log, cluster=args.cluster)
        boto3_factory = Boto3ClientFactory(
            region=config.region,
            proxy=config.proxy,
            aws_access_key_id=config.aws_access_key_id,
            aws_secret_access_key=config.aws_secret_access_key,
        )

        if args.job_queues:
            job_queues = args.job_queues
            show_details = True
        else:
            job_queues = [config.job_queue]
            show_details = args.details
        AWSBqueuesCommand(log, boto3_factory).run(job_queues=job_queues, show_details=show_details)

    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #16
0
    def __populate_output_by_job_ids(self, job_ids, details, include_parents=False):
        """
        Add Job item or jobs array children to the output.

        :param job_ids: job ids or ARNs
        :param details: ask for job details
        """
        try:
            if job_ids:
                self.log.info("Describing jobs (%s), details (%s)" % (job_ids, details))
                parent_jobs = []
                jobs_with_children = []
                jobs = self.__chunked_describe_jobs(job_ids)
                for job in jobs:
                    # always add parent job
                    if include_parents or get_job_type(job) == "SIMPLE":
                        parent_jobs.append(job)
                    if is_job_array(job):
                        jobs_with_children.append((job["jobId"], ":", job["arrayProperties"]["size"]))
                    elif is_mnp_job(job):
                        jobs_with_children.append((job["jobId"], "#", job["nodeProperties"]["numNodes"]))

                # add parent jobs to the output
                self.__add_jobs(parent_jobs)

                # create output items for jobs' children
                self.__populate_output_by_parent_ids(jobs_with_children)
        except Exception as e:
            fail("Error describing jobs from AWS Batch. Failed with exception: %s" % e)
Пример #17
0
    def __search_for_job_definition(self, base_job_definition, nodes):
        """
        Search for existing job definition with the same name of the base_job_definition and the same number of nodes.

        :param base_job_definition: job definition arn
        :param nodes: number of nodes
        :return: the found jobDefinition object or None
        """
        job_definition_found = None
        base_job_definition_name = get_job_definition_name_by_arn(
            base_job_definition)
        try:
            next_token = ""
            while next_token is not None:
                response = self.batch_client.describe_job_definitions(
                    jobDefinitionName=base_job_definition_name,
                    status="ACTIVE",
                    nextToken=next_token)
                for job_definition in response["jobDefinitions"]:
                    if job_definition["nodeProperties"]["numNodes"] == nodes:
                        job_definition_found = job_definition
                        break
                next_token = response.get("nextToken")
        except Exception as e:
            fail("Error listing job definition. Failed with exception: %s" % e)

        return job_definition_found
Пример #18
0
 def __populate_output_by_array_ids(self, job_status, job_array_ids,
                                    details):
     """
     Add jobs array children to the output
     :param job_status: list of job status to ask
     :param job_array_ids: job array ids to ask
     :param details: ask for job details
     """
     try:
         for job_array_id in job_array_ids:
             for status in job_status:
                 self.log.info(
                     "Listing job array children for job (%s) in status (%s)"
                     % (job_array_id, status))
                 next_token = ''
                 while next_token is not None:
                     response = self.batch_client.list_jobs(
                         jobStatus=status,
                         arrayJobId=job_array_id,
                         nextToken=next_token)
                     # add single jobs to the output
                     self.__add_jobs(response['jobSummaryList'], details)
                     next_token = response.get('nextToken')
     except Exception as e:
         fail(
             "Error listing job array children for job (%s). Failed with exception: %s"
             % (job_array_id, e))
Пример #19
0
    def __register_new_job_definition(self, base_job_definition_arn, nodes):
        """
        Register a new job definition by using the base_job_definition_arn as starting point for the nodeRangeProperties
        :param base_job_definition_arn: job definition arn to use as starting point
        :param nodes: nuber of nodes to set in the job definition
        :return: the ARN of the created job definition
        """
        try:
            # get base job definition and reuse its nodeRangeProperties
            response = self.batch_client.describe_job_definitions(
                jobDefinitions=[base_job_definition_arn], status='ACTIVE')
            job_definition = response['jobDefinitions'][0]

            # create new job definition
            response = self.batch_client.register_job_definition(
                jobDefinitionName=job_definition['jobDefinitionName'],
                type='multinode',
                nodeProperties={
                    'numNodes':
                    nodes,
                    'mainNode':
                    0,
                    'nodeRangeProperties': [{
                        'targetNodes':
                        '0:%d' % (nodes - 1),
                        'container':
                        job_definition['nodeProperties']['nodeRangeProperties']
                        [0]['container']
                    }]
                })
            job_definition_arn = response['jobDefinitionArn']
        except Exception as e:
            fail("Error listing job definition. Failed with exception: %s" % e)

        return job_definition_arn
Пример #20
0
    def __init__(self, log, cluster):
        """
        Initialize the object.

        Search for the [cluster cluster-name] section in the /etc/awsbatch-cli.cfg configuration file, if there
        or ask to the pcluster status.

        :param log: log
        :param cluster: cluster name
        """
        self.region = None
        self.env_blacklist = None

        # search for awsbatch-cli config
        cli_config_file = os.path.expanduser(
            os.path.join("~", ".parallelcluster", "awsbatch-cli.cfg"))
        if os.path.isfile(cli_config_file):
            self.__init_from_config(cli_config_file, cluster, log)
        elif cluster:
            self.__init_from_stack(cluster, log)

        else:
            fail("Error: cluster parameter is required")

        self.__verify_initialization(log)
Пример #21
0
def main():
    """Command entrypoint."""
    try:
        # parse input parameters and config file
        args = _get_parser().parse_args()
        log = config_logger(args.log_level)
        log.info("Input parameters: %s" % args)
        config = AWSBatchCliConfig(log=log, cluster=args.cluster)
        boto3_factory = Boto3ClientFactory(
            region=config.region,
            proxy=config.proxy,
            aws_access_key_id=config.aws_access_key_id,
            aws_secret_access_key=config.aws_secret_access_key,
        )

        if args.job_queues:
            job_queues = args.job_queues
            show_details = True
        else:
            job_queues = [config.job_queue]
            show_details = args.details
        AWSBqueuesCommand(log, boto3_factory).run(job_queues=job_queues,
                                                  show_details=show_details)

    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #22
0
    def __create_host_item(container_instance, ec2_instance):
        """
        Merge container instance and ec2 instance information and create a Host item.
        :param container_instance: the containerInstance object to parse
        :param ec2_instance: the ec2Instance object to parse
        :return: the Host item
        """
        try:
            instance_type = '-'
            for attr in container_instance['attributes']:
                if attr['name'] == 'ecs.instance-type':
                    instance_type = attr['value']
                    break

            return Host(container_instance_arn=container_instance[
                'containerInstanceArn'],
                        status=container_instance['status'],
                        ec2_instance=container_instance['ec2InstanceId'],
                        instance_type=instance_type,
                        private_ip_address=ec2_instance['PrivateIpAddress'],
                        public_ip_address=ec2_instance['PublicIpAddress']
                        if ec2_instance['PublicIpAddress'] != '' else '-',
                        private_dns_name=ec2_instance['PrivateDnsName'],
                        public_dns_name=ec2_instance['PublicDnsName']
                        if ec2_instance['PublicDnsName'] != '' else '-',
                        running_jobs=container_instance['runningTasksCount'],
                        pending_jobs=container_instance['pendingTasksCount'])
        except KeyError as e:
            fail("Error building Host item. Key (%s) not found." % e)
Пример #23
0
    def __init_from_stack(self, cluster, log):  # noqa: C901 FIXME
        """
        Init object attributes by asking to the stack.

        :param cluster: cluster name
        :param log: log
        """
        try:
            self.stack_name = _get_stack_name(cluster)
            log.info("Describing stack (%s)" % self.stack_name)
            # get required values from the output of the describe-stack command
            # don't use proxy because we are in the client and use default region
            boto3_factory = Boto3ClientFactory(
                region=self.region,
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
            )
            cfn_client = boto3_factory.get_client("cloudformation")
            stack = cfn_client.describe_stacks(
                StackName=self.stack_name).get("Stacks")[0]
            log.debug(stack)
            if self.region is None:
                self.region = get_region_by_stack_id(stack.get("StackId"))
            self.proxy = "NONE"

            stack_status = stack.get("StackStatus")
            if stack_status in ["CREATE_COMPLETE", "UPDATE_COMPLETE"]:
                for output in stack.get("Outputs", []):
                    output_key = output.get("OutputKey")
                    output_value = output.get("OutputValue")
                    if output_key == "ResourcesS3Bucket":
                        self.s3_bucket = output_value
                    elif output_key == "ArtifactS3RootDirectory":
                        self.artifact_directory = output_value
                    elif output_key == "BatchComputeEnvironmentArn":
                        self.compute_environment = output_value
                    elif output_key == "BatchJobQueueArn":
                        self.job_queue = output_value
                    elif output_key == "BatchJobDefinitionArn":
                        self.job_definition = output_value
                    elif output_key == "MasterPrivateIP":
                        self.head_node_ip = output_value
                    elif output_key == "BatchJobDefinitionMnpArn":
                        self.job_definition_mnp = output_value

                for parameter in stack.get("Parameters", []):
                    if parameter.get("OutputKey") == "ProxyServer":
                        self.proxy = parameter.get("OutputValue")
                        if not self.proxy == "NONE":
                            log.info("Configured proxy is: %s" % self.proxy)
                        break
            else:
                fail("The cluster is in the (%s) status." % stack_status)

        except (ClientError, ParamValidationError) as e:
            fail(
                "Error getting cluster information from AWS CloudFormation. Failed with exception: %s"
                % e)
Пример #24
0
 def get_client(self, service):
     try:
         return boto3.client(
             service,
             region_name=self.region,
             aws_access_key_id=self.aws_access_key_id,
             aws_secret_access_key=self.aws_secret_access_key,
             config=self.proxy_config)
     except ClientError as e:
         fail("AWS %s service failed with exception: %s" % (service, e))
Пример #25
0
    def __print_log_stream(self, log_stream, head=None, tail=None, stream=None, stream_period=None):  # noqa: C901 FIXME
        """
        Ask for log stream and print it.

        :param log_stream: job log stream
        """
        logs_client = self.boto3_factory.get_client("logs")
        try:
            # The maximum number of log events returned by the get_log_events function is as many log events
            # as can fit in a response size of 1 MB, up to 10,000 log events
            max_limit = 10000
            if head:
                limit = head
                start_from_head = True
            elif tail:
                limit = tail
                start_from_head = False
            else:
                limit = max_limit
                start_from_head = False

            response = logs_client.get_log_events(
                logGroupName="/aws/batch/job", logStreamName=log_stream, limit=limit, startFromHead=start_from_head
            )
            events = response["events"]
            self.log.debug(response)
            if not events:
                print("No events found.")

            self.__print_events(events)
            if limit == max_limit or stream:
                # get paginated items
                next_token = response["nextForwardToken"]
                while next_token is not None or stream:
                    self.log.info("Next Forward Token is (%s)" % next_token)
                    if stream:
                        period = stream_period if stream_period else 5
                        self.log.info("Waiting other %s seconds..." % period)
                        time.sleep(period)
                    response = logs_client.get_log_events(
                        logGroupName="/aws/batch/job", logStreamName=log_stream, nextToken=next_token
                    )
                    self.__print_events(response["events"])
                    # if nextForwardToken is the same we passed in, we reached the end of the stream
                    if stream:
                        next_token = response["nextForwardToken"]
                    else:
                        next_token = (
                            response["nextForwardToken"] if response["nextForwardToken"] != next_token else None
                        )
        except KeyboardInterrupt:
            self.log.info("Interrupted by the user")
            exit(0)
        except Exception as e:
            fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e)
Пример #26
0
def _upload_and_get_command(boto3_factory, args, job_s3_folder, job_name, config, log):
    """
    Get command by parsing args and config.

    The function will also perform an s3 upload, if needed.
    :param boto3_factory: initialized Boto3ClientFactory object
    :param args: input arguments
    :param job_s3_folder: S3 folder for the job files
    :param job_name: job name
    :param config: config object
    :param log: log
    :return: command to submit
    """
    # create S3 folder for the job
    s3_uploader = S3Uploader(boto3_factory, config.s3_bucket, job_s3_folder)

    # upload input files, if there
    if args.input_file:
        for file in args.input_file:
            s3_uploader.put_file(file, os.path.basename(file))

    # upload command, if needed
    if args.command_file or not sys.stdin.isatty() or args.env:
        # define job script name
        job_script = job_name + ".sh"
        log.info("Using command-file option or stdin. Job script name: %s" % job_script)

        env_file = None
        if args.env:
            env_file = job_name + ".env.sh"
            # get environment variables and upload file used to extend the submission environment
            env_blacklist = args.env_blacklist if args.env_blacklist else config.env_blacklist
            _get_env_and_upload(s3_uploader, args.env, env_blacklist, env_file, log)

        # upload job script
        if args.command_file:
            # existing script file
            try:
                s3_uploader.put_file(args.command, job_script)
            except Exception as e:
                fail("Error creating job script. Failed with exception: %s" % e)
        elif not sys.stdin.isatty():
            # stdin
            _get_stdin_and_upload(s3_uploader, job_script)

        # define command to execute
        bash_command = _compose_bash_command(args, config.s3_bucket, config.region, job_s3_folder, job_script, env_file)
        command = ["/bin/bash", "-c", bash_command]
    elif type(args.command) == str:
        log.info("Using command parameter")
        command = [args.command] + args.arguments
    else:
        fail("Unexpected error. Command cannot be empty.")
    log.info("Command: %s" % shell_join(command))
    return command
Пример #27
0
 def check(self):
     """Verify if CLI requirements are satisfied."""
     for req in self.requirements:
         if not self.COMPARISON_OPERATORS[req.operator](
                 packaging.version.parse(get_installed_version(
                     req.package)),
                 packaging.version.parse(req.version),
         ):
             fail(
                 f"The cluster requires {req.package}{req.operator}{req.version}"
             )
Пример #28
0
    def __init_from_stack(self, cluster, log):  # noqa: C901 FIXME
        """
        Init object attributes by asking to the stack.

        :param cluster: cluster name
        :param log: log
        """
        try:
            self.stack_name = "parallelcluster-" + cluster
            log.info("Describing stack (%s)" % self.stack_name)
            # get required values from the output of the describe-stack command
            # don't use proxy because we are in the client and use default region
            boto3_factory = Boto3ClientFactory(
                region=self.region,
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
            )
            cfn_client = boto3_factory.get_client("cloudformation")
            stack = cfn_client.describe_stacks(StackName=self.stack_name).get("Stacks")[0]
            log.debug(stack)
            if self.region is None:
                self.region = get_region_by_stack_id(stack.get("StackId"))
            self.proxy = "NONE"

            stack_status = stack.get("StackStatus")
            if stack_status in ["CREATE_COMPLETE", "UPDATE_COMPLETE"]:
                for output in stack.get("Outputs", []):
                    output_key = output.get("OutputKey")
                    output_value = output.get("OutputValue")
                    if output_key == "ResourcesS3Bucket":
                        self.s3_bucket = output_value
                    elif output_key == "BatchComputeEnvironmentArn":
                        self.compute_environment = output_value
                    elif output_key == "BatchJobQueueArn":
                        self.job_queue = output_value
                    elif output_key == "BatchJobDefinitionArn":
                        self.job_definition = output_value
                    elif output_key == "MasterPrivateIP":
                        self.master_ip = output_value
                    elif output_key == "BatchJobDefinitionMnpArn":
                        self.job_definition_mnp = output_value

                for parameter in stack.get("Parameters", []):
                    if parameter.get("OutputKey") == "ProxyServer":
                        self.proxy = parameter.get("OutputValue")
                        if not self.proxy == "NONE":
                            log.info("Configured proxy is: %s" % self.proxy)
                        break
            else:
                fail("The cluster is in the (%s) status." % stack_status)

        except (ClientError, ParamValidationError) as e:
            fail("Error getting cluster information from AWS CloudFormation. Failed with exception: %s" % e)
Пример #29
0
def main():
    try:
        # parse input parameters and config file
        args = _get_parser().parse_args()
        _validate_parameters(args)
        log = config_logger(args.log_level)
        log.info("Input parameters: %s" % args)
        config = AWSBatchCliConfig(log=log, cluster=args.cluster)
        boto3_factory = Boto3ClientFactory(
            region=config.region,
            proxy=config.proxy,
            aws_access_key_id=config.aws_access_key_id,
            aws_secret_access_key=config.aws_secret_access_key)

        # define job name
        if args.job_name:
            job_name = args.job_name
        else:
            # set a default job name if not specified
            if not sys.stdin.isatty():
                # stdin
                job_name = 'STDIN'
            else:
                # normalize name
                job_name = re.sub('\W+', '_', os.path.basename(args.command))
            log.info("Job name not specified, setting it to (%s)" % job_name)

        # upload script, if needed, and get related command
        command = _upload_and_get_command(boto3_factory, args, job_name,
                                          config.region, config.s3_bucket, log)
        # parse and validate depends_on parameter
        depends_on = _get_depends_on(args)

        job_definition = config.job_definition

        AWSBsubCommand(log,
                       boto3_factory).run(job_definition=job_definition,
                                          job_name=job_name,
                                          job_queue=config.job_queue,
                                          command=command,
                                          vcpus=args.vcpus,
                                          memory=args.memory,
                                          array_size=args.array_size,
                                          dependencies=depends_on,
                                          retry_attempts=args.retry_attempts,
                                          timeout=args.timeout,
                                          master_ip=config.master_ip)
    except KeyboardInterrupt:
        print("Exiting...")
        sys.exit(0)
    except Exception as e:
        fail("Unexpected error. Command failed with exception: %s" % e)
Пример #30
0
    def __init_from_stack(self, cluster, log):
        """
        init object attributes by asking to the stack
        :param cluster: cluster name
        :param log: log
        """
        try:
            self.stack_name = 'parallelcluster-' + cluster
            log.info("Describing stack (%s)" % self.stack_name)
            # get required values from the output of the describe-stack command
            # don't use proxy because we are in the client and use default region
            boto3_factory = Boto3ClientFactory(
                region=self.region,
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key)
            cfn_client = boto3_factory.get_client('cloudformation')
            stack = cfn_client.describe_stacks(
                StackName=self.stack_name).get("Stacks")[0]
            log.debug(stack)
            if self.region is None:
                self.region = get_region_by_stack_id(stack.get('StackId'))
            self.proxy = 'NONE'

            stack_status = stack.get('StackStatus')
            if stack_status in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']:
                for output in stack.get('Outputs', []):
                    output_key = output.get('OutputKey')
                    output_value = output.get('OutputValue')
                    if output_key == 'ResourcesS3Bucket':
                        self.s3_bucket = output_value
                    elif output_key == 'BatchComputeEnvironmentArn':
                        self.compute_environment = output_value
                    elif output_key == 'BatchJobQueueArn':
                        self.job_queue = output_value
                    elif output_key == 'BatchJobDefinitionArn':
                        self.job_definition = output_value
                    elif output_key == 'MasterPrivateIP':
                        self.master_ip = output_value

                for parameter in stack.get('Parameters', []):
                    if parameter.get('OutputKey') == 'ProxyServer':
                        self.proxy = parameter.get('OutputValue')
                        if not self.proxy == "NONE":
                            log.info("Configured proxy is: %s" % self.proxy)
                        break
            else:
                fail("The cluster is in the (%s) status." % stack_status)

        except (ClientError, ParamValidationError) as e:
            fail(
                "Error getting cluster information from AWS CloudFormation. Failed with exception: %s"
                % e)
Пример #31
0
    def get_client(self, service):
        """
        Initialize the boto3 client for a given service.

        :param service: boto3 service.
        :return: the boto3 client
        """
        try:
            return boto3.client(service,
                                region_name=self.region,
                                config=self.proxy_config)
        except ClientError as e:
            fail("AWS %s service failed with exception: %s" % (service, e))
Пример #32
0
 def __verify_initialization(self, log):
     try:
         log.debug("stack_name = %s", self.stack_name)
         log.debug("region = %s", self.region)
         log.debug("s3_bucket = %s", self.s3_bucket)
         log.debug("compute_environment = %s", self.compute_environment)
         log.debug("job_queue = %s", self.job_queue)
         log.debug("job_definition = %s", self.job_definition)
         log.debug("master_ip = %s", self.master_ip)
         log.info(self)
     except AttributeError as e:
         fail("Error getting cluster information from AWS CloudFormation."
              "Missing attribute (%s) from the output CloudFormation stack."
              % e)
Пример #33
0
 def __new_queue(queue):
     """
     Parse jobQueue and return a Queue object.
     :param queue: the jobQueue object to parse
     :return: a Queue object
     """
     try:
         return Queue(arn=queue['jobQueueArn'],
                      name=queue['jobQueueName'],
                      priority=queue['priority'],
                      status=queue['status'],
                      status_reason=queue['statusReason'])
     except KeyError as e:
         fail("Error building Queue item. Key (%s) not found." % e)
Пример #34
0
 def __init__(self, requirements_string):
     try:
         self.requirements = []
         for requirement_string in requirements_string.split(","):
             match = re.search(r"([\w+_-]+)([<>=]+)([\d.]+)",
                               requirement_string)
             self.requirements.append(
                 CliRequirement(package=match.group(1),
                                operator=match.group(2),
                                version=match.group(3)))
     except IndexError:
         fail(
             f"Unable to parse ParallelCluster AWS Batch CLI requirements: '{requirements_string}'"
         )
Пример #35
0
    def _add_host_items(self,
                        ecs_cluster_arn,
                        container_instances_arns,
                        instance_ids=None):
        """
        Add a list of Hosts to the output.

        :param ecs_cluster_arn: ECS Cluster arn
        :param container_instances_arns: container ids
        :param instance_ids: hosts requested
        """
        self.log.info("Container ARNs = %s" % container_instances_arns)
        if container_instances_arns:
            response = self.ecs_client.describe_container_instances(
                cluster=ecs_cluster_arn,
                containerInstances=container_instances_arns)
            container_instances = response["containerInstances"]
            self.log.debug("Container Instances = %s" % container_instances)
            # get ec2_instance_ids
            ec2_instances_ids = []
            for container_instance in container_instances:
                ec2_instances_ids.append(container_instance["ec2InstanceId"])

            # get ec2 instances information
            ec2_instances = {}
            try:
                ec2_client = self.boto3_factory.get_client("ec2")
                paginator = ec2_client.get_paginator("describe_instances")
                for page in paginator.paginate(InstanceIds=ec2_instances_ids):
                    for reservation in page["Reservations"]:
                        for instance in reservation["Instances"]:
                            ec2_instances[instance["InstanceId"]] = instance
            except Exception as e:
                fail(
                    "Error listing EC2 instances from AWS EC2. Failed with exception: %s"
                    % e)

            # merge ec2 and container information
            for container_instance in container_instances:
                ec2_instance_id = container_instance["ec2InstanceId"]
                # filter by instance_id if there
                if not instance_ids or ec2_instance_id in instance_ids:
                    self.log.debug("Container Instance = %s" %
                                   container_instance)
                    self.log.debug("EC2 Instance = %s" %
                                   ec2_instances[ec2_instance_id])
                    self.output.add(
                        self.__create_host_item(
                            container_instance,
                            ec2_instances[ec2_instance_id]))
Пример #36
0
 def __verify_initialization(self, log):
     try:
         log.debug("stack_name = %s", self.stack_name)
         log.debug("region = %s", self.region)
         log.debug("s3_bucket = %s", self.s3_bucket)
         log.debug("compute_environment = %s", self.compute_environment)
         log.debug("job_queue = %s", self.job_queue)
         log.debug("job_definition = %s", self.job_definition)
         log.debug("master_ip = %s", self.master_ip)
         log.info(self)
     except AttributeError as e:
         fail(
             "Error getting cluster information from AWS CloudFormation."
             "Missing attribute (%s) from the output CloudFormation stack." % e
         )
Пример #37
0
def _get_stdin_and_upload(s3_uploader, job_script):
    """
    Create file from STDIN and upload to S3.

    :param s3_uploader: S3Uploader object
    :param job_script: job script name
    """
    try:
        # copy stdin to temporary file and upload
        with os.fdopen(sys.stdin.fileno(), "rb") as src:
            with tempfile.NamedTemporaryFile() as dst:
                shutil.copyfileobj(src, dst)
                dst.flush()
                s3_uploader.put_file(dst.name, job_script)
    except Exception as e:
        fail("Error creating job script. Failed with exception: %s" % e)
Пример #38
0
    def get_client(self, service):
        """
        Initialize the boto3 client for a given service.

        :param service: boto3 service.
        :return: the boto3 client
        """
        try:
            return boto3.client(
                service,
                region_name=self.region,
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                config=self.proxy_config,
            )
        except ClientError as e:
            fail("AWS %s service failed with exception: %s" % (service, e))
Пример #39
0
    def __new_queue(queue):
        """
        Parse jobQueue and return a Queue object.

        :param queue: the jobQueue object to parse
        :return: a Queue object
        """
        try:
            return Queue(
                arn=queue["jobQueueArn"],
                name=queue["jobQueueName"],
                priority=queue["priority"],
                status=queue["status"],
                status_reason=queue["statusReason"],
            )
        except KeyError as e:
            fail("Error building Queue item. Key (%s) not found." % e)
Пример #40
0
    def __init_output(self, job_queues):
        """
        Initialize queues output by asking for given queues.

        :param job_queues: a list of job queues
        """
        try:
            # connect to batch and get queues
            batch_client = self.boto3_factory.get_client("batch")
            queues = batch_client.describe_job_queues(jobQueues=job_queues)["jobQueues"]
            self.log.info("Job Queues: %s" % job_queues)
            self.log.debug(queues)

            for queue in queues:
                self.output.add(self.__new_queue(queue=queue))

        except Exception as e:
            fail("Error listing queues from AWS Batch. Failed with exception: %s" % e)
Пример #41
0
    def __get_log_stream(self, job_id):
        """
        Get log stream for the given job.

        :param job_id: job id (ARN)
        :return: the log_stream if there, or None
        """
        log_stream = None
        try:
            batch_client = self.boto3_factory.get_client("batch")
            jobs = batch_client.describe_jobs(jobs=[job_id])["jobs"]
            if len(jobs) == 1:
                job = jobs[0]
                self.log.debug(job)

                if "nodeProperties" in job:
                    # MNP job
                    container = job["nodeProperties"]["nodeRangeProperties"][0]["container"]
                elif "container" in job:
                    container = job["container"]
                else:
                    container = {}

                if get_job_type(job) != "SIMPLE":
                    fail("No output available for the Job (%s). Please ask for its children." % job["jobId"])
                else:
                    if "logStreamName" in container:
                        log_stream = container.get("logStreamName")
                    else:
                        print("No log stream found for job (%s) in the status (%s)" % (job_id, job["status"]))
            else:
                fail("Error asking job output for job (%s). Job not found." % job_id)
        except Exception as e:
            fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e)
        return log_stream
Пример #42
0
def _validate_parameters(args):
    """
    Validate input parameters.

    :param args: args variable
    """
    if args.head:
        if args.tail:
            fail("Parameters validation error: --tail and --head option cannot be set at the same time")
        if args.stream:
            fail("Parameters validation error: --stream and --head option cannot be set at the same time")

    if args.stream_period and not args.stream:
        fail("Parameters validation error: --stream-period can be used only with --stream option")
Пример #43
0
    def __init_from_config(self, cli_config_file, cluster, log):  # noqa: C901 FIXME
        """
        Init object attributes from awsbatch-cli configuration file.

        :param cli_config_file: awsbatch-cli config
        :param cluster: cluster name
        :param log: log
        """
        with open(cli_config_file) as config_file:
            log.info("Searching for configuration file %s" % cli_config_file)
            config = ConfigParser()
            config.read_file(config_file)

            # use cluster if there or search for default value in [main] section of the config file
            try:
                cluster_name = cluster if cluster else config.get("main", "cluster_name")
            except NoSectionError as e:
                fail("Error getting the section [%s] from the configuration file (%s)" % (e.section, cli_config_file))
            except NoOptionError as e:
                fail(
                    "Error getting the option (%s) from the section [%s] of the configuration file (%s)"
                    % (e.option, e.section, cli_config_file)
                )
            cluster_section = "cluster {0}".format(cluster_name)
            try:
                self.region = config.get("main", "region")
            except NoOptionError:
                pass
            try:
                self.env_blacklist = config.get("main", "env_blacklist")
            except NoOptionError:
                pass

            try:
                self.stack_name = "parallelcluster-" + cluster_name
                log.info("Stack name is (%s)" % self.stack_name)
                # if region is set for the current stack, override the region from the AWS ParallelCluster config file
                # or the region from the [main] section
                self.region = config.get(cluster_section, "region")
                self.s3_bucket = config.get(cluster_section, "s3_bucket")
                self.compute_environment = config.get(cluster_section, "compute_environment")
                self.job_queue = config.get(cluster_section, "job_queue")
                self.job_definition = config.get(cluster_section, "job_definition")
                try:
                    self.job_definition_mnp = config.get(cluster_section, "job_definition_mnp")
                except NoOptionError:
                    pass
                self.master_ip = config.get(cluster_section, "master_ip")

                # get proxy
                self.proxy = config.get(cluster_section, "proxy")
                if not self.proxy == "NONE":
                    log.info("Configured proxy is: %s" % self.proxy)
            except NoSectionError:
                # initialize by getting stack info
                self.__init_from_stack(cluster_name, log)
            except NoOptionError as e:
                fail(
                    "Error getting the option (%s) from the section [%s] of the configuration file (%s)"
                    % (e.option, e.section, cli_config_file)
                )