def exec_qconf_command(hosts, qhost_command):
    if not hosts:
        return []

    hostnames = ",".join([host.hostname for host in hosts])
    try:
        logging.info("Executing operation '%s' for hosts %s",
                     qhost_command.description, hostnames)
        command = "qconf {flags} {hostnames}".format(
            flags=qhost_command.command_flags, hostnames=hostnames)
        # setting raise_on_error to False and evaluating command output to decide if the execution was successful
        output = check_sge_command_output(command, raise_on_error=False)
        succeeded_hosts = []
        # assuming output contains a message line for each node the command is executed for.
        for host, message in zip(hosts, output.split("\n")):
            if any(
                    re.match(pattern, message) is not None
                    for pattern in qhost_command.successful_messages):
                succeeded_hosts.append(host)

        return succeeded_hosts
    except Exception as e:
        logging.error(
            "Unable to execute operation '%s' for hosts %s. Failed with exception %s",
            qhost_command.description,
            hostnames,
            e,
        )
        return []
Exemplo n.º 2
0
def _is_host_configured(command, hostname):
    output = check_sge_command_output(command, log)
    # Expected output
    # ip-172-31-66-16.ec2.internal
    # ip-172-31-74-69.ec2.internal
    match = list(filter(lambda x: hostname in x.split(".")[0], output.split("\n")))
    return True if len(match) > 0 else False
Exemplo n.º 3
0
def _is_host_configured(command, hostname):
    output = check_sge_command_output(command, log)
    # Expected output
    # ip-172-31-66-16.ec2.internal
    # ip-172-31-74-69.ec2.internal
    match = list(
        filter(lambda x: hostname in x.split(".")[0], output.split("\n")))
    return True if len(match) > 0 else False
def _run_qstat(full_format=False, hostname_filter=None, job_state_filter=None):
    command = "qstat -xml -g dt -u '*'"
    if full_format:
        command += " -f"
    if hostname_filter:
        command += " -l hostname={0}".format(hostname_filter)
    if job_state_filter:
        command += " -s {0}".format(job_state_filter)
    return check_sge_command_output(command)
Exemplo n.º 5
0
def get_required_nodes(instance_properties):
    command = "qstat -g d -s p -u '*'"
    _output = check_sge_command_output(command, log)
    slots = 0
    output = _output.split("\n")[2:]
    for line in output:
        line_arr = line.split()
        if len(line_arr) >= 8:
            slots += int(line_arr[7])
    vcpus = instance_properties.get('slots')
    return -(-slots // vcpus)
Exemplo n.º 6
0
def get_busy_nodes(instance_properties):
    command = "qstat -f"
    _output = check_sge_command_output(command, log)
    nodes = 0
    output = _output.split("\n")[2:]
    for line in output:
        line_arr = line.split()
        if len(line_arr) == 5:
            # resv/used/tot.
            (resv, used, total) = line_arr[2].split('/')
            if int(used) > 0 or int(resv) > 0:
                nodes += 1
    return nodes
Exemplo n.º 7
0
def hasPendingJobs():
    command = "qstat -g d -s p -u '*'"

    # Command outputs the pending jobs in the queue in the following format
    # job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID
    # -----------------------------------------------------------------------------------------------------------------
    #      70 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:24                                    1
    #      71 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:24                                    1
    #      72 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:25                                    1
    #      73 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:25                                    1

    try:
        output = check_sge_command_output(command, log)
        lines = filter(None, output.split("\n"))
        has_pending = True if len(lines) > 1 else False
        error = False
    except subprocess.CalledProcessError:
        error = True
        has_pending = False

    return has_pending, error
Exemplo n.º 8
0
def hasJobs(hostname):
    # Checking for running jobs on the node, with parallel job view expanded (-g t)
    command = "qstat -g t -l hostname={0} -u '*'".format(hostname)

    # Command output
    # job-ID  prior   name       user         state submit/start at     queue                          master ja-task-ID
    # ------------------------------------------------------------------------------------------------------------------
    # 16 0.6 0500 job.sh     ec2-user     r     02/06/2019 11:06:30 [email protected] SLAVE
    #                                                               [email protected] SLAVE
    #                                                               [email protected] SLAVE
    #                                                               [email protected] SLAVE
    # 17 0.50500 STDIN      ec2-user     r     02/06/2019 11:06:30 [email protected] MASTER 1
    # 17 0.50500 STDIN      ec2-user     r     02/06/2019 11:06:30 [email protected] MASTER 2

    try:
        output = check_sge_command_output(command, log)
        has_jobs = output != ""
    except subprocess.CalledProcessError:
        has_jobs = False

    return has_jobs
Exemplo n.º 9
0
def hasPendingJobs():
    command = "qstat -g d -s p -u '*'"

    # Command outputs the pending jobs in the queue in the following format
    # job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID
    # -----------------------------------------------------------------------------------------------------------------
    #      70 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:24                                    1
    #      71 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:24                                    1
    #      72 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:25                                    1
    #      73 0.55500 job.sh     ec2-user     qw    08/08/2018 22:37:25                                    1

    try:
        output = check_sge_command_output(command, log)
        lines = filter(None, output.split("\n"))
        has_pending = True if len(lines) > 1 else False
        error = False
    except subprocess.CalledProcessError:
        error = True
        has_pending = False

    return has_pending, error
Exemplo n.º 10
0
def hasJobs(hostname):
    # Checking for running jobs on the node, with parallel job view expanded (-g t)
    command = "qstat -g t -l hostname={0} -u '*'".format(hostname)

    # Command output
    # job-ID  prior   name       user         state submit/start at     queue                          master ja-task-ID
    # ------------------------------------------------------------------------------------------------------------------
    # 16 0.6 0500 job.sh     ec2-user     r     02/06/2019 11:06:30 [email protected] SLAVE
    #                                                               [email protected] SLAVE
    #                                                               [email protected] SLAVE
    #                                                               [email protected] SLAVE
    # 17 0.50500 STDIN      ec2-user     r     02/06/2019 11:06:30 [email protected] MASTER 1
    # 17 0.50500 STDIN      ec2-user     r     02/06/2019 11:06:30 [email protected] MASTER 2

    try:
        output = check_sge_command_output(command, log)
        has_jobs = output != ""
    except subprocess.CalledProcessError:
        has_jobs = False

    return has_jobs