Пример #1
0
def submit_federated(clusters, jobs, group, pool):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    messages = ""
    for cluster in clusters:
        cluster_name = cluster['name']
        cluster_url = cluster['url']
        try:
            print_info('Attempting to submit on %s cluster...' % terminal.bold(cluster_name))

            json_body = {'jobs': jobs}
            if group:
                json_body['groups'] = [group]
            if pool:
                json_body['pool'] = pool

            resp = http.post(cluster, 'jobs', json_body)
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except requests.exceptions.ReadTimeout as rt:
            logging.exception(rt)
            print_info(terminal.failed(
                f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.'))
            return 1
        except IOError as ioe:
            logging.exception(ioe)
            reason = f'Cannot connect to {cluster_name} ({cluster_url})'
            message = submit_failed_message(cluster_name, reason)
            messages += message
    print_error(messages)
    raise Exception(terminal.failed('Job submission failed on all of your configured clusters.'))
Пример #2
0
def print_no_data(clusters, states, user):
    """Prints a message indicating that no data was found in the given clusters"""
    clusters_text = ' / '.join([c['name'] for c in clusters])
    if 'all' in states:
        states = ['waiting', 'running', 'completed']
    elif 'success' in states:
        states.remove('success')
        states.append('successful')
    states_text = ' / '.join(states)
    print(
        terminal.failed(
            f'No matching {states_text} jobs for {user} found in {clusters_text}.'
        ))
Пример #3
0
def format_state(state):
    """Capitalizes and colorizes the given state"""
    state = state.capitalize()
    if state == 'Running':
        text = terminal.running(state)
    elif state == 'Waiting':
        text = terminal.waiting(state)
    elif state == 'Failed':
        text = terminal.failed(state)
    elif state == 'Success':
        text = terminal.success(state)
    else:
        text = state
    return text
Пример #4
0
Файл: kill.py Проект: yueri/Cook
def kill_entities(query_result, clusters):
    """Attempts to kill the jobs / instances / groups with the given UUIDs"""
    kill_batch_size = 100
    failed = []
    succeeded = []
    clusters_by_name = {c['name']: c for c in clusters}

    def __kill(cluster, uuids, kill_fn, entity_type):
        if len(uuids) > 0:
            for uuid_batch in partition(uuids, kill_batch_size):
                success = kill_fn(cluster, uuid_batch)
                batch = [{
                    'cluster': cluster,
                    'type': entity_type,
                    'uuid': u
                } for u in uuid_batch]
                (succeeded if success else failed).extend(batch)

    for cluster_name, entities in query_result['clusters'].items():
        cluster = clusters_by_name[cluster_name]
        job_uuids = [j['uuid']
                     for j in entities['jobs']] if 'jobs' in entities else []
        instance_uuids = [i['task_id'] for i, _ in entities['instances']
                          ] if 'instances' in entities else []
        group_uuids = [g['uuid'] for g in entities['groups']
                       ] if 'groups' in entities else []
        __kill(cluster, job_uuids, kill_jobs, 'job')
        __kill(cluster, instance_uuids, kill_instances, 'job instance')
        __kill(cluster, group_uuids, kill_groups, 'job group')

    for item in succeeded:
        print_info(
            f'Killed {item["type"]} {terminal.bold(item["uuid"])} on {terminal.bold(item["cluster"]["name"])}.'
        )
    for item in failed:
        print(
            terminal.failed(
                f'Failed to kill {item["type"]} {item["uuid"]} on {item["cluster"]["name"]}.'
            ))
    num_succeeded = len(succeeded)
    num_failed = len(failed)
    print_info(f'Successful: {num_succeeded}, Failed: {num_failed}')
    return num_failed
Пример #5
0
def no_data_message(clusters):
    """Returns a message indicating that no data was found in the given clusters"""
    clusters_text = ' / '.join([c['name'] for c in clusters])
    message = terminal.failed(f'No matching data found in {clusters_text}.')
    message = f'{message}\nDo you need to add another cluster to your configuration?'
    return message
Пример #6
0
def submit_failed_message(cluster_name, reason):
    """Generates a failed submission message with the given cluster name and reason"""
    return 'Job submission %s on %s:\n%s' % (
        terminal.failed('failed'), cluster_name, terminal.reason(reason))
Пример #7
0
def print_error(text):
    """Prints text to stderr, colored as a failure"""
    print(terminal.failed(text), file=sys.stderr)