Пример #1
0
def submit_federated(clusters, jobs, group, pool):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    messages = ""
    for cluster in clusters:
        cluster_name = cluster['name']
        cluster_url = cluster['url']
        try:
            print_info('Attempting to submit on %s cluster...' % terminal.bold(cluster_name))

            json_body = {'jobs': jobs}
            if group:
                json_body['groups'] = [group]
            if pool:
                json_body['pool'] = pool

            resp = http.post(cluster, 'jobs', json_body)
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except requests.exceptions.ReadTimeout as rt:
            logging.exception(rt)
            print_info(terminal.failed(
                f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.'))
            return 1
        except IOError as ioe:
            logging.exception(ioe)
            reason = f'Cannot connect to {cluster_name} ({cluster_url})'
            message = submit_failed_message(cluster_name, reason)
            messages += message
    print_error(messages)
    raise Exception(terminal.failed('Job submission failed on all of your configured clusters.'))
Пример #2
0
def submit_federated(clusters, jobs):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    for cluster in clusters:
        cluster_name = cluster['name']
        try:
            print_info('Attempting to submit on %s cluster...' %
                       colors.bold(cluster_name))
            resp = http.post(cluster, 'rawscheduler', {'jobs': jobs})
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except IOError as ioe:
            logging.info(ioe)
            reason = 'Cannot connect to %s (%s)' % (cluster_name,
                                                    cluster['url'])
            print_info('%s\n' % submit_failed_message(cluster_name, reason))
    raise Exception(
        colors.failed(
            'Job submission failed on all of your configured clusters.'))
Пример #3
0
def copy_limits(args, config_path):
    """Copies limits (share and quota) for a particular user from one cluster to another cluster"""
    user = args.get('user')

    from_cluster = args.get('from')
    from_url = args.get('from_url')
    if not from_cluster and not from_url:
        copy_limits_parser.print_help()
        print()
        raise Exception(f'You must provide either a from-cluster name (--from) or URL (--from-url).')

    to_cluster = args.get('to')
    to_url = args.get('to_url')
    if not to_cluster and not to_url:
        copy_limits_parser.print_help()
        print()
        raise Exception(f'You must provide either a to-cluster name (--to) or URL (--to-url).')

    _, config_map = configuration.load_config_with_defaults(config_path)
    from_clusters = load_target_clusters(config_map, from_url, from_cluster)
    to_clusters = load_target_clusters(config_map, to_url, to_cluster)
    assert len(from_clusters) == 1, 'Only a single from-cluster is supported.'
    assert len(to_clusters) == 1, 'Only a single to-cluster is supported.'
    from_cluster = from_clusters[0]
    to_cluster = to_clusters[0]
    from_cluster_name = from_cluster['name']
    to_cluster_name = to_cluster['name']
    print(f'Copying limits for {terminal.bold(user)} user '
          f'from {terminal.bold(from_cluster_name)} '
          f'to {terminal.bold(to_cluster_name)}:')
    from_pools = http.make_data_request(from_cluster, lambda: http.get(from_cluster, 'pools', params={}))
    to_pools = http.make_data_request(to_cluster, lambda: http.get(to_cluster, 'pools', params={}))
    from_pools_dict = {pool['name']: pool for pool in from_pools}
    to_pools_dict = {pool['name']: pool for pool in to_pools}
    for pool_name, from_pool in from_pools_dict.items():
        if pool_name in to_pools_dict and to_pools_dict[pool_name]['state'] != 'inactive':
            print(f'\n=== Pool: {pool_name} ===')
            query_result = query([from_cluster, to_cluster], user)
            query_result = filter_query_result_by_pools(query_result, [pool_name])
            print_formatted(query_result)
            answer = input(f'Copy limits for {terminal.bold(pool_name)} pool '
                           f'from {terminal.bold(from_cluster_name)} '
                           f'to {terminal.bold(to_cluster_name)}? ')
            should_copy = str2bool(answer)
            if should_copy:
                from_dict = query_result['clusters'][from_cluster_name]['pools'][pool_name]
                reason = f'Copying limits for {user} user from {from_cluster_name} to {to_cluster_name}'

                from_share = from_dict['share']
                resp = http.post(to_cluster,
                                 'share',
                                 {'pool': pool_name,
                                  'user': user,
                                  'reason': reason,
                                  'share': from_share})
                if resp.status_code != 201:
                    print_error(f'Setting share for {pool_name} on {to_cluster_name} '
                                f'failed with status code {resp.status_code}: {resp.text}')
                else:
                    print(terminal.success(f'Copied share for {pool_name} pool '
                                           f'from {from_cluster_name} '
                                           f'to {to_cluster_name}.'))

                from_quota = from_dict['quota']
                resp = http.post(to_cluster,
                                 'quota',
                                 {'pool': pool_name,
                                  'user': user,
                                  'reason': reason,
                                  'quota': from_quota})
                if resp.status_code != 201:
                    print_error(f'Setting quota for {pool_name} on {to_cluster_name} '
                                f'failed with status code {resp.status_code}: {resp.text}')
                else:
                    print(terminal.success(f'Copied quota for {pool_name} pool '
                                           f'from {from_cluster_name} '
                                           f'to {to_cluster_name}.'))