Exemplo n.º 1
0
def show_experiment_group(group):
    show_common_header(group, 'Set')

    if group.framework == 'tensorflow' and group.framework_config.get(
            'tensorboard', False):
        tensorboard_job = next(job for job in group.jobs
                               if job.role == 'tensorboard')
        if tensorboard_job.state in ['RUNNING']:
            print("Tensorboard: {}".format(
                util.tensorboard_job_url(tensorboard_job)))
        else:
            print("Tensorboard: OFFLINE")

    print()
    util.print_table(
        header=['EXP ID', 'STATE', 'STARTED', 'DURATION', 'PARAMS', 'RESULT'],
        min_widths=(6, 9, 11, 9, 14, 14),
        rows=get_experiments_rows(group,
                                  with_project=False,
                                  with_type=False,
                                  indent=False,
                                  use_started_at=True,
                                  with_duration=True))

    if group.jobs:
        print()
        show_job_table(group.jobs)
Exemplo n.º 2
0
def display_gpus(nodes):

    nodes = filter(lambda n: n.schedulable, nodes)

    def get_device_id(device_name):
        if device_name.startswith('/dev/nvidia'):
            return int(device_name[len('/dev/nvidia'):])

    rows = []
    for n in nodes:
        gpus = 0
        # consistent reporting of GPUs
        if n.gpus_allocatable == len(n.gpus):
            gpus = len(n.gpus)
            gpu_mem = sum([gpu.mem for gpu in n.gpus])
        if not gpus:
            continue
        sorted_gpus = sorted(n.gpus, key=lambda x: get_device_id(x.device))
        for i, gpu in enumerate(sorted_gpus):
            rows.append([
                n.hostname if i == 0 else '',
                n.nvidia_driver if i == 0 else '', gpu.name,
                get_device_id(gpu.device),
                '%.1f' % bytes_to_gib(gpu.mem), gpu.serial
            ])

    print_table(
        header=['NODE', 'DRIVER', 'NAME', 'ID', 'MEM', 'SERIAL'],
        min_widths=[2, 3, 3, 2, 3, 3],
        rows=rows,
        column_spaces=2,
    )
Exemplo n.º 3
0
def get_summary_infos(project_name, jobs_stats):
    def format_cpu(job_stats):
        if job_stats.get('cpu_percent') is None:
            return '-'
        used = '%.1f' % (job_stats.get('cpu_percent') / 100)
        requested = '%.1f' % job_stats.job.cpus
        available = '-'
        if job_stats.get('percpu_percent'):
            available = '%d' % len(job_stats.get('percpu_percent'))
        return '{:>3}/{} |{}'.format(used, available,
                                     requested.rstrip('0').rstrip('.'))

    def format_mem(jobs_stats):
        available = job_stats.get('memory_limit', '%.1f', bytes_to_gib)
        used = job_stats.get('memory_used', '%.1f', bytes_to_gib)
        requested = '%.1f' % mib_to_gib(job_stats.job.mem)
        return '{:>3}/{} |{}'.format(used,
                                     available.rstrip('0').rstrip('.'),
                                     requested.rstrip('0').rstrip('.'))

    def format_gpu(jobs_stats):
        requested = '%d' % job_stats.job.gpus
        if requested == 0 or job_stats.get('gpu_percent') is None:
            return ' -'
        else:
            used = '%.1f' % (job_stats.get('gpu_percent') / float(100))
        return '{:>3}/{}'.format(used, requested)

    def format_gpu_mem(jobs_stats):
        if job_stats.get('gpu_memory_total') is None:
            return '   -'
        available = job_stats.get('gpu_memory_total', '%.1f', bytes_to_gib)
        used = job_stats.get('gpu_memory_used', '%.1f', bytes_to_gib)
        return '{:>3}/{}'.format(used, available.rstrip('0').rstrip('.'))

    rows = []
    output = StringIO()
    for job_stats in jobs_stats:
        job = job_stats.job
        if job.state in (JobState.running, JobState.serving):
            rows.append([
                job.short_id, project_name,
                '%s%s' % (get_state_symbol(job.state), job.state),
                format_cpu(job_stats),
                format_mem(job_stats),
                format_gpu(job_stats),
                format_gpu_mem(job_stats)
            ])
        else:
            rows.append([job.short_id, project_name,
                         '%s%s' % (get_state_symbol(job.state), job.state)] + \
                         ['', '', '', ''])
    print_table(
        header=['ID', 'PROJECT', 'STATE', 'CPU', 'MEM', 'GPU', 'GPU MEM'],
        min_widths=[4, 8, 6, 10, 10, 3, 10],
        rows=rows,
        file=output,
        column_spaces=2)
    return output.getvalue()
Exemplo n.º 4
0
def show_experiments(experiments, all=False, collapsed=True, users=False):
    headers, widths = _get_status_headers(collapsed, users)
    rows = _get_experiment_rows(experiments, all, collapsed, users)
    util.print_table(
        header=headers,
        min_widths=widths,
        rows=rows
    )
Exemplo n.º 5
0
def run_list(args):
    api_client = ApiClient()
    client = AdminApi(api_client)
    users = call_api(lambda: client.get_users())
    rows = []
    for u in users:
        rows.append([u.username, u.email, str(u.is_enabled)])

    print_table(header=['Username', 'Email', 'Enabled'],
                min_widths=[12, 6, 9],
                column_spaces=2,
                rows=rows)
Exemplo n.º 6
0
def display_long(nodes):

    rows = []
    nodes = filter(lambda n: n.schedulable, nodes)
    total_cpus = 0
    total_mem = 0
    total_gpus = 0
    total_gpu_mem = 0

    for n in nodes:
        gpus = 0
        gpu_mem = 0
        # consistent reporting of GPUs
        if n.gpus_allocatable == len(n.gpus):
            gpus = len(n.gpus)
            gpu_mem = sum([gpu.mem for gpu in n.gpus])

        total_cpus += n.cpus
        total_mem += n.mem
        total_gpus += gpus
        total_gpu_mem += gpu_mem

        rows.append([
            n.hostname, n.cpus,
            format_float(bytes_to_gib(n.mem)), gpus,
            format_float(bytes_to_gib(gpu_mem)),
            n.nvidia_driver if n.nvidia_driver != 'NOT FOUND' else '-',
            n.kubelet_version.lstrip('v'), n.docker_version
        ])
    rows.append(TableRowDelimiter('-'))

    rows.append([
        'Total', total_cpus,
        format_float(bytes_to_gib(total_mem)), total_gpus,
        format_float(bytes_to_gib(total_gpu_mem)), '', '', ''
    ])

    print_table(
        header=[
            'NODE', 'CPU', 'MEM', 'GPU', 'GPU MEM', 'NVIDIA DRIVER',
            'KUBELET VERSION', 'DOCKER VERSION'
        ],
        min_widths=[18, 3, 3, 3, 7, 3, 3, 3],
        rows=rows,
        column_spaces=2,
    )
Exemplo n.º 7
0
def show_job_table(jobs):
    rows = [
        ([job.short_id,
         '%s%s' % (util.get_state_symbol(job.state), job.state),
         util.get_since_str(job.started_at),
         util.get_since_str(job.finished_at),
         job.reason or '',
         job.message[:17] + '...' if job.message and len(job.message) > 20 else job.message or '',
         job.exit_code if job.exit_code is not None else '',
         '%d' % job.gpus, '%.1f' % job.cpus, '%d' % job.mem]) for job in jobs
    ]

    util.print_table(
        header=['JOB ID', 'STATE', 'STARTED', 'FINISHED', 'REASON', 'MESSAGE', 'EXIT CODE', 'GPU', 'CPU', 'MEM'],
        min_widths=[13, 13, 13, 13, 13, 20, 10, 6, 6, 6],
        rows=rows
    )
Exemplo n.º 8
0
def get_gpu_table(job_stats):
    def format_gpu_mem(gpu_stats):
        if gpu_stats.get('memory_total') is None:
            return '  -'
        available = gpu_stats.get('memory_total', '%.1f', bytes_to_gib)
        used = gpu_stats.get('memory_used', '%.1f', bytes_to_gib)
        return '{:>3}/{}'.format(used, available.rstrip('0').rstrip('.'))

    def format_gpu_pwr(gpu_stats):
        if gpu_stats.get('power_limit') is None:
            return '  -'
        limit = gpu_stats.get('power_limit', '%d')
        used = gpu_stats.get('power_draw', '%d')
        return '{:>3}/{}W'.format(used, limit)

    rows = []
    output = StringIO()
    for gpu_index, gpu_dev in enumerate(job_stats.gpus):
        gpu_stats = job_stats.gpu_stats[gpu_dev]
        row = [
            gpu_index,
            gpu_stats.get('name', '%s'),
            gpu_stats.get('gpu_utilization', '%d%%'),
            format_gpu_mem(gpu_stats),
            format_gpu_pwr(gpu_stats),
            gpu_stats.get('temperature', '%dC'),
            gpu_stats.get('device_bus_id', '%s')
        ]
        rows.append(row)
    for _ in range(job_stats.job.gpus - len(job_stats.gpus)):
        row = ['N/A'] + ['' for _ in range(6)]
        rows.append(row)
    if rows:
        print_table(
            header=['ID', 'NAME', 'UTIL', 'MEM', 'POWER', 'TEMP', 'BUS ID'],
            min_widths=[3, 8, 4, 6, 3, 3, 3],
            rows=rows,
            bold_header=False,
            column_spaces=2,
            file=output)
    return output.getvalue().strip()
Exemplo n.º 9
0
def display_short(nodes):
    rows = []
    nodes = filter(lambda n: n.schedulable, nodes)
    total_cpus = 0
    total_mem = 0
    total_gpus = 0
    total_gpu_mem = 0

    for n in nodes:
        gpus = 0
        gpu_mem = 0
        # consistent reporting of GPUs
        if n.gpus_allocatable == len(n.gpus):
            gpus = len(n.gpus)
            gpu_mem = sum([gpu.mem for gpu in n.gpus])
        total_cpus += n.cpus
        total_mem += n.mem
        total_gpus += gpus
        total_gpu_mem += gpu_mem
        rows.append([
            n.hostname, n.cpus,
            '%.1f' % bytes_to_gib(n.mem), gpus,
            format_float(bytes_to_gib(gpu_mem))
        ])

    rows.append(TableRowDelimiter('-'))
    rows.append([
        'Total', total_cpus,
        format_float(bytes_to_gib(total_mem)), total_gpus,
        '%.1f' % bytes_to_gib(total_gpu_mem)
    ])

    print_table(
        header=['NODE', 'CPU', 'MEM', 'GPU', 'GPU MEM'],
        min_widths=[18, 3, 3, 3, 7],
        rows=rows,
        column_spaces=2,
    )
Exemplo n.º 10
0
def show_experiment_group(group):
    print("ID: {}".format(group.short_id))
    print("Type: Set")
    print("State: {}{}".format(util.get_state_symbol(group.state),
                               group.state))
    print("Project: {}".format(group.project.name))

    if group.framework == 'tensorflow' and group.framework_config.get('tensorboard', False):
        tensorboard_job = next(job for job in group.jobs if job.role == 'tensorboard')
        if tensorboard_job.state in ['RUNNING']:
            print("Tensorboard: {}".format(util.tensorboard_job_url(tensorboard_job)))
        else:
            print("Tensorboard: OFFLINE")

    print()
    util.print_table(
        header=['EXP ID', 'STATE', 'AGE', 'PARAMS', 'RESULT'],
        min_widths=(6, 9, 13, 14, 14),
        rows=get_experiments_rows(group, with_project=False, with_type=False, indent=False)
    )

    if group.jobs:
        print()
        show_job_table(group.jobs)