示例#1
0
def run_sweep_doodad(
        target,
        params,
        run_mode,
        mounts,
        test_one=False,
        docker_image='python:3',
        is_docker_interactive=False,
        return_output=False,
        verbose=False,
        postprocess_config_and_run_mode=lambda config, run_mode, idx:
    (config, run_mode),
        default_params=None):
    # build archive
    target_dir = os.path.dirname(target)
    target_mount_dir = os.path.join('target', os.path.basename(target_dir))
    target_mount = mount.MountLocal(local_dir=target_dir,
                                    mount_point=target_mount_dir)
    mounts = list(mounts) + [target_mount]
    target_full_path = os.path.join(target_mount.mount_point,
                                    os.path.basename(target))
    command = launch_api.make_python_command(target_full_path)

    print('Launching jobs with mode %s' % run_mode)
    results = []
    njobs = 0
    with archive_builder.temp_archive_file() as archive_file:
        archive = archive_builder.build_archive(
            archive_filename=archive_file,
            payload_script=command,
            verbose=verbose,
            docker_image=docker_image,
            is_docker_interactive=is_docker_interactive,
            use_nvidia_docker=run_mode.use_gpu,
            mounts=mounts)

        sweeper = Sweeper(params, default_params)
        for config in sweeper:
            config, run_mode = postprocess_config_and_run_mode(
                config, run_mode, njobs)
            if config is None:
                continue
            njobs += 1
            cli_args = ' '.join(
                ['--%s %s' % (key, config[key]) for key in config])
            cmd = archive + ' -- ' + cli_args
            result = run_mode.run_script(cmd,
                                         return_output=return_output,
                                         verbose=False)
            if return_output:
                result = archive_builder._strip_stdout(result)
                results.append(result)
            if test_one:
                break
    print('Launching completed for %d jobs' % njobs)
    run_mode.print_launch_message()
    return tuple(results)
示例#2
0
def run_sweep_doodad_chunked(target,
                             params,
                             run_mode,
                             mounts,
                             num_chunks=10,
                             docker_image='python:3',
                             return_output=False,
                             test_one=False,
                             confirm=True,
                             verbose=False):
    # build archive
    target_dir = os.path.dirname(target)
    target_mount_dir = os.path.join('target', os.path.basename(target_dir))
    target_mount = mount.MountLocal(local_dir=target_dir,
                                    mount_point=target_mount_dir)
    mounts = list(mounts) + [target_mount]
    target_full_path = os.path.join(target_mount.mount_point,
                                    os.path.basename(target))
    command = launch_api.make_python_command(target_full_path)

    print('Launching jobs with mode %s' % run_mode)
    results = []
    njobs = 0
    with archive_builder.temp_archive_file() as archive_file:
        archive = archive_builder.build_archive(
            archive_filename=archive_file,
            payload_script=command,
            verbose=verbose,
            docker_image=docker_image,
            use_nvidia_docker=run_mode.use_gpu,
            mounts=mounts)

        sweeper = Sweeper(params)
        chunks = chunker(sweeper, num_chunks, confirm=confirm)
        for chunk in chunks:
            command = ''
            for config in chunk:
                njobs += 1
                cli_args = ' '.join(
                    ['--%s %s' % (key, config[key]) for key in config])
                single_command = archive + ' -- ' + cli_args
                command += '%s;' % single_command

            result = run_mode.run_script(command,
                                         return_output=return_output,
                                         verbose=False)
            if return_output:
                result = archive_builder._strip_stdout(result)
                results.append(result)
            if test_one:
                break
    print('Launching completed for %d jobs on %d machines' %
          (njobs, num_chunks))
    run_mode.print_launch_message()
    return tuple(results)