示例#1
0
def execute_validation(gpus,
                       exp_folder,
                       exp_alias,
                       validation_datasets,
                       erase_bad_validations,
                       restart_validations,
                       suppress_output=True):
    """
    Args:
        gpus: The gpu being used for this execution.
        exp_folder: The folder this driving experiment is being executed
        exp_alias: The experiment alias, file name, to be executed.
        validation_datasets: Validation datasets to be deleted/restarted
        erase_bad_validations:
        restart_validations:
        suppress_output:
    Returns:
    """
    validation_datasets = validation_datasets.split(
        ',')  # Turn a string into a list of the names
    create_log_folder(exp_folder)
    create_exp_path(exp_folder, exp_alias)
    if erase_bad_validations:
        erase_wrong_plotting_summaries(exp_folder, validation_datasets)
    if restart_validations:
        erase_validations(exp_folder, validation_datasets)
    # The difference between train and validation is the
    p = multiprocessing.Process(target=validate.execute,
                                args=(gpus, exp_folder, exp_alias,
                                      validation_datasets[0], suppress_output))
    p.start()
示例#2
0
    def test_basic_data(self):
        # the town2-town01 data, try to load.
        g_conf.immutable(False)
        g_conf.EXPERIMENT_NAME = 'coil_icra'
        create_log_folder('sample')
        create_exp_path('sample', 'coil_icra')
        merge_with_yaml('configs/sample/coil_icra.yaml')

        set_type_of_process('train')

        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    'CoILTrain')

        dataset = CoILDataset(full_dataset,
                              transform=None,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
示例#3
0
    def test_town3_data(self):
        # the town3 data has different names and does not have pedestrians of vehicle stop
        # indications
        g_conf.immutable(False)
        g_conf.EXPERIMENT_NAME = 'resnet34imnet'
        create_log_folder('town03')
        create_exp_path('town03', 'resnet34imnet')
        merge_with_yaml('configs/town03/resnet34imnet.yaml')

        set_type_of_process('train')

        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    'CoILTrainTown03')

        dataset = CoILDataset(full_dataset,
                              transform=None,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
示例#4
0
def execute_train(gpus,
                  exp_folder,
                  exp_alias,
                  suppress_output=True,
                  number_of_workers=12):
    """
    Args:
        gpus: The gpu being used for this execution.
        exp_folder: Folder name in configs
        exp_alias: The experiment alias (yaml file)
        suppress_output:
        number_of_workers:
    Returns:
    """
    create_log_folder(exp_folder)
    create_exp_path(exp_folder, exp_alias)
    p = multiprocessing.Process(target=train.execute,
                                args=(gpus, exp_folder, exp_alias,
                                      suppress_output, number_of_workers))
    p.start()
示例#5
0
def execute_drive(gpus, exp_folder, exp_alias, exp_set_name, suppress_output,
                  docker, record_collisions, no_screen):
    """
    Args:
        gpus: The gpu being used for this execution.
        exp_folder: The folder this driving experiment is being executed
        exp_alias: The experiment alias, file name, to be executed.
        exp_set_name:
        suppress_output:
        docker:
        record_collisions:
        no_screen:
    Returns:
    """
    create_log_folder(exp_folder)
    create_exp_path(exp_folder, exp_alias)
    p = multiprocessing.Process(target=run_drive.execute,
                                args=(gpus, exp_folder, exp_alias,
                                      exp_set_name, suppress_output, docker,
                                      record_collisions, no_screen))
    p.start()
示例#6
0
    # Check if the mandatory folder argument is passed
    if args.folder is None:
        raise ValueError(
            "You should set a folder name where the experiments are placed")

    # Check if the driving parameters are passed in a correct way
    if args.driving_environments is not None:
        for de in list(args.driving_environments):
            if len(de.split('_')) < 2:
                raise ValueError(
                    "Invalid format for the driving environments should be Suite_Town"
                )

    # This is the folder creation of the
    create_log_folder(args.folder)
    erase_logs(args.folder)
    if args.erase_bad_validations:
        erase_wrong_plotting_summaries(args.folder,
                                       list(args.validation_datasets))
    if args.restart_validations:
        erase_validations(args.folder, list(args.validation_datasets))

    # The definition of parameters for driving
    drive_params = {
        "suppress_output": True,
        "no_screen": args.no_screen,
        "docker": args.docker,
        "record_collisions": args.record_collisions
    }
    # There are two modes of execution
示例#7
0
    for gpu in args.gpus:
        try:
            int(gpu)
        except ValueError:  # Reraise a meaningful error.
            raise ValueError("GPU is not a valid int number")

    # There are two modes of execution
    if args.single_process is not None:
        if args.single_process in ['train', 'validation']:
            # Check if the mandatory folder argument is passed
            if args.folder is None:
                raise ValueError(
                    "You should set a folder name where the experiments are placed"
                )
            # This is the folder creation of the logs
            create_log_folder(args.folder)
            if args.exp is None:
                raise ValueError("You should set the exp alias")
            # The definition of pre-trained encoder model used for training affordances
            if args.encoder_checkpoint and args.encoder_folder and args.encoder_exp:
                encoder_params = {
                    'encoder_checkpoint': args.encoder_checkpoint,
                    'encoder_folder': args.encoder_folder,
                    'encoder_exp': args.encoder_exp
                }
            elif all(v is None for v in [
                    args.encoder_checkpoint, args.encoder_folder,
                    args.encoder_exp
            ]):
                encoder_params = None
            else:
示例#8
0
def folder_execute(exp_folder, exp_set_name, gpus, validation_datasets,
                   driving_environments, is_training, number_of_workers,
                   suppress_output, docker, record_collisions, no_screen,
                   erase_bad_validations, restart_validations):
    """
    Execute a folder of experiments. It will execute trainings and
    all the selected evaluations for each of the models present on the folder.

    Args
        params: a dictionary containing:
            gpus: the gpu numbers that are going  to be allocated for the experiment
            gpu_value: the "value" of each gpu, depending on the value more or less experiments
                        will be allocated per GPU
            folder: the folder where all the experiment configuration files are
            validation_datasets: the validation datasets that are going to be validated
                                 per experiment
            driving_environments: The driving environments where the models are going to be tested.
            record_collisions:
            no_screen:
            erase_bad_validations:
            restart_validations:

    """
    # We set by default that each gpu has a value of 3.5, allowing a training and a driving/validation
    allocation_parameters = {
        'gpu_value': 3.5,
        'train_cost': 1.5,
        'validation_cost': 1.0,
        'drive_cost': 1.5
    }

    create_log_folder(exp_folder)
    experiments_list = os.listdir(os.path.join('configs', exp_folder))
    experiments_list = [
        experiment.split('.')[-2] for experiment in experiments_list
    ]

    allocated_gpus = {gpu: allocation_parameters['gpu_value'] for gpu in gpus}

    executing_processes = []

    free_gpus, resources_on_most_free_gpu, executing_processes = get_gpu_resources(
        allocated_gpus, executing_processes, allocation_parameters)

    # Is a queue of tasks to be executed. The priority is always train, then test, then val.
    tasks_queue = mount_experiment_heap(exp_folder, experiments_list,
                                        is_training, [], [],
                                        validation_datasets,
                                        driving_environments)

    # No process is executing right now.

    while True:
        #   if not done or executing  get to the list
        # If amount of resources is smaller than a threshold.

        while resources_on_most_free_gpu >= min([allocation_parameters['train_cost'],
                                                 allocation_parameters['validation_cost'],
                                                 allocation_parameters['drive_cost']]) \
                and tasks_queue != []:
            # Allocate all the gpus
            popped_thing = heapq.heappop(tasks_queue)
            process_specs = popped_thing[2]  # To get directly the dict

            # Get the train status, that will affect in scheduling a validation or drive process
            train_status = monitorer.get_status(exp_folder,
                                                process_specs['experiment'],
                                                'train')[0]
            # ADD TRAIN TO EXECUTE
            if process_specs['type'] == 'train' and resources_on_most_free_gpu >= \
                    allocation_parameters['train_cost']:
                free_gpus, resources_on_most_free_gpu, gpu_number = allocate_gpu_resources(
                    free_gpus, allocation_parameters['train_cost'])

                execute_train(gpu_number, process_specs['folder'],
                              process_specs['experiment'], number_of_workers)
                process_specs.update({'gpu': gpu_number})

                executing_processes.append(process_specs)
            # ADD DRIVE TO EXECUTE
            elif process_specs['type'] == 'drive' and resources_on_most_free_gpu >= \
                    allocation_parameters['drive_cost'] \
                    and (train_status == 'Iterating' or train_status == 'Loading' or
                         train_status == 'Finished'):
                free_gpus, resources_on_most_free_gpu, gpu_number = allocate_gpu_resources(
                    free_gpus, allocation_parameters['drive_cost'])

                execute_drive(gpu_number, process_specs['folder'],
                              process_specs['experiment'],
                              process_specs['environment'], suppress_output,
                              docker, record_collisions, no_screen)
                process_specs.update({'gpu': gpu_number})
                executing_processes.append(process_specs)
            # ADD VALIDATION TO EXECUTE
            elif process_specs['type'] == 'validation' and resources_on_most_free_gpu >= \
                    allocation_parameters['validation_cost'] \
                    and (train_status == 'Iterating' or train_status == 'Loading' or
                         train_status == 'Finished'):
                free_gpus, resources_on_most_free_gpu, gpu_number = allocate_gpu_resources(
                    free_gpus, allocation_parameters['validation_cost'])
                execute_validation(gpu_number, process_specs['folder'],
                                   process_specs['experiment'],
                                   erase_bad_validations, restart_validations,
                                   suppress_output)
                process_specs.update({'gpu': gpu_number})
                executing_processes.append(process_specs)

        tasks_queue = mount_experiment_heap(exp_folder, experiments_list,
                                            is_training, executing_processes,
                                            tasks_queue, validation_datasets,
                                            driving_environments, False)

        printer.plot_folder_summaries(exp_folder, is_training,
                                      validation_datasets,
                                      driving_environments)
        # Check allocated process, and look which ones finished.

        if len(tasks_queue) == 0 and len(executing_processes) == 0:
            break

        free_gpus, resources_on_most_free_gpu, executing_processes = get_gpu_resources(
            allocated_gpus, executing_processes, allocation_parameters)

        time.sleep(10)

    print("ALL EXPERIMENTS EXECUTED")
示例#9
0
from coil_core import execute_train
from coilutils.general import create_log_folder, create_exp_path, erase_logs


if __name__ == '__main__':
    folder = 'cvpr'
    exp = 'img_gtseg_camv_control'

    create_log_folder(folder)
    erase_logs(folder)
    create_exp_path(folder, exp)

    execute_train('0', folder, exp)
    print("SUCCESSFULLY RAN TRAINING")