Exemplo n.º 1
0
def run(config_file, tunable_id, local_dir):
    register_trainable(tunable_id, TrainTunable)
    lm_config = config_util.load(config_file)

    def easydict_to_dict(config):
        if isinstance(config, EasyDict):
            config = dict(config)

        for key, value in config.items():
            if isinstance(value, EasyDict):
                value = dict(value)
                easydict_to_dict(value)
            config[key] = value
        return config

    tune_space = easydict_to_dict(lm_config['TUNE_SPACE'])
    tune_spec = easydict_to_dict(lm_config['TUNE_SPEC'])
    tune_spec['run'] = tunable_id
    tune_spec['config'] = {'lm_config': os.path.join(os.getcwd(), config_file)}
    tune_spec['local_dir'] = local_dir
    tune_spec['trial_name_creator'] = ray.tune.function(trial_str_creator)

    # Expecting use of gpus to do parameter search
    ray.init(num_cpus=multiprocessing.cpu_count() // 2, num_gpus=max(get_num_gpu(), 1))
    algo = HyperOptSearch(tune_space, max_concurrent=4, reward_attr="mean_accuracy")
    scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="mean_accuracy", max_t=200)
    trials = run_experiments(experiments={'exp_tune': tune_spec},
                             search_alg=algo,
                             scheduler=scheduler)
    print("The best result is", get_best_result(trials, metric="mean_accuracy", param='config'))
Exemplo n.º 2
0
    reward_list = []
    searched_dict = {}
    searched_dict['scores'] = []
    searched_dict['policies'] = []
    for _ in range(1):  # run multiple times.
        for cv_fold in range(cv_num):
            name = "search_%s_%s_fold%d_ratio%.1f" % (C.get()['dataset'],
                                                      C.get()['model']['type'],
                                                      cv_fold, args.cv_ratio)
            print(name)
            register_trainable(
                name,
                lambda augs, rpt: eval_tta(copy.deepcopy(copied_c), augs, rpt))
            # register_trainable(name, eval_tta)
            algo = HyperOptSearch(space,
                                  max_concurrent=20,
                                  reward_attr=reward_attr)

            exp_config = {
                paths[cv_fold]: {
                    'run': name,
                    'num_samples': 4 if args.smoke_test else args.num_search,
                    'resources_per_trial': {
                        'gpu': 1
                    },
                    'stop': {
                        'training_iteration': args.num_policy
                    },
                    'config': {
                        'dataroot': os.path.abspath(args.dataroot),
                        'save_path': paths[cv_fold],
Exemplo n.º 3
0
    import argparse
    from hyperopt import hp

    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(redirect_output=True)

    register_trainable("exp", easy_objective)

    space = {
        'width': hp.uniform('width', 0, 20),
        'height': hp.uniform('height', -100, 100),
        'activation': hp.choice("activation", ["relu", "tanh"])
    }

    config = {
        "my_exp": {
            "run": "exp",
            "num_samples": 10 if args.smoke_test else 1000,
            "stop": {
                "training_iteration": 100
            },
        }
    }
    algo = HyperOptSearch(space, max_concurrent=4, reward_attr="neg_mean_loss")
    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
    run_experiments(config, search_alg=algo, scheduler=scheduler)
Exemplo n.º 4
0
        hp.uniform("momentum", 0, 0.99),
        'weight_decay':
        hp.choice(
            "weight_decay",
            np.concatenate((10**-np.random.uniform(1, 5, size=100), [0])))
    }

    current_best_params = [{
        "factor": 0.1,
        "lr": 0.01,
        "momentum": 0.9,
        "weight_decay": 0
    }]

    algo2 = HyperOptSearch(space2,
                           max_concurrent=4,
                           reward_attr="neg_mean_loss",
                           points_to_evaluate=current_best_params)

    algo1 = BayesOptSearch(
        space1,
        max_concurrent=10,
        reward_attr="neg_mean_loss",
        #reward_attr="mean_accuracy",
        utility_kwargs={
            #             "kind": "ucb",
            #             "kappa": 2.5,
            #             "xi": 0.0
            "kind": "ei",
            "kappa": 2.5,
            "xi": 0.01
        },
Exemplo n.º 5
0
def search(conf):
    sw = StopWatch.get()

    # region conf vars
    conf_dataset     = conf['dataset']
    dataroot    = conf['dataroot']
    redis_ip    = conf['redis']
    conf_loader = conf['autoaug']['loader']
    conf_model  = conf['autoaug']['model']
    model_type  = conf_model['type']
    ds_name     = conf_dataset['name']
    aug         = conf_loader['aug']
    val_ratio   = conf_loader['val_ratio']
    epochs      = conf_loader['epochs']
    val_fold    = conf_loader['val_fold']
    cv_num      = conf_loader['cv_num']
    num_policy = conf['autoaug']['num_policy']
    num_op = conf['autoaug']['num_op']
    num_search = conf['autoaug']['num_search']
    num_result_per_cv = conf['autoaug']['num_result_per_cv']
    smoke_test = conf['smoke_test']
    resume = conf['resume']
    # endregion

    ray.init(redis_address=redis_ip,
        # allocate all GPUs on local node if cluster is not specified
        num_gpus=torch.cuda.device_count() if not redis_ip else None)

    # first train with no aug
    _train_no_aug(conf)

    # get values from config
    num_samples = 4 if smoke_test else num_search

    logger.info('----- Search Test-Time Augmentation Policies -----')
    sw.start(tag='search')

    save_paths = [_get_model_filepath(ds_name,
        model_type, 'ratio%.1f_fold%d' %
            (val_ratio, i)) for i in range(cv_num)]

    copied_c = copy.deepcopy(conf)
    ops = augment_list(False)
    space = {}
    for i in range(num_policy):
        for j in range(num_op):
            space['policy_%d_%d' % (i, j)] = hp.choice('policy_%d_%d' %
                (i, j), list(range(0, len(ops))))
            space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' %
                (i, j), 0.0, 1.0)
            space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' %
                (i, j), 0.0, 1.0)

    final_policy_set = []
    total_computation = 0
    reward_attr = 'top1_valid'      # top1_valid or minus_loss
    for _ in range(1):  # run multiple times.
        for val_fold in range(cv_num):
            name = "search_%s_%s_fold%d_ratio%.1f" % (ds_name,
                model_type, val_fold, val_ratio)
            #logger.info(name)
            register_trainable(name, (lambda augs,
                rpt: _eval_tta(copy.deepcopy(copied_c), augs, rpt)))
            algo = HyperOptSearch(space, max_concurrent=4*20,
                reward_attr=reward_attr)

            exp_config = {
                name: {
                    'run': name,
                    'num_samples': num_samples,
                    'resources_per_trial': {'gpu': 1},
                    'stop': {'training_iteration': num_policy},
                    'config': {
                        'dataroot': dataroot, 'save_path': save_paths[val_fold],
                        'val_ratio': val_ratio, 'val_fold': val_fold,
                        'num_op': num_op, 'num_policy': num_policy
                    },
                }
            }
            results = run_experiments(exp_config, search_alg=algo,
                scheduler=None, verbose=0, queue_trials=True,
                resume=resume, raise_on_failed_trial=False)

            results = [x for x in results if x.last_result is not None]
            results = sorted(results, key=lambda x: x.last_result[reward_attr],
                reverse=True)

            # calculate computation usage
            for result in results:
                total_computation += result.last_result['elapsed_time']

            for result in results[:num_result_per_cv]:
                final_policy = policy_decoder(result.config, num_policy, num_op)
                logger.info('loss=%.12f top1_valid=%.4f %s' %
                    (result.last_result['minus_loss'],
                        result.last_result['top1_valid'], final_policy))

                final_policy = remove_deplicates(final_policy)
                final_policy_set.extend(final_policy)

    logger.info(json.dumps(final_policy_set))
    logger.info('final_policy=%d' % len(final_policy_set))
    logger.info('processed in %.4f secs, gpu hours=%.4f' % (sw.pause('search'), total_computation / 3600.))
    logger.info('----- Train with Augmentations model=%s dataset=%s aug=%s ratio(test)=%.1f -----' \
        % (model_type, ds_name, aug, val_ratio))
    sw.start(tag='train_aug')

    num_experiments = 5
    default_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_default%d'  \
        % (val_ratio, _)) for _ in range(num_experiments)]
    augment_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_augment%d'  \
        % (val_ratio, _)) for _ in range(num_experiments)]
    reqs = [_train_model.remote(copy.deepcopy(copied_c), dataroot, aug, 0.0, 0, save_path=default_path[_], only_eval=True) \
        for _ in range(num_experiments)] + \
        [_train_model.remote(copy.deepcopy(copied_c), dataroot, final_policy_set, 0.0, 0, save_path=augment_path[_]) \
            for _ in range(num_experiments)]

    tqdm_epoch = tqdm(range(epochs))
    is_done = False
    for epoch in tqdm_epoch:
        while True:
            epochs = OrderedDict()
            for exp_idx in range(num_experiments):
                try:
                    if os.path.exists(default_path[exp_idx]):
                        latest_ckpt = torch.load(default_path[exp_idx])
                        epochs['default_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch']
                except:
                    pass
                try:
                    if os.path.exists(augment_path[exp_idx]):
                        latest_ckpt = torch.load(augment_path[exp_idx])
                        epochs['augment_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch']
                except:
                    pass

            tqdm_epoch.set_postfix(epochs)
            if len(epochs) == num_experiments*2 and min(epochs.values()) >= epochs:
                is_done = True
            if len(epochs) == num_experiments*2 and min(epochs.values()) >= epoch:
                break
            time.sleep(10)
        if is_done:
            break

    logger.info('getting results...')
    final_results = ray.get(reqs)

    for train_mode in ['default', 'augment']:
        avg = 0.
        for _ in range(num_experiments):
            r_model, r_cv, r_dict = final_results.pop(0)
            logger.info('[%s] top1_train=%.4f top1_test=%.4f' % (train_mode, r_dict['top1_train'], r_dict['top1_test']))
            avg += r_dict['top1_test']
        avg /= num_experiments
        logger.info('[%s] top1_test average=%.4f (#experiments=%d)' % (train_mode, avg, num_experiments))
    logger.info('processed in %.4f secs' % sw.pause('train_aug'))

    logger.info(sw)
Exemplo n.º 6
0
    "nc": 3,
    "nz": 100,
    "ngf": 64,
    "ndf": 64,
    "lrD": hp.loguniform('lrD', -8, -1),
    "lrG": hp.loguniform('lrG', -8, -1),
    "beta1": hp.uniform('beta1', 0, 1),
    "beta2": hp.uniform('beta2', 0, 1),
    "Diters": 5,
    "noBN": False,
    "type": hp.choice('type', ["dcgan", "mlp", "resnet"]),
}

ray.init()

algo = HyperOptSearch(space, max_concurrent=4, reward_attr="inception")

sched = AsyncHyperBandScheduler(time_attr="training_iteration",
                                reward_attr="inception",
                                max_t=8,
                                grace_period=2)


def train(config, reporter):
    args.update(config)
    main(args, reporter)


tune.register_trainable("main", train)

tune.run_experiments(
Exemplo n.º 7
0
def get_best_model(x_train, y_train, **kwargs):

    y_pred = kwargs['primal_data']['y_pred']
    model_name = kwargs['primal_data']['model_name']
    fn_name, param_name = get_model_design(model_name)

    mapping_instance = create_model(fn_name=fn_name, param_name=param_name)

    def train_model(config, reporter):
        '''
        This function is used by Tune to train the model with each iteration variations.

        Args:
            config(dict): A dictionary with the search params passed by Tune.
            Similar to the JSON we already have.
            reporter: A function used by Tune to keep a track of the metric by
            which the iterations should be optimized.
        '''

        model = mapping_instance.__call__(x_train=x_train, params=config)
        model.fit(x_train, y_pred)
        last_checkpoint = "weights_tune_{}.h5".format(config)
        model.save_weights(last_checkpoint)
        accuracy = model.evaluate(x_train, y_pred)[1]
        reporter(mean_accuracy=accuracy, checkpoint=last_checkpoint)

    # Define experiment configuration
    configuration = tune.Experiment("experiment_name",
                                    run=train_model,
                                    resources_per_trial={"cpu": 4},
                                    stop={"mean_accuracy": 95},
                                    config=kwargs['params'])

    # This validation is to check if the user has opted for hyperopt search method
    if kwargs['space']:
        print('hyperopt choosen-------')
        space = kwargs['space']
        hyperopt_search = HyperOptSearch(space, reward_attr="mean_accuracy")
        # TODO
        # Should this wrapper be avoided(instead the user passes the HyperOptSearch).
        # Add other args for hyperopt search.
        # Add the remaining search_algos if necessary.
        trials = tune.run_experiments(configuration,
                                      search_alg=hyperopt_search,
                                      verbose=2)

    else:
        trials = tune.run_experiments(configuration, verbose=2)

    metric = "mean_accuracy"
    """Restore a model from the best trial."""
    sorted_trials = get_sorted_trials(trials, metric)
    for best_trial in sorted_trials:
        try:
            print("Creating model...")
            best_model = mapping_instance.__call__(
                x_train=x_train,
                params=best_trial.config)  # TODO Pass config as argument
            # best_model = make_model(None)
            weights = os.path.join(best_trial.logdir,
                                   best_trial.last_result["checkpoint"])
            print("Loading from", weights)
            best_model.load_weights(
                weights)  # TODO Validate this loaded model.
            break
        except Exception as e:
            print(e)
            print("Loading failed. Trying next model")

    return best_model
Exemplo n.º 8
0
    # stop condition for VOT and OTB
    if args.dataset.startswith('VOT'):
        stop = {
            "EAO": 0.50,  # if EAO >= 0.50, this procedures will stop
            # "timesteps_total": 100, # iteration times
        }
        tune_spec['zp_tune']['stop'] = stop

        scheduler = AsyncHyperBandScheduler(
            # time_attr="timesteps_total",
            reward_attr="EAO",
            max_t=400,
            grace_period=20)
        algo = HyperOptSearch(
            params, max_concurrent=args.gpu_nums * 2 + 1,
            reward_attr="EAO")  # max_concurrent: the max running task

    elif args.dataset.startswith('OTB'):
        stop = {
            # "timesteps_total": 100, # iteration times
            "AUC": 0.80
        }
        tune_spec['zp_tune']['stop'] = stop
        scheduler = AsyncHyperBandScheduler(
            # time_attr="timesteps_total",
            reward_attr="AUC",
            max_t=400,
            grace_period=20)
        algo = HyperOptSearch(params,
                              max_concurrent=args.gpu_nums * 2 + 1,
Exemplo n.º 9
0
if __name__ == "__main__":
    from hyperopt import hp

    tune.register_trainable("my_class", memLstm)
    ray.init(redis_address="192.168.1.153:9023")

    space = {
        'learning_rate': hp.uniform('learning_rate', 0.0005, 0.0001),
        'memn2n_rnn_dim': hp.uniform('memn2n_rnn_dim', 128, 257),
        'hops': hp.choice('hops', [3, 4, 5, 6]),
        'amp': hp.choice('amp', [1, 2, 3, 4, 5])
    }

    config = {
        'my_exp': {
            'run': memLstm,
            'trial_resources': {
                'gpu': 1
            },
            'stop': {
                "training_iteration": 5
            },
            'num_samples': 8
        }
    }

    algo = HyperOptSearch(space, max_concurrent=4, reward_attr="mean_accuracy")
    scheduler = AsyncHyperBandScheduler(reward_attr="mean_accuracy")

    run_experiments(config, search_alg=algo, scheduler=scheduler)
Exemplo n.º 10
0
register_trainable("exp", create_model)

# Hyperparameter space
space = {
    #'_features':hp.choice('_features',allowed_indices)
    #'_layers' : hp.choice('_layers',[1,2]),
    '_l1nn': hp.randint('_l1nn', 9),
    '_l2nn': hp.randint('_l2nn', 9),
    '_act': hp.choice('_act', ['relu', 'tanh']),
    '_lr': hp.uniform('_lr', 0.001, 0.05)
}

config = {"my_exp": {"run": "exp", "num_samples": 1000}}

start = time.time()
algo = HyperOptSearch(space, max_concurrent=10, reward_attr="mean_acc")
scheduler = AsyncHyperBandScheduler(reward_attr="mean_acc")
train_results = run_experiments(config, search_alg=algo, scheduler=scheduler)
end = time.time()

results = [
    vvv['mean_acc']
    for i, vvv in enumerate(item.last_result for item in train_results)
]
configs = [vvv for i, vvv in enumerate(item.config for item in train_results)]

rdf = pd.DataFrame(results)
cdf = pd.DataFrame(configs)

cdf['_acc'] = rdf
Exemplo n.º 11
0
                                        reward_attr="mean_accuracy",
                                        max_t=200,
                                        grace_period=15)

    space = {
        'L2': hp.loguniform('L2', -2.3 * 5, -2.3 * 9),
        'mixing_ratio': hp.uniform('mixing_ratio', 0.9, 1)
    }
    exp = {
        'run': "my_class",
        'num_samples': 50,
        'stop': {
            "training_iteration": 200
        },
        'trial_resources': {
            "gpu": 1,
            "cpu": 1
        },
        'config': {
            "L2": lambda spec: 10**(3 * random.random() - 8),
            "mixing_ratio": lambda spec: random.random()
        }
    }
    algo = HyperOptSearch(space, reward_attr="mean_accuracy")

    tune.run_experiments({"GRU_teach": exp},
                         search_alg=algo,
                         scheduler=hyperband)

    print("Finished with the simulations")
Exemplo n.º 12
0
def main(args):

    ray.init(num_cpus=args.rayNumCpu, num_gpus=args.rayNumGpu)

    t_loader, v_loader = get_loaders(train_batch_size=16,
                                     num_workers=1,
                                     data_folder=args.dataFolder,
                                     cuda_available=torch.cuda.is_available())
    pinned_obj_dict['data_loader_train'] = pin_in_object_store(t_loader)
    pinned_obj_dict['data_loader_valid'] = pin_in_object_store(v_loader)
    pinned_obj_dict['args'] = pin_in_object_store(args)

    trainable_name = 'hyp_search_train'
    register_trainable(trainable_name, TrainerClass)

    reward_attr = "acc"

    #############################
    # Define hyperband scheduler
    #############################
    hpb = AsyncHyperBandScheduler(time_attr="training_iteration",
                                  reward_attr=reward_attr,
                                  grace_period=40,
                                  max_t=300)

    ##############################
    # Define hyperopt search algo
    ##############################
    space = {
        'lr': hp.uniform('lr', 0.001, 0.1),
        'optimizer':
        hp.choice("optimizer",
                  ['SGD', 'Adam'
                   ]),  #, 'Adadelta']), # Adadelta gets the worst results
        'batch_accumulation': hp.choice("batch_accumulation", [4, 8, 16])
    }
    hos = HyperOptSearch(space, max_concurrent=4, reward_attr=reward_attr)

    #####################
    # Define experiments
    #####################
    exp_name = "resnet152_hyp_search_hyperband_hyperopt_{}".format(
        time.strftime("%Y-%m-%d_%H.%M.%S"))
    exp = Experiment(
        name=exp_name,
        run=trainable_name,
        num_samples=args.numSamples,  # the number of experiments
        resources_per_trial={
            "cpu": args.trialNumCpu,
            "gpu": args.trialNumGpu
        },
        checkpoint_freq=args.checkpointFreq,
        checkpoint_at_end=True,
        stop={
            reward_attr: 0.95,
            "training_iteration": args.
            trainingIteration,  # how many times a specific config will be trained
        })

    ##################
    # Run tensorboard
    ##################
    if args.runTensorBoard:
        thread = threading.Thread(target=launch_tensorboard, args=[exp_name])
        thread.start()
        launch_tensorboard(exp_name)

    ##################
    # Run experiments
    ##################
    run_experiments(exp, search_alg=hos, scheduler=hpb, verbose=False)