Exemplo n.º 1
0
    def fit(self, objective):
        """Train the model a few times and return a best trial/set of parameters"""
        if self.stopped:
            return

        self.metrics.start_train()
        while not self.hpo.is_done():
            configurations = self.hpo.suggest()

            for config in configurations:
                show_dict(config)

                # uid = config.pop('uid')
                epoch = config.pop('epoch')

                new_task = self.task_maker()
                new_task.init(**config)
                new_task.fit(epoch)

                metrics = new_task.metrics.value()
                result = metrics[objective]

                # config['uid'] = uid
                self.hpo.observe(config, result)

        self.metrics.end_train()
        return self.hpo.result()
Exemplo n.º 2
0
def main(C=1,
         l1_ratio=0.5,
         random_state=1,
         bootstrap_seed=1,
         epoch=0,
         uid=None,
         experiment_name=None,
         client=None):

    C = max(C, 1e-10)

    # Load Dataset
    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    dataset_splits = bootstrap(data, target, bootstrap_seed)

    model = LogisticRegression(random_state)

    # Compute validation and test accuracy
    metrics = [
        Accuracy(name='validation', loader=[dataset_splits['valid']]),
        Accuracy(name='test', loader=[dataset_splits['test']])
    ]

    # Setup the task
    task = SklearnTask(model, metrics)

    # Save the result of your experiment inside a db
    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    hyper_parameters = dict(model=dict(C=C, l1_ratio=l1_ratio))

    show_dict(hyper_parameters)

    # initialize the task with you configuration
    task.init(uid=uid, **hyper_parameters)

    # Train
    x, y = dataset_splits['train']
    # TODO: make sure that we fit on whole train and validate on whole valid and test
    task.fit(x, y)

    # Get the stats about this task setup
    stats = task.metrics.value()
    show_dict(stats)

    return float(stats['validation_error_rate'])
Exemplo n.º 3
0
def main(**kwargs):
    args = Namespace(**kwargs)
    set_verbose_level(args.verbose)

    device = fetch_device()
    experiment_name = args.experiment_name.format(**kwargs)

    # save partial results here
    state_storage = StateStorage(
        folder=option('state.storage', f'{base}/detection'))

    def main_task():
        return detection_baseline(device=device, storage=state_storage, **kwargs)

    space = main_task().get_space()

    params = {}
    if space:
        show_dict(space)
        hpo = HPOptimizer('hyperband', space=space,
                          fidelity=Fidelity(args.min_epochs, args.epochs).to_dict())

        hpo_task = HPO(hpo, main_task)
        hpo_task.metrics.append(ElapsedRealTime())

        trial = hpo_task.fit(objective='validation_loss')
        print(f'HPO is done, objective: {trial.objective}')
        params = trial.params
    else:
        print('No hyper parameter missing, running the experiment...')
    # ------

    # Run the experiment with the best hyper parameters
    # -------------------------------------------------
    if params is not None:
        # Train using train + valid for the final result
        final_task = detection_baseline(device=device, **kwargs, hpo_done=True)
        final_task.init(**params)
        final_task.fit(epochs=args.epochs)

        print('=' * 40)
        print('Final Trial Results')
        show_dict(flatten(params))
        final_task.report(pprint=True, print_fun=print)
        print('=' * 40)
Exemplo n.º 4
0
def main(bootstrap_seed,
         random_state,
         hidden_layer_sizes=150,
         alpha=0.001,
         data_path='.',
         epoch=0,
         uid=None,
         experiment_name=None,
         client=None):
    """

    Parameters
    ----------
    bootstrap_seed: int
        seed for controling which data-points are selected for training/testing splits
    random_state: int
        seed for the generation of weights
    hidden_layer_sizes: tuple
        the size of layers ex: (50,) is one layer of 50 neurons
    solver: one of {‘lbfgs’, ‘sgd’, ‘adam’}
        solver to use for optimisation
    alpha: float
        L2 penalty (regularization term) parameter.
    ensembling: bool
        decides if yes or no we will use ensembling for the test set

    """
    hidden_layer_sizes = int(hidden_layer_sizes)
    # Load Dataset
    train_data = get_train_dataset(folder=option('data.path', data_path),
                                   task='pan_allele',
                                   min_nb_examples=1000)
    valid_data = get_valid_dataset(option('data.path', data_path))
    test_data = get_test_dataset(option('data.path', data_path))

    # one bootstrap seed for all 3 datasets
    rng = numpy.random.RandomState(bootstrap_seed)
    train_data = bootstrap(train_data, rng)
    valid_data = bootstrap(valid_data, rng)
    test_data = bootstrap(test_data, rng)

    # Compute validation and test accuracy
    additional_metrics = [
        AUC(name='validation',
            loader=[([valid_data[:, :-1]], valid_data[:, -1])]),
        AUC(name='test', loader=[([test_data[:, :-1]], test_data[:, -1])])
    ]

    # Setup the task
    task = SklearnTask(MLPRegressor(solver='lbfgs', random_state=random_state),
                       metrics=additional_metrics)

    # Save the result of your experiment inside a db
    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))
    hyper_parameters = dict(
        model=dict(hidden_layer_sizes=(hidden_layer_sizes, ), alpha=alpha))
    show_dict(hyper_parameters)

    # initialize the task with you configuration
    task.init(uid=uid, **hyper_parameters)

    # Train
    task.fit(train_data[:, :-1], train_data[:, -1])
    stats = task.metrics.value()
    show_dict(stats)

    return float(stats['validation_aac'])
Exemplo n.º 5
0
def main(bootstrapping_seed=1,
         sampler_seed=1,
         init_seed=1,
         batch_size=16,
         learning_rate=0.001,
         momentum=0.9,
         weight_decay=1e-4,
         epoch=240,
         half=False,
         hpo_done=False,
         uid=None,
         experiment_name=None,
         client=None,
         clean_on_exit=True,
         _interrupt=0):

    base_folder = options('state.storage', '/tmp')
    storage = StateStorage(folder=base_folder)

    split_method = {
        'split_method': 'bootstrap',
        'ratio': 0.25,  # This means 50% training, 25% valid, 25% test
        'seed': bootstrapping_seed,
        'balanced': False
    }

    task = segmentation_baseline('fcn_resnet18',
                                 'self_init',
                                 'SGD',
                                 dataset='voc-segmentation',
                                 batch_size=batch_size,
                                 device=fetch_device(),
                                 split_method=split_method,
                                 sampler_seed=sampler_seed,
                                 init_seed=init_seed,
                                 storage=storage,
                                 half=half,
                                 hpo_done=hpo_done,
                                 verbose=False,
                                 validate=True)

    hyperparameters = {
        'model': {
            'initializer': {
                'gain': 1.0
            }
        },
        'optimizer': {
            'lr': learning_rate,
            'momentum': momentum,
            'weight_decay': weight_decay
        }
    }
    show_dict(hyperparameters)

    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    if _interrupt:
        from studies import InterruptingMetric
        # Will raise interrupt every `_interrupt` epochs
        task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt))
        storage.time_buffer = 0

    task.init(uid=uid, **hyperparameters)
    task.fit(epochs=epoch)

    # Remove checkpoint
    if clean_on_exit:
        file_path = storage._file(uid)
        try:
            os.remove(file_path)
            print('Removed checkpoint at', file_path)
        except FileNotFoundError:
            print('No checkpoint at ', file_path)

    show_dict(task.metrics.value())

    return float(task.metrics.value()['validation_mean_jaccard_distance'])
Exemplo n.º 6
0
from olympus.datasets import Dataset, DataLoader, SplitDataset
from olympus.models import Model
from olympus.optimizers import Optimizer
from olympus.utils.stat import StatStream
from olympus.utils import show_dict, fetch_device

parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='cifar10', type=str)
parser.add_argument('--model', default='vgg11', type=str)
parser.add_argument('--caching', action='store_true', dest='caching')
parser.add_argument('--no-caching', action='store_false', dest='caching')
parser.add_argument('--batch-size', default=128, type=int)
parser.add_argument('--warmup', default=4, type=int)
parser.add_argument('--repeat', default=10, type=int)
args = parser.parse_args()
show_dict(vars(args))

device = fetch_device()
if args.caching:
    args.caching = device

dataset = SplitDataset(Dataset(args.dataset,
                               cache=args.caching,
                               transform=False),
                       split_method='original')
loaders = DataLoader(dataset, batch_size=args.batch_size, sampler_seed=0)
input_size, target_size = loaders.get_shapes()

model = Model(args.model, input_size=input_size,
              output_size=target_size[0]).init()
Exemplo n.º 7
0
                batch = self.preprocessor(self.postprocessor(batch))

        noises = self.get_noise(batch, original_image)
        return batch, noises


builders = {'gradient_ascent': GradientAscentAdversary}

if __name__ == '__main__':
    import torchvision.models as models
    from PIL import Image
    from olympus.dashboard.plots.saliency import imagenet_preprocess, imagenet_postprocessor

    path = '/home/setepenre/work/olympus/docs/_static/images/cat.jpg'
    img = Image.open(path)

    # img = torch.randn((1, 3, 224, 224))
    model = models.vgg19(pretrained=True)

    adversary = GradientAscentAdversary(imagenet_preprocess,
                                        imagenet_postprocessor,
                                        model,
                                        target_class=283)

    samples, noise = adversary.generate([img], min_confidence=0.90, lr=1)
    for s, n in zip(samples, noise):
        n.save('noise.jpg')
        s.save('adversary.jpg')

    show_dict(adversary.report())
Exemplo n.º 8
0
    def on_end_batch(self, task, step, input=None, context=None):
        self.show_progress()

        if task is not None and self.show_metrics == 'batch':
            show_dict(task.metrics.value(), print_fun=self.print_fun)
Exemplo n.º 9
0
    def on_end_epoch(self, task, epoch, context):
        self.reset_throttle()
        self.show_progress('', '\n')

        if task is not None and self.show_metrics == 'epoch':
            show_dict(task.metrics.value(), print_fun=self.print_fun)
Exemplo n.º 10
0
    def on_end_train(self, task, step=None):
        self.print_fun('Completed training')

        if task:
            show_dict(task.metrics.value())
Exemplo n.º 11
0
    def on_resume_train(self, task, epoch):
        self.print_fun('Resuming at epoch', epoch)

        if task:
            show_dict(task.metrics.value(), print_fun=self.print_fun)
Exemplo n.º 12
0
    def on_start_train(self, task, step=None):
        self.print_fun('Starting')

        if task:
            show_dict(task.metrics.value(), print_fun=self.print_fun)
Exemplo n.º 13
0
def main(task='rte',
         bootstrapping_seed=1,
         sampler_seed=1,
         init_seed=1,
         global_seed=1,
         learning_rate=0.00002,
         beta1=0.9,
         beta2=0.999,
         weight_decay=0.0,
         attention_probs_dropout_prob=0.1,
         hidden_dropout_prob=0.1,
         batch_size=32,
         weight_init='normal',
         warmup=0,
         ratio=0.1,
         init_std=0.2,
         epoch=3,
         half=False,
         hpo_done=False,
         uid=None,
         experiment_name=None,
         client=None,
         clean_on_exit=True,
         _interrupt=0):

    print('seeds: init {} / global {} / sampler {} / bootstrapping {}'.format(
        init_seed, global_seed, sampler_seed, bootstrapping_seed))

    base_folder = options('state.storage', '/tmp/storage')
    storage = StateStorage(folder=base_folder)

    split_method = {
        'split_method': 'bootstrap',
        'ratio': ratio,
        'seed': bootstrapping_seed,
        'balanced': False
    }

    task = classification_baseline("bert-{}".format(task),
                                   'normal',
                                   'adam',
                                   schedule='warmup',
                                   dataset="glue-{}".format(task),
                                   split_method=split_method,
                                   sampler_seed=sampler_seed,
                                   init_seed=init_seed,
                                   batch_size=batch_size,
                                   device=fetch_device(),
                                   storage=storage,
                                   half=half,
                                   hpo_done=hpo_done,
                                   verbose=False,
                                   validate=True)

    hyperparameters = dict(model={
        'initializer': {
            'mean': 0.0,
            'std': init_std
        },
        'attention_probs_dropout_prob': attention_probs_dropout_prob,
        'hidden_dropout_prob': hidden_dropout_prob
    },
                           optimizer={
                               'lr': learning_rate,
                               'beta1': beta1,
                               'beta2': beta2,
                               'weight_decay': weight_decay
                           },
                           lr_schedule={
                               'warmup_steps': warmup,
                               'max_steps': epoch * len(task.dataloader),
                               'iterations': 'step'
                           })

    show_dict(hyperparameters)

    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    if _interrupt:
        from studies import InterruptingMetric
        # Will raise interrupt every `_interrupt` epochs
        task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt))

    task.init(uid=uid, **hyperparameters)

    # NOTE: Seed global once all special inits are done.
    set_seeds(global_seed)

    task.fit(epochs=epoch)

    # Remove checkpoint
    if clean_on_exit:
        file_path = storage._file(uid)
        try:
            os.remove(file_path)
            print('Removed checkpoint at', file_path)
        except FileNotFoundError:
            print('No checkpoint at ', file_path)

    return task.metrics.value().get('validation_error_rate', None)
Exemplo n.º 14
0
    loader = DataLoader(splits, sampler_seed=1, batch_size=32)

    main_task = Classification(
        classifier=model,
        optimizer=optimizer,
        lr_scheduler=lr_schedule,
        dataloader=loader.train(),
        device=device,
        storage=StateStorage(folder=f'{base}/hpo_simple'))

    main_task.metrics.append(
        Accuracy(name='validation', loader=loader.valid(batch_size=64)))

    return main_task


space = make_task().get_space()

hp_optimizer = HPOptimizer('hyperband',
                           fidelity=Fidelity(1, 30).to_dict(),
                           space=space)

hpo_task = HPO(hp_optimizer, make_task)

result = hpo_task.fit(objective='validation_accuracy')

print('Best Params:')
print('-' * 40)
print(f'validation_accuracy: {result.objective}')
show_dict(result.params)
Exemplo n.º 15
0
def main(bootstrapping_seed=1,
         sampler_seed=1,
         transform_seed=1,
         init_seed=1,
         learning_rate=0.1,
         momentum=0.9,
         weight_decay=5e-4,
         gamma=0.99,
         weight_init='glorot_uniform',
         epoch=120,
         half=False,
         hpo_done=False,
         uid=None,
         experiment_name=None,
         client=None,
         clean_on_exit=True,
         _interrupt=0):

    base_folder = options('state.storage', '/tmp')
    storage = StateStorage(folder=base_folder, time_buffer=5 * 60)
    print(base_folder)

    sampling_method = {
        'split_method': 'bootstrap',
        'ratio': 0.1666,
        'seed': bootstrapping_seed,
        'balanced': True
    }

    batch_size = 128

    task = classification_baseline('vgg11',
                                   'glorot_uniform',
                                   'sgd',
                                   schedule='exponential',
                                   dataset='cifar10',
                                   batch_size=batch_size,
                                   device=fetch_device(),
                                   data_augment=True,
                                   split_method=sampling_method,
                                   sampler_seed=sampler_seed,
                                   transform_seed=transform_seed,
                                   init_seed=init_seed,
                                   storage=storage,
                                   half=half,
                                   hpo_done=hpo_done,
                                   verbose=False,
                                   validate=True)

    hyperparameters = dict(model={'initializer': {
        'gain': 1.0
    }},
                           optimizer=dict(lr=learning_rate,
                                          momentum=momentum,
                                          weight_decay=weight_decay),
                           lr_schedule=dict(gamma=gamma))

    show_dict(hyperparameters)

    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    if _interrupt:
        from studies import InterruptingMetric
        # Will raise interrupt every `_interrupt` epochs
        task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt))
        storage.time_buffer = 0

    task.init(uid=uid, **hyperparameters)

    task.fit(epochs=epoch)

    # Remove checkpoint
    if clean_on_exit:
        file_path = storage._file(uid)
        try:
            os.remove(file_path)
            print('Removed checkpoint at', file_path)
        except FileNotFoundError:
            print('No checkpoint at ', file_path)

    show_dict(task.metrics.value())

    return float(task.metrics.value()['validation_error_rate'])