Пример #1
0
def system_check():
    import traceback

    try:
        import torch
        from olympus.models import Model
        from olympus.optimizers import Optimizer

        batch = torch.randn((32, 3, 64, 64)).cuda()
        model = Model('resnet18', input_size=(3, 64, 64),
                      output_size=(10, )).cuda()

        model.init()

        optimizer = Optimizer('sgd', params=model.parameters())

        optimizer.init(**optimizer.defaults)

        optimizer.zero_grad()
        loss = model(batch).sum()

        optimizer.backward(loss)
        optimizer.step()

        return True
    except:
        error(traceback.format_exc())
        return False
Пример #2
0
def make_detection_task(client=None):
    dataset = SplitDataset(Dataset('test_pennfudan', path=f'{base}/data'),
                           split_method='original')

    loader = DataLoader(dataset, sampler_seed=0, batch_size=1)

    input_size, target_size = loader.get_shapes()

    model = Model('fasterrcnn_resnet18_fpn',
                  input_size=input_size,
                  output_size=dataset.dataset.dataset.num_classes,
                  weight_init='glorot_uniform')

    optimizer = Optimizer('sgd', lr=0.01, momentum=0.99, weight_decay=1e-3)

    lr_schedule = LRSchedule('exponential', gamma=0.97)

    main_task = ObjectDetection(
        detector=model,
        optimizer=optimizer,
        lr_scheduler=lr_schedule,
        dataloader=loader.train(),
        device=device,
        criterion=reduce_loss,
        storage=StateStorage(folder=f'{base}/detection_short'),
        logger=client)

    return main_task
Пример #3
0
def make_task():
    model = Model('logreg', input_size=(1, 28, 28), output_size=(10, ))

    optimizer = Optimizer('sgd')

    lr_schedule = LRSchedule('exponential')

    data = Dataset('test-mnist', path=f'{base}/data')

    splits = SplitDataset(data, split_method='original')

    loader = DataLoader(splits, sampler_seed=1, batch_size=32)

    main_task = Classification(
        classifier=model,
        optimizer=optimizer,
        lr_scheduler=lr_schedule,
        dataloader=loader.train(),
        device=device,
        storage=StateStorage(folder=f'{base}/hpo_simple'))

    main_task.metrics.append(
        Accuracy(name='validation', loader=loader.valid(batch_size=64)))

    return main_task
Пример #4
0
def test_model_with_parameter_and_hp_2():
    m: Model = Model('TestModel', b=0, input_size=(1, 28, 28), output_size=10)

    # Missing a
    with pytest.raises(MissingParameters):
        m.init()

    m.init(a=0)
Пример #5
0
def test_model_new_factory_init_fixed():
    # set HP in the constructor
    m: Model = Model('resnet18',
                     input_size=(1, 28, 28),
                     output_size=10,
                     weight_init='normal',
                     initializer=dict(mean=0, std=1))
    assert dict(m.get_space()) == {}
Пример #6
0
def detection_baseline(model, weight_init,
                       optimizer, lr_scheduler,
                       dataset, batch_size, device,
                       split_method='original',
                       sampler_seed=0, model_seed=0, storage=None, half=False, hpo_done=False, logger=None, **config):

    dataset = SplitDataset(
        Dataset(dataset, path=f'{base}/data'),
        split_method=split_method
    )

    loader = DataLoader(
        dataset,
        sampler_seed=sampler_seed,
        batch_size=batch_size
    )

    input_size, target_size = loader.get_shapes()

    init = Initializer(
        weight_init,
        seed=model_seed,
        gain=1.0
    )

    model = Model(
        model,
        input_size=input_size,
        output_size=dataset.dataset.dataset.num_classes,
        weight_init=init,
        half=half)

    optimizer = Optimizer(optimizer, half=half)

    lr_schedule = LRSchedule(lr_scheduler)

    train, valid, test = loader.get_loaders(hpo_done=hpo_done)

    main_task = ObjectDetection(
        detector=model,
        optimizer=optimizer,
        lr_scheduler=lr_schedule,
        dataloader=train,
        device=device,
        storage=storage,
        criterion=reduce_loss)

    name = 'validation'
    if hpo_done:
        name = 'test'

    main_task.metrics.append(
        Loss(name=name, loader=test)
    )

    return main_task
Пример #7
0
def test_model_factory_init():
    # set init using its name
    m: Model = Model('resnet18',
                     input_size=(1, 28, 28),
                     output_size=10,
                     weight_init='normal')

    assert dict(m.get_space()) == dict(
        initializer=dict(mean='normal(0, 1)', std='normal(1, 1)'))
    m.init(initializer=dict(mean=0, std=1))
Пример #8
0
def test_model_new_object_init_hp_set():
    init = Initializer('normal')

    # set HP using init
    m: Model = Model('resnet18',
                     input_size=(1, 28, 28),
                     output_size=10,
                     weight_init=init)

    assert dict(m.get_space()) == dict(
        initializer=dict(mean='normal(0, 1)', std='normal(1, 1)'))
    m.init(initializer=dict(mean=0, std=1))
Пример #9
0
def finance_baseline(tickers,
                     start,
                     end,
                     optimizer,
                     batch_size,
                     device,
                     window=70,
                     sampler_seed=0,
                     hpo_done=False):
    dataset = Dataset('stockmarket',
                      path=f'{base}/data',
                      tickers=tickers,
                      start_date=start,
                      end_date=end)

    dataset = WindowedDataset(dataset,
                              window=window,
                              transforms=lambda x: x.transpose(1, 0),
                              overlaps=True)

    dataset = SplitDataset(dataset, split_method='original')

    loader = DataLoader(dataset,
                        sampler_seed=sampler_seed,
                        batch_size=batch_size)

    model = Model('MinVarianceReturnMomentEstimator',
                  weight_init='noinit',
                  input_size=(len(tickers), window),
                  lags=2).to(device=device)

    optimizer = Optimizer(optimizer)

    train, valid, test = loader.get_loaders(hpo_done=hpo_done)

    main_task = Finance(model=model,
                        optimizer=optimizer,
                        oracle=oracle,
                        dataset=train,
                        device=device,
                        criterion=SharpeRatioCriterion())

    name = 'validation'
    if hpo_done:
        name = 'test'

    main_task.metrics.append(Loss(name=name, loader=test))
    main_task.metrics.append(Loss(name='train', loader=train))

    return main_task
Пример #10
0
def a2c_baseline(env_name, parallel_sim, weight_init, model, model_seed, optimizer,
                 lr_scheduler, num_steps, half, device, storage, **config):
    def to_nchw(states):
        return states.permute(0, 3, 1, 2)

    env = Environment(
        env_name,
        parallel_env=parallel_sim,
        transforms=to_nchw
    )

    init = Initializer(
        weight_init,
        seed=model_seed,
        gain=1.0
    )

    model = Model(
        model,
        input_size=env.input_size,
        output_size=env.target_size[0],
        weight_init=init,
        half=half)

    loader = RLDataLoader(
        env,
        replay=simple_replay_vector(num_steps=num_steps),
        actor=model.act,
        critic=model.critic
    )

    optimizer = Optimizer(optimizer, half=half)

    lr_schedule = LRSchedule(lr_scheduler)

    task = A2C(
        model=model,
        optimizer=optimizer,
        dataloader=loader.train(),
        lr_scheduler=lr_schedule,
        device=device,
        storage=storage
    )

    return task
Пример #11
0
import torch.nn.functional as F
from olympus.datasets import Dataset, SplitDataset, DataLoader
from olympus.optimizers import Optimizer, LRSchedule

from olympus.models import Model
from olympus.observers import ObserverList, ProgressView, Speed
from olympus.utils import fetch_device, option

epochs = 2
device = fetch_device()
base = option('base_path', '/tmp/olympus')

# Model
model = Model('resnet18', input_size=(1, 28, 28), output_size=(10, ))

# Optimizer
optimizer = Optimizer('sgd',
                      params=model.parameters(),
                      weight_decay=0.001,
                      lr=1e-5,
                      momentum=1e-5)

# Schedule
lr_schedule = LRSchedule('exponential', optimizer=optimizer, gamma=0.99)

data = Dataset('fake_mnist', path=f'{base}/data')

splits = SplitDataset(data, split_method='original')

# Dataloader
loader = DataLoader(splits, sampler_seed=1, batch_size=32)
Пример #12
0
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

    def get_lr(self):
        return [base_lr * self.gamma ** self.last_epoch
                for base_lr in self.base_lrs]

    @staticmethod
    def get_space():
        return {'gamma': 'loguniform(0.97, 1)'}


if __name__ == '__main__':
    model = Model(
        'logreg',
        input_size=(290,),
        output_size=(10,)
    )

    optimizer = Optimizer('sgd', params=model.parameters())

    # If you use an hyper parameter optimizer, it will generate this for you
    optimizer.init(lr=1e-4, momentum=0.02, weight_decay=1e-3)

    schedule = LRSchedule(schedule=MyExponentialLR)
    schedule.init(optimizer=optimizer, gamma=0.97)

    optimizer.zero_grad()

    input = torch.randn((10, 290))
    output = model(input)
Пример #13
0
def setup():
    model = Model('logreg', input_size=(28, ), output_size=(10, ))

    optimizer = Optimizer('sgd', params=model.parameters())
    optimizer.init(**optimizer.defaults)
    return model, optimizer
Пример #14
0
def segmentation_baseline(model,
                          initializer,
                          optimizer,
                          dataset,
                          batch_size,
                          device,
                          split_method='original',
                          sampler_seed=0,
                          init_seed=0,
                          global_seed=0,
                          storage=None,
                          half=False,
                          hpo_done=False,
                          data_path='/tmp/olympus',
                          validate=True,
                          hyper_parameters=None,
                          uri_metric=None,
                          valid_batch_size=None,
                          **config):
    set_seeds(global_seed)

    # dataset size: 2913
    dataset = SplitDataset(
        Dataset(dataset,
                path=option('data.path', data_path),
                cache=torch.device('cpu')),
        split_method=split_method,
    )

    loader = DataLoader(
        dataset,
        sampler_seed=sampler_seed,
        batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        pin_memory=True,
        num_workers=0,
    )

    input_size, target_size = loader.get_shapes()

    init = Initializer(initializer,
                       seed=init_seed,
                       **get_parameters('initializer', hyper_parameters))

    model = Model(model,
                  input_size=input_size,
                  output_size=target_size[0],
                  weight_init=init,
                  half=half)

    optimizer = Optimizer(optimizer,
                          half=half,
                          **get_parameters('optimizer', hyper_parameters))

    lr_schedule = LRSchedule('none',
                             **get_parameters('schedule', hyper_parameters))

    train, valid, test = loader.get_loaders(hpo_done=hpo_done)

    additional_metrics = []

    if validate and valid:
        additional_metrics.append(MeanIoU(name='validation', loader=valid))

    if validate and test:
        additional_metrics.append(MeanIoU(name='test', loader=test))

    def get_label_counts(dataloader):
        cumulative_counts = {}
        print('get_label_counts(): ', end='')
        for i, (_, labels) in enumerate(dataloader, 1):
            if labels.device.type == 'cuda':
                labels = labels.cpu()
            unique, counts = np.unique(labels.numpy(), return_counts=True)
            for u, c in zip(unique, counts):
                if u not in cumulative_counts:
                    cumulative_counts[u] = 0
                cumulative_counts[u] += c
            if i % (len(dataloader) // 10) == 0:
                print('{}%... '.format(100 * i // len(dataloader)), end='')
        print()
        return cumulative_counts

    def get_criterion_weight(counts, ignore_index=255):
        counts = counts.copy()
        if ignore_index in counts:
            del counts[ignore_index]
        total_count = sum([counts[unique] for unique in sorted(counts)])
        weight = np.array(
            [total_count / counts[unique] for unique in sorted(counts)],
            dtype=np.float32)
        weight /= weight.size
        return weight

    nclasses = 21
    counts = get_label_counts(train)
    weight = get_criterion_weight(counts)
    weight = torch.tensor(weight)
    if half:
        weight = weight.half()
    criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=255)

    main_task = Segmentation(model,
                             optimizer,
                             lr_schedule,
                             train,
                             criterion,
                             nclasses,
                             device=device,
                             storage=storage,
                             metrics=additional_metrics)

    return main_task
Пример #15
0
parser.add_argument('--repeat', default=10, type=int)
args = parser.parse_args()
show_dict(vars(args))

device = fetch_device()
if args.caching:
    args.caching = device

dataset = SplitDataset(Dataset(args.dataset,
                               cache=args.caching,
                               transform=False),
                       split_method='original')
loaders = DataLoader(dataset, batch_size=args.batch_size, sampler_seed=0)
input_size, target_size = loaders.get_shapes()

model = Model(args.model, input_size=input_size,
              output_size=target_size[0]).init()

optimizer = Optimizer('sgd',
                      params=model.parameters(),
                      lr=0.01,
                      momentum=0.9,
                      weight_decay=0.001)

criterion = CrossEntropyLoss()
model = model.to(device=device)
train_loader = loaders.train()


def epoch():
    for batch in train_loader:
        x, *_, y = batch
Пример #16
0
def test_model_fixed_init():
    # Use default init & model is not initialized
    m: Model = Model('resnet18')
    assert dict(m.get_space()) == dict()
Пример #17
0
    def load_model(self, uid, keys):
        state = self.storage.load(uid)
        for k in keys:
            state = state.get(k, dict())

        return Model.from_state(state)
Пример #18
0
def new_model():
    return Model('logreg', input_size=(28,), output_size=10)
Пример #19
0
import torch
import torch.nn as nn

from olympus.models import Model


class MyCustomModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyCustomModel, self).__init__()
        self.main = nn.Linear(input_size[0], output_size[0])

    def forward(self, x):
        return self.main(x)


# Register my model
builders = {'my_model': MyCustomModel}

if __name__ == '__main__':
    model = Model(model=MyCustomModel, input_size=(290, ), output_size=(10, ))

    input = torch.randn((10, 290))
    out = model(input)

    print(out)
Пример #20
0
def test_build_model(model, batch_size=1):
    model = Model(model, input_size=(1, 28, 28), output_size=(10, ))

    input = torch.randn((batch_size, 1, 28, 28))
    model(input)
Пример #21
0
    @staticmethod
    def get_space():
        return {
            'l1': 'uniform(32, 64, discrete=True)',
            'l2': 'uniform(32, 64, discrete=True)',
            'l3': 'uniform(32, 64, discrete=True)',
            'l4': 'uniform(32, 64, discrete=True)'
        }


# Register my model
builders = {'my_model': MyCustomNASModel}


if __name__ == '__main__':
    model = Model(
        model=MyCustomNASModel,
        input_size=(290,),
        output_size=(10,),
        # Fix this hyper-parameter right away
        l1=21
    )

    # If you use an hyper parameter optimizer, it will generate this for you
    model.init(l2=33, l3=33, l4=32)

    input = torch.randn((10, 290))
    out = model(input)

    print(out)
Пример #22
0
def classification_baseline(model,
                            initializer,
                            optimizer,
                            schedule,
                            dataset,
                            batch_size,
                            device,
                            split_method='original',
                            sampler_seed=0,
                            init_seed=0,
                            transform_seed=0,
                            global_seed=0,
                            transform=True,
                            storage=None,
                            half=False,
                            hpo_done=False,
                            data_path='/tmp/olympus',
                            validate=True,
                            hyper_parameters=None,
                            uri_metric=None,
                            valid_batch_size=None,
                            cache=None,
                            **config):

    set_seeds(global_seed)

    dataset = SplitDataset(Dataset(dataset,
                                   path=option('data.path', data_path),
                                   transform=transform,
                                   transform_seed=transform_seed,
                                   cache=cache),
                           split_method=split_method)

    loader = DataLoader(dataset,
                        sampler_seed=sampler_seed,
                        batch_size=batch_size,
                        valid_batch_size=valid_batch_size)

    input_size, target_size = loader.get_shapes()

    init = Initializer(initializer,
                       seed=init_seed,
                       **get_parameters('initializer', hyper_parameters))

    model = Model(model,
                  input_size=input_size,
                  output_size=target_size[0],
                  weight_init=init,
                  half=half)

    optimizer = Optimizer(optimizer,
                          half=half,
                          **get_parameters('optimizer', hyper_parameters))

    lr_schedule = LRSchedule(schedule,
                             **get_parameters('schedule', hyper_parameters))

    train, valid, test = loader.get_loaders(hpo_done=hpo_done)

    additional_metrics = []

    if validate and valid:
        additional_metrics.append(Accuracy(name='validation', loader=valid))

    if validate and test:
        additional_metrics.append(Accuracy(name='test', loader=test))

    main_task = Classification(classifier=model,
                               optimizer=optimizer,
                               lr_scheduler=lr_schedule,
                               dataloader=train,
                               device=device,
                               storage=storage,
                               metrics=additional_metrics)

    return main_task