def test_init_with_objects(): dataset = Dataset(kind='hplc', test_frac=0.3) model = BayesNeuralNet(max_epochs=1000) _ = Emulator(dataset=dataset, model=model, feature_transform='standardize', target_transform='mean')
def make_detection_task(client=None): dataset = SplitDataset(Dataset('test_pennfudan', path=f'{base}/data'), split_method='original') loader = DataLoader(dataset, sampler_seed=0, batch_size=1) input_size, target_size = loader.get_shapes() model = Model('fasterrcnn_resnet18_fpn', input_size=input_size, output_size=dataset.dataset.dataset.num_classes, weight_init='glorot_uniform') optimizer = Optimizer('sgd', lr=0.01, momentum=0.99, weight_decay=1e-3) lr_schedule = LRSchedule('exponential', gamma=0.97) main_task = ObjectDetection( detector=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=loader.train(), device=device, criterion=reduce_loss, storage=StateStorage(folder=f'{base}/detection_short'), logger=client) return main_task
def make_task(): model = Model('logreg', input_size=(1, 28, 28), output_size=(10, )) optimizer = Optimizer('sgd') lr_schedule = LRSchedule('exponential') data = Dataset('test-mnist', path=f'{base}/data') splits = SplitDataset(data, split_method='original') loader = DataLoader(splits, sampler_seed=1, batch_size=32) main_task = Classification( classifier=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=loader.train(), device=device, storage=StateStorage(folder=f'{base}/hpo_simple')) main_task.metrics.append( Accuracy(name='validation', loader=loader.valid(batch_size=64))) return main_task
def test_build_dataset(dataset): data = Dataset(dataset, path='/tmp/olympus') splits = SplitDataset(data, split_method='original') loader = DataLoader(splits, sampler_seed=1, batch_size=1) for i, b in enumerate(loader.train()): if i > 10: break
def test_cross_validate(): # only tests if the code runs dataset = Dataset(kind='hplc', num_folds=5) model = BayesNeuralNet(scope='hplc', max_epochs=2) emulator = Emulator(dataset=dataset, model=model) emulator.cross_validate() emulator.save('emulator_test') shutil.rmtree('emulator_test')
def test_train_nn(): # only tests if the code dataset = Dataset(kind='hplc', num_folds=5) model = NeuralNet(scope='hplc', max_epochs=2) emulator = Emulator(dataset=dataset, model=model) emulator.train() emulator.save('emulator_test') shutil.rmtree('emulator_test')
def detection_baseline(model, weight_init, optimizer, lr_scheduler, dataset, batch_size, device, split_method='original', sampler_seed=0, model_seed=0, storage=None, half=False, hpo_done=False, logger=None, **config): dataset = SplitDataset( Dataset(dataset, path=f'{base}/data'), split_method=split_method ) loader = DataLoader( dataset, sampler_seed=sampler_seed, batch_size=batch_size ) input_size, target_size = loader.get_shapes() init = Initializer( weight_init, seed=model_seed, gain=1.0 ) model = Model( model, input_size=input_size, output_size=dataset.dataset.dataset.num_classes, weight_init=init, half=half) optimizer = Optimizer(optimizer, half=half) lr_schedule = LRSchedule(lr_scheduler) train, valid, test = loader.get_loaders(hpo_done=hpo_done) main_task = ObjectDetection( detector=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=train, device=device, storage=storage, criterion=reduce_loss) name = 'validation' if hpo_done: name = 'test' main_task.metrics.append( Loss(name=name, loader=test) ) return main_task
def make_loader_batch_sampler(dataset): from olympus.datasets.sampling import RandomSampler data = Dataset(dataset, path='/tmp/olympus') splits = SplitDataset(data, split_method='original') sampler = lambda dataset, seed: BatchSampler( sampler=RandomSampler(dataset, seed), batch_size=8, drop_last=True) return DataLoader(splits, sampler_seed=1, batch_sampler=sampler).train()
def finance_baseline(tickers, start, end, optimizer, batch_size, device, window=70, sampler_seed=0, hpo_done=False): dataset = Dataset('stockmarket', path=f'{base}/data', tickers=tickers, start_date=start, end_date=end) dataset = WindowedDataset(dataset, window=window, transforms=lambda x: x.transpose(1, 0), overlaps=True) dataset = SplitDataset(dataset, split_method='original') loader = DataLoader(dataset, sampler_seed=sampler_seed, batch_size=batch_size) model = Model('MinVarianceReturnMomentEstimator', weight_init='noinit', input_size=(len(tickers), window), lags=2).to(device=device) optimizer = Optimizer(optimizer) train, valid, test = loader.get_loaders(hpo_done=hpo_done) main_task = Finance(model=model, optimizer=optimizer, oracle=oracle, dataset=train, device=device, criterion=SharpeRatioCriterion()) name = 'validation' if hpo_done: name = 'test' main_task.metrics.append(Loss(name=name, loader=test)) main_task.metrics.append(Loss(name='train', loader=train)) return main_task
base = option('base_path', '/tmp/olympus') # Model model = Model('resnet18', input_size=(1, 28, 28), output_size=(10, )) # Optimizer optimizer = Optimizer('sgd', params=model.parameters(), weight_decay=0.001, lr=1e-5, momentum=1e-5) # Schedule lr_schedule = LRSchedule('exponential', optimizer=optimizer, gamma=0.99) data = Dataset('fake_mnist', path=f'{base}/data') splits = SplitDataset(data, split_method='original') # Dataloader loader = DataLoader(splits, sampler_seed=1, batch_size=32) # event handler event_handler = ObserverList() speed = Speed() event_handler.append( ProgressView(speed, max_epochs=epochs, max_steps=len(loader.train())).every(epoch=1, batch=1)) model = model.to(device=device) loss = 0
def make_loader(dataset): data = Dataset(dataset, path='/tmp/olympus') splits = SplitDataset(data, split_method='original') return DataLoader(splits, sampler_seed=1, batch_size=8).train()
def segmentation_baseline(model, initializer, optimizer, dataset, batch_size, device, split_method='original', sampler_seed=0, init_seed=0, global_seed=0, storage=None, half=False, hpo_done=False, data_path='/tmp/olympus', validate=True, hyper_parameters=None, uri_metric=None, valid_batch_size=None, **config): set_seeds(global_seed) # dataset size: 2913 dataset = SplitDataset( Dataset(dataset, path=option('data.path', data_path), cache=torch.device('cpu')), split_method=split_method, ) loader = DataLoader( dataset, sampler_seed=sampler_seed, batch_size=batch_size, valid_batch_size=valid_batch_size, pin_memory=True, num_workers=0, ) input_size, target_size = loader.get_shapes() init = Initializer(initializer, seed=init_seed, **get_parameters('initializer', hyper_parameters)) model = Model(model, input_size=input_size, output_size=target_size[0], weight_init=init, half=half) optimizer = Optimizer(optimizer, half=half, **get_parameters('optimizer', hyper_parameters)) lr_schedule = LRSchedule('none', **get_parameters('schedule', hyper_parameters)) train, valid, test = loader.get_loaders(hpo_done=hpo_done) additional_metrics = [] if validate and valid: additional_metrics.append(MeanIoU(name='validation', loader=valid)) if validate and test: additional_metrics.append(MeanIoU(name='test', loader=test)) def get_label_counts(dataloader): cumulative_counts = {} print('get_label_counts(): ', end='') for i, (_, labels) in enumerate(dataloader, 1): if labels.device.type == 'cuda': labels = labels.cpu() unique, counts = np.unique(labels.numpy(), return_counts=True) for u, c in zip(unique, counts): if u not in cumulative_counts: cumulative_counts[u] = 0 cumulative_counts[u] += c if i % (len(dataloader) // 10) == 0: print('{}%... '.format(100 * i // len(dataloader)), end='') print() return cumulative_counts def get_criterion_weight(counts, ignore_index=255): counts = counts.copy() if ignore_index in counts: del counts[ignore_index] total_count = sum([counts[unique] for unique in sorted(counts)]) weight = np.array( [total_count / counts[unique] for unique in sorted(counts)], dtype=np.float32) weight /= weight.size return weight nclasses = 21 counts = get_label_counts(train) weight = get_criterion_weight(counts) weight = torch.tensor(weight) if half: weight = weight.half() criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=255) main_task = Segmentation(model, optimizer, lr_schedule, train, criterion, nclasses, device=device, storage=storage, metrics=additional_metrics) return main_task
parser.add_argument('--dataset', default='cifar10', type=str) parser.add_argument('--model', default='vgg11', type=str) parser.add_argument('--caching', action='store_true', dest='caching') parser.add_argument('--no-caching', action='store_false', dest='caching') parser.add_argument('--batch-size', default=128, type=int) parser.add_argument('--warmup', default=4, type=int) parser.add_argument('--repeat', default=10, type=int) args = parser.parse_args() show_dict(vars(args)) device = fetch_device() if args.caching: args.caching = device dataset = SplitDataset(Dataset(args.dataset, cache=args.caching, transform=False), split_method='original') loaders = DataLoader(dataset, batch_size=args.batch_size, sampler_seed=0) input_size, target_size = loaders.get_shapes() model = Model(args.model, input_size=input_size, output_size=target_size[0]).init() optimizer = Optimizer('sgd', params=model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001) criterion = CrossEntropyLoss()
def test_dataset_init(test_frac, num_folds): for kind in datasets_list: _ = Dataset(kind=kind, test_frac=test_frac, num_folds=num_folds)
def classification_baseline(model, initializer, optimizer, schedule, dataset, batch_size, device, split_method='original', sampler_seed=0, init_seed=0, transform_seed=0, global_seed=0, transform=True, storage=None, half=False, hpo_done=False, data_path='/tmp/olympus', validate=True, hyper_parameters=None, uri_metric=None, valid_batch_size=None, cache=None, **config): set_seeds(global_seed) dataset = SplitDataset(Dataset(dataset, path=option('data.path', data_path), transform=transform, transform_seed=transform_seed, cache=cache), split_method=split_method) loader = DataLoader(dataset, sampler_seed=sampler_seed, batch_size=batch_size, valid_batch_size=valid_batch_size) input_size, target_size = loader.get_shapes() init = Initializer(initializer, seed=init_seed, **get_parameters('initializer', hyper_parameters)) model = Model(model, input_size=input_size, output_size=target_size[0], weight_init=init, half=half) optimizer = Optimizer(optimizer, half=half, **get_parameters('optimizer', hyper_parameters)) lr_schedule = LRSchedule(schedule, **get_parameters('schedule', hyper_parameters)) train, valid, test = loader.get_loaders(hpo_done=hpo_done) additional_metrics = [] if validate and valid: additional_metrics.append(Accuracy(name='validation', loader=valid)) if validate and test: additional_metrics.append(Accuracy(name='test', loader=test)) main_task = Classification(classifier=model, optimizer=optimizer, lr_scheduler=lr_schedule, dataloader=train, device=device, storage=storage, metrics=additional_metrics) return main_task
from olympus.datasets import Dataset, SplitDataset, DataLoader from olympus.utils import option, new_seed, get_seeds from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('dataset', type=str, help='name of the dataset to load') args = parser.parse_args() # can be customized using OLYMPUS_BASE_PATH or Olympus configuration file if found base = option('data.path', '/tmp/olympus') # get the dataset dataset = Dataset(args.dataset, path=f'{base}/data') # How to split the dataset splits = SplitDataset(dataset, split_method='original') # DataLoader builder loader = DataLoader(splits, sampler_seed=new_seed(sampler=1), batch_size=32) # Train my model for step, batch in enumerate(loader.train()): print('\rTrain:', step, len(batch), end='') print() # Using a bigger batch size when gradient is not computed for step, batch in enumerate(loader.valid(batch_size=1024)): print('\rValid:', step, len(batch), end='') print() for step, batch in enumerate(loader.test(batch_size=1024)):