def get_train_val_indices(n, val_part=0.2, seed=None): if seed: generator = Generator().manual_seed(seed) else: generator = None mixed_indices = randperm(n, generator=generator) train_count = round((1. - val_part) * len(mixed_indices)) train_indices, val_indices = mixed_indices[:train_count], mixed_indices[train_count:] return train_indices, val_indices
def derive_rand(rand: th.Generator, device: Device) -> th.Generator: device = as_device(device) # Return existing random number generator for the same device if rand.device == device: return rand # Create and seed new random number generator rand_new = th.Generator(device) rand_new.manual_seed(rand.seed() & _SEED_MASK) return rand_new
def dataset_loader(loaded_dataset): length = len(loaded_dataset) split_set = random_split( loaded_dataset, [round(0.995 * length), round(0.005 * length)], generator=Generator().manual_seed(42)) train_loader = DataLoader(split_set[1], batch_size=5) test_loader = DataLoader(split_set[1], batch_size=5) ''' Lowering values to sane levels to help run tests on github runners. Ideally on local GPU (4GB) 80/20 split with batch size of 30 works well. ''' yield (train_loader, test_loader)
def default_rng(seed: Union[None, int, Generator] = None) -> Generator: """Mirrors numpy's `default_rng` to produce RNGs for Pytorch. Args: seed: a seed to initialize the generator. If passed a Generator, will return it unaltered. Otherwise, creates a new one. If passed an integer, will use it as the manual seed for the created generator. Returns: A PyTorch Generator instance """ if isinstance(seed, Generator): return seed rng = Generator() if isinstance(seed, int): rng.manual_seed(seed) return rng
def get_things(batch_size=64, seed=0, num_workers=8): """ Returns train and test DSprites dataset. """ things_loader = THINGSLoader(shape=(128, 128)) data = THINGSTriplets(things_loader=things_loader) # train_data, test_data = train_test_split(data, test_size=15000, random_state=seed) train_data, test_data = torch.utils.data.random_split( data, [1446680, 15000], generator=Generator().manual_seed(seed)) train_data = DataLoader( train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) #, pin_memory=True, num_workers=16) test_data = DataLoader( test_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) #, pin_memory=True, num_workers=16) return train_data, test_data
def pseudo_random_split(dataset, lengths, seed=42): r""" Randomly split a dataset into non-overlapping new datasets of given lengths. Optionally fix the generator for reproducible results, e.g.: >>> random_split(range(10), [3, 7], generator=torch.Generator().manual_seed(42)) Arguments: dataset (Dataset): Dataset to be split lengths (sequence): lengths of splits to be produced generator (Generator): Generator used for the random permutation. """ generator = Generator().manual_seed(seed) # Cannot verify that dataset is Sized if sum(lengths) != len(dataset): # type: ignore raise ValueError( "Sum of input lengths does not equal the length of the input dataset!" ) indices = randperm(sum(lengths), generator=generator).tolist() return [ Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths) ]
def calculate_triplet_score(model, train_size=10000, test_size=5000, batch_size=16, dataset="dsprites", seed=0): print("Dataset", dataset) if dataset == "dsprites": train_data, test_data = rpm.get_dsprites(train_size=train_size, test_size=test_size, dataset=ColourDSpritesTriplets, batch_size=batch_size, k=None) else: train_data, test_data = random_split(dataset, [train_size, test_size], generator=Generator().manual_seed(seed)) train_data = DataLoader(train_data, batch_size=batch_size) test_data = DataLoader(test_data, batch_size=batch_size) train_loc, train_y = batch_sample_latent_triplets(model, train_data, train_size, batch_size=batch_size) assert train_loc.shape[0] == train_size assert train_y.shape[0] == train_size test_loc, test_y = batch_sample_latent_triplets(model, test_data, test_size, batch_size=batch_size) assert test_loc.shape[0] == test_size assert test_y.shape[0] == test_size train_acc, test_acc = predict_triplets(train_loc, train_y, test_loc, test_y) scores = {} scores['mean_train_k'] = train_acc # scores['std_train_k'] = np.std(train_acc) scores['triplet_10k'] = test_acc # scores['std_test_k'] = np.std(test_acc) return scores
args = parse_args() transform = Compose([ToTensor()]) train_dataset = MNIST(root = args.root, train = True, transform = transform, download = True) test_dataset = MNIST(root = args.root, train = False, transform = transform, download = True) len_validation = int(args.validation * len(train_dataset)) train,validation = random_split(train_dataset,[len(train_dataset)-len_validation,len_validation], generator = None if args.seed==None else Generator().manual_seed(args.seed)) figure(figsize=(10,10)) save_plot_dataset(train,'train.pt', path = args.data) save_plot_dataset(validation,'validation.pt', path = args.data, colour = 'xkcd:blue') xticks(range(-1,10)) legend() title ('Frequencies of Classes') savefig(join(args.figs,'freqs')) if args.show: show()
def main(dataset_root: str, mode: str): normalize_transform = Compose([ ToTensor(), Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2460, 0.2411, 0.2576)), ]) augment_transform = Compose( [RandomHorizontalFlip(), RandomCrop(32, padding=4)]) train_dataset = CIFAR10( dataset_root, train=True, transform=Compose([augment_transform, normalize_transform]), ) validation_dataset = CIFAR10(dataset_root, train=True, transform=normalize_transform) validation_length = int(math.floor(len(train_dataset) * 0.10)) train_length = len(train_dataset) - validation_length train_dataset, _ = random_split( train_dataset, lengths=[train_length, validation_length], generator=Generator().manual_seed(0), ) _, validation_dataset = random_split( validation_dataset, lengths=[train_length, validation_length], generator=Generator().manual_seed(0), ) train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True) validation_dataloader = DataLoader( validation_dataset, batch_size=256, shuffle=False, num_workers=4, pin_memory=True, ) if torch.cuda.device_count() == 0: device: Optional[torch.device] = torch.device("cpu") blocks_per_component = ["rest"] elif torch.cuda.device_count() == 1: device = torch.device("cuda") blocks_per_component = ["rest"] else: device = None # For the demo, just put one block on each device, and all remaing blocks on the # last device. blocks_per_component = ["1"] * (torch.cuda.device_count() - 1) + [ "rest" ] # block_type and architecture correspond to ResNet-32. main_nets, aux_nets = resnet_builder.resnet( block_type="basic", architecture="64,3/128,4/256,6/512,3", aux_net_architecture="conv128_bn_conv64_bn_gbpl_fc", blocks_per_component=blocks_per_component, dataset="cifar10", n_classes=10, ) optimizer_constructor = lambda params: SGD( params, lr=0.1, momentum=0.9, weight_decay=2e-4) # Learning rate schedule for ResNet-50ish on CIFAR10, taken from : # https://github.com/tensorflow/models/blob/master/official/r1/resnet/cifar10_main.py#L217 lr_scheduler_constructor = lambda optimizer: MultiStepLR( optimizer, milestones=[91, 136, 182], gamma=0.1) loss_function = F.cross_entropy if mode == "e2e": model = interlocking_backprop.build_e2e_model( main_nets, optimizer_constructor, lr_scheduler_constructor, loss_function) elif mode == "local": model = interlocking_backprop.build_local_model( main_nets, aux_nets, optimizer_constructor, lr_scheduler_constructor, loss_function, ) elif mode == "pairwise": model = interlocking_backprop.build_pairwise_model( main_nets, aux_nets, optimizer_constructor, lr_scheduler_constructor, loss_function, ) elif mode == "3wise": model = interlocking_backprop.build_nwise_model( main_nets, aux_nets, optimizer_constructor, lr_scheduler_constructor, loss_function, nwise_communication_distance=3 - 1, ) else: raise ValueError(f"Unknown mode {mode}") if torch.cuda.device_count() > 1: model.enable_model_parallel() else: model = model.to(device) print( f"Epoch 0: " f"validation accuracy = {_compute_accuracy(validation_dataloader, model):.2f}" ) for epoch in range(100): model.train() losses = [] for inputs, targets in train_dataloader: loss = model.training_step(inputs, targets) losses.append(loss) train_loss = (torch.stack([loss.result() for loss in losses], axis=0).mean().item()) validation_accuracy = _compute_accuracy(validation_dataloader, model) print(f"Epoch {epoch + 1}: " f"training loss = {train_loss:.3f} " f"validation accuracy = {validation_accuracy:.2f}")
def use_rand(rand: th.Generator, **kwargs: Any): # Fork and seed current global random state with th.random.fork_rng(devices=(rand.device, ), **kwargs): th.random.manual_seed(rand.seed() & _SEED_MASK) yield
def load(self, dataset, trainset=True, jitonic=[None,None], subset_size = None, kfold = None, kfold_ind = None): if jitonic[1] is not None: print(f'spatial jitter -> var = {jitonic[1]}') transform = tonic.transforms.Compose([tonic.transforms.SpatialJitter(variance_x=jitonic[1], variance_y=jitonic[1], sigma_x_y=0, integer_coordinates=True, clip_outliers=True)]) if jitonic[0] is not None: print(f'time jitter -> var = {jitonic[0]}') transform = tonic.transforms.Compose([tonic.transforms.TimeJitter(variance=jitonic[0], integer_timestamps=False, clip_negative=True, sort_timestamps=True)]) if jitonic == [None,None]: print('no jitter') transform = None download=False path = '../Data/' if dataset == 'nmnist': if trainset: path+='Train/' else: path+='Test/' if not os.path.exists(path): download=True eventset = tonic.datasets.NMNIST(save_to='../Data/', train=trainset, download=download, transform=transform) elif dataset == 'poker': if trainset: path+='pips_train/' else: path+='pips_test/' if not os.path.exists(path): download=True eventset = tonic.datasets.POKERDVS(save_to='../Data/', train=trainset, download=download, transform=transform) elif dataset == 'gesture': if trainset: path+='ibmGestureTrain/' else: path+='ibmGestureTest/' if not os.path.exists(path): download=True eventset = tonic.datasets.DVSGesture(save_to='../Data/', train=trainset, download=download, transform=transform) elif dataset == 'cars': if trainset: path+='ncars-train/' else: path+='ncars-test/' if not os.path.exists(path): download=True eventset = tonic.datasets.NCARS(save_to='../Data/', train=trainset, download=download, transform=transform) elif dataset == 'ncaltech': eventset = tonic.datasets.NCALTECH101(save_to='../Data/', train=trainset, download=download, transform=transform) else: print('incorrect dataset') if subset_size is not None: subset_indices = [] for i in range(len(eventset.classes)): all_ind = np.where(np.array(eventset.targets)==i)[0] subset_indices += all_ind[:subset_size//len(eventset.classes)].tolist() g_cpu = Generator() subsampler = SubsetRandomSampler(subset_indices, g_cpu) loader = tonic.datasets.DataLoader(eventset, batch_size=1, shuffle=False, sampler=subsampler) elif kfold is not None: subset_indices = [] subset_size = len(testset)//kfold for i in range(len(testset.classes)): all_ind = np.where(np.array(testset.targets)==i)[0] subset_indices += all_ind[kfold_ind*subset_size//len(testset.classes): min((kfold_ind+1)*subset_size//len(testset.classes), len(testset)-1)].tolist() g_cpu = Generator() subsampler = SubsetRandomSampler(subset_indices, g_cpu) loader = tonic.datasets.DataLoader(testset, batch_size=1, shuffle=False, sampler=subsampler) else: loader = tonic.datasets.DataLoader(eventset, shuffle=True) if eventset.sensor_size!=self.TS[0].camsize: print('sensor formatting...') self.sensformat(eventset.sensor_size) return loader, eventset.ordering, eventset.classes