Exemplo n.º 1
0
def get_train_val_indices(n, val_part=0.2, seed=None):
    if seed:
        generator = Generator().manual_seed(seed)
    else:
        generator = None
    mixed_indices = randperm(n, generator=generator)
    train_count = round((1. - val_part) * len(mixed_indices))
    train_indices, val_indices = mixed_indices[:train_count], mixed_indices[train_count:]
    return train_indices, val_indices
Exemplo n.º 2
0
Arquivo: torch.py Projeto: lqf96/mltk
def derive_rand(rand: th.Generator, device: Device) -> th.Generator:
    device = as_device(device)
    # Return existing random number generator for the same device
    if rand.device == device:
        return rand

    # Create and seed new random number generator
    rand_new = th.Generator(device)
    rand_new.manual_seed(rand.seed() & _SEED_MASK)

    return rand_new
Exemplo n.º 3
0
def dataset_loader(loaded_dataset):
    length = len(loaded_dataset)
    split_set = random_split(
        loaded_dataset,
        [round(0.995 * length), round(0.005 * length)],
        generator=Generator().manual_seed(42))
    train_loader = DataLoader(split_set[1], batch_size=5)
    test_loader = DataLoader(split_set[1], batch_size=5)
    '''
    Lowering values to sane levels to help run tests on github
    runners. Ideally on local GPU (4GB) 80/20 split with batch size of
    30 works well.
    '''
    yield (train_loader, test_loader)
Exemplo n.º 4
0
def default_rng(seed: Union[None, int, Generator] = None) -> Generator:
    """Mirrors numpy's `default_rng` to produce RNGs for Pytorch.

    Args:
        seed: a seed to initialize the generator. If passed a Generator, will
        return it unaltered. Otherwise, creates a new one. If passed an
        integer, will use it as the manual seed for the created generator.

    Returns:
        A PyTorch Generator instance
    """
    if isinstance(seed, Generator):
        return seed

    rng = Generator()
    if isinstance(seed, int):
        rng.manual_seed(seed)
    return rng
Exemplo n.º 5
0
def get_things(batch_size=64, seed=0, num_workers=8):
    """
    Returns train and test DSprites dataset.
    """
    things_loader = THINGSLoader(shape=(128, 128))

    data = THINGSTriplets(things_loader=things_loader)
    # train_data, test_data = train_test_split(data, test_size=15000,  random_state=seed)
    train_data, test_data = torch.utils.data.random_split(
        data, [1446680, 15000], generator=Generator().manual_seed(seed))
    train_data = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers)  #, pin_memory=True, num_workers=16)
    test_data = DataLoader(
        test_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers)  #, pin_memory=True, num_workers=16)

    return train_data, test_data
def pseudo_random_split(dataset, lengths, seed=42):
    r"""
    Randomly split a dataset into non-overlapping new datasets of given lengths.
    Optionally fix the generator for reproducible results, e.g.:
    >>> random_split(range(10), [3, 7], generator=torch.Generator().manual_seed(42))
    Arguments:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths of splits to be produced
        generator (Generator): Generator used for the random permutation.
    """
    generator = Generator().manual_seed(seed)
    # Cannot verify that dataset is Sized
    if sum(lengths) != len(dataset):  # type: ignore
        raise ValueError(
            "Sum of input lengths does not equal the length of the input dataset!"
        )

    indices = randperm(sum(lengths), generator=generator).tolist()
    return [
        Subset(dataset, indices[offset - length:offset])
        for offset, length in zip(_accumulate(lengths), lengths)
    ]
Exemplo n.º 7
0
def calculate_triplet_score(model, train_size=10000, test_size=5000, batch_size=16, dataset="dsprites", seed=0):
    print("Dataset", dataset)
    if dataset == "dsprites":
        train_data, test_data = rpm.get_dsprites(train_size=train_size, test_size=test_size, 
                                            dataset=ColourDSpritesTriplets, batch_size=batch_size, k=None)
    else:
        train_data, test_data = random_split(dataset, [train_size, test_size], generator=Generator().manual_seed(seed))
        train_data = DataLoader(train_data, batch_size=batch_size)
        test_data = DataLoader(test_data, batch_size=batch_size)
    train_loc, train_y = batch_sample_latent_triplets(model, train_data, train_size, batch_size=batch_size)
    assert train_loc.shape[0] == train_size
    assert train_y.shape[0] == train_size
    test_loc, test_y = batch_sample_latent_triplets(model, test_data, test_size, batch_size=batch_size)
    assert test_loc.shape[0] == test_size
    assert test_y.shape[0] == test_size
    train_acc, test_acc = predict_triplets(train_loc, train_y, test_loc, test_y)
    scores = {}
    scores['mean_train_k'] = train_acc
    # scores['std_train_k'] = np.std(train_acc)
    scores['triplet_10k'] = test_acc
    # scores['std_test_k'] = np.std(test_acc)
    return scores
Exemplo n.º 8
0
    args              = parse_args()

    transform         = Compose([ToTensor()])

    train_dataset     = MNIST(root      = args.root,
                              train     = True,
                              transform = transform,
                              download  = True)
    test_dataset      = MNIST(root      = args.root,
                              train     = False,
                              transform = transform,
                              download  = True)

    len_validation    = int(args.validation * len(train_dataset))
    train,validation  = random_split(train_dataset,[len(train_dataset)-len_validation,len_validation],
                                     generator = None if args.seed==None else Generator().manual_seed(args.seed))

    figure(figsize=(10,10))
    save_plot_dataset(train,'train.pt',
                      path = args.data)
    save_plot_dataset(validation,'validation.pt',
                      path   = args.data,
                      colour = 'xkcd:blue')

    xticks(range(-1,10))
    legend()
    title ('Frequencies of Classes')
    savefig(join(args.figs,'freqs'))
    if args.show:
        show()
def main(dataset_root: str, mode: str):
    normalize_transform = Compose([
        ToTensor(),
        Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2460, 0.2411, 0.2576)),
    ])
    augment_transform = Compose(
        [RandomHorizontalFlip(),
         RandomCrop(32, padding=4)])
    train_dataset = CIFAR10(
        dataset_root,
        train=True,
        transform=Compose([augment_transform, normalize_transform]),
    )
    validation_dataset = CIFAR10(dataset_root,
                                 train=True,
                                 transform=normalize_transform)
    validation_length = int(math.floor(len(train_dataset) * 0.10))
    train_length = len(train_dataset) - validation_length
    train_dataset, _ = random_split(
        train_dataset,
        lengths=[train_length, validation_length],
        generator=Generator().manual_seed(0),
    )
    _, validation_dataset = random_split(
        validation_dataset,
        lengths=[train_length, validation_length],
        generator=Generator().manual_seed(0),
    )

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=128,
                                  shuffle=True,
                                  num_workers=4,
                                  pin_memory=True)
    validation_dataloader = DataLoader(
        validation_dataset,
        batch_size=256,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
    )

    if torch.cuda.device_count() == 0:
        device: Optional[torch.device] = torch.device("cpu")
        blocks_per_component = ["rest"]
    elif torch.cuda.device_count() == 1:
        device = torch.device("cuda")
        blocks_per_component = ["rest"]
    else:
        device = None
        # For the demo, just put one block on each device, and all remaing blocks on the
        # last device.
        blocks_per_component = ["1"] * (torch.cuda.device_count() - 1) + [
            "rest"
        ]

    # block_type and architecture correspond to ResNet-32.
    main_nets, aux_nets = resnet_builder.resnet(
        block_type="basic",
        architecture="64,3/128,4/256,6/512,3",
        aux_net_architecture="conv128_bn_conv64_bn_gbpl_fc",
        blocks_per_component=blocks_per_component,
        dataset="cifar10",
        n_classes=10,
    )

    optimizer_constructor = lambda params: SGD(
        params, lr=0.1, momentum=0.9, weight_decay=2e-4)
    # Learning rate schedule for ResNet-50ish on CIFAR10, taken from :
    # https://github.com/tensorflow/models/blob/master/official/r1/resnet/cifar10_main.py#L217
    lr_scheduler_constructor = lambda optimizer: MultiStepLR(
        optimizer, milestones=[91, 136, 182], gamma=0.1)
    loss_function = F.cross_entropy

    if mode == "e2e":
        model = interlocking_backprop.build_e2e_model(
            main_nets, optimizer_constructor, lr_scheduler_constructor,
            loss_function)
    elif mode == "local":
        model = interlocking_backprop.build_local_model(
            main_nets,
            aux_nets,
            optimizer_constructor,
            lr_scheduler_constructor,
            loss_function,
        )
    elif mode == "pairwise":
        model = interlocking_backprop.build_pairwise_model(
            main_nets,
            aux_nets,
            optimizer_constructor,
            lr_scheduler_constructor,
            loss_function,
        )
    elif mode == "3wise":
        model = interlocking_backprop.build_nwise_model(
            main_nets,
            aux_nets,
            optimizer_constructor,
            lr_scheduler_constructor,
            loss_function,
            nwise_communication_distance=3 - 1,
        )
    else:
        raise ValueError(f"Unknown mode {mode}")

    if torch.cuda.device_count() > 1:
        model.enable_model_parallel()
    else:
        model = model.to(device)

    print(
        f"Epoch 0: "
        f"validation accuracy = {_compute_accuracy(validation_dataloader, model):.2f}"
    )

    for epoch in range(100):
        model.train()
        losses = []
        for inputs, targets in train_dataloader:
            loss = model.training_step(inputs, targets)
            losses.append(loss)
        train_loss = (torch.stack([loss.result() for loss in losses],
                                  axis=0).mean().item())
        validation_accuracy = _compute_accuracy(validation_dataloader, model)
        print(f"Epoch {epoch + 1}: "
              f"training loss = {train_loss:.3f} "
              f"validation accuracy = {validation_accuracy:.2f}")
Exemplo n.º 10
0
Arquivo: torch.py Projeto: lqf96/mltk
def use_rand(rand: th.Generator, **kwargs: Any):
    # Fork and seed current global random state
    with th.random.fork_rng(devices=(rand.device, ), **kwargs):
        th.random.manual_seed(rand.seed() & _SEED_MASK)
        yield
Exemplo n.º 11
0
    def load(self, dataset, trainset=True, jitonic=[None,None], subset_size = None, kfold = None, kfold_ind = None):

        if jitonic[1] is not None:
            print(f'spatial jitter -> var = {jitonic[1]}')
            transform = tonic.transforms.Compose([tonic.transforms.SpatialJitter(variance_x=jitonic[1], variance_y=jitonic[1], sigma_x_y=0, integer_coordinates=True, clip_outliers=True)])

        if jitonic[0] is not None:
            print(f'time jitter -> var = {jitonic[0]}')
            transform = tonic.transforms.Compose([tonic.transforms.TimeJitter(variance=jitonic[0], integer_timestamps=False, clip_negative=True, sort_timestamps=True)])

        if jitonic == [None,None]:
            print('no jitter')
            transform = None

        download=False
        path = '../Data/'
        if dataset == 'nmnist':
            if trainset:
                path+='Train/'
            else:
                path+='Test/'
            if not os.path.exists(path):
                download=True

            eventset = tonic.datasets.NMNIST(save_to='../Data/',
                                train=trainset, download=download,
                                transform=transform)
        elif dataset == 'poker':
            if trainset:
                path+='pips_train/'
            else:
                path+='pips_test/'
            if not os.path.exists(path):
                download=True
            eventset = tonic.datasets.POKERDVS(save_to='../Data/',
                                train=trainset, download=download,
                                transform=transform)
        elif dataset == 'gesture':
            if trainset:
                path+='ibmGestureTrain/'
            else:
                path+='ibmGestureTest/'
            if not os.path.exists(path):
                download=True
            eventset = tonic.datasets.DVSGesture(save_to='../Data/',
                                train=trainset, download=download,
                                transform=transform)
        elif dataset == 'cars':
            if trainset:
                path+='ncars-train/'
            else:
                path+='ncars-test/'
            if not os.path.exists(path):
                download=True
            eventset = tonic.datasets.NCARS(save_to='../Data/',
                                train=trainset, download=download,
                                transform=transform)
        elif dataset == 'ncaltech':
            eventset = tonic.datasets.NCALTECH101(save_to='../Data/',
                                train=trainset, download=download,
                                transform=transform)
        else: print('incorrect dataset')

        if subset_size is not None:
            subset_indices = []
            for i in range(len(eventset.classes)):
                all_ind = np.where(np.array(eventset.targets)==i)[0]
                subset_indices += all_ind[:subset_size//len(eventset.classes)].tolist()
            g_cpu = Generator()
            subsampler = SubsetRandomSampler(subset_indices, g_cpu)
            loader = tonic.datasets.DataLoader(eventset, batch_size=1, shuffle=False, sampler=subsampler)
        elif kfold is not None:
            subset_indices = []
            subset_size = len(testset)//kfold
            for i in range(len(testset.classes)):
                all_ind = np.where(np.array(testset.targets)==i)[0]
                subset_indices += all_ind[kfold_ind*subset_size//len(testset.classes):
                            min((kfold_ind+1)*subset_size//len(testset.classes), len(testset)-1)].tolist()
            g_cpu = Generator()
            subsampler = SubsetRandomSampler(subset_indices, g_cpu)
            loader = tonic.datasets.DataLoader(testset, batch_size=1, shuffle=False, sampler=subsampler)
        else:
            loader = tonic.datasets.DataLoader(eventset, shuffle=True)

        if eventset.sensor_size!=self.TS[0].camsize:
            print('sensor formatting...')
            self.sensformat(eventset.sensor_size)

        return loader, eventset.ordering, eventset.classes