def __init__(self,
                 transform=None,
                 first_time_multiplier=1,
                 name=None,
                 joking=False):
        if joking:
            return

        self._train_val_set = customcifar.UnbalancedCIFAR10(
            root="./cifar",
            train=True,
            download=True,
            transform=transform,
            filename=name,
            percentage=.1)

        self._test_set = customcifar.UnbalancedCIFAR10(
            root="./cifar", train=False, download=True,
            transform=transform)  # 10000

        self.validation_indices = self._train_val_set._val_indices
        self.train_indices = [
            x for x in self._train_val_set.indices
            if x not in self.validation_indices
        ]
        self.already_selected_indices = numpy.random.choice(
            self.train_indices,
            size=tslp * first_time_multiplier,
            replace=False).tolist()
        self._train = tud.DataLoader(self._train_val_set,
                                     batch_size=train_batch_size,
                                     shuffle=False,
                                     num_workers=2,
                                     sampler=customcifar.CustomRandomSampler(
                                         self.already_selected_indices))

        self._v = tud.DataLoader(self._train_val_set,
                                 batch_size=100,
                                 shuffle=False,
                                 num_workers=2,
                                 sampler=customcifar.CustomRandomSampler(
                                     self.validation_indices))
        self._t = torch.utils.data.DataLoader(
            self._test_set,
            batch_size=100,
            shuffle=False,
            num_workers=2,
            sampler=customcifar.CustomSampler(
                [x for x in range(len((self._test_set)))]))
 def select_for_train(self, indices):
     self.already_selected_indices.extend(indices)
     return tud.DataLoader(self._train_val_set,
                           batch_size=train_batch_size,
                           shuffle=False,
                           num_workers=2,
                           sampler=customcifar.CustomRandomSampler(indices))
 def validate(self):
     return tud.DataLoader(self._train_val_set,
                           batch_size=100,
                           shuffle=False,
                           num_workers=2,
                           sampler=customcifar.CustomRandomSampler(
                               self.validation_indices))
    def restore(self, all, selected, validation, transform=None, name=None):
        self._train_val_set = customcifar.UnbalancedCIFAR10(
            root="./cifar",
            train=True,
            download=True,
            transform=transform,
            filename=name,
            percentage=.1,
            provided_indices=(all, validation))
        self._test_set = customcifar.UnbalancedCIFAR10(
            root="./cifar", train=False, download=True,
            transform=transform)  # 10000
        self.validation_indices = validation

        self.train_indices = [
            x for x in all if x not in self.validation_indices
        ]
        self.already_selected_indices = selected
        self._train = tud.DataLoader(self._train_val_set,
                                     batch_size=train_batch_size,
                                     shuffle=False,
                                     num_workers=2,
                                     sampler=customcifar.CustomRandomSampler(
                                         self.already_selected_indices))
        self._v = tud.DataLoader(self._train_val_set,
                                 batch_size=100,
                                 shuffle=False,
                                 num_workers=2,
                                 sampler=customcifar.CustomRandomSampler(
                                     self.validation_indices))
        self._t = torch.utils.data.DataLoader(
            self._test_set,
            batch_size=100,
            shuffle=False,
            num_workers=2,
            sampler=customcifar.CustomSampler(
                [x for x in range(len((self._test_set)))]))
        return self
Exemplo n.º 5
0
    def __init__(self):
        self.dataset = customcifar.CustomCIFAR10(root="./cifar", train=True, download=True, transform=transform)
        self.testset = customcifar.CustomCIFAR10(root="./cifar", train=False, download=True, transform=transform) # palindromo!

        dataloader = tud.DataLoader(self.dataset, batch_size=64, shuffle=False, num_workers=2,
                                    sampler=customcifar.CustomRandomSampler([x for x in range(len(self.dataset))]))
        el_for_class = [[] for x in range(num_of_classes)]
        for batch_index, (inputs, targets, index) in enumerate(dataloader):
            for t in range(len(targets)):
                el_for_class[targets[t]].append(index[t].item())

        val_els_per_class = int((len(self.dataset) * val_percentage) / num_of_classes)

        self.validation_indices = [el for xl in el_for_class for el in numpy.random.choice(xl, size=val_els_per_class, replace=False)]
        self.remaining_indices = [x for x in range(len(self.dataset)) if x not in self.validation_indices]
        self.train_indices = numpy.random.choice(self.remaining_indices, size=int(len(self.remaining_indices)*initial_percentage  ), replace=False)

        print("Dataset loaded: train length {0}/{3} | validation length {1} | test length {2}".format(len(self.train_indices), len(self.validation_indices), len(self.testset), len(self.remaining_indices)))
    def __init__(self,
                 transform=None,
                 first_time_multiplier=1,
                 name=None,
                 unbal=True):
        self._train_val_set = customcifar.UnbalancedCIFAR10(
            root="./cifar",
            train=True,
            download=True,
            transform=transform,
            filename=name,
            percentage=.1)

        self._test_set = customcifar.UnbalancedCIFAR10(
            root="./cifar", train=False, download=True,
            transform=transform)  # 10000

        self.validation_indices = self._train_val_set._val_indices

        self.train_indices = [
            x for x in self._train_val_set.indices
            if x not in self.validation_indices
        ]

        print([
            len([
                x for x in self.train_indices
                if x in self._train_val_set.el_for_class[i]
            ]) for i in range(10)
        ])

        if unbal:
            self.already_selected_indices = numpy.random.choice(
                self.train_indices,
                size=tslp * first_time_multiplier,
                replace=False).tolist()
        else:
            lenel = [
                int(tslp / 10) + (1 if i < tslp % int(tslp / 10) else 0)
                for i in range(10)
            ]
            self.already_selected_indices = [
                x for i in range(10)
                for x in numpy.random.choice([
                    xx for xx in self._train_val_set.el_for_class[i]
                    if xx not in self.validation_indices
                ],
                                             size=lenel[i],
                                             replace=False).tolist()
            ]

        print("Selected: {}".format([
            len([
                x for x in self.already_selected_indices
                if x in self._train_val_set.el_for_class[i]
            ]) for i in range(10)
        ]))

        self._train = tud.DataLoader(self._train_val_set,
                                     batch_size=train_batch_size,
                                     shuffle=False,
                                     num_workers=2,
                                     sampler=customcifar.CustomRandomSampler(
                                         self.already_selected_indices))

        self._v = tud.DataLoader(self._train_val_set,
                                 batch_size=100,
                                 shuffle=False,
                                 num_workers=2,
                                 sampler=customcifar.CustomRandomSampler(
                                     self.validation_indices))
        self._t = torch.utils.data.DataLoader(
            self._test_set,
            batch_size=100,
            shuffle=False,
            num_workers=2,
            sampler=customcifar.CustomSampler(
                [x for x in range(len((self._test_set)))]))
Exemplo n.º 7
0
    def distance_and_varratio(self, ds, indices, howmany, train_indices, n=5):
        distance_weight = 1e-5
        varratio_weight = 1

        self.net.eval()
        N = torch.Tensor().to("cuda:0")  # labelled
        S = torch.Tensor().to("cuda:0")  # unlabelled
        normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()]

        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)

        trainloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                       sampler=customcifar.CustomRandomSampler(train_indices)) for i in range(n)]
        dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                      sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)]
        with torch.no_grad():
            for batch_index, element in enumerate(zip(*trainloaders)):  # labelled samples
                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    o = torch.cat((o, self.net(input[0])[1].reshape(len(input[0]), 512, 1)), 2)
                N = torch.cat((N, o), 0)
                print("\r N: {0} ".format(N.size()), end="")
            print("")

            for batch_index, element in enumerate(zip(*dataloaders)):  # unlabelled samples
                normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0)

                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                predictions = torch.Tensor().long()

                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    output = self.net(input[0])
                    out = output[1].reshape(len(input[0]), 512, 1)

                    o = torch.cat((o, out), 2)
                    predictions = torch.cat((predictions, output[0].max(1)[1].reshape(len(output[0]), 1).cpu()), 1)

                normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1 - torch.Tensor(
                    acquisition_functions.confidence(predictions.transpose(0,1))).cpu() / n), 0).cpu()

                S = torch.cat((S, o), 0)
                print("\r S: {0} ".format(S.size()), end="")
            print("")
            S = (torch.sum(S, 2)) / n
            N = (torch.sum(N, 2)) / n

            S_batches = torch.split(S, 25, dim =0)
            dist_S_N = torch.Tensor()
            for el in S_batches:
                partial_dist = el.unsqueeze(1) - N.unsqueeze(0)
                partial_dist = torch.sum(partial_dist * partial_dist, -1)
                partial_dist = torch.sqrt(partial_dist)
                dist_S_N = torch.cat((dist_S_N, partial_dist.cpu()), 0)

            mindist = torch.min(dist_S_N, 1)[0].to("cuda:0")

            normalizing_factor = torch.max(mindist, -1)[0]
            print("NF : " + str(normalizing_factor))

            mindist_confidence = (distance_weight*(mindist / normalizing_factor)) + (varratio_weight * normalized_confidence[0].to("cuda:0")) # devo calcolare la confidenza ancora

            erlist_indexes = normalized_confidence[1]
            new_N = []

            for i in range(howmany):
                #  maxx = torch.max(mindist, -1)[1]
                maxx = torch.max(mindist_confidence, -1)[1]
                print("Max: {0:.3f} = ({1:.3f} * {3}) + ({2:.3f} * {4})".format(mindist_confidence[maxx], mindist[maxx]/normalizing_factor, normalized_confidence[0][maxx], distance_weight, varratio_weight))

                if erlist_indexes[maxx].item() in new_N:
                    print("Error: Duplicate")

                new_N.append(erlist_indexes[maxx].item())
                mindist[maxx] = float("-inf")
                mindist_confidence[maxx] = float("-inf")

                newdists = S - S[maxx].reshape(1, len(S[maxx]))
                newdists = torch.sum(newdists * newdists, -1)
                newdists = torch.sqrt(newdists)
                mindist = torch.min(mindist, newdists)
                mindist_confidence = (distance_weight*(mindist / normalizing_factor)) + (varratio_weight * normalized_confidence[0].to("cuda:0"))
            return new_N
Exemplo n.º 8
0
    def kl_divergence(self, ds, indices, howmany, train_indices, n=5):
        self.net.eval()
        N = torch.Tensor().to("cuda:0") #labelled
        S = torch.Tensor().to("cuda:0") #unlabelled
        normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()]


        randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False)

        trainloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                    sampler=customcifar.CustomRandomSampler(train_indices)) for i in range(n)]
        dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4,
                                      sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)]
        with torch.no_grad():
            for batch_index, element in enumerate(zip(*trainloaders)): #labelled samples
                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    o = torch.cat((o, self.net(input[0])[0].reshape(len(input[0]),10, 1)), 2)
                N = torch.cat((N, o), 0)
                print("\r N: {0} ".format(N.size()), end="")
            print("")

            for batch_index, element in enumerate(zip(*dataloaders)): #unlabelled samples
                normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0)

                els = [x for x in element]
                o = torch.Tensor().to("cuda:0")
                predictions = torch.Tensor().long().to("cuda:0")
                for input in els:
                    input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0")
                    out = self.net(input[0])[0].reshape(len(input[0]), 10, 1)
                    o = torch.cat((o, out), 2)
                    predictions = torch.cat((predictions, out.max(1)[1]), 1).to("cuda:0")
                normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1.1 - torch.Tensor(acquisition_functions.confidence(predictions.transpose(1, 0))).cpu() / n), 0).cpu()

                S = torch.cat((S, o), 0)
                print("\r S: {0} ".format(S.size()), end="")
            print("")

            # calc KL divergence
            S = (torch.sum(F.softmax(S, dim=1), 2)) /n
            N = (torch.sum(F.softmax(N, dim=1), 2)) /n

            S_on_N = S.to("cpu").unsqueeze(1) / N.to("cpu").unsqueeze(0)
            ln_S_on_N = numpy.log2(S_on_N).reshape(len(N), len(S), 10).transpose(0,1)

            ln_S_on_N_batches = torch.split(ln_S_on_N, 300, dim=0)
            S_batches = torch.split(S, 300, dim=0)




            kldiv = torch.Tensor()
            for i in range(len(ln_S_on_N_batches)):
                partial_kldiv = torch.bmm(ln_S_on_N_batches[i].to("cuda:0"), S_batches[i].reshape(len(S_batches[i]), 10, 1)).cpu()
                kldiv = torch.cat((partial_kldiv, kldiv), 0)
                print(kldiv.size())
            kldiv = kldiv.reshape(len(S), len(N))

            mindiv = torch.min(kldiv, 1)[0]* normalized_confidence[0]
            errorlist = [[mindiv[i].item(), normalized_confidence[1][i].item() ]for i in range(len(normalized_confidence[0]))]
            sorlist = sorted(errorlist, key=lambda xp: xp[0], reverse=True)

            return [x[1] for x in sorlist[:howmany]]
Exemplo n.º 9
0
 def get_test_loader(self):
     return tud.DataLoader(self.testset, batch_size=64, shuffle=False, num_workers=2,
                           sampler=customcifar.CustomRandomSampler([x for x in range(len(self.testset))]))
Exemplo n.º 10
0
 def get_validation_loader(self):
     return tud.DataLoader(self.dataset, batch_size=64, shuffle=False, num_workers=2,
                           sampler=customcifar.CustomRandomSampler(self.validation_indices))