def __init__(self, transform=None, first_time_multiplier=1, name=None, joking=False): if joking: return self._train_val_set = customcifar.UnbalancedCIFAR10( root="./cifar", train=True, download=True, transform=transform, filename=name, percentage=.1) self._test_set = customcifar.UnbalancedCIFAR10( root="./cifar", train=False, download=True, transform=transform) # 10000 self.validation_indices = self._train_val_set._val_indices self.train_indices = [ x for x in self._train_val_set.indices if x not in self.validation_indices ] self.already_selected_indices = numpy.random.choice( self.train_indices, size=tslp * first_time_multiplier, replace=False).tolist() self._train = tud.DataLoader(self._train_val_set, batch_size=train_batch_size, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.already_selected_indices)) self._v = tud.DataLoader(self._train_val_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.validation_indices)) self._t = torch.utils.data.DataLoader( self._test_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomSampler( [x for x in range(len((self._test_set)))]))
def select_for_train(self, indices): self.already_selected_indices.extend(indices) return tud.DataLoader(self._train_val_set, batch_size=train_batch_size, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler(indices))
def validate(self): return tud.DataLoader(self._train_val_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.validation_indices))
def restore(self, all, selected, validation, transform=None, name=None): self._train_val_set = customcifar.UnbalancedCIFAR10( root="./cifar", train=True, download=True, transform=transform, filename=name, percentage=.1, provided_indices=(all, validation)) self._test_set = customcifar.UnbalancedCIFAR10( root="./cifar", train=False, download=True, transform=transform) # 10000 self.validation_indices = validation self.train_indices = [ x for x in all if x not in self.validation_indices ] self.already_selected_indices = selected self._train = tud.DataLoader(self._train_val_set, batch_size=train_batch_size, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.already_selected_indices)) self._v = tud.DataLoader(self._train_val_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.validation_indices)) self._t = torch.utils.data.DataLoader( self._test_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomSampler( [x for x in range(len((self._test_set)))])) return self
def __init__(self): self.dataset = customcifar.CustomCIFAR10(root="./cifar", train=True, download=True, transform=transform) self.testset = customcifar.CustomCIFAR10(root="./cifar", train=False, download=True, transform=transform) # palindromo! dataloader = tud.DataLoader(self.dataset, batch_size=64, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler([x for x in range(len(self.dataset))])) el_for_class = [[] for x in range(num_of_classes)] for batch_index, (inputs, targets, index) in enumerate(dataloader): for t in range(len(targets)): el_for_class[targets[t]].append(index[t].item()) val_els_per_class = int((len(self.dataset) * val_percentage) / num_of_classes) self.validation_indices = [el for xl in el_for_class for el in numpy.random.choice(xl, size=val_els_per_class, replace=False)] self.remaining_indices = [x for x in range(len(self.dataset)) if x not in self.validation_indices] self.train_indices = numpy.random.choice(self.remaining_indices, size=int(len(self.remaining_indices)*initial_percentage ), replace=False) print("Dataset loaded: train length {0}/{3} | validation length {1} | test length {2}".format(len(self.train_indices), len(self.validation_indices), len(self.testset), len(self.remaining_indices)))
def __init__(self, transform=None, first_time_multiplier=1, name=None, unbal=True): self._train_val_set = customcifar.UnbalancedCIFAR10( root="./cifar", train=True, download=True, transform=transform, filename=name, percentage=.1) self._test_set = customcifar.UnbalancedCIFAR10( root="./cifar", train=False, download=True, transform=transform) # 10000 self.validation_indices = self._train_val_set._val_indices self.train_indices = [ x for x in self._train_val_set.indices if x not in self.validation_indices ] print([ len([ x for x in self.train_indices if x in self._train_val_set.el_for_class[i] ]) for i in range(10) ]) if unbal: self.already_selected_indices = numpy.random.choice( self.train_indices, size=tslp * first_time_multiplier, replace=False).tolist() else: lenel = [ int(tslp / 10) + (1 if i < tslp % int(tslp / 10) else 0) for i in range(10) ] self.already_selected_indices = [ x for i in range(10) for x in numpy.random.choice([ xx for xx in self._train_val_set.el_for_class[i] if xx not in self.validation_indices ], size=lenel[i], replace=False).tolist() ] print("Selected: {}".format([ len([ x for x in self.already_selected_indices if x in self._train_val_set.el_for_class[i] ]) for i in range(10) ])) self._train = tud.DataLoader(self._train_val_set, batch_size=train_batch_size, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.already_selected_indices)) self._v = tud.DataLoader(self._train_val_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler( self.validation_indices)) self._t = torch.utils.data.DataLoader( self._test_set, batch_size=100, shuffle=False, num_workers=2, sampler=customcifar.CustomSampler( [x for x in range(len((self._test_set)))]))
def distance_and_varratio(self, ds, indices, howmany, train_indices, n=5): distance_weight = 1e-5 varratio_weight = 1 self.net.eval() N = torch.Tensor().to("cuda:0") # labelled S = torch.Tensor().to("cuda:0") # unlabelled normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()] randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False) trainloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4, sampler=customcifar.CustomRandomSampler(train_indices)) for i in range(n)] dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4, sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)] with torch.no_grad(): for batch_index, element in enumerate(zip(*trainloaders)): # labelled samples els = [x for x in element] o = torch.Tensor().to("cuda:0") for input in els: input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0") o = torch.cat((o, self.net(input[0])[1].reshape(len(input[0]), 512, 1)), 2) N = torch.cat((N, o), 0) print("\r N: {0} ".format(N.size()), end="") print("") for batch_index, element in enumerate(zip(*dataloaders)): # unlabelled samples normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0) els = [x for x in element] o = torch.Tensor().to("cuda:0") predictions = torch.Tensor().long() for input in els: input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0") output = self.net(input[0]) out = output[1].reshape(len(input[0]), 512, 1) o = torch.cat((o, out), 2) predictions = torch.cat((predictions, output[0].max(1)[1].reshape(len(output[0]), 1).cpu()), 1) normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1 - torch.Tensor( acquisition_functions.confidence(predictions.transpose(0,1))).cpu() / n), 0).cpu() S = torch.cat((S, o), 0) print("\r S: {0} ".format(S.size()), end="") print("") S = (torch.sum(S, 2)) / n N = (torch.sum(N, 2)) / n S_batches = torch.split(S, 25, dim =0) dist_S_N = torch.Tensor() for el in S_batches: partial_dist = el.unsqueeze(1) - N.unsqueeze(0) partial_dist = torch.sum(partial_dist * partial_dist, -1) partial_dist = torch.sqrt(partial_dist) dist_S_N = torch.cat((dist_S_N, partial_dist.cpu()), 0) mindist = torch.min(dist_S_N, 1)[0].to("cuda:0") normalizing_factor = torch.max(mindist, -1)[0] print("NF : " + str(normalizing_factor)) mindist_confidence = (distance_weight*(mindist / normalizing_factor)) + (varratio_weight * normalized_confidence[0].to("cuda:0")) # devo calcolare la confidenza ancora erlist_indexes = normalized_confidence[1] new_N = [] for i in range(howmany): # maxx = torch.max(mindist, -1)[1] maxx = torch.max(mindist_confidence, -1)[1] print("Max: {0:.3f} = ({1:.3f} * {3}) + ({2:.3f} * {4})".format(mindist_confidence[maxx], mindist[maxx]/normalizing_factor, normalized_confidence[0][maxx], distance_weight, varratio_weight)) if erlist_indexes[maxx].item() in new_N: print("Error: Duplicate") new_N.append(erlist_indexes[maxx].item()) mindist[maxx] = float("-inf") mindist_confidence[maxx] = float("-inf") newdists = S - S[maxx].reshape(1, len(S[maxx])) newdists = torch.sum(newdists * newdists, -1) newdists = torch.sqrt(newdists) mindist = torch.min(mindist, newdists) mindist_confidence = (distance_weight*(mindist / normalizing_factor)) + (varratio_weight * normalized_confidence[0].to("cuda:0")) return new_N
def kl_divergence(self, ds, indices, howmany, train_indices, n=5): self.net.eval() N = torch.Tensor().to("cuda:0") #labelled S = torch.Tensor().to("cuda:0") #unlabelled normalized_confidence = [torch.Tensor().to("cuda:0"), torch.Tensor().long()] randomized_list = numpy.random.choice([x for x in indices], len(indices), replace=False) trainloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4, sampler=customcifar.CustomRandomSampler(train_indices)) for i in range(n)] dataloaders = [tud.DataLoader(ds._train_val_set, batch_size=500, shuffle=False, num_workers=4, sampler=customcifar.CustomSampler(randomized_list)) for i in range(n)] with torch.no_grad(): for batch_index, element in enumerate(zip(*trainloaders)): #labelled samples els = [x for x in element] o = torch.Tensor().to("cuda:0") for input in els: input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0") o = torch.cat((o, self.net(input[0])[0].reshape(len(input[0]),10, 1)), 2) N = torch.cat((N, o), 0) print("\r N: {0} ".format(N.size()), end="") print("") for batch_index, element in enumerate(zip(*dataloaders)): #unlabelled samples normalized_confidence[1] = torch.cat((normalized_confidence[1], element[0][2]), 0) els = [x for x in element] o = torch.Tensor().to("cuda:0") predictions = torch.Tensor().long().to("cuda:0") for input in els: input[0], input[1] = input[0].to("cuda:0"), input[1].to("cuda:0") out = self.net(input[0])[0].reshape(len(input[0]), 10, 1) o = torch.cat((o, out), 2) predictions = torch.cat((predictions, out.max(1)[1]), 1).to("cuda:0") normalized_confidence[0] = torch.cat((normalized_confidence[0].cpu(), 1.1 - torch.Tensor(acquisition_functions.confidence(predictions.transpose(1, 0))).cpu() / n), 0).cpu() S = torch.cat((S, o), 0) print("\r S: {0} ".format(S.size()), end="") print("") # calc KL divergence S = (torch.sum(F.softmax(S, dim=1), 2)) /n N = (torch.sum(F.softmax(N, dim=1), 2)) /n S_on_N = S.to("cpu").unsqueeze(1) / N.to("cpu").unsqueeze(0) ln_S_on_N = numpy.log2(S_on_N).reshape(len(N), len(S), 10).transpose(0,1) ln_S_on_N_batches = torch.split(ln_S_on_N, 300, dim=0) S_batches = torch.split(S, 300, dim=0) kldiv = torch.Tensor() for i in range(len(ln_S_on_N_batches)): partial_kldiv = torch.bmm(ln_S_on_N_batches[i].to("cuda:0"), S_batches[i].reshape(len(S_batches[i]), 10, 1)).cpu() kldiv = torch.cat((partial_kldiv, kldiv), 0) print(kldiv.size()) kldiv = kldiv.reshape(len(S), len(N)) mindiv = torch.min(kldiv, 1)[0]* normalized_confidence[0] errorlist = [[mindiv[i].item(), normalized_confidence[1][i].item() ]for i in range(len(normalized_confidence[0]))] sorlist = sorted(errorlist, key=lambda xp: xp[0], reverse=True) return [x[1] for x in sorlist[:howmany]]
def get_test_loader(self): return tud.DataLoader(self.testset, batch_size=64, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler([x for x in range(len(self.testset))]))
def get_validation_loader(self): return tud.DataLoader(self.dataset, batch_size=64, shuffle=False, num_workers=2, sampler=customcifar.CustomRandomSampler(self.validation_indices))