Exemplo n.º 1
0
 def _make_dataset(self, root, transform, semantic):
     domain_names = os.listdir(root)
     datas = []
     labels = []
     domains = []
     maps = [1, 0, 4, 2, 3]
     #maps = [2, 3, 0, 1, 4]
     for idx, domain in enumerate(sorted(domain_names)):
         correct = 0
         total = 0
         path = os.path.join(root, domain)
         dataset = datasets.ImageFolder(path, transform)
         if semantic:
             for data, gt in dataset:
                 data = data.cuda()
                 data = (data.unsqueeze(0) + 1) * 0.5
                 label = semantic(data).argmax(1)
                 labels.append(label)
                 correct += int(maps[gt] == label)
                 total += 1
             print(f'Accuracy for {domain}: {correct / total}')
         else:
             labels += [0] * len(dataset)
         datas.append(dataset)
         domains += [idx] * len(dataset)
     return torch.utils.data.ConcatDataset(datas), torch.LongTensor(
         labels), domains
Exemplo n.º 2
0
def make_db(path):
    with torch.no_grad():
        list = []
        file = glob.glob(path)
        for f in file:
            img = cv2.imread(f)
            img = img.transpose((2, 0, 1)) / 255.
            data = torch.from_numpy(img.astype(np.float32)).clone().to(device)

            data = data.unsqueeze(0)

            # mu, logvar = model.encode(data.contiguous().view(-1, 784 * 3))
            mu, logvar = model.encode(data.contiguous().view(
                -1, image_size * image_size * chn_num))
            z = model.reparameterize(mu, logvar).cpu().detach().numpy().copy()
            z = z.tolist()
            z[0].append(f)
            list.append(np.array(z[0]))
    df = pd.DataFrame(list,
                      columns=[
                          'z1', 'z2', 'z3', 'z4', 'z5', 'z6', 'z7', 'z8', 'z9',
                          'z10', 'path'
                      ])

    return df
Exemplo n.º 3
0
def train(model, device, is_binary, train_generator, optimizer, loss_fn, batch_size, loss_meter, train_stats):
    """
     Train the network and collect accuracy and loss in dataframes. Different loss functions will be 
     used if it is a binary prediction or multiclass prediction.
    """
    model.train() # Set model to train mode (default mode)

    total_items = 0
    acc = 0.0
    loss= 0.0
    for data, target in train_generator:
        data = data.unsqueeze(1).float()
        data, target = data.to(device), target.to(device)
        total_items += target.shape[0] 
        optimizer.zero_grad() # Zero out the gradients
        prediction = model(data)
        if is_binary:
            target = target.unsqueeze(1).float()
            acc += utils.bin_accuracy(target, prediction)
            loss = loss_fn(prediction, target.float())
        else:
            acc += utils.multi_accuracy(target, prediction)
            loss = loss_fn(prediction, target.long())
        loss.backward() # Compute gradients
        optimizer.step() # Upate weights

    # Calculate loss per epoch
    loss_meter.update(loss.item(), batch_size)
    acc_avg = acc/total_items
    train_stats = train_stats.append(pd.DataFrame([[acc_avg, loss_meter.avg]], columns=['accuracy', 'loss']), ignore_index=True)

    return train_stats
Exemplo n.º 4
0
def test(model, device, is_binary, test_generator, loss_fn, epoch, batch_size, loss_meter, test_stats, train_stats, logger):
    """
     Test the model with the test dataset. Only doing forward passes, backpropagrations should not be applied
    """
    model.eval() # Set model to eval mode - required for dropout and norm layers
    
    total_items = 0
    acc = 0.0
    loss= 0.0
    loss_meter.reset()
    with torch.no_grad():
        for data, target in test_generator:
            data = data.unsqueeze(1).float()
            data, target = data.to(device), target.to(device)
            total_items += target.shape[0]
            prediction = model(data)
            if is_binary:
                target = target.unsqueeze(1).float()
                acc += utils.bin_accuracy(target, prediction)
                loss = loss_fn(prediction, target.float())
            else:
                acc += utils.multi_accuracy(target, prediction)
                loss = loss_fn(prediction, target.long())
            loss_meter.update(loss.item(), batch_size)

    loss_meter.update(loss.item(), batch_size)
    acc_avg = acc/total_items
    test_stats = test_stats.append(pd.DataFrame([[acc_avg, loss_meter.avg]], columns=['accuracy', 'loss']), ignore_index=True)

    # write training log to the log file
    logger.info('Epoch: %d Training Loss: %2.5f Test Accuracy : %2.3f Accurate Count: %d Total Items :%d '% (epoch, train_stats.iloc[epoch]['loss'], acc_avg, acc, total_items))
    loss_meter.reset()

    return test_stats
Exemplo n.º 5
0
 def prediction(epoch):
     """n-step prediction"""
     model.eval()
     loss = np.zeros((2, prediction_steps))
     output = np.zeros((prediction_steps, train_data.shape[1], train_data.shape[2]))
     
     data = Variable(torch.from_numpy(train_data[:train_steps-1].squeeze()))
     data = data.unsqueeze(1).unsqueeze(1).to(device) # T x bs(=1) x c(=1) x h x w
     outputs, last_state_list = model(data)
     #prev_state = outputs[-1].view(1,1,1,height,width) # T(=1) x bs(=1) x c(=1) x h x w
     prev_state = Variable(torch.from_numpy(train_data[train_steps])).unsqueeze(0).unsqueeze(0).unsqueeze(0).to(device)
     
     for i in range(prediction_steps):
         prev_state, last_state_list = model(prev_state, last_state_list)
         loss[0,i] = mean_squared_error(prev_state.squeeze().cpu().detach().numpy(), train_data[train_steps+i])
         loss[1,i] = mean_squared_error(prev_state.squeeze().cpu().detach().numpy(), true_data[train_steps+i])
         output[i] = prev_state.squeeze().cpu().detach().numpy()
     
     if epoch%display_steps==0:
         print_contents = "===> Prediction loss:\n"
         for i in range(prediction_steps):
             print_contents += "{} step forecast {}: {}\n".format(i+1, loss_name, loss[0,i])
         print(print_contents)
     
     #print("output", output.shape, output.min(), output.max())
     return loss, output
Exemplo n.º 6
0
    def __getitem__(self, index):
        data = self.features[:, index]
        label = self.labels[:, index]
        data = torch.from_numpy(data).float()
        label = torch.from_numpy(label).float()

        return data.unsqueeze(1), label
Exemplo n.º 7
0
    def __getitem__(self, i):
        x, y = self.indices[i]
        x1, y1 = x - self.patch_size // 2, y - self.patch_size // 2
        x2, y2 = x1 + self.patch_size, y1 + self.patch_size

        data = self.data[x1:x2, y1:y2]
        label = self.label[x, y]

        if self.data_aug and self.patch_size > 1:
            # Perform data augmentation (only on 2D patches)
            data = self.flip(data)

        # Copy the data into numpy arrays (PyTorch doesn't like numpy views)
        data = np.asarray(np.copy(data).transpose((2, 0, 1)), dtype='float32')
        label = np.asarray(np.copy(label), dtype='int64')

        # Load the data into PyTorch tensors
        data = torch.from_numpy(data)
        label = torch.from_numpy(label)
        # Remove unused dimensions when we work with invidual spectrums
        if self.patch_size == 1:
            data = data[:, 0, 0]

        # Add a fourth dimension for 3D CNN
        if self.patch_size > 1:
            # Make 4D data ((Batch x) Planes x Channels x Width x Height)
            data = data.unsqueeze(0)
        return data, label
def attack(model, dataset, n_samples, method):

    model.eval()
    adversarial_attacks = []

    for data, target in tqdm(dataset):
        data = data.unsqueeze(0)
        target = torch.tensor(target).unsqueeze(0)

        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
            device = 'cuda'
        else:
            data, target = data.cpu(), target.cpu()
            device = 'cpu'

        samples_attacks = []

        for idx in list(range(n_samples)):
            random.seed(idx)
            perturbed_image = run_attack(net=model,
                                         image=data,
                                         label=target,
                                         method=method,
                                         device=device,
                                         hyperparams=None).squeeze()
            perturbed_image = torch.clamp(perturbed_image, 0., 1.)
            samples_attacks.append(perturbed_image)

        adversarial_attacks.append(torch.stack(samples_attacks).mean(0))

    return torch.stack(adversarial_attacks)
Exemplo n.º 9
0
def report_collate(batch):
	
	#print('batch info : ', batch[0][0].shape)
	
	batch.sort(key=lambda x: x[0].shape[0], reverse=True)
	reports, targets = zip(*batch)

	N = len(batch) 
		
	report_lens    = torch.LongTensor([report.shape[0] for report in reports])
	max_report_len = max(report_lens)
	
	if _use_shared_memory:
		data = torch.LongStorage._new_shared(max_report_len, N).new(max_report_len, N).zero_()
	else:
		data = torch.LongTensor(max_report_len, N).zero_()
	labels = torch.LongTensor(list(targets))
	
	for i, report in enumerate(reports):
		data[:report.shape[0], i]  = report
		
	dict_ = {
			'reports':data.unsqueeze(2),
			'seq_lens':report_lens,
			'labels':labels
			}
	return dict_
Exemplo n.º 10
0
def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, data in enumerate(train_loader):
        data = data.unsqueeze(1)
        #print (data.shape)
        data = data.cuda()
        data = Variable(data)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        #print (recon_batch.min(),recon_batch.max() )

        loss = loss_function(recon_batch, data, mu, logvar)
        #print (loss)
        loss.backward()
        train_loss += loss.data[0]
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.data[0] / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(train_loader.dataset)))
Exemplo n.º 11
0
def geo_tag_region(dataloader, args):
    # map from a region name to a list whose value at index i represents count of category
    # i 
    region_tags = {}
    tag_to_region_features = {}
    categories = dataloader.dataset.categories

    if not os.path.exists("results/{}/geo_ctr.pkl".format(args.folder)):
        print('running geo_ctr_region() first to get necessary info...')
        geo_ctr_region(dataloader, args)
    
    counts = pickle.load(open("results/{}/geo_ctr.pkl".format(args.folder), "rb"))
    id_to_region = counts_gps['id_to_region']

    # get name of regions
    unique_regions = list(set(id_to_region.values()))

    # Extracts features from model pretrained on ImageNet
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = models.alexnet(pretrained=True).to(device)
    new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
    model.classifier = new_classifier
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    region_features = {}
    for region in unique_regions:
        region_features[region] = []

    for cat in range(len(categories)):
        tag_to_region_features[cat] = copy.deepcopy(region_features)

    for i, (data, target) in enumerate(tqdm(dataloader)):
        if data is None:
            continue
        region_name = id_to_region[target[3]]
        anns = target[0]
        filepath = target[3]
        this_categories = list(set([categories.index(ann['label']) for ann in anns]))

        if region_name not in region_tags.keys():
            region_tags[region_name] = np.zeros(len(categories))
        this_features = None
        for cat in this_categories:
            if len(tag_to_region_features[cat][region_name]) < 500:
                data = normalize(data).to(device)
                big_data = F.interpolate(data.unsqueeze(0), size=224, mode='bilinear').to(device)
                this_features = model.forward(big_data)
                break
        for cat in this_categories:
            if this_features is not None and len(tag_to_region_features[cat][region_name]) < 500:
                tag_to_region_features[cat][region_name].append((this_features.data.cpu().numpy(), filepath))
        for ann in anns:
            region_tags[region_name][categories.index(ann['label'])] += 1
    info_stats = {}
    info_stats['region_tags'] = region_tags
    info_stats['tag_to_region_features'] = tag_to_region_features
    pickle.dump(info_stats, open("results/{}/geo_tag.pkl".format(args.folder), "wb"))
Exemplo n.º 12
0
    def __getitem__(self, index):
        pil_img = self.dataset[index]  # 根据索引,读取一个3X32X32的列表
        # print(np.array(pil_img).shape)
        data = self.transforms(pil_img, self.length)
        data = data.unsqueeze(0)  # 输入数据为1通道时,在第一维度进行升维,确保训练数据x具有3个维度
        # print(data.shape)
        label = self.label[index]

        return data, label
Exemplo n.º 13
0
def chew(data):
    not_data = 1.-data
    data = data.unsqueeze(1)
    not_data = not_data.unsqueeze(1)
    sets = torch.cat([data, not_data], dim=1)
    rets = []
    for i in range(sets.size(2)):
        rets.append(sets[:,:,i])
    return rets
Exemplo n.º 14
0
def geo_tag(dataloader, args):
    # redirect to geo_tag_gps if dataset is of gps form:
    if (dataloader.dataset.geography_info_type == "GPS_LABEL"):
        print("redirecting to geo_tag_gps()...")
        return geo_tag_gps(dataloader, args)
    elif (dataloader.dataset.geography_info_type == "STRING_FORMATTED_LABEL" and dataloader.dataset.geography_label_string_type == "REGION_LABEL"):
        print("redirecting to geo_tag_region()...")
        return geo_tag_region(dataloader, args)
    country_tags = {}
    tag_to_subregion_features = {}
    categories = dataloader.dataset.categories
    iso3_to_subregion = pickle.load(open('util_files/iso3_to_subregion_mappings.pkl', 'rb'))
    unique_subregions = set(list(iso3_to_subregion.values()))

    # Extracts features from model pretrained on ImageNet
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = models.alexnet(pretrained=True).to(device)
    new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
    model.classifier = new_classifier
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    subregion_features = {}
    for subregion in unique_subregions:
        subregion_features[subregion] = []
    for cat in range(len(categories)):
        tag_to_subregion_features[cat] = copy.deepcopy(subregion_features)
    for i, (data, target) in enumerate(tqdm(dataloader)):
        if data is None:
            continue
        country = target[2][0]
        anns = target[0]
        filepath = target[3]
        this_categories = list(set([categories.index(ann['label']) for ann in anns]))
        subregion = iso3_to_subregion[country_to_iso3(country)]
        if country not in country_tags.keys():
            country_tags[country] = np.zeros(len(categories))
        this_features = None
        for cat in this_categories:
            if len(tag_to_subregion_features[cat][subregion]) < 500:
                data = normalize(data).to(device)
                big_data = F.interpolate(data.unsqueeze(0), size=224, mode='bilinear').to(device)
                this_features = model.forward(big_data)
                break
        for cat in this_categories:
            country_tags[country][cat] += 1
            if this_features is not None and len(tag_to_subregion_features[cat][subregion]) < 500:
                tag_to_subregion_features[cat][subregion].append((this_features.data.cpu().numpy(), filepath))

    info_stats = {}
    info_stats['country_tags'] = country_tags
    info_stats['tag_to_subregion_features'] = tag_to_subregion_features
    pickle.dump(info_stats, open("results/{}/geo_tag.pkl".format(args.folder), "wb"))
Exemplo n.º 15
0
def one_hot_encode(batch, depth, use_gpu):
    data, batch_sizes = torch.nn.utils.rnn.pad_packed_sequence(
        torch.nn.utils.rnn.pack_sequence(batch))

    # one-hot encoding
    prot_aa_list = data.unsqueeze(1)
    embed_tensor = torch.zeros(prot_aa_list.size(0), depth, prot_aa_list.size(2)) # 21 classes
    if use_gpu:
        prot_aa_list = prot_aa_list.cuda()
        embed_tensor = embed_tensor.cuda()
    input_sequences = embed_tensor.scatter_(1, prot_aa_list.data, 1).transpose(1,2)
    return input_sequences, batch_sizes
Exemplo n.º 16
0
def test(net, img, args):
    """
    Test a model on a specific image
    """
    net.eval()
    patch_size = args.patch_size
    center_pixel = args.center_pixel
    batch_size, device = args.batch_size, torch.device(args.device)
    n_classes = args.n_classes

    kwargs = {
        'step': args.test_stride,
        'window_size': (patch_size, patch_size)
    }
    probs = np.zeros(img.shape[:2] + (n_classes, ))

    iterations = utils.count_sliding_window(img, **kwargs) // batch_size
    for batch in tqdm(utils.grouper(batch_size,
                                    utils.sliding_window(img, **kwargs)),
                      total=(iterations),
                      desc="Inference on the image"):
        with torch.no_grad():
            if patch_size == 1:
                data = [b[0][0, 0] for b in batch]
                data = np.copy(data)
                data = torch.from_numpy(data)
            else:
                data = [b[0] for b in batch]
                data = np.copy(data)
                data = data.transpose(0, 3, 1, 2)
                data = torch.from_numpy(data)
                data = data.unsqueeze(1)

            indices = [b[1:] for b in batch]
            data = data.to(device)
            output = net(data)
            if isinstance(output, tuple):
                output = output[0]
            output = output.to('cpu')

            if patch_size == 1 or center_pixel:
                output = output.numpy()
            else:
                output = np.transpose(output.numpy(), (0, 2, 3, 1))
            for (x, y, w, h), out in zip(indices, output):
                if center_pixel:
                    probs[x + w // 2, y + h // 2] += out
                else:
                    probs[x:x + w, y:y + h] += out
    return probs
Exemplo n.º 17
0
    def _preprocess_data(self, data, metadata):
        if data.isna().sum().sum():
            data.apply(self.sample_column, axis=0, result_type='broadcast')

        # TODO: Find more sophisticated way to deal with string/int/categorical columns
        cat_cols = []
        targets = None
        for col in metadata['dataResources'][0]['columns']:
            col_name = col['colName']

            if col_name == 'd3mIndex':
                data = data.drop(col_name, axis=1)
                continue

            n_unique_vals = len(data[col_name].unique())
            if n_unique_vals == 1:
                data = data.drop(col_name, axis=1)
            elif col['role'][0] == 'suggestedTarget':
                targets = data[col_name]
                data = data.drop(col_name, axis=1)
            elif col['colType'] in ['categorical', 'string']:
                if n_unique_vals > 25:
                    data = data.drop(col_name, axis=1)
                else:
                    cat_cols.append(col_name)

        # One-hot encode the categorical features and normalize the real features
        data = pd.get_dummies(data, columns=cat_cols)
        features = data.to_numpy()
        features = self._normalize_data(features)

        # Ordinal encode and normalize the targets
        assert targets is not None
        targets, _ = pd.factorize(targets)
        targets = np.expand_dims(targets, axis=-1)
        targets = self._normalize_data(targets)

        # Recombine the targets and features
        data = np.concatenate((features, targets), axis=-1)

        # Convert the dataset to a tensor and permute the dimensions such that the columns come first
        data = torch.from_numpy(data).to(torch.float)
        # TODO: add more info to the 3rd dim (ie cat vs num)
        data = data.unsqueeze(dim=-1).permute(1, 0,
                                              2)  # [seq_len, batch_size, dim]
        return data
Exemplo n.º 18
0
    def __getitem__(self, index):
        ID = self.m_dataIDs[index]
        filename = self.m_dataPartitions.m_inputFilesList[ID]
        data = np.load(filename).astype(np.float32)

        patientID = getStemName(filename, self.m_dataPartitions.m_inputSuffix)
        if self.m_dataPartitions.m_inputLabelDir is not None:
            labelFile = os.path.join(self.m_dataPartitions.m_inputLabelDir, patientID+self.m_dataPartitions.m_inputSuffix)
            label = np.load(labelFile).astype(np.float32)

            if self.m_transform:
                data, label = self.m_transform(data, label)
            else:
                data, label = torch.from_numpy(data), torch.from_numpy(label)

            return data.unsqueeze(dim=0), label, patientID  # 3D data filter needs unsueeze feature dim
        else:
            return torch.from_numpy(data), patientID
Exemplo n.º 19
0
def compute(data):
    global W, B

    if data.dim() == 2:
        buffer = data
    if data.dim() == 1:
        buffer = data.unsqueeze(1)

    for i in range(0, len(net_shape)-1):
        buffer = torch.matmul(buffer, W[i]) + B[i]

        if net_f[i] != 0:
            buffer = net_f[i](buffer)

    if buffer.shape[1] == 0:
        result = buffer[:, 0]
    else:
        result = buffer

    return result
Exemplo n.º 20
0
    def processor(sample):
        data, labels, training = sample

        data = augmentation(data.unsqueeze(1).float() / 255.0)
        labels = torch.ByteTensor(labels)
        labels = labels.long()

        labels = torch.eye(NUM_CLASSES).index_select(dim=0, index=labels)

        data = Variable(data)
        labels = Variable(labels)

        if training:
            classes, reconstructions = model(data, labels)
        else:
            classes, reconstructions = model(data)

        loss = capsule_loss(data, labels, classes, reconstructions)

        return loss, classes
Exemplo n.º 21
0
def attack(model, dataset, method):

    model.eval()
    adversarial_attacks = []

    for data, target in tqdm(dataset):
        data = data.unsqueeze(0)
        target = torch.tensor(target).unsqueeze(0)

        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
            device = 'cuda'
        else:
            data, target = data.cpu(), target.cpu()
            device = 'cpu'

        perturbed_image = run_attack(net=model, image=data, label=target, method=method, 
                                     device=device, hyperparams=None).squeeze()
        perturbed_image = torch.clamp(perturbed_image, 0., 1.)
        adversarial_attacks.append(perturbed_image)

    return torch.stack(adversarial_attacks)
Exemplo n.º 22
0
    def __getitem__(self, i):
        x, y = self.indices[i]  # xy是某一组含padding的坐标
        x1, y1 = x - self.patch_size // 2, y - self.patch_size // 2  # x1y1是卷积核起始的坐标值
        x2, y2 = x1 + self.patch_size, y1 + self.patch_size  # x2y2是卷积核结束的坐标值

        data = self.data[x1:x2, y1:y2]  # 卷积原始数据方块
        label = self.label[x1:x2, y1:y2]  # 原始数据方块对应的标签方块(非单个值)

        if self.flip_augmentation and self.patch_size > 1:
            # Perform data augmentation (only on 2D patches)
            data, label = self.flip(data, label)
        if self.radiation_augmentation and np.random.random() < 0.1:
            data = self.radiation_noise(data)
        if self.mixture_augmentation and np.random.random() < 0.2:
            data = self.mixture_noise(data, label)

        # Copy the data into numpy arrays (PyTorch doesn't like numpy views)
        data = np.asarray(np.copy(data).transpose((2, 0, 1)),
                          dtype='float32')  # 数据变为了【光谱×行×列】
        label = np.asarray(np.copy(label), dtype='int64')

        # Load the data into PyTorch tensors
        data = torch.from_numpy(data)
        label = torch.from_numpy(label)
        # Extract the center label if needed
        if self.center_pixel and self.patch_size > 1:
            label = label[self.patch_size // 2,
                          self.patch_size // 2]  # 将标签方块中心值作为最终label
        # Remove unused dimensions when we work with invidual spectrums
        elif self.patch_size == 1:  # 如果非数据块,则转为单点数据和标签
            data = data[:, 0, 0]
            label = label[0, 0]

        # Add a fourth dimension for 3D CNN
        if self.patch_size > 1:
            # Make 4D data ((Batch x) Planes x Channels x Width x Height)
            data = data.unsqueeze(0)  # 进行维度扩充
        return data, label
Exemplo n.º 23
0
    def loadData(self, path, regen=False):
        """Check if data exists, if so load it, else create new dataset."""
        if regen: # If forced regeneration
            data, labels = self.generate()
        else:
            try: # Try to load the files
                data = torch.load(path + "data.pt")
                labels = torch.load(path + "labels.pt")
            except: # If no files, generate new files
                print("No files found, creating new dataset")
                makedir(path) # make sure that there's a directory
                data, labels = self.generate()

        # --- save data for next time ---
        arrays = [data, labels]
        tensors = self.arraysToTensors(arrays, "FL")
        data, labels = tensors
        if len(data.shape) < 3:
            data = data.unsqueeze(2)
        outputs = [data, labels]
        names = ["data", "labels"]
        self.saveTensors(outputs, names, self._data_path)
        return data, labels
Exemplo n.º 24
0
    def __getitem__(self, i):
        x, y = self.indices[i]
        x1, y1 = x - self.patch_size // 2, y - self.patch_size // 2
        x2, y2 = x1 + self.patch_size, y1 + self.patch_size

        data = self.data[x1:x2, y1:y2]
        label = self.label[x1:x2, y1:y2]

        if self.flip_augmentation and self.patch_size > 1:
            # Perform data augmentation (only on 2D patches)
            data, label = self.flip(data, label)
        if self.radiation_augmentation and np.random.random() < 0.1:
            data = self.radiation_noise(data)
        if self.mixture_augmentation and np.random.random() < 0.2:
            data = self.mixture_noise(data, label)

        # Copy the data into numpy arrays (PyTorch doesn't like numpy views)
        data = np.asarray(np.copy(data).transpose((2, 0, 1)), dtype="float32")
        label = np.asarray(np.copy(label), dtype="int64")

        # Load the data into PyTorch tensors
        data = torch.from_numpy(data)
        label = torch.from_numpy(label)
        # Extract the center label if needed
        if self.center_pixel and self.patch_size > 1:
            label = label[self.patch_size // 2, self.patch_size // 2]
        # Remove unused dimensions when we work with invidual spectrums
        elif self.patch_size == 1:
            data = data[:, 0, 0]
            label = label[0, 0]

        # Add a fourth dimension for 3D CNN
        if self.patch_size > 1:
            # Make 4D data ((Batch x) Planes x Channels x Width x Height)
            data = data.unsqueeze(0)
        return data, label
Exemplo n.º 25
0
def forward(model, device, is_binary, test_generator, predict_list, target_list):
    """
     One forward pass through the model. mostly used to get confusion matrix values
    """
    with torch.no_grad():
        for data, target in test_generator:
            data = data.unsqueeze(1).float()
            data, target = data.to(device), target.to(device)
            prediction = model(data)
            if is_binary:
                actual_labels = actual_labels.unsqueeze(1).float()
                pred_labels_sigmoid = torch.nn.Sigmoid(prediction)
                prediction_tags = (pred_labels_sigmoid >= 0.5).eq(actual_labels)
            else:
                prediction_softmax = torch.softmax(prediction, dim=1)
                _, prediction_tags = torch.max(prediction_softmax, dim=1)
            
            predict_list.append(prediction_tags.to('cpu'))
            target_list.append(target.to('cpu'))
            
    predict_list = [j for val in predict_list for j in val]
    target_list = [j for val in target_list for j in val]

    return predict_list, target_list
Exemplo n.º 26
0
        paths = [os.path.join('../../tmp/images/1', lin) for lin in lines]

    print(len(paths))
    random.shuffle(paths)

    pool = nn.MaxPool2d(4, 4, return_indices=True)
    up = nn.MaxUnpool2d(4, 4)

    with torch.no_grad():
        for i, path in enumerate(paths):
            print(i, path)
            im = Image.open(path).resize((cfg.width, cfg.height))
            draw = ImageDraw.Draw(im)
            data = (np.array(im) / 255. - cfg.mean) / cfg.std
            data = transforms.ToTensor()(data)
            data = data.unsqueeze(0).to(device, dtype=torch.float32)

            output = mm(data)
            heatmap = output['hm'].sigmoid()

            _hm, _idx = pool(heatmap)
            heatmap = up(_hm, _idx)

            hm = heatmap.cpu().data.numpy()[0, 0]
            _im = np.floor(hm * 255)
            _im = Image.fromarray(_im).convert('L')
            _im.save(f'./tmp/{i}_hm.jpg')

            for jj, ii in zip(*np.where(hm > 0.9)):
                cx = cfg.stride * (ii + output['off'][0, 0, jj, ii]).item()
                cy = cfg.stride * (jj + output['off'][0, 1, jj, ii]).item()
Exemplo n.º 27
0
==================================== TRAINING =================================
"""
for epoch in range(EPOCHS):
    torch.autograd.set_detect_anomaly = False  #Prevents leaks
    #Reset loss
    train_loss = 0
    test_loss = 0
    for i, (data, noisy_data) in enumerate(
            train_loader):  #i,(data,noisy_data) in enumerate(train_loader):
        cnn.train()

        hidden = None
        #Obtain inputs (nosiy data), output (ground truth), prediction
        inp = Variable(noisy_data.unsqueeze(1).cuda())

        out = Variable(data.unsqueeze(1).cuda())
        pred = cnn(inp).cuda()

        #Compute l1 loss...Pytorch does not seem to work correctly
        l1_loss = torch.sum(torch.abs(pred))
        #Obtain loss
        loss = criterion(pred, out)  # + lmbda*l1_loss
        train_loss += loss.item()
        loss.backward()  #Backpropagate
        optimizer.step()  #Step optimizer
        optimizer.zero_grad()  #Zero-out gradient
    #Test dataset...ignore for now. Or add into image pre-processing and run
    if False:  #change to True if want to run
        with torch.no_grad():  #Do not train when running test set
            for data, noisy_data in test_loader:
                hidden = None
Exemplo n.º 28
0
def train(batch_size, num_epochs, lr, manualSeed, image_size, data_path):
    """
    Main processes responsible for training Protein Gan.

    Inputs (hyperparameters):

    - batch_size: Number of maps shown to the generator and discriminator per step.
    - num_epochs: Number of epochs to train the GAN for.
    - lr:         Learning rate.
    - manualSeed: Set random seed for reproducibility. Default is 666.
    - image_size: NxN dimensions of images in data_path.
    - data_path:  Target hdf5 file containing datasets 'train_16', 'train_64' and 'train_128'

    Outputs:

    - model:      pt file containing discriminator and generator states, as well as optimizer states.
    - logs:       Logs are saved as tensorboard logs in 'runs/'.
    """

    # Setup
    Path("models").mkdir(exist_ok=True)
    models = Path("models")
    model_file = models / f"{image_size}_{batch_size}_{num_epochs}_{lr}_{manualSeed}.pt"

    print(f"Loading dataset from {data_path}")
    # Loading dataset
    with h5py.File(data_path, "r") as data_file:
        x = data_file[f"train_{image_size}"][:]
    data_len = len(x)

    # Scale down values by 100
    x = (x * 1) / 100

    # setting device on GPU if available, else CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if torch.cuda.is_available():
        ngpu = 1

    # Set random seed for reproducibility
    manualSeed = 666
    # manualSeed = random.randint(1, 10000) # use if you want new results
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)

    # Tensorboard for tracking progress
    writer = SummaryWriter()

    # Number of workers for dataloader
    workers = multiprocessing.cpu_count()

    # Batch size during training
    batch_size = batch_size

    image_size = image_size

    # Size of z latent vector (i.e. size of generator input)
    nz = 100

    # Size of feature maps in generator
    ngf = image_size

    # Size of feature maps in discriminator
    ndf = image_size

    # Number of training epochs
    num_epochs = num_epochs

    # Learning rate for optimizers
    lr = lr

    # Beta1 hyperparam for Adam optimizers
    beta1 = 0.5

    # Number of GPUs available. Use 0 for CPU mode.
    ngpu = ngpu

    dataloader = torch.utils.data.DataLoader(x,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=workers)

    # Decide which device we want to run on
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and ngpu > 0) else "cpu")

    # custom weights initialization called on netG and netD
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find("Conv") != -1:
            nn.init.normal_(m.weight.data, 0.0, 0.02)
        elif classname.find("BatchNorm") != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0)

    # Create the generator
    if ngf == 16:
        netG = Generator16(ngpu).to(device)

    elif ngf == 64:
        netG = Generator64(ngpu).to(device)

    elif ngf == 128:
        netG = Generator128(ngpu).to(device)

    # Apply the weights_init function to randomly initialize all weights
    #  to mean=0, stdev=0.2.
    netG.apply(weights_init)

    # Create the Discriminator
    if ndf == 16:
        netD = Discriminator16(ngpu).to(device)

    elif ndf == 64:
        netD = Discriminator64(ngpu).to(device)

    elif ndf == 128:
        netD = Discriminator128(ngpu).to(device)

    # Apply the weights_init function to randomly initialize all weights
    #  to mean=0, stdev=0.2.
    netD.apply(weights_init)

    # Initialize BCELoss function
    criterion = nn.BCELoss()

    # Create batch of latent vectors that we will use to visualize
    #  the progression of the generator
    fixed_noise = torch.randn(image_size, nz, 1, 1, device=device)

    # Establish convention for real and fake labels during training
    real_label = 0.9
    fake_label = 0

    # Setup Adam optimizers for both G and D
    optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

    # Training Loop
    # Lists to keep track of progress
    iters = 0
    print("Starting Training")
    # For each epoch
    for epoch in tqdm(range(num_epochs), desc=f"Training Epochs"):
        # For each batch in the dataloader
        for i, data in tqdm(enumerate(dataloader, 0),
                            total=int(data_len / batch_size),
                            desc="Steps"):
            ############################
            # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
            ###########################
            # Train with all-real batch
            netD.zero_grad()
            # Format batch
            # Unsqueezed dim 1 to convert [batch_size, image_size, image_size] to [batch_size, 1, image_size, image_size] to conform to D architecture
            real_cpu = (data.unsqueeze(dim=1).type(
                torch.FloatTensor)).to(device)
            b_size = real_cpu.size(0)
            label = torch.full((b_size, ), real_label, device=device)
            # Forward pass real batch through D
            output = netD(real_cpu).view(-1)
            # Calculate loss on all-real batch
            errD_real = criterion(output, label)
            # Calculate gradients for D in backward pass
            errD_real.backward()
            D_x = output.mean().item()

            # Train with all-fake batch
            # Generate batch of latent vectors
            noise = torch.randn(b_size, nz, 1, 1, device=device)
            # Generate fake image batch with G
            fake = netG(noise)
            label.fill_(fake_label)
            # Make Symmetric
            sym_fake = (fake.clamp(min=0) +
                        fake.clamp(min=0).permute(0, 1, 3, 2)) / 2
            # Classify all fake batch with D
            output = netD(sym_fake.detach()).view(-1)
            # Calculate D's loss on the all-fake batch
            errD_fake = criterion(output, label)
            # Calculate the gradients for this batch
            errD_fake.backward()
            # Add the gradients from the all-real and all-fake batches
            errD = errD_real + errD_fake
            # Update D
            optimizerD.step()

            ############################
            # (2) Update G network: maximize log(D(G(z)))
            ###########################
            netG.zero_grad()
            label.fill_(real_label)  # fake labels are real for generator cost
            # Since we just updated D, perform another forward pass of all-fake batch through D
            output = netD(sym_fake).view(-1)
            # Calculate G's loss based on this output
            errG = criterion(output, label)
            # Calculate gradients for G
            errG.backward()
            # Update G
            optimizerG.step()

            # Log to Tensorboard
            writer.add_scalars(
                "Disriminator Loss vs Generator Loss",
                {
                    "Discriminator Loss": errD.item(),
                    "Generator Loss": errG.item()
                },
                iters,
            )
            writer.add_scalar("Disriminator Accuracy", D_x, iters)

            # Check how the generator is doing by saving G's output on fixed_noise
            if (iters % int((data_len / batch_size) * 0.05)
                    == 0) or ((epoch == num_epochs - 1) and
                              (i == len(dataloader) - 1)):
                with torch.no_grad():
                    fake = netG(fixed_noise).detach().cpu()
                writer.add_image(
                    "Generator Output",
                    vutils.make_grid(
                        (fake.clamp(min=0) +
                         fake.clamp(min=0).permute(0, 1, 3, 2)) / 2,
                        padding=2,
                        normalize=True,
                    ),
                    iters,
                )

            iters += 1

    # Save our model
    torch.save(
        {
            "netG_state_dict": netG.state_dict(),
            "netD_state_dict": netD.state_dict(),
            "optimizerG_state_dict": optimizerG.state_dict(),
            "optimizerD_state_dict": optimizerD.state_dict(),
        },
        model_file,
    )
    print(f"Training successful! saving to {model_file}")
Exemplo n.º 29
0
 def getz(self, data):
     return self.Encoder(data.unsqueeze(1)).view(data.shape[0], -1)
Exemplo n.º 30
0
def att_clu(dataloader, args):
    use_cuda = not args.ngpu and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Extracts scene features from the entire image
    arch = 'resnet18'
    model_file = 'util_files/%s_places365.pth.tar' % arch
    model = models.__dict__[arch](num_classes=365).to(device)
    checkpoint = torch.load(model_file,
                            map_location=lambda storage, loc: storage)
    state_dict = {
        str.replace(k, 'module.', ''): v
        for k, v in checkpoint['state_dict'].items()
    }
    model.load_state_dict(state_dict)
    model.eval()

    scene_classifier = model.fc
    new_classifier = nn.Sequential()
    model.fc = new_classifier

    categories = dataloader.dataset.categories
    attr_names = dataloader.dataset.attribute_names
    num_attrs = len(attr_names)
    scene_features = [[[] for j in range(num_attrs)]
                      for i in range(len(categories))]
    instance_features = [[[] for j in range(num_attrs)]
                         for i in range(len(categories))]
    scene_filepaths = [[[] for j in range(num_attrs)]
                       for i in range(len(categories))]

    # Extracts features of just the cropped object
    model_file = 'util_files/cifar_resnet110.th'
    small_model = resnet110()
    checkpoint = torch.load(model_file,
                            map_location=lambda storage, loc: storage)
    state_dict = {
        str.replace(k, 'module.', ''): v
        for k, v in checkpoint['state_dict'].items()
    }
    small_model.load_state_dict(state_dict)
    small_model.to(device)
    small_model.eval()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    for i, (data, target) in enumerate(tqdm(dataloader)):
        attr = target[1]
        anns = target[0]
        if len(attr) > 1:
            data.to(device)
            data = normalize(data)
            big_data = F.interpolate(data.unsqueeze(0),
                                     size=224,
                                     mode='bilinear').to(device)
            this_features = model.forward(big_data)
            logit = scene_classifier.forward(this_features)
            h_x = F.softmax(logit, 1).data.squeeze()
            probs, idx = h_x.sort(0, True)
            pred = idx[0]

            size = list(data.size())[1:]
            scene_added = []

            for ann in anns:
                index = categories.index(ann['label'])
                bbox = np.array([
                    ann['bbox'][0] * size[1], ann['bbox'][1] * size[1],
                    ann['bbox'][2] * size[0], ann['bbox'][3] * size[0]
                ]).astype(int)
                instance = data[:, bbox[2]:bbox[3], bbox[0]:bbox[1]]
                if 0 in list(instance.size()):
                    continue
                small_data = F.interpolate(instance.unsqueeze(0),
                                           size=32,
                                           mode='bilinear').to(device)
                this_small_features = small_model.features(small_data)
                for att in attr[0]:
                    if len(scene_features[index]
                           [att]) < 500 and index not in scene_added:
                        scene_added.append(index)
                        scene_features[index][att].extend(
                            this_features.data.cpu().numpy())
                        scene_filepaths[index][att].append((target[3], pred))
                    if len(instance_features[index][att]) < 500:
                        instance_features[index][att].extend(
                            this_small_features.data.cpu().numpy())
    stats = {}
    stats['instance'] = instance_features
    stats['scene'] = scene_features
    stats['scene_filepaths'] = scene_filepaths
    pickle.dump(stats, open("results/{}/att_clu.pkl".format(args.folder),
                            "wb"))