예제 #1
0
def compute_loss_and_accuracy(
        dataloader: torch.utils.data.DataLoader,
        model: torch.nn.Module,
        loss_criterion: torch.nn.modules.loss._Loss):
    """
    Computes the average loss and the accuracy over the whole dataset
    in dataloader.
    Args:
        dataloder: Validation/Test dataloader
        model: torch.nn.Module
        loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss()
    Returns:
        [average_loss, accuracy]: both scalar.
        #test
    """
    average_loss = 0
    accuracy = 0

    with torch.no_grad():
        for (X_batch, Y_batch) in dataloader:
            # Transfer images/labels to GPU VRAM, if possible
            X_batch = utils.to_cuda(X_batch)
            Y_batch = utils.to_cuda(Y_batch)
            # Forward pass the images through our model
            output_probs = model(X_batch)

            # Compute Loss and Accuracy

    return average_loss, accuracy
예제 #2
0
 def comb_mats_no_which_dataset(self, rel_tensor, in_batch=True, cpu=False):
     stack_mats = to_cuda(
         torch.zeros(rel_tensor.shape[-1], 2, self.embedding_size))
     i = 0
     for rel_tent in rel_tensor:
         rel = rel_tent.item()
         if rel in self.equiv_flip_dict:
             rel = self.equiv_flip_dict[rel]
         p_rel = self.r_id2_vrr[rel][0]
         #no need to stack
         if p_rel == rel or (self.rank_start[rel] == self.rank_start[p_rel]
                             and self.rank_end[rel]
                             == self.rank_end[p_rel]):
             if self.rank_end[rel] == 0:
                 result = to_cuda(torch.zeros(2, self.embedding_size))
                 result[0] = self.dict_of_random_mats[str(p_rel)]
             else:
                 result = self.dict_of_random_mats[str(p_rel)]
             if cpu == True:
                 stack_mats[i] = result.cpu()
             stack_mats[i] = result
         else:
             #stack everything from above and return
             result = torch.mm(self.dict_of_linenar_comb[str(rel)],
                               self.dict_of_random_mats[str(p_rel)])
             result_num_rank = result.shape[0]
             if cpu == True:
                 stack_mats[i][:result_num_rank] = result.cpu()
             stack_mats[i][:result_num_rank] = result
         i += 1
     return stack_mats
예제 #3
0
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader,
                              model: torch.nn.Module,
                              loss_criterion: torch.nn.modules.loss._Loss):
    """
    Computes the average loss and the accuracy over the whole dataset
    in dataloader.
    Args:
        dataloder: Validation/Test dataloader
        model: torch.nn.Module
        loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss()
    Returns:
        [average_loss, accuracy]: both scalar.
    """
    average_loss = 0
    accuracy = 0
    i = 0
    # TODO: Implement this function (Task  2a)
    with torch.no_grad():
        for (X_batch, Y_batch) in dataloader:
            i += 1
            # Transfer images/labels to GPU VRAM, if possible
            X_batch = utils.to_cuda(X_batch)
            Y_batch = utils.to_cuda(Y_batch)
            # Forward pass the images through our model
            output_probs = model(X_batch)

            # Compute Loss and Accuracy
            average_loss += loss_criterion(output_probs, Y_batch)
            _, predicted = torch.max(
                output_probs.data, 1)  # use dim=1, since batch_size is dim = 0
            accuracy += (predicted == Y_batch).sum() / (Y_batch.shape[0])

    average_loss = average_loss / i
    accuracy = accuracy / i
    return average_loss.detach().cpu().float(), accuracy.detach().cpu().float()
예제 #4
0
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader,
                              model: torch.nn.Module,
                              loss_criterion: torch.nn.modules.loss._Loss):
    """
    Computes the average loss and the accuracy over the whole dataset
    in dataloader.
    Args:
        dataloder: Validation/Test dataloader
        model: torch.nn.Module
        loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss()
    Returns:
        [average_loss, accuracy]: both scalar.
    """
    average_loss = []
    accuracy = []

    with torch.no_grad():
        for (X_batch, Y_batch) in dataloader:
            # Transfer images/labels to GPU VRAM, if possible
            X_batch = utils.to_cuda(X_batch)
            Y_batch = utils.to_cuda(Y_batch)
            # Forward pass the images through our model
            output_probs = model(X_batch)

            # Compute Loss and Accuracy
            average_loss.append(loss_criterion(output_probs, Y_batch))
            accuracy.append(
                (output_probs.argmax(dim=-1) == Y_batch).float().mean())

        return sum(average_loss) / len(average_loss), sum(accuracy) / len(
            accuracy)
예제 #5
0
def compute_class_accuracy(testloader, model, name, use_cuda=True):

    class_correct, class_total = [0] * 10, [0] * 10
    confusion_matrix = np.zeros((10, 10), dtype=np.int32)

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = to_cuda(images, use_cuda)
            labels = to_cuda(labels, use_cuda)

            outputs = F.softmax(model(images), dim=1)

            _, predicted = torch.max(outputs, dim=1)

            for i in range(predicted.shape[0]):
                class_total[labels[i].item()] += 1
                if labels[i].item() == predicted[i].item():
                    class_correct[predicted[i].item()] += 1
                confusion_matrix[predicted[i].item(), labels[i].item()] += 1

    save_confusion_matrix(confusion_matrix, name)

    print(sum(class_correct))
    return class_correct, confusion_matrix
예제 #6
0
 def get_candidate_labels(self, candidate_starts, candidate_ends, labeled_starts, labeled_ends, labels):
     same_start = torch.eq(to_cuda(labeled_starts.view(-1, 1)), to_cuda(candidate_starts.view(1, -1)))  # [num_labeled, num_candidates]
     same_end = torch.eq(to_cuda(labeled_ends.view(-1, 1)), to_cuda(candidate_ends.view(1, -1)))  # [num_labeled, num_candidates]
     same_span = same_start & same_end                                       # [num_labeled, num_candidates]
     candidate_labels = torch.matmul(to_cuda(labels.view(1, -1).float()), same_span.float())  # [1, num_candidates]
     candidate_labels = candidate_labels.squeeze(0)  # [num_candidates]
     return candidate_labels
예제 #7
0
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader,
                              model: torch.nn.Module,
                              loss_criterion: torch.nn.modules.loss._Loss):
    """
    Computes the average loss and the accuracy over the whole dataset
    in dataloader.
    Args:
        dataloder: Validation/Test dataloader
        model: torch.nn.Module
        loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss()
    Returns:
        [average_loss, accuracy]: both scalar.
    """
    average_loss = 0
    accuracy = 0
    total_loss = 0
    total_correct = 0
    total_images = 0
    # TODO: Implement this function (Task  2a)
    with torch.no_grad():
        for (X_batch, Y_batch) in dataloader:
            # Transfer images/labels to GPU VRAM, if possible
            X_batch = utils.to_cuda(X_batch)
            Y_batch = utils.to_cuda(Y_batch)
            # Forward pass the images through our model
            output_probs = model(X_batch)
            # Compute Loss and Accuracy
            total_loss += loss_criterion(output_probs, Y_batch)
            total_correct += num_correct_preds(output_probs, Y_batch)
            total_images += X_batch.shape[0]
    average_loss = total_loss / len(dataloader)
    accuracy = total_correct / total_images

    return average_loss, accuracy
예제 #8
0
파일: trainer.py 프로젝트: mariusud/Datasyn
    def train_step(self, X_batch, Y_batch):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # X_batch is the CIFAR10 images. Shape: [batch_size, 3, 32, 32]
        # Y_batch is the CIFAR10 image label. Shape: [batch_size]
        # Transfer images / labels to GPU VRAM, if possible
        X_batch = utils.to_cuda(X_batch)
        Y_batch = utils.to_cuda(Y_batch)

        # Perform the forward pass
        predictions = self.model(X_batch)
        # Compute the cross entropy loss for the batch
        loss = self.loss_criterion(predictions, Y_batch)
        # Backpropagation
        loss.backward()
        # Gradient descent step
        self.optimizer.step()
        # Reset all computed gradients to 0
        self.optimizer.zero_grad()

        return loss.detach().cpu().item()
예제 #9
0
파일: trainer.py 프로젝트: mariusud/Datasyn
def compute_loss_and_accuracy(dataloader: torch.utils.data.DataLoader,
                              model: torch.nn.Module,
                              loss_criterion: torch.nn.modules.loss._Loss):
    """
    Computes the average loss and the accuracy over the whole dataset
    in dataloader.
    Args:
        dataloder: Validation/Test dataloader
        model: torch.nn.Module
        loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss()
    Returns:
        [average_loss, accuracy]: both scalar.
    """
    average_loss = 0
    correct_predictions = 0
    num_imgs = 0
    with torch.no_grad():
        for i, (X_batch, Y_batch) in enumerate(dataloader):
            # Transfer images/labels to GPU VRAM, if possible
            X_batch = utils.to_cuda(X_batch)
            Y_batch = utils.to_cuda(Y_batch)

            # Forward pass the images through our model
            output_probs = model(X_batch)

            # Compute Loss and Accuracy
            _, prediction = torch.max(output_probs, 1, keepdim=False)
            correct_predictions += (prediction == Y_batch).sum().item()
            num_imgs += Y_batch.size(0)

            average_loss += loss_criterion(output_probs, Y_batch).item()
    accuracy = correct_predictions / num_imgs
    return average_loss / i, accuracy
    def set_forward_adaptation(self, x, is_feature = True): #further adaptation, default is fixing feature and train a new softmax clasifier
        assert is_feature == True, 'Feature is fixed in further adaptation'
        z_support, z_query  = self.parse_feature(x,is_feature)

        z_support   = z_support.contiguous().view(self.n_way* self.n_support, -1 )
        z_query     = z_query.contiguous().view(self.n_way* self.n_query, -1 )

        y_support = torch.from_numpy(np.repeat(range( self.n_way ), self.n_support ))
        y_support = Variable(to_cuda(y_support))

        linear_clf = nn.Linear(self.feat_dim, self.n_way)
        linear_clf = to_cuda(linear_clf)

        set_optimizer = torch.optim.SGD(linear_clf.parameters(), lr = 0.01, momentum=0.9, dampening=0.9, weight_decay=0.001)

        loss_function = nn.CrossEntropyLoss()
        loss_function = to_cuda(loss_function)
        
        batch_size = 4
        support_size = self.n_way* self.n_support
        for epoch in range(100):
            rand_id = np.random.permutation(support_size)
            for i in range(0, support_size , batch_size):
                set_optimizer.zero_grad()
                selected_id = to_cuda(torch.from_numpy( rand_id[i: min(i+batch_size, support_size) ]))
                z_batch = z_support[selected_id]
                y_batch = y_support[selected_id] 
                scores = linear_clf(z_batch)
                loss = loss_function(scores,y_batch)
                loss.backward()
                set_optimizer.step()

        scores = linear_clf(z_query)
        return scores
예제 #11
0
def run():
    parser = create_arg_parser()
    args = parser.parse_args()

    save_dir = create_model_id(args)

    generator = utils.to_cuda(Generator())
    discriminator = utils.to_cuda(Discriminator())

    # optimizer
    optim_gen = optim.Adam(generator.parameters(),
                           lr=args.lr,
                           betas=(0.5, 0.999))
    optim_dis = optim.Adam(discriminator.parameters(),
                           lr=args.lr,
                           betas=(0.5, 0.999))

    # loss
    criterion = nn.BCELoss()

    # dataset loader
    transform = transforms.Compose([transforms.ToTensor()])
    dataset = datasets.MNIST('datasets/mnist',
                             train=True,
                             download=True,
                             transform=transform)
    data_loader = DataLoader(dataset, batch_size=args.batch, shuffle=True)

    train_loop(args, discriminator, generator, criterion, optim_dis, optim_gen,
               data_loader, save_dir)

    plot_loss(save_dir)
예제 #12
0
    def train(self):
        """
        Trains the model for [self.epochs] epochs.
        """
        # Track initial loss/accuracy
        self.validation_epoch()
        for epoch in range(self.epochs):
            # Perform a full pass through all the training samples
            for batch_it, (X_batch, Y_batch) in enumerate(self.dataloader_train):
                # X_batch is the CIFAR10 images. Shape: [batch_size, 3, 32, 32]
                # Y_batch is the CIFAR10 image label. Shape: [batch_size]
                # Transfer images / labels to GPU VRAM, if possible
                X_batch = to_cuda(X_batch)
                Y_batch = to_cuda(Y_batch)

                # Perform the forward pass
                predictions = self.model(X_batch)
                # Compute the cross entropy loss for the batch
                loss = self.loss_criterion(predictions, Y_batch)

                # Backpropagation
                loss.backward()

                # Gradient descent step
                self.optimizer.step()
                
                # Reset all computed gradients to 0
                self.optimizer.zero_grad()
                 # Compute loss/accuracy for all three datasets.
                if batch_it % self.validation_check == 0:
                    self.validation_epoch()
                    # Check early stopping criteria.
                    if self.should_early_stop():
                        print("Early stopping.")
                        return
예제 #13
0
def train():
    if args.resume:
        model.load_state_dict(torch.load(args.checkpoint))

    for epoch in range(args.start_epoch,
                       1000):  # loop over the dataset multiple times
        print("=== Epoch", epoch, "===")
        scheduler.step()

        running_loss, epoch_avg = 0.0, 0.0

        for i, data in enumerate(trainloader, start=1):
            # get the inputs
            images, volumes, landmarks = data

            images = to_cuda(images, True)
            volumes = to_cuda(volumes, True)
            # landmarks = to_cuda(landmarks, True)

            # l_shape = landmarks.shape
            # m = landmarks.view(l_shape[0], l_shape[1], l_shape[2] * l_shape[3]).argmax(2)
            # idx_gt = to_cuda(torch.stack((m // 128, m % 128), dim=2), True) / 128.0

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            # out_volumes, landmarks_predictions = net(images, landmarks)

            out_volumes = F.sigmoid(model(images))

            loss = F.binary_cross_entropy(out_volumes, volumes)
            # loss2 = F.mse_loss(landmarks_predictions, idx_gt)
            # loss = loss1 + 0.5 * loss2

            loss.backward()

            torch.nn.utils.clip_grad_value_(model.parameters(), 5)
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_avg += loss.item()

            if i % 1 == 0:
                print("[%2d, %5d/%5d] loss: %.8f lr %.8f" % (
                    epoch,
                    i,
                    len(trainloader),
                    running_loss / 1,
                    scheduler.get_lr()[0],
                ))
                running_loss = 0.0

        print("EPOCH AVG", epoch_avg / len(trainloader))

        if epoch % 5 == 0:
            torch.save(model.state_dict(),
                       "../checkpoints/2hourglass_%d_schd_with_aug" % epoch)
예제 #14
0
def train_student_normal(model, args, trainloader, testloader, seed):

    if torch.cuda.is_available() and args.use_cuda:
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    model.to(device)
    model.set_train_mode()

    #get loss function
    criterion = nn.CrossEntropyLoss(reduction='mean')

    optimizer = get_optimizer(model, args)

    loss_values = []
    total_accuracy = []
    epoch_eval = []

    #train the student network
    for epoch in range(args.nr_epochs):
        loss_epoch = 0.0
        for i, data in enumerate(trainloader, 0):
            samples, labels = data
            samples = to_cuda(samples, args.use_cuda)
            labels = to_cuda(labels, args.use_cuda)

            #zero the gradients of network params
            optimizer.zero_grad()

            #define loss
            output_logits = model(samples)
            loss = criterion(output_logits, labels)

            loss.backward()
            optimizer.step()
            loss_epoch += loss.item()

        loss_epoch /= float(i)
        loss_values.append(loss_epoch)
        print("Loss at epoch {} is {}".format(epoch, loss_epoch))

        if epoch % args.eval_interval == 0:
            model.eval()
            acc = compute_overall_accuracy(testloader, model, args.use_cuda)
            total_accuracy.append(acc)
            epoch_eval.append(epoch)
            model.train()
            print("Accuracy at epoch {} is {}".format(epoch, acc))

        if epoch % args.save_interval == 0:
            print("Saving model at {} epoch".format(epoch))
            with open(
                    args.dataset + "_student_network_simple" +
                    args.student_model + str(seed) + "_" + str(args.id),
                    "wb") as f:
                torch.save(model.state_dict(), f)

    return epoch_eval, loss_values, total_accuracy
예제 #15
0
    def triple_classification(self, pos_h, pos_t, pos_r):
        pos_h_e = self.ent_embeddings(pos_h)
        pos_t_e = self.ent_embeddings(pos_t)
        #pos_r_e = self.rel_embeddings(pos_r) #\vv{r}
        pos_r_e = self.vvrel_embedding_func(pos_r)
        pos_sub = pos_h_e + pos_r_e - pos_t_e

        num_ent = pos_h_e.shape[0]
        #unique_time = time.time()
        unique_rels_2_unique_proj_idx = self.unique_rels(pos_r)
        #print("unique time:", unique_time-time.time())
        unique_rel_tensor = to_cuda(
            longTensor([int(key) for key in unique_rels_2_unique_proj_idx]))
        #comb  = time.time()
        #This has shape [#unique_rel x 2 x emb_dim]
        unique_bases = self.comb_mats(
            unique_rel_tensor)  #this is AT, A is unique_bases.t()
        #this will have shape [#unique_rel x 2]
        ####THIS HERE IS WRONG
        #print("comb time :", time.time()-comb)
        #inv_time = time.time()
        ATA_inverse = self.inverse(
            torch.bmm(unique_bases, unique_bases.transpose(1, 2)))
        #print("ATA inverse time :", time.time()-inv_time)
        #proj_time = time.time()
        unique_projmat = to_cuda(
            torch.cat(
                [torch.eye(self.embedding_size)] *
                len(unique_rels_2_unique_proj_idx)).view(
                    len(unique_rels_2_unique_proj_idx), self.embedding_size,
                    self.embedding_size)) - torch.bmm(
                        torch.bmm(unique_bases.transpose(1, 2), ATA_inverse),
                        unique_bases)
        #print("Multiplying for projection :", time.time()-proj_time)
        #Projection Matrices
        #Make in the shape of num_ent x emb_dim x emb_dim  (every proj matrix at every num_ent)
        #assign_time = time.time()

        unique_proj_idx = [
            unique_rels_2_unique_proj_idx[pos_r[i].item()]
            for i in range(num_ent)
        ]
        P_pos_r = unique_projmat[unique_proj_idx]
        #need to fix here
        bmmed = torch.bmm(
            P_pos_r,
            torch.cat(pos_sub).view(num_ent, self.embedding_size,
                                    1)).squeeze(2)
        #print("Time for bmm-ing:", time.time()-bmm_time)

        #print("time for bmm:", time.time() - bmm_tim)
        pos_sub = bmmed[:num_ent]

        if self.L1_flag:
            pos = torch.sum(torch.abs(pos_sub), 1)
        else:
            pos = torch.sum((pos_sub)**2, 1)
        return pos, neg
예제 #16
0
def select_vectors_from_pairs(x_src, y_tgt, pairs, gpuid):
    n = len(pairs)
    d = x_src.shape[1]
    x = to_cuda(torch.zeros([n, d]), gpuid)
    y = to_cuda(torch.zeros([n, d]), gpuid)
    for k, ij in enumerate(pairs):
        i, j = ij
        x[k, :] = x_src[i, :]
        y[k, :] = y_tgt[j, :]
    return x, y
예제 #17
0
def batch_projection_colvec_transINT(column_vectors,
                                     list_of_basis_of_H_in_rows):
    list_proj = projection_matrix_transINT(list_of_basis_of_H_in_rows)
    return_tensor = floatTensor(column_vectors.shape[1],
                                column_vectors.shape[0])
    for i in range(column_vectors.shape[1]):
        single_col_vec = column_vectors[:, i].view(-1, 1)
        return_tensor[i] = torch.mm(to_cuda(list_proj[i]),
                                    to_cuda(single_col_vec)).t()
    return return_tensor  #return tensor of dim [num_ent x embed_dim] (embed vectors in row, not column)
예제 #18
0
    def run_iteration(self,
                      data_generator,
                      do_backprop=True,
                      run_online_evaluation=False):
        data_dict = next(data_generator)
        data = data_dict['data']
        target = data_dict['target']

        data = maybe_to_torch(data)
        target = maybe_to_torch(target)

        if torch.cuda.is_available():
            data = to_cuda(data)
            target = to_cuda(target)

        self.optimizer.zero_grad()

        if self.fp16:
            with autocast():
                ret = self.network(data,
                                   target,
                                   return_hard_tp_fp_fn=run_online_evaluation)
                if run_online_evaluation:
                    ces, tps, fps, fns, tp_hard, fp_hard, fn_hard = ret
                    self.run_online_evaluation(tp_hard, fp_hard, fn_hard)
                else:
                    ces, tps, fps, fns = ret
                del data, target
                l = self.compute_loss(ces, tps, fps, fns)

            if do_backprop:
                self.amp_grad_scaler.scale(l).backward()
                self.amp_grad_scaler.unscale_(self.optimizer)
                clip_grad_norm_(self.network.parameters(), 12)
                self.amp_grad_scaler.step(self.optimizer)
                self.amp_grad_scaler.update()
        else:
            ret = self.network(data,
                               target,
                               return_hard_tp_fp_fn=run_online_evaluation)
            if run_online_evaluation:
                ces, tps, fps, fns, tp_hard, fp_hard, fn_hard = ret
                self.run_online_evaluation(tp_hard, fp_hard, fn_hard)
            else:
                ces, tps, fps, fns = ret
            del data, target
            l = self.compute_loss(ces, tps, fps, fns)

            if do_backprop:
                l.backward()
                clip_grad_norm_(self.network.parameters(), 12)
                self.optimizer.step()

        return l.detach().cpu().numpy()
예제 #19
0
    def get_masked_mention_word_scores(self, encoded_doc, span_starts, span_ends):
        num_words = encoded_doc.shape[0]
        num_c = span_starts.shape[0]

        doc_range = torch.arange(num_words).view(1, -1).repeat(num_c, 1)
        mention_mask = (doc_range >= (span_starts.view(-1, 1))) & (doc_range <= span_ends.view(-1, 1))

        word_attn = self.masked_mention_score(encoded_doc)
        mention_word_attn = F.softmax(torch.log(to_cuda(mention_mask.float())) + to_cuda(word_attn.view(1, -1)), dim=-1)

        return mention_word_attn
예제 #20
0
    def set_forward_loss(self, x):
        y = torch.from_numpy(np.repeat(range(self.n_way), self.n_query))

        scores = self.set_forward(x)
        if self.loss_type == 'mse':
            y_oh = utils.one_hot(y, self.n_way)
            y_oh = Variable(to_cuda(y_oh))

            return self.loss_fn(scores, y_oh)
        else:
            y = Variable(to_cuda(y))
            return self.loss_fn(scores, y)
예제 #21
0
def batch_projection_colvec_transINT_given_proj_mat_list(
        column_vectors, list_proj):
    list_proj = list_proj
    return_tensor = floatTensor(column_vectors.shape[1],
                                column_vectors.shape[0])
    #print("return_tensor:", return_tensor)
    for i in range(column_vectors.shape[1]):
        single_col_vec = column_vectors[:, i].view(-1, 1)
        #print("single_col_vec :", single_col_vec)
        #print("list_proj[i] :", list_proj[i])
        return_tensor[i] = torch.mm(to_cuda(list_proj[i]),
                                    to_cuda(single_col_vec)).t()
    return return_tensor  #return tensor of dim [num_ent x embed_dim] (embed vectors in row, not column)
 def beamstep(self, decoder, ifcuda = False, ifAttn = False, *args, **kwargs):      # one step of beam search
     tmp1 = to_cuda(torch.Tensor(1), ifcuda)
     tmp2 = to_cuda(torch.LongTensor(1), ifcuda)
     tmp3 = torch.LongTensor(1)
     m = []
     att = []
     hid = []
     for k in range(self.K):
         if self.beamseq[k][-1].self_id is not self.eos_id:
             m.append(k)
             word_id = Variable(torch.LongTensor([self.beamseq[k][-1].self_id]))
             word_id = to_cuda(word_id, ifcuda)
             hidden = self.beamseq[k][-1].hidden_next
             if ifAttn:
                 output, hidden, attn_dist = decoder.predict(word_id, hidden = hidden, *args, **kwargs)
                 att.append(attn_dist)
             else:
                 output, hidden = decoder.predict(word_id, hidden = hidden, *args, **kwargs)
                 att.append([])
             hid.append(hidden)
             
             scores, inds = output.data.topk(self.K)
         
             tmp1 = torch.cat([tmp1, scores + self.beamseq[k][-1].score])
             tmp2 = torch.cat([tmp2, inds])
             tmp3 = torch.cat([tmp3, torch.LongTensor([k] * self.K)])
     
     if len(m) == 0:
         print('All beams have met <EOS>!')
         return
     
     tmp1 = tmp1[1:]
     tmp2 = tmp2[1:]
     tmp3 = tmp3[1:]
     
     order = tmp1.topk(len(m))[1]
     
     for k, d in enumerate(m):
         score = tmp1[order[k]]
         self_id = tmp2[order[k]]
         pre_loc = tmp3[order[k]]
         if ifAttn:
             self.beamseq[d].append(BeamUnit(self_id, pre_loc, score,
                                    hid[tmp3[order[k]]], att[tmp3[order[k]]]))
         else:
             self.beamseq[d].append(BeamUnit(self_id, pre_loc, score,
                                    hid[tmp3[order[k]]]))
         self.Kscores[d] = score
     
     self.step += 1
     return
예제 #23
0
def compute_loss_and_accuracy(
        dataloader: torch.utils.data.DataLoader,
        model: torch.nn.Module,
        loss_criterion: torch.nn.modules.loss._Loss):
    """
    Computes the average loss and the accuracy over the whole dataset
    in dataloader.
    Args:
        dataloder: Validation/Test dataloader
        model: torch.nn.Module
        loss_criterion: The loss criterion, e.g: torch.nn.CrossEntropyLoss()
    Returns:
        [average_loss, accuracy]: both scalar.
    """
    average_loss = 0
    accuracy = 0
    N = 0 #divide by this in the end, summing variable.
    iterations = 0
    # TODO: Implement this function (Task  2a)
    with torch.no_grad():
        for (X_batch, Y_batch) in dataloader:

            # Transfer images/labels to GPU VRAM, if possible
            X_batch = utils.to_cuda(X_batch)
            Y_batch = utils.to_cuda(Y_batch)
            
            # Forward pass the images through our model
            pred = model.forward(X_batch)
            _,predicted = torch.max(pred,axis=1)
            accuracy += (predicted == Y_batch).sum().item()
            N += X_batch.size(0)
            iterations += 1
            loss = loss_criterion(pred,Y_batch)
            average_loss += torch.sum(loss)
            


            
    
    try:
        accuracy /=N
        average_loss /=iterations
    except ZeroDivisionError:
        print(r"Dividing by 0")

        
            

    return average_loss, accuracy
예제 #24
0
파일: task2.py 프로젝트: ulrikah/cvdl
    def train(self):
        """
        Trains the model for [self.epochs] epochs.
        """

        # Track initial loss/accuracy
        def should_validate_model():
            return self.global_step % self.num_steps_per_val == 0

        #model 2
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, 20)

        for epoch in range(self.epochs):
            self.epoch = epoch
            # Perform a full pass through all the training samples
            for X_batch, Y_batch in self.dataloader_train:
                # X_batch is the CIFAR10 images. Shape: [batch_size, 3, 32, 32]
                # Y_batch is the CIFAR10 image label. Shape: [batch_size]
                # Transfer images / labels to GPU VRAM, if possible
                X_batch = utils.to_cuda(X_batch)
                Y_batch = utils.to_cuda(Y_batch)

                # Perform the forward pass
                predictions = self.model(X_batch)
                # Compute the cross entropy loss for the batch
                loss = self.loss_criterion(predictions, Y_batch)
                self.TRAIN_LOSS[self.global_step] = loss.detach().cpu().item()

                # Backpropagation
                loss.backward()

                # Gradient descent step

                self.optimizer.step()

                # Reset all computed gradients to 0
                self.optimizer.zero_grad()
                self.global_step += 1
                # Compute loss/accuracy for all three datasets.
                if should_validate_model():
                    self.validation_epoch()
                    self.save_model()
                    if self.should_early_stop():
                        print("Early stopping.")
                        return
        #model 2
        scheduler.step()
예제 #25
0
def smoothTransformer2D(inp):
    if len(inp) == 3:
        [im, defgrad, affine] = inp  # defgrad in range [-1,1]
    else:
        [im, defgrad] = inp  # defgrad in range [-1,1]

    defgrad = logisticGrowth(defgrad, 2.0)

    base_grid = U.to_cuda(
        integralImage(
            torch.ones((defgrad.shape[0], defgrad.shape[1], defgrad.shape[2],
                        defgrad.shape[3]))))
    sampling_grid = integralImage(defgrad)

    samples = im.shape[0]
    channels = im.shape[1]
    height = im.shape[2]
    width = im.shape[3]

    try:
        identity = U.to_cuda(
            torch.cat(samples * [torch.Tensor([[1, 0, 0, 0, 1, 0, 0, 0, 1]])]))
        affine = affine + identity
        affine = torch.reshape(affine, (samples, 3, 3))
        sampling_grid = torch.cat(
            (sampling_grid, U.to_cuda(torch.ones(
                (samples, 1, height, width)))), 1)
        sampling_grid = sampling_grid.permute(0, 2, 3, 1)
        sampling_grid = torch.matmul(sampling_grid.view(samples, -1, 3),
                                     torch.transpose(affine, 1, 2))  #********
        sampling_grid = sampling_grid.view(samples, height, width,
                                           3)  #********
        sampling_grid = sampling_grid.permute(0, 3, 1, 2)
        sampling_grid = sampling_grid[:, 0:2, :, :]

    except:
        pass

    sampling_grid_norm = normalize(sampling_grid, height, width)

    sampling_grid_inverse = 2 * base_grid - sampling_grid_norm
    mov_def = resample2D(im, sampling_grid_norm, height, width, samples,
                         channels)

    ref_def = resample2D(mov_def, sampling_grid_inverse, height, width,
                         samples, channels)

    return mov_def, ref_def, sampling_grid_norm, sampling_grid_inverse
예제 #26
0
def triple_classification(mod, pos_h, pos_t, orig_r, proj_r):
    pos_h_e = mod.ent_embeddings(pos_h)
    pos_t_e = mod.ent_embeddings(pos_t)
    #pos_r_e = self.rel_embeddings(pos_r) #\vv{r}
    pos_sub = pos_h_e - pos_t_e
    num_ent = pos_h_e.shape[0]
    #unique_time = time.time()
    #print("unique time:", unique_time-time.time())
    #comb  = time.time()
    #This has shape [#unique_rel x 2 x emb_dim]
    unique_bases = torch.tensor(
        null_spaces[proj_r]).cuda().t()  #this is AT, A is unique_bases.t()
    orig_bases = torch.tensor(null_spaces[orig_r]).cuda().t()
    #this will have shape [#unique_rel x 2]
    ####THIS HERE IS WRONG
    #print("comb time :", time.time()-comb)
    #inv_time = time.time()
    ATA_inverse = torch.inverse(torch.mm(unique_bases,
                                         unique_bases.t())).cuda()
    orig_ATA_inverse = torch.inverse(torch.mm(orig_bases,
                                              orig_bases.t())).cuda()
    #print("ATA inverse time :", time.time()-inv_time)
    #proj_time = time.time()
    unique_projmat = to_cuda(torch.eye(
        mod.embedding_size).type(floatTensor)) - torch.mm(
            torch.mm(unique_bases.t(), ATA_inverse),
            unique_bases).type(floatTensor)
    orig_unique_projmat = to_cuda(
        torch.eye(mod.embedding_size).type(floatTensor)) - torch.mm(
            torch.mm(orig_bases.t(), orig_ATA_inverse),
            orig_bases).type(floatTensor)
    orig_P_pos_r = torch.cat([orig_unique_projmat.unsqueeze(0)] *
                             pos_h_e.shape[0],
                             dim=0)
    P_pos_r = torch.cat([unique_projmat.unsqueeze(0)] * pos_h_e.shape[0],
                        dim=0)
    #need to fix here
    pos_sub = torch.bmm(orig_P_pos_r,
                        pos_sub.view(num_ent, mod.embedding_size,
                                     1)).squeeze(2)
    bmmed = torch.bmm(P_pos_r, pos_sub.view(num_ent, mod.embedding_size,
                                            1)).squeeze(2)
    #print("Time for bmm-ing:", time.time()-bmm_time)
    #print("time for bmm:", time.time() - bmm_tim)
    proj_sub = bmmed[:num_ent]
    dist_l1 = torch.sum(torch.abs(proj_sub - pos_sub), 1)
    dist_l2 = torch.sum((proj_sub - pos_sub)**2, 1)
    return dist_l1, dist_l2
예제 #27
0
    def set_forward_loss(self, x):
        y_query = torch.from_numpy(np.repeat(range(self.n_way), self.n_query))
        y_query = Variable(to_cuda(y_query))

        scores = self.set_forward(x)

        return self.loss_fn(scores, y_query)
예제 #28
0
    def enc_dec_step(self):
        """
        Encoding / decoding step.
        """
        transformer = self.model
        transformer.train()
        task = self.config.task
        # batch
        (x, len_x), (y, len_y), _ = next(self.dataloader)

        # target words to predict
        alen = torch.arange(len_y.max(), dtype=torch.long, device=len_y.device)
        pred_mask = alen[:, None] < len_y[None] - 1  # do not predict anything given the last target word
        t = y[1:].masked_select(pred_mask[:-1])
        assert len(t) == (len_y - 1).sum().item()

        # cuda
        x, len_x, y, len_y, t = to_cuda(self.config, x, len_x, y, len_y, t)

        # forward / loss
        encoded = transformer(mode = 'encode', x = x, len_x = len_x)
        decoded = transformer(mode = 'decode', y = y, len_y = len_y, encoded = encoded.transpose(0,1), len_enc = len_x)
        _, loss = transformer(mode = 'predict', tensor = decoded, pred_mask = pred_mask, y = t, get_scores = False)
        self.stats[task].append(loss.item())

        # optimize
        self.optimize(loss)

        # number of processed sequences / words
        self.n_equations += self.config.batch_size
        self.stats['processed_e'] += len_x.size(0)
        self.stats['processed_w'] += (len_x + len_y - 2).sum().item()
        # Deletes data on CUDA to free its memory
        del x, len_x, y, len_y, t, alen, pred_mask
예제 #29
0
def create_model():
    """
        Initializes the mode. Edit the code below if you would like to change the model.
    """
    model = nn.Sequential(
        # First convolution
        nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Second convolution
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Third convolution
        nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Neural network
        nn.Flatten(
        ),  # Flattens the image from shape (batch_size, C, Height, width) to (batch_size, C*height*width)
        # nn.Linear(4194304, 64),
        nn.Linear(32 * 32 * 2, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
        # No need to include softmax, as this is already combined in the loss function
    )
    # Transfer model to GPU memory if a GPU is available
    model = utils.to_cuda(model)
    return model
    def visualize(self):
        image = plt.imread("horse.jpg")
        image = to_tensor(image)
        image = normalize(image.data, mean, std)
        image = image.view(1, *image.shape)
        image = nn.functional.interpolate(image, size=(256, 256))
        image = to_cuda(image)

        # print(self.model.model)
        # print(self.model.model.children())

        #Save weights visualization
        weights = self.model.model.conv1.weight.data
        #print(weights.shape)
        torchvision.utils.save_image(weights, "weights_first_layer.png")

        #Save First Layer Activations visualization
        first_layer_out = self.model.model.conv1(image)
        #print(first_layer_out.shape)
        to_visualize = first_layer_out.view(first_layer_out.shape[1], 1,
                                            *first_layer_out.shape[2:])
        #print(to_visualize.shape)
        torchvision.utils.save_image(to_visualize, "filters_first_layer.png")

        #Pass image trought all layers but the last 2
        for name, child in self.model.model.named_children():
            if name not in ['avgpool', 'fc']:
                #print("Passing image through layer ", name)
                image = child(image)

        #Save Last Conv. Layer Activations visualization
        to_visualize = image.view(image.shape[1], 1, *image.shape[2:])[:64]
        torchvision.utils.save_image(to_visualize, "filters_last_layer.png")

        return