Exemplo n.º 1
0
    def _test_n_k_q_combination(self, n, k, q):
        n_shot_taskloader = DataLoader(self.dataset,
                                       batch_sampler=NShotTaskSampler(
                                           self.dataset, 100, n, k, q))

        # Load a single n-shot, k-way task
        for batch in n_shot_taskloader:
            x, y = batch
            break

        # Take just dummy label features and a little bit of noise
        # So distances are never 0
        support = x[:n * k, 1:]
        queries = x[n * k:, 1:]
        support += torch.rand_like(support)
        queries += torch.rand_like(queries)

        distances = pairwise_distances(queries, support, 'cosine')

        # Calculate "attention" as softmax over distances
        attention = (-distances).softmax(dim=1).cuda()

        y_pred = matching_net_predictions(attention, n, k, q)

        self.assertEqual(
            y_pred.shape, (q * k, k),
            'Matching Network predictions must have shape (q * k, k).')

        y_pred_sum = y_pred.sum(dim=1)
        self.assertTrue(
            torch.all(
                torch.isclose(y_pred_sum,
                              torch.ones_like(y_pred_sum).double())),
            'Matching Network predictions probabilities must sum to 1 for each '
            'query sample.')
Exemplo n.º 2
0
def proto_net_episode(model: Module, optimiser: Optimizer, loss_fn: Callable,
                      input_ids: torch.Tensor, attention_mask: torch.Tensor,
                      y: torch.Tensor, n_shot: int, k_way: int, q_queries: int,
                      distance: str, train: bool):
    """Performs a single training episode for a Prototypical Network.

    # Arguments
        model: Prototypical Network to be trained.
        optimiser: Optimiser to calculate gradient step
        loss_fn: Loss function to calculate between predictions and outputs. Should be cross-entropy
        x: Input samples of few shot classification task
        y: Input labels of few shot classification task
        n_shot: Number of examples per class in the support set
        k_way: Number of classes in the few shot classification task
        q_queries: Number of examples per class in the query set
        distance: Distance metric to use when calculating distance between class prototypes and queries
        train: Whether (True) or not (False) to perform a parameter update

    # Returns
        loss: Loss of the Prototypical Network on this task
        y_pred: Predicted class probabilities for the query set on this task
    """
    if train:
        # Zero gradients
        model.train()
        optimiser.zero_grad()
    else:
        model.eval()

    # Embed all samples
    embeddings = model(input_ids, attention_mask)

    # Samples are ordered by the NShotWrapper class as follows:
    # k lots of n support samples from a particular class
    # k lots of q query samples from those classes
    support = embeddings[:n_shot * k_way]
    queries = embeddings[n_shot * k_way:]
    prototypes = compute_prototypes(support, k_way, n_shot)

    # Calculate squared distances between all queries and all prototypes
    # Output should have shape (q_queries * k_way, k_way) = (num_queries, k_way)
    distances = pairwise_distances(queries, prototypes, distance)

    # Calculate log p_{phi} (y = k | x)
    log_p_y = (-distances).log_softmax(dim=1)
    loss = loss_fn(log_p_y, y)

    # Prediction probabilities are softmax over distances
    y_pred = (-distances).softmax(dim=1)

    if train:
        # Take gradient step
        loss.backward()
        optimiser.step()
    else:
        pass

    return loss, y_pred
 def evaluate_batch(self, x, y):
     embeddings = self.model(x)
     support = embeddings[:self.n_shot * self.k_way]
     queries = embeddings[self.n_shot * self.k_way:]
     prototypes = compute_prototypes(support, self.k_way, self.n_shot)
     distances = pairwise_distances(queries, prototypes,
                                    self.distance_metric)
     log_p_y = (-distances).log_softmax(dim=1)
     loss = self.loss_fn(log_p_y, y)
     y_pred = (-distances).softmax(dim=1)
     return loss, y_pred, distances, prototypes, support, queries
def get_class_fit(support, prototypes, k_way, n_shot, distance_metric):
    sigmoid = nn.Sigmoid()
    support_distances = pairwise_distances(support, prototypes,
                                           distance_metric)
    support_prob = sigmoid(-support_distances)
    support_prob = support_prob.reshape(k_way, n_shot, -1)
    #print(support_prob.reshape(k_way, n_shot, -1).mean(dim=1))
    support_prob = support_prob.cpu().detach().numpy()
    mu_stds = []
    for i in range(k_way):
        pos_mu, pos_std = fit(support_prob[i, :, i])
        mu_stds.append([pos_mu, pos_std])

    return mu_stds
Exemplo n.º 5
0
def autoencoder_episode(model: Module, optimiser: Optimizer, loss_fn: Callable,
                        x: torch.Tensor, y: torch.Tensor, n_shot: int,
                        k_way: int, q_queries: int, distance: str,
                        train: bool):
    """Performs a single training episode for the baseline nearest neigbhbour
    model.
    """

    if train:
        # Zero gradients
        model.train()
        optimiser.zero_grad()
    else:
        model.eval()

    x = x.view(x.size(0), 1, -1)

    # Embed all samples
    embeddings = model(x)

    # Samples are ordered by the NShotWrapper class as follows: # k lots of n support samples from a particular class
    # k lots of q query samples from those classes
    support = embeddings[:n_shot * k_way]
    queries = embeddings[n_shot * k_way:]

    # Calculate squared distances between all queries and all prototypes
    # Output should have shape (q_queries * k_way, k_way) = (num_queries, k_way)
    distances = pairwise_distances(queries, support, distance)

    # Calculate log p_{phi} (y = k | x)
    log_p_y = (-distances).log_softmax(dim=1)
    loss = loss_fn(log_p_y, y)

    # Prediction probabilities are softmax over distances
    y_pred = (-distances).softmax(dim=1)

    if train:
        # Take gradient step
        loss.backward()
        optimiser.step()
    else:
        pass

    return loss, y_pred
Exemplo n.º 6
0
def matching_net_episode(model: Module,
                         optimiser,
                         loss_fn: Loss,
                         x: torch.Tensor,
                         y: torch.Tensor,
                         n_shot: int,
                         k_way: int,
                         q_queries: int,
                         distance: str,
                         fce: bool,
                         train: bool):
    """Performs a single training episode for a Matching Network.

    # Arguments
        model: Matching Network to be trained.
        optimiser: Optimiser to calculate gradient step from loss
        loss_fn: Loss function to calculate between predictions and outputs
        x: Input samples of few shot classification task
        y: Input labels of few shot classification task
        n_shot: Number of examples per class in the support set
        k_way: Number of classes in the few shot classification task
        q_queries: Number of examples per class in the query set
        distance: Distance metric to use when calculating distance between support and query set samples
        fce: Whether or not to us fully conditional embeddings
        train: Whether (True) or not (False) to perform a parameter update

    # Returns
        loss: Loss of the Matching Network on this task
        y_pred: Predicted class probabilities for the query set on this task
    """
    if train:
        # Zero gradients
        model.train()
        optimiser.zero_grad()
    else:
        model.eval()

    # Embed all samples
    embeddings = model.encoder(x)

    # Samples are ordered by the NShotWrapper class as follows:
    # k lots of n support samples from a particular class
    # k lots of q query samples from those classes
    support = embeddings[:n_shot * k_way]
    queries = embeddings[n_shot * k_way:]

    # Optionally apply full context embeddings
    if fce:
        # LSTM requires input of shape (seq_len, batch, input_size). `support` is of
        # shape (k_way * n_shot, embedding_dim) and we want the LSTM to treat the
        # support set as a sequence so add a single dimension to transform support set
        # to the shape (k_way * n_shot, 1, embedding_dim) and then remove the batch dimension
        # afterwards

        # Calculate the fully conditional embedding, g, for support set samples as described
        # in appendix A.2 of the paper. g takes the form of a bidirectional LSTM with a
        # skip connection from inputs to outputs
        support, _, _ = model.g(support.unsqueeze(1))
        support = support.squeeze(1)

        # Calculate the fully conditional embedding, f, for the query set samples as described
        # in appendix A.1 of the paper.
        queries = model.f(support, queries)

    # Efficiently calculate distance between all queries and all prototypes
    # Output should have shape (q_queries * k_way, k_way) = (num_queries, k_way)
    distances = pairwise_distances(queries, support, distance)

    # Calculate "attention" as softmax over support-query distances
    attention = (-distances).softmax(dim=1)

    # Calculate predictions as in equation (1) from Matching Networks
    # y_hat = \sum_{i=1}^{k} a(x_hat, x_i) y_i
    y_pred = matching_net_predictions(attention, n_shot, k_way, q_queries)

    # Calculated loss with negative log likelihood
    # Clip predictions for numerical stability
    clipped_y_pred = y_pred.clamp(EPSILON, 1 - EPSILON)
    loss = loss_fn(clipped_y_pred.log(), y)

    if train:
        # Backpropagate gradients
        loss.backward()
        # I found training to be quite unstable so I clip the norm
        # of the gradient to be at most 1
        clip_grad_norm_(model.parameters(), 1)
        # Take gradient step
        optimiser.step()

    return loss, y_pred
Exemplo n.º 7
0
def proto_net_episode(model: Module,
                      optimiser: Optimizer,
                      loss_fn: Callable,
                      x: torch.Tensor,
                      y: torch.Tensor,
                      n_shot: int,
                      k_way: int,
                      q_queries: int,
                      distance: str,
                      train: bool,
                      stnmodel = None,
                      stnoptim = None,
                      args = None,):

    """Performs a single training episode for a Prototypical Network.

    # Arguments
        model: Prototypical Network to be trained.
        optimiser: Optimiser to calculate gradient step
        loss_fn: Loss function to calculate between predictions and outputs. Should be cross-entropy
        x: Input samples of few shot classification task
        y: Input labels of few shot classification task
        n_shot: Number of examples per class in the support set
        k_way: Number of classes in the few shot classification task
        q_queries: Number of examples per class in the query set
        distance: Distance metric to use when calculating distance between class prototypes and queries
        train: Whether (True) or not (False) to perform a parameter update

    # Returns
        loss: Loss of the Prototypical Network on this task
        y_pred: Predicted class probabilities for the query set on this task
    """
    if train:
        # Zero gradients
        model.train()
        optimiser.zero_grad()
        if stnmodel:
            stnmodel.train()
            stnoptim.zero_grad()
    else:
        model.eval()
        if stnmodel:
            stnmodel.eval()

    # If there is an STN, then modify some of the samples
    theta = None
    info = None
    if stnmodel:
        if args.targetonly:
            supnum = n_shot*k_way
            xsup, thetasup, info = stnmodel(x[:supnum], 1)
            xtar, thetatar, info = stnmodel(x[supnum:], 0)
            x = torch.cat([xsup, xtar], 0)
            theta = torch.cat([thetasup, thetatar], 0)
        else:
            x, theta, info = stnmodel(x)

    # Embed all samples
    embeddings = model(x)

    # Samples are ordered by the NShotWrapper class as follows:
    # k lots of n support samples from a particular class
    # k lots of q query samples from those classes
    support = embeddings[:n_shot*k_way]
    queries = embeddings[n_shot*k_way:]
    prototypes = compute_prototypes(support, k_way, n_shot)

    # Calculate squared distances between all queries and all prototypes
    # Output should have shape (q_queries * k_way, k_way) = (num_queries, k_way)
    distances = pairwise_distances(queries, prototypes, distance, model)

    # Calculate log p_{phi} (y = k | x)
    log_p_y = (-distances).log_softmax(dim=1)
    loss = loss_fn(log_p_y, y)

    # Calculate the stn loss
    if stnmodel and train:
        #print(loss, stnidentityloss(theta))
        loss = -loss + args.stn_reg_coeff * stnidentityloss(theta)
        loss.backward()
        #for p in stnmodel.parameters():
            #print(p.grad)
        stnoptim.step()

        # Reset optimizers
        optimiser.zero_grad()

        # Prediction probabilities are softmax over distances
        # Embed all samples
        embeddings = model(x.detach())

        # Samples are ordered by the NShotWrapper class as follows:
        # k lots of n support samples from a particular class
        # k lots of q query samples from those classes
        support = embeddings[:n_shot*k_way]
        queries = embeddings[n_shot*k_way:]
        prototypes = compute_prototypes(support, k_way, n_shot)

        # Calculate squared distances between all queries and all prototypes
        # Output should have shape (q_queries * k_way, k_way) = (num_queries, k_way)
        distances = pairwise_distances(queries, prototypes, distance)

        # Calculate log p_{phi} (y = k | x)
        log_p_y = (-distances).log_softmax(dim=1)
        loss = loss_fn(log_p_y, y)

    y_pred = (-distances).softmax(dim=1)

    if train:
        # Take gradient step
        loss.backward()
        optimiser.step()
    else:
        pass

    return loss, y_pred, x.detach()