def loss_consistency(self, sample): x_augment = sample["x_augment"] n_samples = len(x_augment) # x_augment = [(A, [A_1, A_2, ..., A_n]), (B, [B_1, B_2, ..., B_m])] lengths = [1 + len(augments) for sentence, augments in x_augment] x = list() for sentence, augs in x_augment: x.append(sentence) x += augs z = self.encoder.embed_sentences(x) assert len(z) == sum(lengths) i = 0 original_embeddings = list() augmented_embeddings = list() for length in lengths: original_embeddings.append(z[i]) augmented_embeddings.append(z[i + 1:i + length + 1]) i += length augmented_embeddings = [a.mean(0) for a in augmented_embeddings] if self.metric == "euclidean": dists = euclidean_dist(original_embeddings, augmented_embeddings) elif self.metric == "cosine": dists = ( -cosine_similarity(original_embeddings, augmented_embeddings) + 1) * 5 else: raise NotImplementedError log_p_y = torch_functional.log_softmax(-dists, dim=1).view( n_samples, n_samples, -1)
def loss(self, sample): """ :param sample: { "xs": [ [support_A_1, support_A_2, ...], [support_B_1, support_B_2, ...], [support_C_1, support_C_2, ...], ... ], "xq": [ [query_A_1, query_A_2, ...], [query_B_1, query_B_2, ...], [query_C_1, query_C_2, ...], ... ] } :return: """ xs = sample['xs'] # support xq = sample['xq'] # query n_class = len(xs) assert len(xq) == n_class n_support = len(xs[0]) n_query = len(xq[0]) target_inds = torch.arange(0, n_class).view(n_class, 1, 1).expand( n_class, n_query, 1).long() target_inds = Variable(target_inds, requires_grad=False).to(device) x = [item for xs_ in xs for item in xs_] + [item for xq_ in xq for item in xq_] z = self.encoder.forward(x) z_dim = z.size(-1) z_proto = z[:n_class * n_support].view(n_class, n_support, z_dim).mean(1) zq = z[n_class * n_support:] if self.metric == "euclidean": dists = euclidean_dist(zq, z_proto) elif self.metric == "cosine": dists = (-cosine_similarity(zq, z_proto) + 1) * 5 else: raise NotImplementedError log_p_y = torch_functional.log_softmax(-dists, dim=1).view( n_class, n_query, -1) dists.view(n_class, n_query, -1) loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean() _, y_hat = log_p_y.max(2) acc_val = torch.eq(y_hat, target_inds.squeeze()).float().mean() return loss_val, { 'loss': loss_val.item(), 'acc': acc_val.item(), 'dists': dists, 'target': target_inds }
def neightbours_search(data, target, n, cosine=True): neighbours = [] for idx in range(len(data)): if data[idx][1] == 'real': if cosine: neighbours.append( (data[idx][0], cosine_similarity(target[2], data[idx][2]), idx)) else: neighbours.append( (data[idx][0], euclidean_distance(target[2], data[idx][2]), idx)) neighbours = sorted(neighbours, key=lambda x: x[1], reverse=cosine)[:(n + 1)] neighbours_values = [] for neightbour in neighbours[1:]: neighbours_values.append(data[neightbour[2]]) return neighbours_values
def loss(self, sample, supervised_loss_share: float = 0): """ :param supervised_loss_share: share of supervised loss in total loss :param sample: { "xs": [ [support_A_1, support_A_2, ...], [support_B_1, support_B_2, ...], [support_C_1, support_C_2, ...], ... ], "xq": [ [query_A_1, query_A_2, ...], [query_B_1, query_B_2, ...], [query_C_1, query_C_2, ...], ... ] } :return: """ xs = sample['xs'] # support xq = sample['xq'] # query n_class = len(xs) assert len(xq) == n_class n_support = len(xs[0]) n_query = len(xq[0]) target_inds = torch.arange(0, n_class).view(n_class, 1, 1).expand( n_class, n_query, 1).long() target_inds = Variable(target_inds, requires_grad=False).to(device) has_augment = "x_augment" in sample if has_augment: augmentations = sample["x_augment"] n_augmentations_samples = len(sample["x_augment"]) n_augmentations_per_sample = [ len(item['tgt_texts']) for item in augmentations ] assert len(set(n_augmentations_per_sample)) == 1 n_augmentations_per_sample = n_augmentations_per_sample[0] supports = [item["sentence"] for xs_ in xs for item in xs_] queries = [item["sentence"] for xq_ in xq for item in xq_] augmentations_supports = [[item2 for item2 in item["tgt_texts"]] for item in sample["x_augment"]] augmentation_queries = [ item["src_text"] for item in sample["x_augment"] ] # Encode x = supports + queries + [ item2 for item1 in augmentations_supports for item2 in item1 ] + augmentation_queries z = self.encoder.embed_sentences(x) z_dim = z.size(-1) # Dispatch z_support = z[:len(supports)].view(n_class, n_support, z_dim).mean(dim=[1]) z_query = z[len(supports):len(supports) + len(queries)] z_aug_support = ( z[len(supports) + len(queries):len(supports) + len(queries) + n_augmentations_per_sample * n_augmentations_samples].view( n_augmentations_samples, n_augmentations_per_sample, z_dim).mean(dim=[1])) z_aug_query = z[-len(augmentation_queries):] else: # When not using augmentations supports = [item["sentence"] for xs_ in xs for item in xs_] queries = [item["sentence"] for xq_ in xq for item in xq_] # Encode x = supports + queries z = self.encoder.embed_sentences(x) z_dim = z.size(-1) # Dispatch z_support = z[:len(supports)].view(n_class, n_support, z_dim).mean(dim=[1]) z_query = z[len(supports):len(supports) + len(queries)] if self.metric == "euclidean": supervised_dists = euclidean_dist(z_query, z_support) if has_augment: unsupervised_dists = euclidean_dist(z_aug_query, z_aug_support) elif self.metric == "cosine": supervised_dists = (-cosine_similarity(z_query, z_support) + 1) * 5 if has_augment: unsupervised_dists = ( -cosine_similarity(z_aug_query, z_aug_support) + 1) * 5 else: raise NotImplementedError from torch.nn import CrossEntropyLoss supervised_loss = CrossEntropyLoss()(-supervised_dists, target_inds.reshape(-1)) _, y_hat_supervised = (-supervised_dists).max(1) acc_val_supervised = torch.eq(y_hat_supervised, target_inds.reshape(-1)).float().mean() if has_augment: # Unsupervised loss unsupervised_target_inds = torch.range(0, n_augmentations_samples - 1).to(device).long() unsupervised_loss = CrossEntropyLoss()(-unsupervised_dists, unsupervised_target_inds) _, y_hat_unsupervised = (-unsupervised_dists).max(1) acc_val_unsupervised = torch.eq( y_hat_unsupervised, unsupervised_target_inds.reshape(-1)).float().mean() # Final loss assert 0 <= supervised_loss_share <= 1 final_loss = (supervised_loss_share) * supervised_loss + ( 1 - supervised_loss_share) * unsupervised_loss return final_loss, { "metrics": { "supervised_acc": acc_val_supervised.item(), "unsupervised_acc": acc_val_unsupervised.item(), "supervised_loss": supervised_loss.item(), "unsupervised_loss": unsupervised_loss.item(), "supervised_loss_share": supervised_loss_share, "final_loss": final_loss.item(), }, "supervised_dists": supervised_dists, "unsupervised_dists": unsupervised_dists, "target": target_inds } return supervised_loss, { "metrics": { "acc": acc_val_supervised.item(), "loss": supervised_loss.item(), }, "dists": supervised_dists, "target": target_inds }
def loss_softkmeans(self, sample): xs = sample['xs'] # support xq = sample['xq'] # query xu = sample['xu'] # unlabeled n_class = len(xs) assert len(xq) == n_class n_support = len(xs[0]) n_query = len(xq[0]) target_inds = torch.arange(0, n_class).view(n_class, 1, 1).expand( n_class, n_query, 1).long() target_inds = Variable(target_inds, requires_grad=False).to(device) x = [item["sentence"] for xs_ in xs for item in xs_ ] + [item["sentence"] for xq_ in xq for item in xq_] + [item["sentence"] for item in xu] z = self.encoder.embed_sentences(x) z_dim = z.size(-1) zs = z[:n_class * n_support] z_proto = z[:n_class * n_support].view(n_class, n_support, z_dim).mean(1) zq = z[n_class * n_support:(n_class * n_support) + (n_class * n_query)] zu = z[(n_class * n_support) + (n_class * n_query):] distances_to_proto = euclidean_dist(torch.cat((zs, zu)), z_proto) distances_to_proto_normed = torch.nn.Softmax( dim=-1)(-distances_to_proto) refined_protos = list() for class_ix in range(n_class): z = torch.cat( (zs[class_ix * n_support:(class_ix + 1) * n_support], zu)) d = torch.cat((torch.ones(n_support).to(device), distances_to_proto_normed[(n_class * n_support):, class_ix])) refined_proto = ((z.t() * d).sum(1) / d.sum()) refined_protos.append(refined_proto.view(1, -1)) refined_protos = torch.cat(refined_protos) if self.metric == "euclidean": dists = euclidean_dist(zq, refined_protos) elif self.metric == "cosine": dists = (-cosine_similarity(zq, refined_protos) + 1) * 5 else: raise NotImplementedError log_p_y = torch_functional.log_softmax(-dists, dim=1).view( n_class, n_query, -1) dists.view(n_class, n_query, -1) loss_val = -log_p_y.gather(2, target_inds).squeeze().view(-1).mean() _, y_hat = log_p_y.max(2) acc_val = torch.eq(y_hat, target_inds.squeeze()).float().mean() return loss_val, { 'loss': loss_val.item(), "metrics": { "acc": acc_val.item(), "loss": loss_val.item(), }, 'dists': dists, 'target': target_inds }
def train_ARSC_one_episode( self, data_path: str, n_iter: int = 100, ): self.train() episode = create_ARSC_train_episode(prefix=data_path, n_support=5, n_query=0, n_unlabeled=0) n_episode_classes = len(episode["xs"]) loss_fn = nn.CrossEntropyLoss() episode_matrix = None episode_classifier = None if self.is_pp: with torch.no_grad(): init_matrix = np.array([[ self.encoder.forward([sentence ]).squeeze().cpu().detach().numpy() for sentence in episode["xs"][c] ] for c in range(n_episode_classes)]).mean(1) episode_matrix = torch.Tensor(init_matrix).to(device) episode_matrix.requires_grad = True optimizer = torch.optim.Adam(list(self.parameters()) + [episode_matrix], lr=2e-5) else: episode_classifier = nn.Linear( in_features=self.hidden_dim, out_features=n_episode_classes).to(device) optimizer = torch.optim.Adam(list(self.parameters()) + list(episode_classifier.parameters()), lr=2e-5) # Train on support iter_bar = tqdm.tqdm(range(n_iter)) losses = list() accuracies = list() for _ in iter_bar: optimizer.zero_grad() sentences = [ sentence for sentence_list in episode["xs"] for sentence in sentence_list ] labels = torch.Tensor([ ix for ix, sl in enumerate(episode["xs"]) for _ in sl ]).long().to(device) z = self.encoder(sentences) # z = batch_embeddings if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, episode_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, episode_matrix) else: raise NotImplementedError else: z = self.dropout(z) z = episode_classifier(z) loss = loss_fn(input=z, target=labels) acc = (z.argmax(1) == labels).float().mean() loss.backward() optimizer.step() iter_bar.set_description(f"{loss.item():.3f} | {acc.item():.3f}") losses.append(loss.item()) accuracies.append(acc.item()) return {"loss": np.mean(losses), "acc": np.mean(accuracies)}
def test_one_episode( self, support_data_dict: Dict[str, List[str]], query_data_dict: Dict[str, List[str]], sentence_to_embedding_dict: Dict, batch_size: int = 4, n_iter: int = 1000, summary_writer: SummaryWriter = None, summary_tag_prefix: str = None, ): # Check data integrity assert set(support_data_dict.keys()) == set(query_data_dict.keys()) # Freeze encoder self.encoder.eval() episode_classes = sorted(set(support_data_dict.keys())) n_episode_classes = len(episode_classes) class_to_ix = {c: ix for ix, c in enumerate(episode_classes)} ix_to_class = {ix: c for ix, c in enumerate(episode_classes)} support_data_list = [{ "sentence": sentence, "label": label } for label, sentences in support_data_dict.items() for sentence in sentences] support_data_list = (support_data_list * batch_size * n_iter)[:(batch_size * n_iter)] loss_fn = nn.CrossEntropyLoss() episode_matrix = None episode_classifier = None if self.is_pp: init_matrix = np.array([[ sentence_to_embedding_dict[sentence].ravel() for sentence in support_data_dict[ix_to_class[c]] ] for c in range(n_episode_classes)]).mean(1) episode_matrix = torch.Tensor(init_matrix).to(device) episode_matrix.requires_grad = True optimizer = torch.optim.Adam([episode_matrix], lr=1e-3) else: episode_classifier = nn.Linear( in_features=self.hidden_dim, out_features=n_episode_classes).to(device) optimizer = torch.optim.Adam(list(episode_classifier.parameters()), lr=1e-3) # Train on support iter_bar = tqdm.tqdm(range(n_iter)) for iteration in iter_bar: optimizer.zero_grad() batch = support_data_list[iteration * batch_size:iteration * batch_size + batch_size] batch_sentences = [d['sentence'] for d in batch] batch_embeddings = torch.Tensor([ sentence_to_embedding_dict[s] for s in batch_sentences ]).to(device) batch_labels = torch.Tensor( [class_to_ix[d['label']] for d in batch]).long().to(device) # z = self.encoder(batch_sentences) z = batch_embeddings if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, episode_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, episode_matrix) else: raise NotImplementedError else: z = self.dropout(z) z = episode_classifier(z) loss = loss_fn(input=z, target=batch_labels) acc = (z.argmax(1) == batch_labels).float().mean() loss.backward() optimizer.step() iter_bar.set_description(f"{loss.item():.3f} | {acc.item():.3f}") if summary_writer: summary_writer.add_scalar(tag=f'{summary_tag_prefix}_loss', global_step=iteration, scalar_value=loss.item()) summary_writer.add_scalar(tag=f'{summary_tag_prefix}_acc', global_step=iteration, scalar_value=acc.item()) # Predict on query self.eval() if not self.is_pp: episode_classifier.eval() query_data_list = [{ "sentence": sentence, "label": label } for label, sentences in query_data_dict.items() for sentence in sentences] query_labels = torch.Tensor([ class_to_ix[d['label']] for d in query_data_list ]).long().to(device) logits = list() with torch.no_grad(): for ix in range(0, len(query_data_list), 16): batch = query_data_list[ix:ix + 16] batch_sentences = [d['sentence'] for d in batch] batch_embeddings = torch.Tensor([ sentence_to_embedding_dict[s] for s in batch_sentences ]).to(device) # z = self.encoder(batch_sentences) z = batch_embeddings if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, episode_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, episode_matrix) else: raise NotImplementedError else: z = episode_classifier(z) logits.append(z) logits = torch.cat(logits, dim=0) y_hat = logits.argmax(1) y_pred = logits.argmax(1).cpu().detach().numpy() probas_pred = logits.cpu().detach().numpy() probas_pred = np.exp(probas_pred) / np.exp(probas_pred).sum(1)[:, None] y_true = query_labels.cpu().detach().numpy() where_ok = np.where(y_pred == y_true)[0] import uuid tag = str(uuid.uuid4()) summary_writer.add_text(tag=tag, text_string=json.dumps(ix_to_class, ensure_ascii=False), global_step=0) if len(where_ok): # Looking for OK but with less confidence (not too easy) ok_idx = sorted(where_ok, key=lambda x: probas_pred[x][y_pred[x]])[0] ok_sentence = query_data_list[ok_idx]['sentence'] ok_prediction = ix_to_class[y_pred[ok_idx]] ok_label = query_data_list[ok_idx]['label'] summary_writer.add_text(tag=tag, text_string=json.dumps({ "sentence": ok_sentence, "true_label": ok_label, "predicted_label": ok_prediction, "p": probas_pred[ok_idx].tolist(), }), global_step=1) where_ko = np.where(y_pred != y_true)[0] if len(where_ko): # Looking for KO but with most confidence ko_idx = sorted(where_ko, key=lambda x: probas_pred[x][y_pred[x]], reverse=True)[0] ko_sentence = query_data_list[ko_idx]['sentence'] ko_prediction = ix_to_class[y_pred[ko_idx]] ko_label = query_data_list[ko_idx]['label'] summary_writer.add_text(tag=tag, text_string=json.dumps({ "sentence": ko_sentence, "true_label": ko_label, "predicted_label": ko_prediction, "p": probas_pred[ko_idx].tolist() }), global_step=2) loss = loss_fn(input=logits, target=query_labels) acc = (y_hat == query_labels).float().mean() return {"loss": loss.item(), "acc": acc.item()}
def train_model(self, data_dict: Dict[str, List[str]], summary_writer: SummaryWriter = None, n_epoch: int = 400, batch_size: int = 16, log_every: int = 10): self.train() training_classes = sorted(set(data_dict.keys())) n_training_classes = len(training_classes) class_to_ix = {c: ix for ix, c in enumerate(training_classes)} training_data_list = [{ "sentence": sentence, "label": label } for label, sentences in data_dict.items() for sentence in sentences] training_matrix = None training_classifier = None if self.is_pp: training_matrix = torch.randn(n_training_classes, self.hidden_dim, requires_grad=True, device=device) optimizer = torch.optim.Adam(list(self.parameters()) + [training_matrix], lr=2e-5) else: training_classifier = nn.Linear( in_features=self.hidden_dim, out_features=n_training_classes).to(device) optimizer = torch.optim.Adam( list(self.parameters()) + list(training_classifier.parameters()), lr=2e-5) n_samples = len(training_data_list) loss_fn = nn.CrossEntropyLoss() global_step = 0 # Metrics training_losses = list() training_accuracies = list() for _ in tqdm.tqdm(range(n_epoch)): random.shuffle(training_data_list) for ix in tqdm.tqdm(range(0, n_samples, batch_size)): optimizer.zero_grad() torch.cuda.empty_cache() batch_items = training_data_list[ix:ix + batch_size] batch_sentences = [d['sentence'] for d in batch_items] batch_labels = torch.Tensor([ class_to_ix[d['label']] for d in batch_items ]).long().to(device) z = self.encoder(batch_sentences) if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, training_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, training_matrix) else: raise NotImplementedError else: z = self.dropout(z) z = training_classifier(z) loss = loss_fn(input=z, target=batch_labels) acc = (z.argmax(1) == batch_labels).float().mean() loss.backward() optimizer.step() global_step += 1 training_losses.append(loss.item()) training_accuracies.append(acc.item()) if (global_step % log_every) == 0: if summary_writer: summary_writer.add_scalar( tag="loss", global_step=global_step, scalar_value=np.mean(training_losses)) summary_writer.add_scalar( tag="acc", global_step=global_step, scalar_value=np.mean(training_accuracies)) # Empty metrics training_losses = list() training_accuracies = list()
def test_model_ARSC(self, data_path: str, n_iter: int = 1000, valid_summary_writer: SummaryWriter = None, test_summary_writer: SummaryWriter = None, eval_every: int = 100): self.eval() tasks = get_ARSC_test_tasks(prefix=data_path) metrics = list() logger.info("Embedding sentences...") sentences_to_embed = [ s for task in tasks for sentences_lists in task['xs'] + task['x_test'] + task['x_valid'] for s in sentences_lists ] # sentence_to_embedding_dict = {s: np.random.randn(768) for s in tqdm.tqdm(sentences_to_embed)} sentence_to_embedding_dict = { s: self.encoder.forward([s]).cpu().detach().numpy().squeeze() for s in tqdm.tqdm(sentences_to_embed) } for ix_task, task in enumerate(tasks): task_metrics = list() n_episode_classes = 2 loss_fn = nn.CrossEntropyLoss() episode_matrix = None episode_classifier = None if self.is_pp: with torch.no_grad(): init_matrix = np.array([[ sentence_to_embedding_dict[sentence] for sentence in task["xs"][c] ] for c in range(n_episode_classes)]).mean(1) episode_matrix = torch.Tensor(init_matrix).to(device) episode_matrix.requires_grad = True optimizer = torch.optim.Adam([episode_matrix], lr=2e-5) else: episode_classifier = nn.Linear( in_features=self.hidden_dim, out_features=n_episode_classes).to(device) optimizer = torch.optim.Adam(list( episode_classifier.parameters()), lr=2e-5) # Train on support iter_bar = tqdm.tqdm(range(n_iter)) losses = list() accuracies = list() for iteration in iter_bar: optimizer.zero_grad() sentences = [ sentence for sentence_list in task["xs"] for sentence in sentence_list ] labels = torch.Tensor([ ix for ix, sl in enumerate(task["xs"]) for _ in sl ]).long().to(device) batch_embeddings = torch.Tensor([ sentence_to_embedding_dict[s] for s in sentences ]).to(device) # z = self.encoder(sentences) z = batch_embeddings if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, episode_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, episode_matrix) else: raise NotImplementedError else: z = self.dropout(z) z = episode_classifier(z) loss = loss_fn(input=z, target=labels) acc = (z.argmax(1) == labels).float().mean() loss.backward() optimizer.step() iter_bar.set_description( f"{loss.item():.3f} | {acc.item():.3f}") losses.append(loss.item()) accuracies.append(acc.item()) if (eval_every and (iteration + 1) % eval_every == 0) or (not eval_every and iteration + 1 == n_iter): self.eval() if not self.is_pp: episode_classifier.eval() # -------------- # VALIDATION # -------------- valid_query_data_list = [{ "sentence": sentence, "label": label } for label, sentences in enumerate(task["x_valid"]) for sentence in sentences] valid_query_labels = torch.Tensor([ d['label'] for d in valid_query_data_list ]).long().to(device) logits = list() with torch.no_grad(): for ix in range(0, len(valid_query_data_list), 16): batch = valid_query_data_list[ix:ix + 16] batch_sentences = [d['sentence'] for d in batch] batch_embeddings = torch.Tensor([ sentence_to_embedding_dict[s] for s in batch_sentences ]).to(device) # z = self.encoder(batch_sentences) z = batch_embeddings if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, episode_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, episode_matrix) else: raise NotImplementedError else: z = episode_classifier(z) logits.append(z) logits = torch.cat(logits, dim=0) y_hat = logits.argmax(1) valid_loss = loss_fn(input=logits, target=valid_query_labels) valid_acc = (y_hat == valid_query_labels).float().mean() # -------------- # TEST # -------------- test_query_data_list = [{ "sentence": sentence, "label": label } for label, sentences in enumerate(task["x_test"]) for sentence in sentences] test_query_labels = torch.Tensor([ d['label'] for d in test_query_data_list ]).long().to(device) logits = list() with torch.no_grad(): for ix in range(0, len(test_query_data_list), 16): batch = test_query_data_list[ix:ix + 16] batch_sentences = [d['sentence'] for d in batch] batch_embeddings = torch.Tensor([ sentence_to_embedding_dict[s] for s in batch_sentences ]).to(device) # z = self.encoder(batch_sentences) z = batch_embeddings if self.is_pp: if self.metric == "cosine": z = cosine_similarity(z, episode_matrix) * 5 elif self.metric == "euclidean": z = -euclidean_dist(z, episode_matrix) else: raise NotImplementedError else: z = episode_classifier(z) logits.append(z) logits = torch.cat(logits, dim=0) y_hat = logits.argmax(1) test_loss = loss_fn(input=logits, target=test_query_labels) test_acc = (y_hat == test_query_labels).float().mean() # --RETURN METRICS task_metrics.append({ "test": { "loss": test_loss.item(), "acc": test_acc.item() }, "valid": { "loss": valid_loss.item(), "acc": valid_acc.item() }, "step": iteration + 1 }) # if valid_summary_writer: # valid_summary_writer.add_scalar(tag=f'loss', global_step=ix_task, scalar_value=valid_loss.item()) # valid_summary_writer.add_scalar(tag=f'acc', global_step=ix_task, scalar_value=valid_acc.item()) # if test_summary_writer: # test_summary_writer.add_scalar(tag=f'loss', global_step=ix_task, scalar_value=test_loss.item()) # test_summary_writer.add_scalar(tag=f'acc', global_step=ix_task, scalar_value=test_acc.item()) metrics.append(task_metrics) return metrics
def loss(self, sample): """ :param sample: { "xs": [ [support_A_1, support_A_2, ...], [support_B_1, support_B_2, ...], [support_C_1, support_C_2, ...], ... ], "xq": [ [query_A_1, query_A_2, ...], [query_B_1, query_B_2, ...], [query_C_1, query_C_2, ...], ... ] } :return: """ xs = sample["xs"] # support xq = sample["xq"] # query n_class = len(xs) assert len(xq) == n_class n_support = len(xs[0]) n_query = len(xq[0]) x = [item for xs_ in xs for item in xs_] + [item for xq_ in xq for item in xq_] z = self.encoder.forward(x) z_support = z[:n_class * n_support] z_query = z[n_class * n_support:] if self.metric == "euclidean": similarities = -euclidean_dist(z_query, z_support) elif self.metric == "cosine": similarities = cosine_similarity(z_query, z_support) * 5 else: raise NotImplementedError # Average over support samples distances_from_query_to_classes = torch.cat([ similarities[:, c * n_support:(c + 1) * n_support].mean(1).view( 1, -1) for c in range(n_class) ]).T true_labels = torch.zeros_like(distances_from_query_to_classes) for ix_class, class_query_sentences in enumerate(xq): for ix_sentence, sentence in enumerate(class_query_sentences): true_labels[ix_class * n_query + ix_sentence, ix_class] = 1 loss_fn = nn.CrossEntropyLoss() loss_val = loss_fn(distances_from_query_to_classes, true_labels.argmax(1)) acc_val = (true_labels.argmax(1) == distances_from_query_to_classes.argmax(1)).float().mean() return loss_val, { "loss": loss_val.item(), "metrics": { "acc": acc_val.item(), "loss": loss_val.item(), }, "y_hat": distances_from_query_to_classes.argmax(1).cpu().detach().numpy() }