def trainer(): return train.Trainer(dataset=DummyDataset(), train_sampler=BaseSampler(), val_sampler=BaseSampler(), model=DummyModel(), loss_fn=NLLLoss(), metric_fn=NLLLoss(), optimizer=Adam, extra_validation_metrics=[NLLLoss()] * 3)
def __init__(self, name: Optional[str] = None): """ Constructor. Args: name: Name of the module (DEFAULT: None) """ # Call the base constructors. # Serialization.__init__(self, name=name) torch_NLLLoss.__init__(self)
def fit_model(self, train_csv, valid_csv, test_csv, save_path=None, save_model='dialog_retrieval_model.pth', save_final_model=False): train_loader, valid_loader, test_loader = loading_retrieval_data(train_csv, valid_csv, test_csv, max_len=self.seq_len, word_sequence=self.word_sequence, batch_size=self.batch_size) model = SANNetwork(vocab_size=len(self.word_sequence.word_dict), embed_size=self.embed_size, rnn_hidden_size=self.rnn_hidden_size, rnn_model=self.rnn_model, output_size=self.output_size, use_bidirectional=self.use_bidirectional, dropout=self.drop_out) summary(model) self.model = model optimizer = Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay) criterion = NLLLoss() self.model.fit(train_loader=train_loader, valid_loader=valid_loader, optimizer=optimizer, criterion=criterion, device=self.device, epochs=self.epochs, save_dir=save_path, model_file=save_model, save_final_model=save_final_model) self.model.test(test_loader=test_loader, criterion=criterion, device=self.device)
def __init__(self, args, encoder, decoder, vocab): self.encoder = encoder self.decoder = decoder self.enc_optimizer = self.add_optimizers(self.encoder, args) self.dec_optimizer = self.add_optimizers(self.decoder, args) self.vocab = vocab self.create_embeddings() self.start_token = vocab.word_to_ind['<s>'] self.end_token = vocab.word_to_ind['</s>'] # self.evaluator = evaluator self.train_data = DialogueBatcher(vocab, "train") self.val_data = DialogueBatcher(vocab, "valid") # self.test_data = DialogueBatcher(vocab, "test") self.summary_dir = args.summary_dir self.verbose = args.verbose self.criterion = NLLLoss() self.teach_ratio = args.teacher_forcing_ratio self.grad_clip = args.grad_clip self.train_iterations = self.train_data.num_per_epoch * args.min_epochs self.val_iterations = self.train_data.num_per_epoch * args.min_epochs self.print_every = args.print_every self.val_every = args.val_every
def __init__(self, in_dim, channels, kernel_size, layers, filters, dist_size, masked_conv_class): super().__init__() self.in_dim = in_dim self.channels = channels self.kernel_size = kernel_size self.filters = filters self.layers = layers self.dist_size = dist_size self.mconv = masked_conv_class p = int((self.kernel_size - 1) / 2) self.net = ModuleList() self.net.append( self.mconv('A', self.channels, self.filters, self.kernel_size, p)) self.net.append(ReLU()) for _ in range(self.layers - 1): self.net.append( ResBlock('B', self.filters, self.filters, self.kernel_size, self.mconv)) self.net.append(self.mconv('B', self.filters, self.filters, 1, 0)) self.net.append(ReLU()) self.net.append( self.mconv('B', self.filters, self.dist_size * self.channels, 1, 0)) self.log_softmax = LogSoftmax(dim=2) self.loss = NLLLoss(reduction='sum') print(self)
def hyper_parameters_grid_search(base_folder: str, file_base_name: str, num_to_class: Dict): batches = [32, 64, 128, 256] lrs = [0.01, 0.001, 0.0001] epochs = [10, 15, 20] loss_funcs = [NLLLoss(), CrossEntropyLoss()] best_values = {'loss': 0, 'accuracy': 0, 'predictions': None, 'index': 0} for i, (batch_size, lr, epochs, loss_func) in enumerate( itertools.product(batches, lrs, epochs, loss_funcs)): print( f'Run #{i + 1} --> batch: {batch_size}, lr: {lr}, epochs: {epochs}, loss_func: {loss_func}' ) loss, accuracy, preds = main(batch_size, lr, epochs, loss_func, base_folder) write_predictions_to_file(preds, f'{file_base_name}_{i + 1}', num_to_class) if accuracy > best_values['accuracy']: best_values['index'] = i best_values['loss'] = loss best_values['accuracy'] = accuracy best_values['predictions'] = preds print( f"Best: run #{best_values['index']} --> loss: {best_values['loss']}, accuracy: {best_values['accuracy']}" ) return best_values
def show_dialogues(val_data, encoder, decoder, task): encoder.eval() decoder.eval() dialogues = data_io.select_consecutive_pairs(val_data, 5) for i, dialog in enumerate(dialogues): print("Dialogue Sample {} ------------".format(i)) for j, turn in enumerate(dialog): input_variable, output_variable = turn _, predictions, _ = run_inference(encoder, decoder, input_variable, \ output_variable, criterion=NLLLoss(), teach_ratio=0) sources = input_variable.data.tolist() targets = output_variable.data.tolist() source_tokens = [vocab.index_to_word(s[0], task) for s in sources] target_tokens = [vocab.index_to_word(t[0], task) for t in targets] pred_tokens = [vocab.index_to_word(p, task) for p in predictions] source = " ".join(source_tokens[:-1]) # Remove the <EOS> target = " ".join(target_tokens[:-1]) pred = " ".join(pred_tokens[:-1]) print("User Query: {0}".format(source)) print("Target Response: {0}".format(target)) print("Predicted Response: {0}".format(pred)) print('')
def PN_train(train_loader,model, optimizer,writer,iter_counter,alpha): test_shot = model.shots[-1] way = model.way target = torch.LongTensor([i//test_shot for i in range(test_shot*way)]).cuda() criterion = NLLLoss().cuda() criterion_part = BCEWithLogitsLoss().cuda() lr = optimizer.param_groups[0]['lr'] writer.add_scalar('lr',lr,iter_counter) avg_proto_loss = 0 avg_heatmap_loss = 0 avg_total_loss = 0 avg_acc = 0 for i, ((inp,mask),_) in enumerate(train_loader): iter_counter += 1 inp = inp.cuda() mask = mask.cuda() if iter_counter%1000==0: model.eval() util.visualize(model,writer,iter_counter,inp[:9],mask[:9]) model.train() log_prediction,heatmap_logits = model(inp,mask) loss_heatmap = criterion_part(heatmap_logits,mask) loss_proto = criterion(log_prediction,target) loss = alpha*loss_heatmap+loss_proto optimizer.zero_grad() loss.backward() optimizer.step() _,max_index = torch.max(log_prediction,1) acc = 100*torch.sum(torch.eq(max_index,target)).item()/test_shot/way avg_acc += acc avg_total_loss += loss.item() avg_proto_loss += loss_proto.item() avg_heatmap_loss += loss_heatmap.item() avg_total_loss = avg_total_loss/(i+1) avg_proto_loss = avg_proto_loss/(i+1) avg_heatmap_loss = avg_heatmap_loss/(i+1) avg_acc = avg_acc/(i+1) writer.add_scalar('total_loss',avg_total_loss,iter_counter) writer.add_scalar('proto_loss',avg_proto_loss,iter_counter) writer.add_scalar('heatmap_loss',avg_heatmap_loss,iter_counter) writer.add_scalar('train_acc',avg_acc,iter_counter) return iter_counter,avg_acc
def __init__(self, input_size, hidden_size, n_layers, batch_size, learning_rate): self.batch_size = batch_size self.input_size = input_size self.hidden_size = hidden_size self.n_layers = n_layers self.batch_size = batch_size self.learning_rate = learning_rate self.device = 'cuda:0' if torch.cuda.is_available() else "cpu" dataset = TranslateDataset() self.dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=True) self.char2index, self.index2char = dataset.char2index, dataset.index2char self.vocab_size = dataset.len self.encoder = EncoderRNN(self.input_size, self.hidden_size, self.vocab_size, self.n_layers, self.batch_size) self.decoder = DecoderRNN(self.input_size, self.hidden_size, self.vocab_size, self.n_layers, self.batch_size) self.encoder.to(self.device) self.decoder.to(self.device) self.loss = NLLLoss() self.encoder_optim = torch.optim.SGD(self.encoder.parameters(), lr=self.learning_rate) self.deoder_optim = torch.optim.SGD(self.encoder.parameters(), lr=self.learning_rate)
def forward(self, input_ids: torch.tensor, attention_mask: torch.tensor, token_type_ids: torch.tensor, intent_label: torch.tensor, example_input: torch.tensor, example_mask: torch.tensor, example_token_types: torch.tensor, example_intents: torch.tensor): example_pooled_output = self.encode(input_ids=example_input, attention_mask=example_mask, token_type_ids=example_token_types) pooled_output = self.encode(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) pooled_output = self.dropout(pooled_output) probs = torch.softmax(pooled_output.mm(example_pooled_output.t()), dim=-1) intent_probs = 1e-6 + torch.zeros( probs.size(0), self.num_intent_labels).cuda().scatter_add( -1, example_intents.unsqueeze(0).repeat(probs.size(0), 1), probs) # Compute losses if labels provided if intent_label is not None: loss_fct = NLLLoss() intent_lp = torch.log(intent_probs) intent_loss = loss_fct(intent_lp.view(-1, self.num_intent_labels), intent_label.type(torch.long)) else: intent_loss = torch.tensor(0) return intent_probs, intent_loss
def child_adience_ldl_loss(model_out, gt): dist, minor, adience = model_out ldl, minor_gt, adience_gt = gt lf = NLLLoss(reduction='mean') kl = KLDivLoss(reduction='batchmean') return kl(dist, ldl) + lf(minor, minor_gt) + lf(adience, adience_gt)
def create_model(word_sequence, output_size): # 构建 lstm 模型 if config.model.lower() == 'lstm': model = LSTM_Model(len(word_sequence), embed_size=config.lstm_embed_size, hidden_size=config.lstm_hidden_size, output_size=output_size, num_layers=config.lstm_num_layers, drop_out=config.dropout) # 构建 fnn 模型 elif config.model.lower() == 'fnn': model = FNN_Model(len(word_sequence.word_dict), output_size=output_size, hidden_num=config.fnn_hidden, dropout=config.dropout) print( "---------------------------- model summary ----------------------------------" ) print(model) print( "-----------------------------------------------------------------------------" ) print() optimizer = optim.Adam(model.parameters(), lr=config.lr) criterion = NLLLoss() return (model, optimizer, criterion)
def forward(self, sample: Dict[str, Any]) -> Dict[str, Any]: # type: ignore """Forward pass of an embedder, encoder and decoder.""" if "forward" in sample: raise RuntimeError("Forward already computed.") if "loss" in sample: raise RuntimeError("Loss already computed.") graph, etypes = sample[self.graph_field_name] features = [ sample[field_name] for field_name in self.feature_field_names ] formatting_indexes = sample[self.indexes_field_name].indexes graph = self.graph_embedder(graph=graph, features=features) encodings = self.graph_encoder(graph=graph, feat=graph.ndata["x"], etypes=etypes) label_encodings = self.selector(tensor=encodings, indexes=formatting_indexes) projections = self.class_projection(label_encodings) softmaxed = self.softmax(projections) labels = sample[self.label_field_name] sample["forward"] = softmaxed if labels is not None: sample["loss"] = NLLLoss(weight=softmaxed.new([ graph.batch_size, formatting_indexes.numel() - graph.batch_size ]))(softmaxed, labels) return sample
def __init__(self, input_size, hidden_size, batch_size, learning_rate, num_epoch, method): dataset = Seq2SeqDataset() self.vocab = sorted(set(dataset.full_text)) self.vocab_size = len(self.vocab) self.char2ind, self.ind2char = self.get_vocab() self.input_size = input_size self.hidden_size = hidden_size self.output_size = self.vocab_size self.method = method self.learning_rate = learning_rate self.batch_size = batch_size self.num_epoch = num_epoch self.device = "cuda:0" if torch.cuda.is_available() else "cpu" self.dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) self.encoder = Encoder(input_size, hidden_size, self.vocab_size) self.decoder = Decoder(hidden_size, self.output_size, method) self.encoder = self.encoder.to(self.device) self.decoder = self.decoder.to(self.device) self.loss_function = NLLLoss() self.encoder_optim = optim.Adam(self.encoder.parameters(), lr=self.learning_rate) self.decoder_optim = optim.Adam(self.decoder.parameters(), lr=self.learning_rate)
def train_PN_stage_1(train_loader, model, optimizer, writer, iter_counter, alpha): lr = optimizer.param_groups[0]['lr'] writer.add_scalar('lr', lr, iter_counter) criterion = NLLLoss().cuda() criterion_part = BCEWithLogitsLoss().cuda() avg_dynamic_loss = 0 avg_heatmap_loss = 0 avg_total_loss = 0 avg_acc = 0 for i, ((inp, mask), target) in enumerate(train_loader): iter_counter += 1 batch_size = target.size(0) inp = inp.cuda() mask = mask.cuda() target = target.cuda() if iter_counter % 1000 == 0: model.eval() util.visualize(model, writer, iter_counter, inp[:9], mask[:9]) model.train() log_prediction, heatmap_logits = model.forward_stage_1(inp, mask) loss_heatmap = criterion_part(heatmap_logits, mask) loss_dynamic = criterion(log_prediction, target) loss = alpha * loss_heatmap + loss_dynamic optimizer.zero_grad() loss.backward() optimizer.step() _, max_index = torch.max(log_prediction, 1) acc = 100 * (torch.sum(torch.eq(max_index, target)).float() / batch_size).item() avg_acc += acc avg_total_loss += loss.item() avg_dynamic_loss += loss_dynamic.item() avg_heatmap_loss += loss_heatmap.item() avg_total_loss = avg_total_loss / (i + 1) avg_dynamic_loss = avg_dynamic_loss / (i + 1) avg_heatmap_loss = avg_heatmap_loss / (i + 1) avg_acc = avg_acc / (i + 1) writer.add_scalar('total_loss', avg_total_loss, iter_counter) writer.add_scalar('dynamic_loss', avg_dynamic_loss, iter_counter) writer.add_scalar('heatmap_loss', avg_heatmap_loss, iter_counter) writer.add_scalar('train_acc', avg_acc, iter_counter) return iter_counter, avg_acc
def train(net, train_loader, test_loader, path='models/', epochs=10, plot_train=False): optimizer = optim.Adam(net.parameters(), lr=0.01) accumulate_grad_steps = 50 nllloss = NLLLoss(ignore_index=-1) loss_func = partial(nll_loss_func, nllloss=nllloss) # klloss = KLDivLoss(reduction='batchmean') # loss_func = partial(kl_loss_func, klloss=klloss) net.train() device = net.device if net.use_coda: net.cuda() print("Training Started") test_loss_lst, test_acc_lst, train_loss_lst, train_acc_lst, time_lst = [], [], [], [], [] best_acc = 0 for epoch in range(epochs): t0 = time.time() weights = 0 for i, sentence in enumerate(train_loader): headers = sentence[2].to(device) sentence_len = sentence[3][0] scores = net(sentence) loss = loss_func(scores, headers) loss = loss * sentence_len weights += sentence_len if i % accumulate_grad_steps == 0: loss = loss / weights loss.backward() # torch.nn.utils.clip_grad_norm_(net.parameters(), 0.5) optimizer.step() net.zero_grad() weights = 0 else: loss.backward() test_acc, test_loss = predict(net, device, test_loader, loss_func) test_loss_lst.append(test_loss) test_acc_lst.append(test_acc) if plot_train: train_acc, train_loss = predict(net, device, train_loader, loss_func) train_loss_lst.append(train_loss) train_acc_lst.append(train_acc) if best_acc < test_acc and epoch > 5 and test_acc > 0.88: tmp_path = path + '_epoch_' + str(epoch) + '_acc_' + str( np.round(test_acc, 4)).replace('.', '') + '.pt' net.save(tmp_path) best_acc = test_acc ctime = (time.time() - t0) / 60 time_lst.append(ctime) print( f"Epoch [{epoch + 1}/{epochs}] Completed \t Test Loss: {test_loss:.3f}" f" \t Test Accuracy: {test_acc:.3f} \t Time: {ctime:.2f}") plot(test_acc_lst, test_loss_lst, time_lst, path + '_test_plot.png') if plot_train: plot(train_acc_lst, train_loss_lst, time_lst, path + '_train_plot.png')
def __init__(self, n_features, n_hidden_units, n_classes, lr=0.01, n_hidden_layers=1): super(GraphSAGE, self).__init__() self.convs = [SAGEConv(n_features, n_hidden_units)] + [SAGEConv(n_hidden_units, n_hidden_units) for _ in range(n_hidden_layers-1)] self.convs = torch.nn.Sequential(*self.convs) self.output = SAGEConv(n_hidden_units, n_classes) self.loss = NLLLoss() self.optimizer = Adam(self.parameters(), lr=lr, weight_decay=5e-4)
def __init__(self, config, num_labels=2, num_splices=None): super(BertForSplicedSequenceClassification, self).__init__(config=config, num_labels=num_labels) self.num_splices = num_splices self.softmax = nn.Softmax(dim=1) self.loc_softmax = nn.Softmax(dim=0) #self.loc_weights = nn.Parameter(torch.ones(num_splices)) self.loss_fct = NLLLoss()
def epoch_train(self): model = Seq2Seq() optimizer = torch.optim.SGD(model.parameters(), lr=self.learning_rate) loss_func = NLLLoss() for i in range(self.n_epoch): loss = self.train(model, optimizer, loss_func) a = self.eval('G') print(loss)
def pow_loss(self, logits, labed): resutl = torch.log( torch.pow(logits, 2) / torch.sum(torch.pow(logits, 2), dim=-1, keepdim=True)) # pisewies = MyReLU.apply # resutl = pisewies(resutl) * -1 loss_function = NLLLoss() resutl = loss_function(resutl, labed) return resutl
def _create_xor_model(): mlp = MLP(2, 10, 2, 2, False) loss_model = CompareModel(mlp, NLLLoss()) data_model = DataModel(loss_model, { "train": XORDataset(train=True), "test": XORDataset(train=False) }) model = ModelWrapper(data_model) return model
def consistency_loss(model, batch, loss_weight=1e-2, max_loss=5.0, mode="kl", min_prob=0.75, **kwargs): min_prob = torch.Tensor([min_prob])[0].to(model.device) max_loss = torch.Tensor([max_loss])[0].to(model.device) x = batch["x"].reshape((-1, ) + batch["x"].shape[2:]).to(model.device) letters = batch["letters"].reshape((-1, ) + batch["letters"].shape[2:]).to( model.device) output = model(letters, inputs=x) output = output.reshape(batch["x"].shape[:2] + output.shape[1:]) n_paradigms, n_classes = output.shape[1], output.shape[-1] indexes = batch["lcs"].unsqueeze(dim=-1).repeat(1, 1, 1, n_classes).to(model.device) mask = batch["lcs_mask"].unsqueeze(dim=-1).repeat(1, 1, 1, n_classes).to( model.device) lcs_output = torch.gather(output, 2, indexes) if mode == "majority": _, lcs_labels = torch.max(lcs_output, dim=-1) lcs_labels_one_hot = torch.nn.functional.one_hot( lcs_labels, n_classes).int() _, lcs_votes = torch.max(torch.sum(lcs_labels_one_hot, dim=1), dim=-1) # B * L target_labels = lcs_votes.unsqueeze(dim=1).repeat(1, n_paradigms, 1) # B * Z * L loss = NLLLoss(reduction="none")(lcs_output.permute(0, 3, 1, 2), target_labels) loss = loss * batch["lcs_mask"] loss = nn.functional.relu(loss + torch.log(min_prob)) else: lcs_output_probs = torch.exp(lcs_output) mean_probs = lcs_output_probs.mean(dim=1) target_probs = mean_probs.unsqueeze(dim=1).repeat(1, n_paradigms, 1, 1) # print(target_probs.shape) loss = torch.min( torch.max( -max_loss, target_probs * (torch.log(target_probs) - lcs_output) * mask), max_loss).mean() loss = loss_weight * loss.mean() return {"consistency_loss": loss}
def __init__(self, parameters): self.params = parameters # Transform applied to each image transform = transforms.Compose( [transforms.ToTensor(), ImageTransform(self.params)]) # Initialize datasets self.trainset = MNIST(root=self.params.dataset_dir, train=True, download=True, transform=transform) self.testset = MNIST( root=self.params.dataset_dir, train=False, download=True, transform=transform, ) # Initialize loaders self.trainloader = DataLoader( self.trainset, batch_size=self.params.batch_size, shuffle=False, num_workers=self.params.num_workers, sampler=RandomSampler(self.trainset), ) self.testloader = DataLoader( self.testset, batch_size=self.params.batch_size, shuffle=False, num_workers=self.params.num_workers, ) # Checking for GPU self.use_gpu = self.params.use_gpu and torch.cuda.is_available() self.device = torch.device("cuda:0" if self.use_gpu else "cpu") # Initialize model self.model = MNIST_Network(self.params) self.model.to(self.device) print(self.model) print("Number of parameters = {}".format(self.model.num_parameters())) # Setup optimizer self.optimizer = self.optimizer_select() # Criterion self.criterion = NLLLoss()
def forward(self, input_ids: torch.tensor, attention_mask: torch.tensor, token_type_ids: torch.tensor, slot_labels: torch.tensor, example_word_inds: torch.tensor, example_input: torch.tensor, example_mask: torch.tensor, example_token_types: torch.tensor, example_slots: torch.tensor): example_hidden_states = self.encode(input_ids=example_input, attention_mask=example_mask, token_type_ids=example_token_types) hidden_states = self.encode(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) self.dropout(hidden_states) # relevant example states example_hidden = example_hidden_states[ torch.arange(example_hidden_states.size(0)), example_word_inds] # Compute probabilities by copying from examples probs = torch.softmax(hidden_states.bmm( example_hidden.t().unsqueeze(0).repeat(hidden_states.size(0), 1, 1)), dim=-1) example_slots = example_slots.view(1, 1, example_slots.size(0)).repeat( probs.size(0), probs.size(1), 1) slot_probs = 1e-6 + torch.zeros( probs.size(0), probs.size(1), self.num_slot_labels).cuda().scatter_add(-1, example_slots, probs) # Compute losses if labels provided if slot_labels is not None: loss_fct = NLLLoss() slot_logits = torch.log(slot_probs) # Only keep active parts of the loss if attention_mask is not None: active_loss = attention_mask.view(-1) == 1 active_logits = slot_logits.view( -1, self.num_slot_labels)[active_loss] active_labels = slot_labels.view(-1)[active_loss] slot_loss = loss_fct(active_logits, active_labels.type(torch.long)) else: slot_loss = loss_fct( slot_logits.view(-1, self.num_slot_labels), slot_labels.view(-1).type(torch.long)) else: slot_loss = torch.tensor( 0).cuda() if torch.cuda.is_available() else torch.tensor(0) return slot_logits, slot_loss
def __init__(self, transformer: OpenaiTransformer, metrics: Dict[str, Any] = None, accuracy_top_k: List = None): super(BaseLMHead, self).__init__() self.transformer = transformer self._metrics = metrics self._accuracy_top_k = accuracy_top_k self._decoder = self.transformer.decoder self.log_softmax = nn.LogSoftmax(dim=1) self.loss = NLLLoss(ignore_index=0)
def __init__(self, input_len, out_length, loss=None): super().__init__() self.linear = Linear(input_len, input_len) self.activation = ReLU() self.linear_2 = Linear(input_len, out_length) if not loss: self.loss = NLLLoss(reduction='none') else: self.loss = loss self.softmax = LogSoftmax()
def __init__(self, model, lr, criterion_num=0, optimizer_num=0): self.model = model criterions = [CrossEntropyLoss(), NLLLoss()] self.criterion = criterions[criterion_num] optimizers = [Adam(self.model.parameters(), lr=lr), SGD(self.model.parameters(), lr=lr)] self.optimizer = optimizers[optimizer_num] cuda = torch.cuda.is_available() self.device = torch.device("cuda:0" if cuda else "cpu")
def criterion(self, predict, target, ignore_index=-1, size_average=True): """ criterion for FCN, the predict is (n, c, w, h), c is classes :param predict: :param target: :param ignore_index: :param size_average: :return: """ log_p = F.log_softmax(predict, dim=1) # now log_p is 0~1, output would be 0 - n_classes nll_loss = NLLLoss(ignore_index=ignore_index) loss = nll_loss(log_p, target) return loss
def init_losses( task_dict: dict[str, TaskType], loss_dict: dict[str, Literal["L1", "L2", "CSE"]], robust: bool = False, ) -> dict[str, tuple[str, type[torch.nn.Module]]]: """_summary_ Args: task_dict (dict[str, TaskType]): Map of target names to "regression" or "classification". loss_dict (dict[str, "L1" | "L2" | "CSE"]): Map of target names to loss functions. robust (bool, optional): Whether to use an uncertainty adjusted loss. Defaults to False. Returns: dict[str, tuple[str, type[torch.nn.Module]]]: Dictionary of losses for each task """ criterion_dict: dict[str, tuple[str, type[torch.nn.Module]]] = {} for name, task in task_dict.items(): # Select Task and Loss Function if task == "classification": if loss_dict[name] != "CSE": raise NameError( "Only CSE loss allowed for classification tasks") if robust: criterion_dict[name] = (task, NLLLoss()) else: criterion_dict[name] = (task, CrossEntropyLoss()) elif task == "regression": if robust: if loss_dict[name] == "L1": criterion_dict[name] = (task, RobustL1Loss) elif loss_dict[name] == "L2": criterion_dict[name] = (task, RobustL2Loss) else: raise NameError( "Only L1 or L2 losses are allowed for robust regression tasks" ) else: if loss_dict[name] == "L1": criterion_dict[name] = (task, L1Loss()) elif loss_dict[name] == "L2": criterion_dict[name] = (task, MSELoss()) else: raise NameError( "Only L1 or L2 losses are allowed for regression tasks" ) return criterion_dict
def default_train(train_loader,model, optimizer,writer,iter_counter): way = model.way test_shot = model.shots[-1] target = torch.LongTensor([i//test_shot for i in range(test_shot*way)]).cuda() criterion = NLLLoss().cuda() lr = optimizer.param_groups[0]['lr'] writer.add_scalar('lr',lr,iter_counter) avg_loss = 0 avg_acc = 0 for i, (inp,_) in enumerate(train_loader): iter_counter += 1 if isinstance(inp,list): (image_inp,mask) = inp image_inp = image_inp.cuda() mask = mask.cuda() log_prediction = model(image_inp,mask) elif isinstance(inp,torch.Tensor): inp = inp.cuda() log_prediction = model(inp) loss = criterion(log_prediction,target) optimizer.zero_grad() loss.backward() optimizer.step() loss_value = loss.item() _,max_index = torch.max(log_prediction,1) acc = 100*torch.sum(torch.eq(max_index,target)).item()/test_shot/way avg_acc += acc avg_loss += loss_value avg_acc = avg_acc/(i+1) avg_loss = avg_loss/(i+1) writer.add_scalar('proto_loss',avg_loss,iter_counter) writer.add_scalar('train_acc',avg_acc,iter_counter) return iter_counter,avg_acc