def __init__(self, args): #parameters self.batch_size = 128 #args.batch_size self.epoch = 300#args.epoch self.save_dir = '../models'#args.save_dir self.result_dir = '../results'#args.result_dir self.dataset = "ImageNet"#args.dataset self.dataroot_dir = '../../ImageNet/ILSVRC/Data/DET'#args.dataroot_dir ''' self.log_dir = args.log_dir self.multi_gpu = args.multi_gpu ''' self.model_name = args.gan_type+args.comment self.sample_num = 128 self.gpu_mode = True#args.gpu_mode self.num_workers = 0#args.num_workers self.beta1 = args.beta1 self.beta2 = args.beta2 self.lrG = args.lrG self.lrD = args.lrD self.type = "train" self.lambda_ = 0.25 self.n_critic = args.n_critic self.enc_dim = 300 self.num_cls = 10 #load dataset self.data_loader = DataLoader(utils.ImageNet(root_dir = '../../ImageNet/ILSVRC/Data/DET',transform=transforms.Compose([transforms.Scale(100), transforms.RandomCrop(64), transforms.ToTensor()]),_type=self.type), batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers) #self.num_cls = self.data_loader.dataset.num_cls # number of class ImageNet #networks init self.G = Generator() self.D = Discriminator(num_cls=self.num_cls) self.G_optimizer = optim.Adam(self.G.parameters(), lr=self.lrG, betas=(self.beta1, self.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=self.lrD, betas=(self.beta1, self.beta2)) if self.gpu_mode: self.G = self.G.cuda() self.D = self.D.cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() self.BCE_loss = nn.BCELoss().cuda() self.MSE_loss = nn.MSELoss().cuda() self.L1_loss = nn.L1Loss().cuda() self.ML_loss = nn.MultiLabelMarginLoss().cuda() self.sample_z_ = Variable(torch.rand((self.batch_size, self.enc_dim)).cuda(), volatile=True) else: self.CE_loss = nn.CrossEntropyLoss() self.BCE_loss = nn.BCELoss() self.MSE_loss = nn.MSELoss() self.L1_loss = nn.L1Loss() self.ML_loss = nn.MultiLabelMarginLoss() self.sample_z_ = Variable(torch.rand((self.batch_size, self.enc_dim)), volatile=True)
def forward(self, log_prob, target, mask, top_pred, top_true, reason_weight): # truncate to the same size, input: (80L, 16L, 9488L), target: (80, 17) batch_size = log_prob.size(0) # 50 # print('batch_size in ReviewNetCriterion' + str(batch_size)) target = target[:, :log_prob.size(1)] mask = mask[:, :log_prob.size(1)] if self.use_label_smoothing: K = log_prob.size(2) step_length = log_prob.size(1) target_ = torch.unsqueeze(target, 2) one_hot = torch.FloatTensor(batch_size, step_length, K).zero_() if self.use_cuda: one_hot = one_hot.cuda() one_hot.scatter_(2, target_.data, 1.0) one_hot = one_hot * (1.0 - self.label_smoothing_epsilon ) + self.label_smoothing_epsilon / K output = -torch.sum(log_prob * Variable(one_hot), 2) * mask output = torch.sum(output) / batch_size else: input = to_contiguous(log_prob).view(-1, log_prob.size(2)) # 800, 9488 target = to_contiguous(target).view(-1, 1) # 800, 1 mask = to_contiguous(mask).view(-1, 1) # 800, 1 output = -input.gather(1, target) * mask # output = torch.sum(output) / torch.sum(mask) output = torch.sum(output) / batch_size discriminative_loss = nn.MultiLabelMarginLoss()(top_pred, top_true) output = output + discriminative_loss * reason_weight return output
def forward(self, log_prob, target, mask, top_pred, top_true, reason_weight): if isinstance(top_pred, list): top_pred = top_pred[-1] output = nn.MultiLabelMarginLoss()(top_pred, top_true) return output
def forward(self, input_ids=None, attention_mask=None, labels=None, pos_weight=None): bert_outputs = self.bert(input_ids, attention_mask=attention_mask) pooled_output = bert_outputs[0][:, 0] pooled_output = self.dropout( pooled_output) # shape (batch_size, hidden_size) if self.agents_extended > 0: ext_output = nn.GELU()(self.extend_adapter( pooled_output.unsqueeze(1))) ext_output = self.dropout(ext_output) pooled_output = nn.GELU()(self.adapter(pooled_output.unsqueeze(1))) pooled_output = self.dropout( pooled_output) # shape (batch_size, class_size) logits = self.classifier(pooled_output).squeeze( dim=2) # shape (batch_size, num_labels) if self.agents_extended > 0: ext_logits = self.extend_classifier(ext_output).squeeze(dim=2) logits = torch.cat((logits, ext_logits), dim=1) outputs = (logits, ) if labels is not None: loss_fct = nn.MultiLabelMarginLoss() loss = loss_fct(logits, labels) outputs = outputs + (loss, ) return outputs # sigmoid(logits), (loss)
def test_okutama(data_loader, model, device, epoch): model.eval() actions_meter=AverageMeter() # activities_meter=AverageMeter() loss_meter=AverageMeter() num_boxes = 12 B = 2 T = 5 epoch_timer=Timer() with torch.no_grad(): for batch_data in data_loader: # prepare batch data batch_data=[b.to(device=device) for b in batch_data] batch_size=batch_data[0].shape[0] num_frames=batch_data[0].shape[1] actions_in=batch_data[2].reshape((batch_size,num_frames, num_boxes)) # activities_in=batch_data[3].reshape((batch_size,num_frames)) bboxes_num=batch_data[3].reshape(batch_size,num_frames) # forward actions_scores=model((batch_data[0],batch_data[1],batch_data[3])) actions_scores = torch.reshape(actions_scores, (B*T,num_boxes)).to(device=device) actions_in_nopad=[] actions_in=actions_in.reshape((batch_size*num_frames,num_boxes,)) bboxes_num=bboxes_num.reshape(batch_size*num_frames,) for bt in range(batch_size*num_frames): N=bboxes_num[bt] actions_in_nopad.append(actions_in[bt,:N]) loss = nn.MultiLabelMarginLoss() actions_loss = loss(actions_scores, actions_in) actions_loss = Variable(actions_loss, requires_grad = True) actions_correct=torch.sum(torch.eq(actions_scores.int(),actions_in.int()).float()) # Get accuracy actions_accuracy=actions_correct.item()/(actions_scores.shape[0] * num_boxes) actions_meter.update(actions_accuracy, actions_scores.shape[0]) # Total lossloss_meter.update(actions_loss.item(), batch_size) test_info={ 'time':epoch_timer.timeit(), 'epoch':epoch, 'loss':loss_meter.avg, 'actions_acc':actions_meter.avg*100 } return test_info
def compute_loss(pred, true, fn='ce'): if fn == 'mse': loss_fn = nn.MSELoss(reduce=True) elif fn == 'ce': loss_fn = nn.CrossEntropyLoss() elif fn == 'bce': loss_fn = nn.BCEWithLogitsLoss() elif fn == 'hinge': loss_fn = nn.MultiLabelMarginLoss() return loss_fn(pred, true)
def forward(self, input, seq, reward, logprobs_all, entropy_reg, top_pred, top_true, reason_weight, sample_logprobs_old, opt): batch_size = input.size(0) input_length = input.size(1) input = to_contiguous(input).view(-1) reward = to_contiguous(reward).view(-1) mask_0 = (seq > 0).float() mask = Variable( to_contiguous( torch.cat( [mask_0.new(mask_0.size(0), 1).fill_(1), mask_0[:, :-1]], 1))) mask = mask.view(-1) logprobs_all = logprobs_all[:, :input_length, :] temp = torch.sum(logprobs_all * torch.exp(logprobs_all), 2).squeeze() entropy_minus = temp * Variable(mask_0) if opt.use_ppo: probs = torch.exp(input) probs_old = torch.exp(sample_logprobs_old) ratio = probs / (1e-5 + probs_old) # clip loss surr1 = ratio * reward # surrogate from conservative policy iteration surr2 = surr1.clamp(1 - opt.ppo_clip, 1 + opt.ppo_clip) * reward output = -torch.min(surr1, surr2) * mask else: output = -input * reward * mask output = torch.sum(output) / batch_size + entropy_reg * torch.sum( entropy_minus) / batch_size if not isinstance(top_pred, list): discriminative_loss = nn.MultiLabelMarginLoss()(top_pred, top_true) output = output + discriminative_loss * reason_weight else: discriminative_loss = [] for i in range(len(top_pred)): discriminative_loss.append(nn.MultiLabelMarginLoss()( top_pred[i], top_true)) output = output + sum(discriminative_loss) * reason_weight / len( top_pred) return output
def train(model, device, train_loader, optimizer, epoch): print("train_loader", len(train_loader)) model.train() sum_num_correct = 0 sum_loss = 0 num_batches_since_log = 0 for batch_idx, (data, target) in enumerate(train_loader): # print("batch_idx", batch_idx, data, target) data, target = data.to(device), target.to(device) target = torch.LongTensor(np.array(target.numpy(), np.long)) data_var = torch.autograd.Variable(data) target_var = torch.autograd.Variable(target) optimizer.zero_grad() output = model(data_var) for batch in range(len(target)): print("target") print([ listIngredients[i] for i in range(len(target[batch])) if target[batch][i] == 1 ]) print("output") # print(listIngredients[output[batch].max(0)[1]]) print([i for i in torch.topk(output[batch], 10, largest=True)]) print([ listIngredients[i] for i in torch.topk(output[batch], 10, largest=True)[1] ]) print(len([i for i in output[batch] if i != 0])) # print([listIngredients[i] for i in range(len(output[batch])) if output[batch][i] == 1]) # loss_function = nn.MultiLabelMarginLoss() loss = nn.MultiLabelMarginLoss()(output, target_var) # print(output) # print(target_var) # loss = F.nll_loss(output, target) # pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability # correct = pred.eq(target.view_as(pred)).sum().item() # sum_num_correct += correct sum_loss += loss.item() num_batches_since_log += 1 loss.backward() optimizer.step() if batch_idx > -1: #% 100 == 0: print( 'Train Epoch: {} [{:05d}/{} ({:02.0f}%)]\tLoss: {:.6f}\tAccuracy: {:02.0f}%' .format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), sum_loss / num_batches_since_log, 100. * sum_num_correct / (num_batches_since_log * train_loader.batch_size))) sum_num_correct = 0 sum_loss = 0 num_batches_since_log = 0
def valid_epoch(epoch, args, model, data_loader): model.train(False) model.eval() # Loss Value Variables valid_dloss = 0 valid_closs = 0 # Batch Size Counter bcnt = 0 # Validation Minibatch Iteration for i, data in enumerate(tqdm(data_loader)): # Initialize Data Variables image = Variable(data[0], requires_grad=False) body = Variable(data[1], requires_grad=False) #disc = Variable(data[2], requires_grad=False) disc = Variable(data[2].type(torch.LongTensor), requires_grad=False) cont = Variable(data[3], requires_grad=False) # Utilize CUDA if params.USE_CUDA: body, image, disc, cont = body.cuda(), image.cuda(), disc.cuda( ), cont.cuda() # Generate Predictions disc_pred, cont_pred = model(body, image) # Initialize Weighted Loss Functions disc_loss = nn.MultiLabelMarginLoss() # cont_loss = nn.MSELoss() # Compute Test Loss d_loss = disc_loss(disc_pred, disc) # c_loss = cont_loss(cont_pred, cont.float()) # Record Accuracy and Loss Data valid_dloss += torch.Tensor.item(d_loss) # valid_closs += c_loss.data[0] # Update Batch Counter bcnt += 1 # Report Loss # print('[VALID LOSS]\tD_LOSS: ' + str(valid_dloss/float(bcnt)) + '\tC_LOSS: ' + str(valid_closs/float(bcnt))) print('[VALID LOSS]\tD_LOSS: ' + str(valid_dloss / float(bcnt))) # Persist Loss to Log # util.loss_log.write(params.LOSS_LOG_DIR + '/emotic_basline_valid_loss.csv', str(epoch) + ',' + str(valid_dloss/float(bcnt)) + ',' + str(valid_closs/float(bcnt))) util.loss_log.write( params.LOSS_LOG_DIR + '/emotic_attention_valid_loss.csv', str(epoch) + ',' + str(valid_dloss / float(bcnt)))
def __init__(self, *args, **kwargs): # super(ANN,self).__init__() # python 2.x super().__init__() # python 3.x self.epochs = kwargs['epochs'] self.batch_size = kwargs['BATCH_SIZE'] self.first_n_pkts = kwargs['first_n_pkts'] self.out_size = kwargs['num_class'] first_n_pkts = 10 self.small_in_size = first_n_pkts self.small_h_size = 5 self.small_out_size = 2 self.pkts_ann = nn.Sequential( nn.Linear(self.small_in_size, self.small_h_size * 2), nn.Tanh(), nn.Linear(self.small_h_size * 2, self.small_h_size), nn.Tanh(), nn.Linear(self.small_h_size, self.small_out_size)) self.intr_tm_ann = nn.Sequential( nn.Linear(self.small_in_size, self.small_h_size * 2), nn.Tanh(), nn.Linear(self.small_h_size * 2, self.small_h_size), nn.Tanh(), nn.Linear(self.small_h_size, self.small_out_size)) self.in_size = 2 * self.small_out_size + 1 # first_n_pkts_list, flow_duration, intr_time_list self.h_size = 5 # self.out_size = 1 # number of label, one-hot coding self.classify_ann = nn.Sequential( nn.Linear(self.in_size, self.h_size * 2), nn.Tanh(), nn.Linear(self.h_size * 2, self.h_size), nn.Tanh(), nn.Linear(self.h_size, self.out_size, nn.Softmax())) print('---------- Networks architecture -------------') print_network('pkts_ann:', self.pkts_ann) print_network('intr_tm_ann:', self.intr_tm_ann) print_network('classify_ann:', self.classify_ann) print('-----------------------------------------------') # self.criterion = nn.MSELoss(size_average=False) self.criterion = nn.MultiLabelMarginLoss() self.d_learning_rate = 1e-4 self.g_learning_rate = 1e-4 # self.optimizer = torch.optim.Adam(self.proposed_algorithms.parameters(), lr=self.learning_rate) # self.optimizer = optim.Adam([self.pkts_ann, self.intr_tm_ann, self.classify_ann], lr=self.d_learning_rate, # betas=(0.5, 0.9)) params = list(self.pkts_ann.parameters()) + list( self.intr_tm_ann.parameters()) + list( self.classify_ann.parameters()) self.optimizer = optim.Adam(params, lr=self.g_learning_rate, betas=(0.5, 0.9))
def initialize(self, opt): BaseModel.initialize(self, opt) self.isTrain = opt.isTrain if opt.using_multi_labels: self.label = self.Tensor(opt.batchSize, opt.L) else: self.label = self.Tensor(opt.batchSize) self.bases = self.Tensor(opt.batchSize, opt.F, opt.num_of_bases) self.BasesNet = networks.BasesNet(opt) self.sub_concept_pooling = nn.modules.MaxPool2d((opt.K, 1), stride=(1, 1)) self.instance_pooling = nn.modules.MaxPool2d((opt.num_of_bases, 1), stride=(1, 1)) self.softmax = nn.Softmax(dim=-1) self.sigmoid = nn.Sigmoid() if opt.using_multi_labels: self.loss = nn.MultiLabelMarginLoss() else: self.loss = nn.CrossEntropyLoss() if (len(opt.gpu_ids) > 0): self.BasesNet.cuda(opt.gpu_ids[0]) self.sub_concept_pooling.cuda(opt.gpu_ids[0]) self.instance_pooling.cuda(opt.gpu_ids[0]) self.softmax.cuda(opt.gpu_ids[0]) self.sigmoid.cuda(opt.gpu_ids[0]) self.loss.cuda(opt.gpu_ids[0]) networks.init_weights(self.BasesNet, self.opt.init_type) if (self.isTrain): if opt.using_multi_labels: self.optimizer = optim.Adam(list(self.BasesNet.parameters()), lr=opt.learning_rate, weight_decay=0.00001) else: self.optimizer = optim.Adam(list(self.BasesNet.parameters()), lr=opt.learning_rate, weight_decay=0.00001) else: self.BasesNet.eval() self.batch_loss = [] self.batch_accuracy = [] self.batch_ap = []
def test_grad(): input = tensor(([1, 2, 3], [4, 5, 6], [7, 8, 9]), dtype=torch.float) #weight=tensor(([0.1,0.2,0.3,0.4],[0.1,0.2,0.3,0.4],[0.1,0.2,0.3,0.4]),requires_grad=True) weight = tensor(torch.rand(3, 4), requires_grad=True) #input=input.unsqueeze(0) print(input, weight) pre = torch.mm(input, weight) #loss1=f.multilabel_soft_margin_loss() loss2 = nn.MultiLabelMarginLoss() lable1 = tensor(([0, 1, 1, 0], ), dtype=torch.float) lable2 = tensor(([0, 1, 1, 0], [1, 0, 0, 0], [1, 0, 1, 1]), dtype=torch.long) print(pre, lable1) loss1 = f.multilabel_soft_margin_loss(pre, lable1, reduction='sum') loss1.backward() print('weight.grad.data1:', weight.grad.data)
def __init__(self, cfg, in_channels): super(ClassifierModule, self).__init__() self.cfg = cfg.clone() self.GlobalAvgPool = nn.AdaptiveAvgPool2d(1) # TODO #self.fc = nn.Linear(in_channels, cfg.MODEL.CLASSIFIER.NUM_CLASSES) self.fc = nn.Linear(2048, cfg.MODEL.CLASSIFIER.NUM_CLASSES) self.sigmoid = nn.Sigmoid() if cfg.MODEL.CLASSIFIER.LOSS == "BCE": self.loss = nn.BCELoss() elif cfg.MODEL.CLASSIFIER.LOSS == "MultiLabelMarginLoss": self.loss = nn.MultiLabelMarginLoss() else: raise ValueError('Wrong loss type %s' % cfg.MODEL.CLASSIFIER.LOSS)
def train(model, train_dataset, test_dataset, output_dir): print("Training Model: #TRN=%d , #VLD=%d, #CLS=%d" % (len(train_dataset), len(test_dataset), model.n_classes)) # Hyper Parameters batch_size = 32 num_epochs = 5 learning_rate = 0.003 # Loss and Optimizer criterion = nn.MultiLabelMarginLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): # pbar = tqdm(range(len(train_dataset)), "training ... ") loader = DataLoader(train_dataset, batch_size) for i, (seqs, lbls) in enumerate(loader): model.train() sequences = Variable(torch.from_numpy(seqs).float()) labels = Variable(torch.from_numpy(lbls).long()) # Forward + Backward + Optimize optimizer.zero_grad() outputs = model(sequences) train_loss = criterion(outputs, labels) train_loss.backward() optimizer.step() if (i + 1) % 100 == 0: test_loss = eval(model, criterion, test_dataset) print( 'Epoch [%d/%d], Step [%d/%d], Train Loss: %.4f, Test Loss: %.4f' % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, train_loss.data[0], test_loss)) # pbar.update(batch_size) # pbar.close() # Save the Trained Model torch.save(model.state_dict(), '%s/cnn.pkl' % output_dir)
def __init__(self, dim_feat, dim_latent=10, hash_bit=64, label=10): super(DAN, self).__init__() self.in_feature_1 = dim_feat self.out_feature_1 = dim_latent self.in_feature_2 = dim_latent self.out_feature_2 = dim_feat self.fc1_1 = nn.Linear(self.in_feature_1, self.out_feature_1) self.fc1_2 = nn.Linear(self.in_feature_2, self.out_feature_2) self.fc2_1 = nn.Linear(self.in_feature_1, self.out_feature_1) self.fc2_2 = nn.Linear(self.in_feature_2, self.out_feature_2) self.fc_to_cla = nn.Linear(self.out_feature_2, label) self.fc_to_lsh = nn.Linear(self.out_feature_2, hash_bit) self.CrossEntropyLoss = nn.MultiLabelMarginLoss() self.MSELoss_lsh = nn.MSELoss() self.MSELoss_recon = nn.MSELoss() self.kl = nn.KLDivLoss()
def train_model(embedding_dim=8, hidden_dim=8, epochs=12): train, valid, test, class_ix = load_data() # Number of all possible trigrams vocab_size = 8001 class_size = len(class_ix) model = LSTMTagger(embedding_dim, hidden_dim, vocab_size, class_size) model = model.cuda() loss_function = nn.MultiLabelMarginLoss() optimizer = optim.RMSprop(model.parameters(), lr=0.01) train_data = train[['ngrams', 'interpros']].values for epoch in range(epochs): print('Epoch %d/%d' % (epoch + 1, epochs)) with ck.progressbar(train_data) as data: train_loss = 0.0 for item in data: # Clear gradients model.zero_grad() # Clear hidden state for each instance model.hidden = model.init_hidden() inputs_tensor = torch.LongTensor(item[0]).cuda() inputs = autograd.Variable(inputs_tensor).cuda() labels = [] for ipro in item[1]: if ipro in class_ix: labels.append(class_ix[ipro]) if len(labels) == 0: continue labels = autograd.Variable(torch.LongTensor(labels)).cuda() scores = model(inputs) loss = loss_function(scores, labels) train_loss += loss loss.backward() optimizer.step() print('Training Loss: ', train_loss / len(train_data))
def forward(self, log_prob, target, mask, gv, top_true, ltg_weight, gv_l1_penality): # truncate to the same size, input: (80L, 16L, 9488L), target: (80, 17) batch_size = log_prob.size(0) target = target[:, :log_prob.size(1)] mask = mask[:, :log_prob.size(1)] log_prob = to_contiguous(log_prob).view(-1, log_prob.size(2)) target = to_contiguous(target).view(-1, 1) mask = to_contiguous(mask).view(-1, 1) output = -log_prob.gather(1, target) * mask output = torch.sum(output) / batch_size gv_loss = nn.MultiLabelMarginLoss()(gv, top_true) zero_tensor = Variable(torch.zeros(gv.size())) zero_tensor = zero_tensor.cuda() # gv_l1_loss = nn.L1Loss(size_average=False)(gv, zero_tensor) gv_l1_loss = nn.SmoothL1Loss(size_average=False)(gv, zero_tensor) print('loss: ' + str(output.data[0]) + ', guiding loss: ' + str(gv_loss.data[0]) + ', l1 loss: ' + str(gv_l1_loss.data[0])) output = output + gv_loss * ltg_weight + gv_l1_loss * gv_l1_penality return output
def parse_loss(loss): loss, kwargs = parse_str(loss) if loss == 'l1': return nn.L1Loss(**kwargs) if loss == 'mse': return nn.MSELoss(**kwargs) if loss == 'cross_entropy': return nn.CrossEntropyLoss(**kwargs) if loss == 'nll': return nn.NLLLoss(**kwargs) if loss == 'poisson': return nn.PoissonLoss(**kwargs) if loss == 'nll2d': return nn.NLLLoss2d(**kwargs) if loss == 'kl_div': return nn.KLDivLoss(**kwargs) if loss == 'bce': return nn.BCELoss(**kwargs) if loss == 'bce_with_logits': return nn.BCEWithLogitsLoss(**kwargs) if loss == 'margin_ranking': return nn.MarginRankingLoss(**kwargs) if loss == 'hinge_embedding': return nn.HingeEmbeddingLoss(**kwargs) if loss == 'multilabel_margin': return nn.MultiLabelMarginLoss(**kwargs) if loss == 'smooth_l1': return nn.SmoothL1Loss(**kwargs) if loss == 'multilabel_softmargin': return nn.MultiLabelSoftMarginLoss(**kwargs) if loss == 'cosine_embedding': return nn.CosineEmbeddingLoss(**kwargs) if loss == 'multi_margin': return nn.MultiMarginLoss(**kwargs) if loss == 'triplet_margin': return nn.TripletMarginLoss(**kwargs) loss = getattr(nn.functional, loss) return lambda x: loss(x, **kwargs)
def test(model, device, test_loader, dataset_name="Test set"): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) target = torch.LongTensor(np.array(target.numpy(), np.long)) data_var = torch.autograd.Variable(data) target_var = torch.autograd.Variable(target) optimizer.zero_grad() output = model(data_var) # output = model(data) test_loss += nn.MultiLabelMarginLoss()( output, target_var).item() # sum up batch loss # test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss # pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability # correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print('\n{}: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( dataset_name, test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
def forward(self, input_ids, token_type_ids, valid_mask, position_ids, labels=None): attention_mask = _mask_both_directions(valid_mask, token_type_ids) sequence_output = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids)[0] query_embeddings, doc_embeddings = _average_query_doc_embeddings( sequence_output, token_type_ids, valid_mask) similarities = torch.matmul(query_embeddings, doc_embeddings.T) output = (similarities, query_embeddings, doc_embeddings) if labels is not None: loss_fct = nn.MultiLabelMarginLoss() loss = loss_fct(similarities, labels) output = loss, *output return output
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch,data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adamax(params=filter(lambda p: p.requires_grad, decoder.parameters())) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] # Move to GPU, if available decoder = decoder.to(device) # Loss functions criterion_ce = nn.CrossEntropyLoss().to(device) criterion_dis = nn.MultiLabelMarginLoss().to(device) # Custom dataloaders train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, decoder=decoder, criterion_ce = criterion_ce, criterion_dis=criterion_dis, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, decoder=decoder, criterion_ce=criterion_ce, criterion_dis=criterion_dis) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, decoder,decoder_optimizer, recent_bleu4, is_best)
['softmin', nn.Softmin()], ['tanhshrink', nn.Tanhshrink()], ['rrelu', nn.RReLU()], ['glu', nn.GLU()], ]) loss = nn.ModuleDict( [['l1', nn.L1Loss()], ['nll', nn.NLLLoss()], ['kldiv', nn.KLDivLoss()], ['mse', nn.MSELoss()], ['bce', nn.BCELoss()], ['bce_with_logits', nn.BCEWithLogitsLoss()], ['cosine_embedding', nn.CosineEmbeddingLoss()], ['ctc', nn.CTCLoss()], ['hinge_embedding', nn.HingeEmbeddingLoss()], ['margin_ranking', nn.MarginRankingLoss()], ['multi_label_margin', nn.MultiLabelMarginLoss()], ['multi_label_soft_margin', nn.MultiLabelSoftMarginLoss()], ['multi_margin', nn.MultiMarginLoss()], ['smooth_l1', nn.SmoothL1Loss()], ['soft_margin', nn.SoftMarginLoss()], ['cross_entropy', nn.CrossEntropyLoss()], ['triplet_margin', nn.TripletMarginLoss()], ['poisson_nll', nn.PoissonNLLLoss()]]) optimizer = dict({ 'adadelta': optim.Adadelta, 'adagrad': optim.Adagrad, 'adam': optim.Adam, 'sparse_adam': optim.SparseAdam, 'adamax': optim.Adamax,
def train_okutama(data_loader, model, device, optimizer, epoch): actions_meter = AverageMeter() # activities_meter=AverageMeter() loss_meter = AverageMeter() epoch_timer = Timer() #parameters B = 2 T = 5 num_boxes = 12 for batch_data in data_loader: model.train() model.apply(set_bn_eval) # prepare batch data batch_data = [b.to(device=device) for b in batch_data] batch_size = batch_data[0].shape[0] num_frames = batch_data[0].shape[1] # forward actions_scores = model((batch_data[0], batch_data[1], batch_data[3])) actions_scores = torch.reshape(actions_scores, (B * T, num_boxes)).to(device=device) # print(actions_scores.shape) # actions_scores = actions_scores.unsqueeze(0) # actions_scores = torch.zeros(actions_scores.size(0), 15).scatter_(1, actions_scores, 1.) actions_in = batch_data[2].reshape((batch_size, num_frames, num_boxes)) bboxes_num = batch_data[3].reshape(batch_size, num_frames) actions_in_nopad = [] actions_in = actions_in.reshape(( batch_size * num_frames, num_boxes, )) bboxes_num = bboxes_num.reshape(batch_size * num_frames, ) for bt in range(batch_size * num_frames): N = bboxes_num[bt] actions_in_nopad.append(actions_in[bt, :N]) # Predict actions # print("shape of actions_scores = ", actions_scores.shape) # print("shape of actions_in = ", actions_in.shape) # actions_in = torch.reshape(actions_in, (B,T,num_boxes)).to(device=device) # print("actions_in = ", actions_in) # print("actions_scores = ",actions_scores) # actions_scores = Variable(actions_scores.float(), requires_grad = True) # actions_in = Variable(actions_in.float(), requires_grad = True) loss = nn.MultiLabelMarginLoss() actions_loss = loss(actions_scores, actions_in) actions_loss = Variable(actions_loss, requires_grad=True) # actions_loss=F.cross_entropy(actions_scores,actions_in,weight=None) # actions_labels=torch.argmax(actions_scores,dim=1) #B*T*N, # print("actions_labels = ",actions_labels) actions_correct = torch.sum( torch.eq(actions_scores.int(), actions_in.int()).float()) # Get accuracy actions_accuracy = actions_correct.item() / (actions_scores.shape[0] * num_boxes) actions_meter.update(actions_accuracy, actions_scores.shape[0]) # Total loss loss_meter.update(actions_loss.item(), batch_size) # Optim optimizer.zero_grad() actions_loss.backward() optimizer.step() train_info = { 'time': epoch_timer.timeit(), 'epoch': epoch, 'loss': loss_meter.avg, 'actions_acc': actions_meter.avg * 100 } return train_info
in_size, out_size = [x.size() for x in in_params], [x.size() for x in out_params] in_sum, out_sum = sum([np.prod(x) for x in in_size ]), sum([np.prod(x) for x in out_size]) print "IN : {} params".format(in_sum) #print print_params(in_names, in_size) print "OUT : {} params".format(out_sum) #print print_params(out_names, out_size) print "TOTAL : {} params".format(in_sum + out_sum) loss_fn = { 'xent': nn.CrossEntropyLoss(), 'mse': nn.MSELoss(), 'mrl': nn.MarginRankingLoss(), 'mlml': nn.MultiLabelMarginLoss(), 'mml': nn.MultiMarginLoss() } tt = torch if not args.cpu: loss_fn = {k: v.cuda() for (k, v) in loss_fn.items()} tt = torch.cuda optimizer = torch.optim.Adam(in_params, lr=args.lr) out_data = {'train':{'x':[], 'y':[] }, \ 'valid':{'x':[], 'y':[] }, \ 'bleu':{'x':[], 'y':[] }, \ 'best_valid':{'x':[], 'y':[] } } best_epoch = -1
def runNet(sXpXa, drugResist2, samples, positions, epoch): trainNum = int(math.floor(0.7 * samples)) testNum = int(samples - trainNum) availableSamples = list(range(0, samples)) testSamp = [] trainSamp = [] for i in range(0, testNum): samp = random.randint(0, len(availableSamples) - 1) testSamp.append(availableSamples[samp]) availableSamples.pop(samp) for i in range(0, trainNum): samp = random.randint(0, len(availableSamples) - 1) trainSamp.append(availableSamples[samp]) availableSamples.pop(samp) # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = samples, positions, 100, 10 # Create random Tensors to hold inputs and outputs x = torch.reshape(sXpXa[0], (-1, )) y = drugResist2 #x = torch.randn(N, D_in) #y = torch.randn(N, D_out) lossDict = {} global confusionMat global confusionMatTest # Construct our model by instantiating the class defined above model = TwoLayerNet(D_in, H, D_out) acc = np.zeros((1, 500)) prec = np.zeros((1, 500)) recall = np.zeros((1, 500)) fOut = np.zeros((1, 500)) accTest = np.zeros((1, 500)) precTest = np.zeros((1, 500)) recallTest = np.zeros((1, 500)) fOutTest = np.zeros((1, 500)) # Construct our loss function and an Optimizer. The call to model.parameters() # in the SGD constructor will contain the learnable parameters of the two # nn.Linear modules which are members of the model. #criterion = torch.nn.L1Loss() criterion = nn.MultiLabelMarginLoss() optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) #####Train ##### for w in range(epoch): for t in range(samples // 10): # Forward pass: Compute predicted y by passing x to the model xs = torch.zeros([10, positions]) ys = torch.zeros([10, 10]) #y[t*10:t*10+10:1,...] #Randomly select a sample, recording negative and positive labels in Target #If sample exceeds 50/50 balance ratio, choose another sample #Only select samples which produce 50/50 negative positive balance samp = 0 positiveCount = 0 negativeCount = 0 while samp < 10: while positiveCount < 50 and negativeCount < 50: rnd = random.randint(0, len(trainSamp) - 1) xs[samp, ] = torch.reshape(sXpXa[trainSamp[rnd], ...], (-1, )) ys[samp, ] = torch.reshape(y[trainSamp[rnd], ...], (-1, )) for entry in ys[samp, ]: if entry == 1: positiveCount += 1 else: negativeCount += 1 samp += 1 diff = positiveCount - negativeCount diffMatch = false s = 0 pCount = 0 nCount = 0 sample = torch.reshape(y[trainSamp[s], ...], (-1, )) while not diffMatch and s < len(y): for entry in sample: if entry == 1: pCount += 1 else: nCount += 1 if nCount - pCount == diff: diffMatch = true positiveCount += pCount negativeCount += nCount else: s += 1 sample = torch.reshape(y[trainSamp[s], ...], (-1, )) xs[samp, ] = torch.reshape(sXpXa[trainSamp[s], ...], (-1, )) ys[samp, ] = sample samp += 1 # 1985 for all, 1916 for just type B #xs[i,] = torch.reshape(sXpXa[i+t*10,...,...],(-1,)) y_pred = model(xs) lossRecorder(y_pred, ys, t, lossDict) conMat(y_pred, ys, w, confusionMat) for i in range(0, 10): rnd = random.randint(0, len(testSamp) - 1) #rnd = random.randint(238,1676) xs[i, ] = torch.reshape(sXpXa[testSamp[rnd], ...], (-1, )) ys[i, ] = torch.reshape(y[testSamp[rnd], ...], (-1, )) y_pred = model(xs) conMat(y_pred, ys, w, confusionMatTest) # Compute and print loss loss = criterion(y_pred, ys.type(torch.LongTensor)) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() acc[0, w] = (confusionMat[w, 0, 0] + confusionMat[w, 1, 1]) / ( confusionMat[w, 0, 0] + confusionMat[w, 1, 0] + confusionMat[w, 0, 1] + confusionMat[w, 1, 1]) prec[0, w] = confusionMat[w, 0, 0] / (confusionMat[w, 0, 0] + confusionMat[w, 1, 0]) recall[0, w] = confusionMat[w, 0, 0] / (confusionMat[w, 0, 0] + confusionMat[w, 0, 1]) fOut[0, w] = confusionMat[w, 1, 0] / (confusionMat[w, 1, 0] + confusionMat[w, 1, 1]) accTest[ 0, w] = (confusionMatTest[w, 0, 0] + confusionMatTest[w, 1, 1]) / ( confusionMatTest[w, 0, 0] + confusionMatTest[w, 1, 0] + confusionMatTest[w, 0, 1] + confusionMatTest[w, 1, 1]) precTest[0, w] = confusionMatTest[w, 0, 0] / (confusionMatTest[w, 0, 0] + confusionMatTest[w, 1, 0]) recallTest[0, w] = confusionMatTest[w, 0, 0] / ( confusionMatTest[w, 0, 0] + confusionMatTest[w, 0, 1]) fOutTest[0, w] = confusionMatTest[w, 1, 0] / (confusionMatTest[w, 1, 0] + confusionMatTest[w, 1, 1]) #acc = accuracy(lossDict) drugList = [ "3TC", "ABC", "AZT", "D4T", "DDI", "TDF", "EFV", "NVP", "ETR", "RPV" ] #for i in range(0,len(drugs)): #print (drugs[i], acc[i]) #print(confusionMat) accuracy = (confusionMat[..., 0, 0] + confusionMat[..., 1, 1]) / torch.sum(confusionMat) precision = confusionMat[..., 0, 0] / (confusionMat[..., 0, 0] + confusionMat[..., 1, 0]) Recall = confusionMat[..., 0, 0] / (confusionMat[..., 0, 0] + confusionMat[..., 0, 1]) fallOut = confusionMat[..., 1, 0] / (confusionMat[..., 1, 0] + confusionMat[..., 1, 1]) #print("accuracy: " , accuracy) #print("precision: " , precision) #print("Recall: " , Recall) #print("fallOut: " , fallOut) t = np.arange(0., 10., 1) plt.plot(t, acc[0, 0:10], 'r--', t, prec[0, 0:10], 'bs', t, recall[0, 0:10], 'g^', t, fOut[0, 0:10], "m+") plt.savefig("/data/hibbslab/cluikart/MultiDrug_train.pdf") plt.close() plt.plot(t, accTest[0, 0:10], 'r--', t, precTest[0, 0:10], 'bs', t, recallTest[0, 0:10], 'g^', t, fOutTest[0, 0:10], "m+") plt.savefig("/data/hibbslab/cluikart/MultiDrug_test.pdf") plt.close() #plt.show() torch.save(confusionMat, 'conMat.pth') torch.save(confusionMatTest, 'conMatTest.pth') confusionMat = torch.zeros(500, 2, 2) wrongPred = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] #####Test for w in range(epoch): for t in range(samples // 10): # Forward pass: Compute predicted y by passing x to the model xs = torch.zeros([10, positions]) ys = torch.zeros([10, 10]) #y[t*10:t*10+10:1,...] for i in range(0, 10): rnd = random.randint(0, len(testSamp) - 1) #rnd = random.randint(238,1676) xs[i, ] = torch.reshape(sXpXa[testSamp[rnd], ...], (-1, )) ys[i, ] = torch.reshape(y[testSamp[rnd], ...], (-1, )) # 1985 for all, 1916 for just type B #xs[i,] = torch.reshape(sXpXa[i+t*10,...,...],(-1,)) y_pred = model(xs) lossRecorder(y_pred, ys, t, lossDict) #conMat(y_pred,ys,w) for entry in range(0, len(ys)): for drug in range(0, len(ys[1, ])): if (abs(ys[entry, drug] - y_pred[entry, drug]) > .5): wrongPred[drug] += 1 elif (abs(ys[entry, drug] - y_pred[entry, drug]) <= .5): a = 1 #Do Nothing # Compute and print loss loss = criterion(y_pred, ys.type(torch.LongTensor)) if (w > 8): a = 2 #print(t, loss.item()) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() acc[0, w] = (confusionMat[w, 0, 0] + confusionMat[w, 1, 1]) / ( confusionMat[w, 0, 0] + confusionMat[w, 1, 0] + confusionMat[w, 0, 1] + confusionMat[w, 1, 1]) prec[0, w] = confusionMat[w, 0, 0] / (confusionMat[w, 0, 0] + confusionMat[w, 1, 0]) recall[0, w] = confusionMat[w, 0, 0] / (confusionMat[w, 0, 0] + confusionMat[w, 0, 1]) fOut[0, w] = confusionMat[w, 1, 0] / (confusionMat[w, 1, 0] + confusionMat[w, 1, 1]) #acc = accuracy(lossDict) drugs = [ "3Tc", "ABC", "AZT", "D4T", "DDI", "TDF", "EFV", "NVP", "ETR", "RPV" ] #for i in range(0,len(drugs)): #print (drugs[i], acc[i]) print(confusionMat) accuracy = (confusionMat[..., 0, 0] + confusionMat[..., 1, 1]) / torch.sum(confusionMat) precision = confusionMat[..., 0, 0] / (confusionMat[..., 0, 0] + confusionMat[..., 1, 0]) Recall = confusionMat[..., 0, 0] / (confusionMat[..., 0, 0] + confusionMat[..., 0, 1]) fallOut = confusionMat[..., 1, 0] / (confusionMat[..., 1, 0] + confusionMat[..., 1, 1]) #print("accuracy: " , accuracy) #print("precision: " , precision) # print("Recall: " , Recall) #print("fallOut: " , fallOut) t = np.arange(0., 10., 1) plt.plot(t, acc[0, 0:10], 'r--', t, prec[0, 0:10], 'bs', t, recall[0, 0:10], 'g^', t, fOut[0, 0:10], "m+") #plt.savefig("/data/hibbslab/cluikart/MultiDrug_test.pdf") plt.close() ret = [float(x) / (len(drugResist2) * epoch) for x in wrongPred] for item in range(0, len(wrongPred)): print(drugList[item] + " " + str((float(wrongPred[item]) / (len(drugResist2) * epoch)) * 100) + "%") #plt.show() TPR_List = [] FPR_List = [] yList = [] target = [] i = 0 confusionMat = torch.zeros(500, 2, 2) for t in range(samples // 10): #w = int(np.asscalar(r)) # Forward pass: Compute predicted y by passing x to the model xs = torch.zeros([10, positions]) ys = torch.zeros([10, 10]) #y[t*10:t*10+10:1,...] for i in range(0, 10): rnd = random.randint(1, samples - 1) xs[i, ] = torch.reshape(sXpXa[rnd, ...], (-1, )) ys[i, ] = torch.reshape(y[rnd, ...], (-1, )) # 1985 for all, 1916 for just type B y_pred = model(xs) y_temp = y_pred.detach().numpy().tolist() yList = yList + [item for sublist in y_temp for item in sublist] ys_temp = ys.numpy().tolist() target = target + [item for sublist in ys_temp for item in sublist] lossRecorder(y_pred, ys, t, lossDict) conMat2(y_pred, ys, w, confusionMat) # Compute and print loss loss = criterion(y_pred, ys.type(torch.LongTensor)) i += 1 # Zero gradients, perform a backward pass, and update the weights. #optimizer.zero_grad() #loss.backward() #optimizer.step() TPR = (confusionMat[w, 0, 0] / (confusionMat[w, 0, 0] + confusionMat[w, 0, 1])) TPR_List.append(TPR.item()) FPR = (confusionMat[w, 1, 0] / (confusionMat[w, 1, 0] + confusionMat[w, 1, 1])) FPR_List.append(FPR.item()) #plt.plot(FPR_List, TPR_List) fpr, tpr, thres = metrics.roc_curve(target, yList) roc_auc = metrics.auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc)) plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) plt.savefig("/data/hibbslab/cluikart/MultiDrug_auc.pdf") plt.close()
from torch import nn loss_functions = { "MSE": nn.MSELoss(), "L1": nn.L1Loss(), "CrossEntropy": nn.CrossEntropyLoss(), "CTC": nn.CTCLoss(), "NLL": nn.NLLLoss(), "PoissonNLL": nn.PoissonNLLLoss(), "KLDiv": nn.KLDivLoss(), "BCE": nn.BCELoss(), "BCEWithLogits": nn.BCEWithLogitsLoss(), "MarginRanking": nn.MarginRankingLoss(), "HingeEmbedding": nn.HingeEmbeddingLoss(), "MultiLabelMargin": nn.MultiLabelMarginLoss(), "SmoothL1": nn.SmoothL1Loss(), "SoftMargin": nn.SoftMarginLoss(), "MultiLabelSoftMargin": nn.MultiLabelSoftMarginLoss(), "CosineEmbedding": nn.CosineEmbeddingLoss(), "MultiMargin": nn.MultiMarginLoss(), "TripletMargin": nn.TripletMarginLoss() }
def Hingeloss(output,target): return nn.MultiLabelMarginLoss()(output,target)
loss_f_none = nn.MarginRankingLoss(margin=0, reduction='none') loss = loss_f_none(x1, x2, target) print(loss) # ---------------------------------------------- 11 Multi Label Margin Loss ----------------------------------------- flag = 0 # flag = 1 if flag: x = torch.tensor([[0.1, 0.2, 0.4, 0.8]]) y = torch.tensor([[0, 3, -1, -1]], dtype=torch.long) loss_f = nn.MultiLabelMarginLoss(reduction='none') loss = loss_f(x, y) print(loss) # --------------------------------- compute by hand flag = 0 # flag = 1 if flag: x = x[0] item_1 = (1 - (x[0] - x[1])) + (1 - (x[0] - x[2])) # [0] item_2 = (1 - (x[3] - x[1])) + (1 - (x[3] - x[2])) # [3] loss_h = (item_1 + item_2) / x.shape[0]
args.num_relations = train_data_layer._num_relations args.num_classes = train_data_layer._num_classes # load net net = Vrd_Model(args) network.weights_normal_init(net, dev=0.01) pretrained_model = '../data/VGG_imagenet.npy' network.load_pretrained_npy(net, pretrained_model) # Initial object embedding with word2vec #with open('../data/vrd/params_emb.pkl') as f: # emb_init = cPickle.load(f) #net.state_dict()['emb.weight'][1::].copy_(torch.from_numpy(emb_init)) net.cuda() params = list(net.parameters()) momentum = 0.9 weight_decay = 0.0005 args.criterion = nn.MultiLabelMarginLoss().cuda() opt_params = [ { 'params': net.fc8.parameters(), 'lr': args.lr * 10 }, { 'params': net.fc_fusion.parameters(), 'lr': args.lr * 10 }, { 'params': net.fc_rel.parameters(), 'lr': args.lr * 10 }, ] if (args.use_so):
##load models image_model = torch.nn.DataParallel(ImageEmbedding().cuda(), device_ids=device) recipe_model = torch.nn.DataParallel(TextEmbedding().cuda(), device_ids=device) netG = torch.nn.DataParallel(G_NET().cuda(), device_ids=device) multi_label_net = torch.nn.DataParallel(MultiLabelNet().cuda(), device_ids=device) cm_discriminator = torch.nn.DataParallel(cross_modal_discriminator().cuda(), device_ids=device) text_discriminator = torch.nn.DataParallel(text_emb_discriminator().cuda(), device_ids=device) netsD = torch.nn.DataParallel(D_NET128().cuda(), device_ids=device) ## load loss functions triplet_loss = TripletLoss(device, margin=0.3) img2text_criterion = nn.MultiLabelMarginLoss().cuda() weights_class = torch.Tensor(opts.numClasses).fill_(1) weights_class[0] = 0 class_criterion = nn.CrossEntropyLoss(weight=weights_class).cuda() GAN_criterion = nn.BCELoss().cuda() nz = opts.Z_DIM noise = Variable(torch.FloatTensor(opts.batch_size, nz)).cuda() fixed_noise = Variable(torch.FloatTensor(opts.batch_size, nz).normal_(0, 1)).cuda() real_labels = Variable(torch.FloatTensor(opts.batch_size).fill_(1)).cuda() fake_labels = Variable(torch.FloatTensor(opts.batch_size).fill_(0)).cuda() fc_sia = nn.Sequential(