def ensure_model(model): if torch.cuda.is_available(): model.cuda() if torch.cuda.device_count() > 1: logging.info('%d GPUs are used' % torch.cuda.device_count()) model = nn.DataParallel(model).cuda() return model
self.sum += val * n self.count += n self.avg = self.sum / self.count # print corpus.train.size() ntokens = 605 ############################################################################### # Build the model ############################################################################### model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.nConv1Out, args.dropout) if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor([0.3 ,1.0]), size_average=False) ############################################################################### # Training code ############################################################################### def accuracy(output, target, topk=(1,), ori_label=(1,)): """Computes the precision@k for the specified values of k""" maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True)
############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nhidlast, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied, args.dropoutl, args.n_experts) if args.cuda: if args.single_gpu: parallel_model = model.cuda() else: parallel_model = nn.DataParallel(model, dim=1).cuda() else: parallel_model = model total_params = sum(x.data.nelement() for x in model.parameters()) logging('Args: {}'.format(args)) logging('Model total parameters: {}'.format(total_params)) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ###############################################################################
### if not criterion: splits = [] if ntokens > 500000: # One Billion # This produces fairly even matrix mults for the buckets: # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 splits = [4200, 35000, 180000] elif ntokens > 75000: # WikiText-103 splits = [2800, 20000, 76000] print('Using', splits) criterion = SplitCrossEntropyLoss(args.emsize, splits=splits, verbose=False) ### if args.cuda: model = model.cuda() criterion = criterion.cuda() ### params = list(model.parameters()) + list(criterion.parameters()) total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) print('Args:', args) print('Model total parameters:', total_params) ############################################################################### # Training code ############################################################################### def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset()
ntokens = len(corpus.dictionary) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) # Load checkpoint if args.checkpoint != '': if args.cuda: model = torch.load(args.checkpoint) else: # Load GPU model on CPU model = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) if args.cuda: model.cuda() else: model.cpu() print(model) #quit() criterion = nn.CrossEntropyLoss() if args.cuda: criterion.cuda() ############################################################################### # Training code ############################################################################### def repackage_hidden(h):
if args.continue_train: model = torch.load(os.path.join(args.save, 'model.pt')) else: # maddie - get vocab and put it in RNN model as a parameter print("Loading Vocab") vocab = Vocab.load(args.vocab) # vocab_hony model = model.RNNModel(args.model, vocab, ntokens, args.emsize, args.nhid, args.nhidlast, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied, args.dropoutl, args.n_experts, args.num4embed, args.num4first, args.num4second) if args.cuda: if args.single_gpu: parallel_model = model.cuda() else: parallel_model = nn.DataParallel(model, dim=1).cuda() else: parallel_model = model total_params = sum(x.data.nelement() for x in model.parameters()) logging('Args: {}'.format(args)) logging('Model total parameters: {}'.format(total_params)) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ###############################################################################
splits = [] if ntokens > 500000: # One Billion # This produces fairly even matrix mults for the buckets: # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 splits = [4200, 35000, 180000] elif ntokens > 75000: # WikiText-103 splits = [2800, 20000, 76000] print('Using', splits) criterion = SplitCrossEntropyLoss(args.emsize, splits=splits, verbose=False) ### if args.cuda: model = model.cuda() criterion = criterion.cuda() ### params = list(model.parameters()) + list(criterion.parameters()) total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) print('Args:', args) print('Model total parameters:', total_params) if args.distributed: #model.para sync custom for parameter in params: #model.parameters(): dist.broadcast(parameter.data, 0, group = 0) if dist.get_rank() == 0:
optimizer = optim.Adagrad(model.parameters(), lr=lr) elif args.optim == 'adadelta': optimizer = optim.Adadelta(model.parameters(), lr=lr) return optimizer optimizer = get_optim(args.lr) model = DNI(model, hidden_size=args.nhid, optim=optimizer, dni_network=LinearDNI, λ=0.5) if args.cuda: model.cuda(0) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if h is None: return None if type(h) == Variable: return Variable(h.data) elif type(h) == list:
def train_model(model, train_loader, epoch, num_epochs, optimizer, writer, current_lr, counting, log_every=100): _ = model.train() if torch.cuda.is_available(): model.cuda() y_preds = [] y_trues = [] losses = [] a = np.zeros([1, 1]) att_scores = [] inds = [] for i, (image, label, weight) in enumerate(train_loader): optimizer.zero_grad() if torch.cuda.is_available(): image = image.cuda() label = label.cuda() weight = weight.cuda() label = label[0] weight = weight[0] prediction, att = model.forward(image.float()) att_scores.append(att) inds.append(i) b = label.numpy() c = pd.concat([pd.DataFrame(a), pd.DataFrame(b)], axis=1) c.columns = ['dud', 'target'] c['dud'] = 1 - c['target'] label = torch.from_numpy(np.asarray(c)) loss = torch.nn.BCEWithLogitsLoss(weight=weight)(prediction, label) loss.backward() optimizer.step() loss_value = loss.item() losses.append(loss_value) probas = torch.sigmoid(prediction) y_trues.append(int(label[0][1])) y_preds.append(probas[0][1].item()) try: auc = metrics.roc_auc_score(y_trues, y_preds) except: auc = 0.5 if epoch % 50 == 0: print(auc) writer.add_scalar('Train/Loss', loss_value, epoch * len(train_loader) + i) writer.add_scalar('Train/AUC', auc, epoch * len(train_loader) + i) if (i % log_every == 0) & (i > 0): print( '''[Epoch: {0} / {1} |Single batch number : {2} / {3} ]| avg train loss {4} | train auc : {5} | lr : {6}''' .format(epoch + 1, num_epochs, i, len(train_loader), np.round(np.mean(losses), 4), np.round(auc, 4), current_lr)) writer.add_scalar('Train/AUC_epoch', auc, epoch + i) train_loss_epoch = np.round(np.mean(losses), 4) train_auc_epoch = np.round(auc, 4) scores = pd.DataFrame( pd.concat([pd.DataFrame(att_scores), pd.DataFrame(inds)], axis=1)) scores.to_csv('scores2_train{}'.format(counting)) return train_loss_epoch, train_auc_epoch