def main(): use_cuda = args.use_cuda train_data = UnlabeledContact(data=args.data_dir) print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 img_height = 21 img_width = 21 vae = AutoEncoder(code_size=20, imgsize=input_size, height=img_height, width=img_width) criterion = nn.BCEWithLogitsLoss() if use_cuda: #vae = nn.DataParallel(vae) vae = vae.cuda() #.half() criterion = criterion.cuda() optimizer = optim.SGD(vae.parameters(), lr=0.01) clock = AverageMeter(name='clock32single', rank=0) epoch_loss = 0 total_loss = 0 end = time.time() for epoch in range(15): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda() #.half() inputs = Variable(inputs) optimizer.zero_grad() output, code = vae(inputs) loss = criterion(output, inputs) loss.backward() optimizer.step() epoch_loss += loss.data[0] clock.update(time.time() - end) end = time.time() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) clock.save( path= '/home/ygx/libraries/mds/molecules/molecules/conv_autoencoder/runtimes' )
def main(): use_cuda = args.use_cuda train_data = UnlabeledContact(data=args.data_dir) print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 encoder = Encoder(input_size=input_size, latent_size=3) decoder = Decoder(latent_size=3, output_size=input_size) vae = VAE(encoder, decoder, use_cuda=use_cuda) criterion = nn.MSELoss() if use_cuda: encoder = nn.DataParallel(encoder) decoder = nn.DataParallel(decoder) encoder = encoder.cuda().half() decoder = decoder.cuda().half() vae = nn.DataParallel(vae) vae = vae.cuda().half() criterion = criterion.cuda().half() optimizer = optim.SGD(vae.parameters(), lr=0.01) clock = AverageMeter(name='clock16', rank=0) epoch_loss = 0 total_loss = 0 end = time.time() for epoch in range(15): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] # inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda().half() inputs = Variable(inputs) optimizer.zero_grad() dec = vae(inputs) ll = latent_loss(vae.z_mean, vae.z_sigma) loss = criterion(dec, inputs) + ll loss.backward() optimizer.step() epoch_loss += loss.data[0] clock.update(time.time() - end) end = time.time() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) clock.save(path='/home/ygx/libraries/mds/molecules/molecules/linear_vae')
if b_i == 0: joint_matrix = MICriterion._p_i_j.detach().cpu().numpy() # noqa fig = plt.figure() plt.imshow(joint_matrix) plt.colorbar() writer.add_figure( tag=f"mi_head_{head}_{head_i_epoch}", figure=plt.gcf(), global_step=e_i, close=True ) plt.close(plt.gcf()) scaler.scale(loss).backward() scaler.step(optimiser) scaler.update() avg_loss_meter.update(loss.item()) mi_meter.update(avg_mi_batch.item()) state_dict = dict(zip( ("Model ind", "epoch", "avg_loss", "inst_loss", "avg_mi", "inst_mi"), ( config.model_ind, e_i, avg_loss_meter.summary(), loss.item(), mi_meter.summary(), avg_mi_batch.item(), ) )) indicator.set_postfix(state_dict) avg_loss = avg_loss_meter.summary()
def run_one_epoch(self, training): tic = time.time() batch_time = AverageMeter() losses = AverageMeter() accs = AverageMeter() if training: amnt = self.num_train dataset = self.train_loader else: dataset = self.val_loader amnt = self.num_valid with tqdm(total=amnt) as pbar: for i, data in enumerate(dataset): x, y = data # segmentation task if self.classification: # assuming one-hot y = y.view(1, -1).expand(self.model.num_heads, -1) else: y = y.view(1, -1, 1, x.shape[-2], x.shape[-1]).expand(self.model.num_heads, -1, -1, -1, -1) if self.config.use_gpu: x, y = x.cuda(), y.cuda() output = self.model(x) if training: self.optimizer.zero_grad() loss = None for head in range(self.model.num_heads): if loss is None: loss = self.criterion(output[head], y[head]) else: loss = loss + self.criterion(output[head], y[head]) loss = loss / self.model.num_heads if training: loss.backward() self.optimizer.step() try: loss_data = loss.data[0] except IndexError: loss_data = loss.data.item() losses.update(loss_data) # measure elapsed time toc = time.time() batch_time.update(toc - tic) if self.classification: _, predicted = torch.max(output.data, -1) total = self.batch_size*self.model.num_heads correct = (predicted == y).sum().item() acc = correct/total accs.update(acc) pbar.set_description(f"{(toc - tic):.1f}s - loss: {loss_data:.3f} acc {accs.avg:.3f}") else: pbar.set_description(f"{(toc - tic):.1f}s - loss: {loss_data:.3f}") pbar.update(self.batch_size) if training and i % 2 == 0: self.model.log_illumination(self.curr_epoch, i) if not training and i == 0 and not self.classification: y_sample = y[0, 0].view(256, 256).detach().cpu().numpy() p_sample = output[0, 0].view(256, 256).detach().cpu().numpy() wandb.log({f"images_epoch{self.curr_epoch}": [ wandb.Image(np.round(p_sample * 255), caption="prediction"), wandb.Image(np.round(y_sample * 255), caption="label")]}, step=self.curr_epoch) return losses.avg, accs.avg
def train(self, epoch, data_loader, opt_sn, opt_vn, mode, writer=None, print_freq=1): self.sn.train() self.vn.train() batch_time = AverageMeter() data_time = AverageMeter() losses_sn = AverageMeter() losses_vn = AverageMeter() ious = AverageMeter() end = time.time() for i, inputs in enumerate(data_loader): data_time.update(time.time() - end) img, lbl = self._parse_data(inputs) # train sn loss_sn, iou_, heat_map = self._forward_sn(img, lbl) losses_sn.update(loss_sn.data[0], lbl.size(0)) ious.update(iou_, lbl.size(0)) if mode == 'sn': # if opt_sn is None: # img.volatile = True # lbl.volatile = True # else: # img.volatile = False # lbl.volatile = False self.step(opt_sn, loss_sn) # train vn elif mode == 'vn': # heat_map = heat_map.detach() _, seg_pred = torch.max(heat_map, dim=1, keepdim=True) # seg_pred = onehot(seg_pred, 2) # heat_map = heat_map target_iou = iou(heat_map.data, lbl.data, average=False) loss_vn, iou_pred = self._forward_vn(img, heat_map, target_iou) losses_vn.update(loss_vn.data[0], lbl.size(0)) self.step(opt_vn, loss_vn) # bp % gd # if opt_sn is not None: # self.step(opt_sn, loss_sn) # if opt_vn is not None: # self.step(opt_vn, loss_vn) batch_time.update(time.time() - end) end = time.time() if (i + 1) % print_freq == 0: print('Epoch: [{}][{}/{}]\t' 'Time {:.3f} ({:.3f})\t' 'Data {:.3f} ({:.3f})\t' 'Loss_sn {:.3f} ({:.3f})\t' 'Loss_vn {:.3f} ({:.3f})\t' 'Prec {:.2%} ({:.2%})\t'.format( epoch, i + 1, len(data_loader), batch_time.val, batch_time.avg, data_time.val, data_time.avg, losses_sn.val, losses_sn.avg, losses_vn.val, losses_vn.avg, ious.val, ious.avg)) if writer is not None: summary_output_lbl(seg_pred.data, lbl.data, writer, epoch)
def forward(data_loader, model, criterion, epoch, training, model_type, optimizer=None, writer=None): if training: model.train() else: model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() total_steps = len(data_loader) for i, (inputs, target) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.to('cuda:0') target = target.to('cuda:0') # compute output output = model(inputs) if model_type == 'int': # omit the output exponent output, output_exp = output output = output.float() loss = criterion(output * (2**output_exp.float()), target) else: output_exp = 0 loss = criterion(output, target) # measure accuracy and record loss losses.update(float(loss), inputs.size(0)) prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) if training: if model_type == 'int': model.backward(target) elif model_type == 'hybrid': # float backward optimizer.update(epoch, epoch * len(data_loader) + i) optimizer.zero_grad() loss.backward() optimizer.step() #int8 backward model.backward() else: optimizer.update(epoch, epoch * len(data_loader) + i) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_interval == 0 and training: logging.info('{model_type} [{0}][{1}/{2}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.2f} ' 'loss {loss.val:.3f} ({loss.avg:.3f}) ' 'e {output_exp:d} ' '@1 {top1.val:.3f} ({top1.avg:.3f}) ' '@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(data_loader), model_type=model_type, batch_time=batch_time, data_time=data_time, loss=losses, output_exp=output_exp, top1=top1, top5=top5)) if args.grad_hist: if args.model_type == 'int': for idx, l in enumerate(model.forward_layers): if hasattr(l, 'weight'): grad = l.grad_int32acc writer.add_histogram( 'Grad/' + l.__class__.__name__ + '_' + str(idx), grad, epoch * total_steps + i) elif args.model_type == 'float': for idx, l in enumerate(model.layers): if hasattr(l, 'weight'): writer.add_histogram( 'Grad/' + l.__class__.__name__ + '_' + str(idx), l.weight.grad, epoch * total_steps + i) for idx, l in enumerate(model.classifier): if hasattr(l, 'weight'): writer.add_histogram( 'Grad/' + l.__class__.__name__ + '_' + str(idx), l.weight.grad, epoch * total_steps + i) return losses.avg, top1.avg, top5.avg
def train_model(output_path, model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=5, scheduler=None): if not os.path.exists('iterations/' + str(output_path) + '/saved'): os.makedirs('iterations/' + str(output_path) + '/saved') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") losses = AverageMeter() accuracies = AverageMeter() all_preds = [] all_labels = [] val_auc_all = [] val_acc_all = [] test_auc_all = [] test_acc_all = [] TPFPFN0_all = [] TPFPFN1_all = [] best_val_auc = 0.0 best_epoch = 0 for epoch in range(1, num_epochs + 1): print('-' * 50) print('Epoch {}/{}'.format(epoch, num_epochs)) for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode # tqdm_loader = tqdm(dataloaders[phase]) # for data in tqdm_loader: # inputs, labels = data for i, (inputs, labels) in enumerate(dataloaders[phase]): inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() # with torch.set_grad_enabled(True): outputs = model(inputs) _, preds = torch.max(outputs.data, 1) labels_onehot = torch.nn.functional.one_hot(labels, num_classes=2) labels_onehot = labels_onehot.type(torch.FloatTensor) # BCEloss = torch.nn.functional.binary_cross_entropy_with_logits(outputs.cpu(), labels_onehot, torch.FloatTensor([1.0, 1.0])) BCEloss = criterion(outputs.cpu(), labels_onehot) # print("BCEloss", BCEloss) BCEloss_rank = binary_crossentropy_with_ranking( outputs, labels_onehot) # print("BCEloss_rank", BCEloss_rank) # BCEloss_rank.requires_grad = True loss = BCEloss + 0 * BCEloss_rank # print("BCEloss, BCEloss_rank", BCEloss, BCEloss_rank) # loss = (BCEloss_rank + 1) * BCEloss loss.backward() optimizer.step() losses.update(loss.item(), inputs.size(0)) acc = float(torch.sum(preds == labels.data)) / preds.shape[0] accuracies.update(acc) all_preds += list( torch.nn.functional.softmax(outputs, dim=1)[:, 1].cpu().data.numpy()) all_labels += list(labels.cpu().data.numpy()) # tqdm_loader.set_postfix(loss=losses.avg, acc=accuracies.avg) auc = roc_auc_score(all_labels, all_preds) if phase == 'train': auc_t = auc loss_t = losses.avg acc_t = accuracies.avg if phase == 'val': auc_v = auc loss_v = losses.avg acc_v = accuracies.avg val_acc_all.append(acc_v) val_auc_all.append(auc_v) print('Train AUC: {:.8f} Loss: {:.8f} ACC: {:.8f} '.format( auc_t, loss_t, acc_t)) print('Val AUC: {:.8f} Loss: {:.8f} ACC: {:.8f} '.format( auc_v, loss_v, acc_v)) if auc_v > best_val_auc: best_val_auc = auc_v best_epoch = epoch # print(auc_v, best_val_auc) # print(best_epoch) best_model = copy.deepcopy(model) torch.save( model.module, './iterations/' + str(output_path) + '/saved/model_{}_epoch.pt'.format(epoch)) # ############################################################################################################# Test for phase in ['test']: model.eval() # Set model to evaluate mode for i, (inputs, labels) in enumerate(dataloaders[phase]): inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() with torch.set_grad_enabled(False): outputs = model(inputs) _, preds = torch.max(outputs.data, 1) acc = float(torch.sum(preds == labels.data)) / preds.shape[0] accuracies.update(acc) all_preds += list( torch.nn.functional.softmax(outputs, dim=1)[:, 1].cpu().data.numpy()) all_labels += list(labels.cpu().data.numpy()) # tqdm_loader.set_postfix(loss=losses.avg, acc=accuracies.avg) auc = roc_auc_score(all_labels, all_preds) auc_test = auc loss_test = losses.avg acc_test = accuracies.avg test_acc_all.append(acc_test) test_auc_all.append(auc_test) print('Test AUC: {:.8f} Loss: {:.8f} ACC: {:.8f} '.format( auc_test, loss_test, acc_test)) nb_classes = 2 confusion_matrix = torch.zeros(nb_classes, nb_classes) with torch.no_grad(): TrueP0 = 0 FalseP0 = 0 FalseN0 = 0 TrueP1 = 0 FalseP1 = 0 FalseN1 = 0 for i, (inputs, classes) in enumerate(dataloaders[phase]): confusion_matrix = torch.zeros(nb_classes, nb_classes) input = inputs.to(device) target = classes.to(device) outputs = model(input) _, preds = torch.max(outputs, 1) for t, p in zip(target.view(-1), preds.view(-1)): confusion_matrix[t, p] += 1 this_class = 0 col = confusion_matrix[:, this_class] row = confusion_matrix[this_class, :] TP = row[this_class] FN = sum(row) - TP FP = sum(col) - TP # print("TP, FP, FN: ", TP, FP, FN) TrueP0 = TrueP0 + TP FalseP0 = FalseP0 + FP FalseN0 = FalseN0 + FN this_class = 1 col = confusion_matrix[:, this_class] row = confusion_matrix[this_class, :] TP = row[this_class] FN = sum(row) - TP FP = sum(col) - TP # print("TP, FP, FN: ", TP, FP, FN) TrueP1 = TrueP1 + TP FalseP1 = FalseP1 + FP FalseN1 = FalseN1 + FN TPFPFN0 = [TrueP0, FalseP0, FalseN0] TPFPFN1 = [TrueP1, FalseP1, FalseN1] TPFPFN0_all.append(TPFPFN0) TPFPFN1_all.append(TPFPFN1) print("overall_TP, FP, FN for 0: ", TrueP0, FalseP0, FalseN0) print("overall_TP, FP, FN for 1: ", TrueP1, FalseP1, FalseN1) print("best_ValidationEpoch:", best_epoch) # print(TPFPFN0_all, val_auc_all, test_auc_all) TPFPFN0_best = TPFPFN0_all[best_epoch - 1][0] TPFPFN1_best = TPFPFN1_all[best_epoch - 1][0] val_auc_best = val_auc_all[best_epoch - 1] val_acc_best = val_acc_all[best_epoch - 1] test_auc_best = test_auc_all[best_epoch - 1] test_acc_best = test_acc_all[best_epoch - 1] # #################### save only the best, delete others file_path = './iterations/' + str(output_path) + '/saved/model_' + str( best_epoch) + '_epoch.pt' if os.path.isfile(file_path): for CleanUp in glob.glob('./iterations/' + str(output_path) + '/saved/*.pt'): if 'model_' + str(best_epoch) + '_epoch.pt' not in CleanUp: os.remove(CleanUp) # # ###################################################### return best_epoch, best_model, TPFPFN0_all[best_epoch - 1], TPFPFN1_all[ best_epoch - 1], test_acc_best, test_auc_best # def binary_crossentropy_with_ranking(y_true, y_pred): # """ Trying to combine ranking loss with numeric precision""" # # first get the log loss like normal # logloss = K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) # # # next, build a rank loss # # # clip the probabilities to keep stability # y_pred_clipped = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # # # translate into the raw scores before the logit # y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped)) # # # determine what the maximum score for a zero outcome is # y_pred_score_zerooutcome_max = K.max(y_pred_score * (y_true < 1)) # # # determine how much each score is above or below it # rankloss = y_pred_score - y_pred_score_zerooutcome_max # # # only keep losses for positive outcomes # rankloss = rankloss * y_true # # # only keep losses where the score is below the max # rankloss = K.square(K.clip(rankloss, -100, 0)) # # # average the loss for just the positive outcomes # rankloss = K.sum(rankloss, axis=-1) / (K.sum(y_true > 0) + 1) # # # return (rankloss + 1) * logloss - an alternative to try # return rankloss + logloss