def __init__(self, config, bert_hidden_states=1, dropout=0.1, update_bert=False): config = deepcopy(config) config.output_hidden_states = True config.dropout = dropout super(DistilBertForToxic, self).__init__(config) self.bert_hidden_states = bert_hidden_states self.num_labels = 1 self.update_bert = update_bert #bert=DistilBertModel(DistilBertConfig()) bert = DistilBertForSequenceClassification.from_pretrained( "distilbert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. num_labels= 2, # The number of output labels--2 for binary classification. # You can increase this for multi-class tasks. output_attentions= False, # Whether the model returns attentions weights. output_hidden_states= True, # Whether the model returns all hidden-states. ).distilbert bert.config = config device = get_device() bert = bert.to(device) self.bert = bert self.qa_outputs = nn.Sequential( nn.Dropout(dropout), nn.Linear(config.hidden_size * bert_hidden_states, 1), nn.Sigmoid())
def main_train(path_trn: str, path_val: str, crop_size: int, upscale_factor: int, num_epochs: int, num_workers: int, to_device: str = 'cuda:0', in_memory_trn: bool = False, in_memory_val: bool = False, batch_size: int = 64, step_val: int = 5): out_dir = path_trn + '_results_c{}_s{}'.format(crop_size, upscale_factor) out_dir_states = out_dir + '_states' out_dir_statistics = out_dir + '_staticstics' os.makedirs(out_dir, exist_ok=True) os.makedirs(out_dir_states, exist_ok=True) os.makedirs(out_dir_statistics, exist_ok=True) path_results_csv = os.path.join( out_dir, 'statistics_x{}_train_results.csv'.format(upscale_factor)) # to_device = get_device(to_device) train_set = DatasetExtTrn(path_idx=path_trn, crop_lr=crop_size, scale=upscale_factor, in_memory=in_memory_trn).build() val_set = DatasetExtVal(path_idx=path_val, crop_lr=crop_size, scale=upscale_factor, in_memory=in_memory_val).build() # train_loader = DataLoader(dataset=train_set, num_workers=num_workers, batch_size=batch_size, shuffle=True) val_loader = DataLoader(dataset=val_set, num_workers=num_workers, batch_size=1, shuffle=False) # netG = Generator(upscale_factor).to(to_device) print('# generator parameters:', sum(param.numel() for param in netG.parameters())) netD = Discriminator().to(to_device) print('# discriminator parameters:', sum(param.numel() for param in netD.parameters())) generator_criterion = GeneratorLoss().to(to_device) # optimizerG = optim.Adam(netG.parameters()) optimizerD = optim.Adam(netD.parameters()) # results = {'d_loss': [], 'g_loss': [], 'd_score': [], 'g_score': [], 'psnr': [], 'ssim': []} for epoch in range(1, num_epochs + 1): results_train = train_step(train_loader, netD, netG, optimizerD, optimizerG, generator_criterion, epoch, num_epochs) # FIXME: seperate function for epoch training if (epoch % step_val) == 0: results_validation = validation_step(val_loader, netG, out_dir, epoch, num_epochs) results_save = {**results_train, **results_validation} results_save['epoch'] = epoch export_results(results_save, path_results_csv) # export model # save model parameters path_state_G = os.path.join( out_dir_states, 'netG_epoch_x{}_{:05d}.pth'.format(upscale_factor, epoch)) path_state_D = os.path.join( out_dir_states, 'netD_epoch_x{}_{:05d}.pth'.format(upscale_factor, epoch)) t1 = time.time() torch.save(netG.state_dict(), path_state_G) torch.save(netD.state_dict(), path_state_D) dt = time.time() - t1 print( '\t\t:: dump:generator-model to [{}], dt ~ {:0.2f} (s)'.format( path_state_G, dt))
type=int, help='#workers for parallel processing') parser.add_argument('--batch_size', default=32, type=int, help='batch-size') parser.add_argument('--device', default='cuda:0', type=str, help='device, default "cuda:0"') parser.add_argument('--in_memory_trn', action='store_true', help='Load train dataset into memory') parser.add_argument('--in_memory_val', action='store_true', help='Load validation dataset into memory') args = parser.parse_args() print('args:\n\t{}'.format(args)) # to_device = get_device(args.device) main_train(path_trn=args.trn, path_val=args.val, crop_size=args.crop_size, upscale_factor=args.upscale_factor, num_epochs=args.num_epochs, num_workers=args.threads, batch_size=args.batch_size, to_device=to_device, in_memory_trn=args.in_memory_trn, in_memory_val=args.in_memory_val)
def run_model(pos_train_file, neg_train_file, pos_dev_file, neg_dev_file, nrows_train, nrows_dev, epochs, out_dir, dropout=0.2, model='bert', batch_size=16, test_file='../data/test_data_clean.csv', lr=2e-5, lmda=10.0, stnc_emb='last'): device = get_device() bert_hidden_states = 4 if model == 'bert': config = BertConfig() config.output_hidden_states = True model = BertForToxic( config, bert_hidden_states=bert_hidden_states, dropout=dropout, update_bert=True, ) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) if model == 'distilbert': #config = DistilBertConfig() config = BertConfig() config.output_hidden_states = True model = DistilBertForToxic(config, bert_hidden_states=bert_hidden_states, dropout=dropout, update_bert=True, lmda=lmda, stnc_emb=stnc_emb) #tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) train_dataloader = get_data_loader_bal(pos_train_file, neg_train_file, batch_size=batch_size, nrows_pos=nrows_train, nrows_neg=nrows_train * 10, mode='train', tokenizer=tokenizer) dev_dataloader = get_data_loader_bal(pos_dev_file, neg_dev_file, batch_size=batch_size, nrows_pos=nrows_dev, nrows_neg=nrows_dev, mode='dev', tokenizer=tokenizer) model.to(device) optimizer = AdamW( model.parameters(), lr=lr, # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=1e-8 # args.adam_epsilon - default is 1e-8. ) total_steps = len(train_dataloader) * epochs # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, # Default value in run_glue.py num_training_steps=total_steps) if not os.path.exists(out_dir): os.makedirs(out_dir) best_score = -np.inf stats_vec = [] dev_pred_vec = [] for epoch in range(epochs): stats, dev_pred = train_epoch(model, train_dataloader, dev_dataloader, optimizer, scheduler) print(epoch, stats) if stats['accuracy'] > best_score: best_score = stats['accuracy'] f = out_dir + '/' + 'best_model_ch.pt' torch.save({ 'epoch': epoch, 'model': model, 'stats': stats, }, f) stats_vec.append(stats) dev_pred_vec.append(dev_pred) stats_vec = pd.DataFrame(stats_vec) dev_pred_vec = pd.concat(dev_pred_vec, axis=0) f = out_dir + '/' + 'last_model_ch.pt' torch.save({ 'epoch': epoch, 'model': model, 'stats': stats, }, f) print(stats_vec) stats_vec.to_csv(out_dir + '/' + 'stats.csv') out_file = out_dir + '/train_pred.csv' df = get_data_pred( train_dataloader, model, out_file, ) out_file = out_dir + '/dev_pred.csv' df = get_data_pred( dev_dataloader, model, out_file, ) test_dataloader = get_data_loader_pred(test_file, tokenizer, nrows=None) out_file = out_dir + '/test_pred.csv' df = get_data_pred( test_dataloader, model, out_file, )
def run_model(pos_train_file, neg_train_file, pos_dev_file, neg_dev_file, nrows_train, nrows_dev, epochs, out_dir): batch_size = 16 #x_train = _read_data('../data/train_bal.csv', nrows_train) #x_dev = _read_data('../data/dev_bal.csv', nrows_dev) #train_data = list( zip( x_train['comment_text'].values, x_train['target'].values )) #train_dataloader = DataLoader( train_data, # collate_fn=my_collate, # batch_size=batch_size , shuffle=True, ) # # #dev_data = list( zip( x_dev['comment_text'].values, x_dev['target'].values )) #dev_dataloader = DataLoader( dev_data, # collate_fn=my_collate, # batch_size=batch_size, shuffle=False, ) train_dataloader = get_data_loader_bal(pos_train_file, neg_train_file, batch_size=batch_size, nrows_pos=nrows_train, nrows_neg=nrows_train, mode='train') dev_dataloader = get_data_loader_bal(pos_dev_file, neg_dev_file, batch_size=batch_size, nrows_pos=nrows_dev, nrows_neg=nrows_dev, mode='dev') device = get_device() bert_hidden_states = 4 config = DistilBertConfig() config.output_hidden_states = True model = DistilBertForSequenceClassification.from_pretrained( "distilbert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. num_labels= 2, # The number of output labels--2 for binary classification. # You can increase this for multi-class tasks. output_attentions=False, # Whether the model returns attentions weights. output_hidden_states= False, # Whether the model returns all hidden-states. ) model = model.to(device) optimizer = AdamW( model.parameters(), lr=2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=1e-8 # args.adam_epsilon - default is 1e-8. ) total_steps = len(train_dataloader) * epochs # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, # Default value in run_glue.py num_training_steps=total_steps) if not os.path.exists(out_dir): os.makedirs(out_dir) best_score = -np.inf stats_vec = [] for epoch in range(epochs): stats = train_epoch(model, train_dataloader, dev_dataloader, optimizer, scheduler) print(stats) if stats['accuracy'] > best_score: best_score = stats['accuracy'] f = out_dir + '/' + 'best_model_ch.pt' torch.save({ 'epoch': epoch, 'model': model, 'stats': stats, }, f) stats_vec.append(stats) stats_vec = pd.DataFrame(stats_vec) f = out_dir + '/' + 'last_model_ch.pt' torch.save({ 'epoch': epoch, 'model': model, 'stats': stats, }, f) print(stats_vec) stats_vec.to_csv(out_dir + '/' + 'stats.csv')
def main_train(path_trn: str, path_val: str, crop_size: int, upscale_factor: int, num_epochs: int, num_workers: int, to_device: str = 'cuda:0', batch_size: int = 64): to_device = get_device(to_device) train_set = TrainDatasetFromFolder(path_trn, crop_size=crop_size, upscale_factor=upscale_factor) val_set = ValDatasetFromFolder(path_val, upscale_factor=upscale_factor) # train_set = TrainDatasetFromFolder('data/VOC2012/train', crop_size=crop_size, upscale_factor=upscale_factor) # val_set = ValDatasetFromFolder('data/VOC2012/val', upscale_factor=upscale_factor) # train_loader = DataLoader(dataset=train_set, num_workers=num_workers, batch_size=batch_size, shuffle=True) val_loader = DataLoader(dataset=val_set, num_workers=num_workers, batch_size=1, shuffle=False) netG = Generator(upscale_factor) print('# generator parameters:', sum(param.numel() for param in netG.parameters())) netD = Discriminator() print('# discriminator parameters:', sum(param.numel() for param in netD.parameters())) generator_criterion = GeneratorLoss() if torch.cuda.is_available(): netG.cuda() netD.cuda() generator_criterion.cuda() optimizerG = optim.Adam(netG.parameters()) optimizerD = optim.Adam(netD.parameters()) results = {'d_loss': [], 'g_loss': [], 'd_score': [], 'g_score': [], 'psnr': [], 'ssim': []} for epoch in range(1, num_epochs + 1): train_bar = tqdm(train_loader) running_results = {'batch_sizes': 0, 'd_loss': 0, 'g_loss': 0, 'd_score': 0, 'g_score': 0} netG.train() netD.train() # FIXME: seperate function for epoch training for data, target in train_bar: g_update_first = True batch_size = data.size(0) # # img_hr = target.numpy().transpose((0, 2, 3, 1))[0] # img_lr = data.numpy().transpose((0, 2, 3, 1))[0] # img_lr_x4 = cv2.resize(img_lr, img_hr.shape[:2], interpolation=cv2.INTER_CUBIC) # # # plt.subplot(1, 3, 1) # plt.imshow(img_hr) # plt.subplot(1, 3, 2) # plt.imshow(img_lr) # plt.subplot(1, 3, 3) # plt.imshow(img_lr_x4) # plt.show() running_results['batch_sizes'] += batch_size ############################ # (1) Update D network: maximize D(x)-1-D(G(z)) ########################### # real_img = Variable(target) # if torch.cuda.is_available(): # real_img = real_img.cuda() # z = Variable(data) # if torch.cuda.is_available(): # z = z.cuda() z = data.to(to_device) real_img = target.to(to_device) fake_img = netG(z) netD.zero_grad() real_out = netD(real_img).mean() fake_out = netD(fake_img).mean() d_loss = 1 - real_out + fake_out d_loss.backward(retain_graph=True) optimizerD.step() ############################ # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss ########################### netG.zero_grad() g_loss = generator_criterion(fake_out, fake_img, real_img) g_loss.backward() optimizerG.step() fake_img = netG(z) fake_out = netD(fake_img).mean() g_loss = generator_criterion(fake_out, fake_img, real_img) running_results['g_loss'] += float(g_loss) * batch_size d_loss = 1 - real_out + fake_out running_results['d_loss'] += float(d_loss) * batch_size running_results['d_score'] += float(real_out) * batch_size running_results['g_score'] += float(fake_out) * batch_size train_bar.set_description(desc='[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f' % ( epoch, num_epochs, running_results['d_loss'] / running_results['batch_sizes'], running_results['g_loss'] / running_results['batch_sizes'], running_results['d_score'] / running_results['batch_sizes'], running_results['g_score'] / running_results['batch_sizes'])) netG.eval() #FIXME: seperate function for epoch validation with torch.no_grad(): out_path = 'training_results/SRF_' + str(upscale_factor) + '/' if not os.path.exists(out_path): os.makedirs(out_path) val_bar = tqdm(val_loader) valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0} val_images = [] for val_lr, val_hr_restore, val_hr in val_bar: batch_size = val_lr.size(0) valing_results['batch_sizes'] += batch_size # lr = Variable(val_lr, volatile=True) # hr = Variable(val_hr, volatile=True) # if torch.cuda.is_available(): # lr = lr.cuda() # hr = hr.cuda() lr = val_lr.to(to_device) hr = val_hr.to(to_device) sr = netG(lr) batch_mse = ((sr - hr) ** 2).mean() valing_results['mse'] += float(batch_mse) * batch_size batch_ssim = float(pytorch_ssim.ssim(sr, hr)) #.data[0] valing_results['ssims'] += batch_ssim * batch_size valing_results['psnr'] = 10 * log10(1 / (valing_results['mse'] / valing_results['batch_sizes'])) valing_results['ssim'] = valing_results['ssims'] / valing_results['batch_sizes'] val_bar.set_description( desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % ( valing_results['psnr'], valing_results['ssim'])) val_images.extend( [display_transform()(val_hr_restore.squeeze(0)), display_transform()(hr.data.cpu().squeeze(0)), display_transform()(sr.data.cpu().squeeze(0))]) val_images = torch.stack(val_images) val_images = torch.chunk(val_images, val_images.size(0) // 15) val_save_bar = tqdm(val_images, desc='[saving training results]') index = 1 for image in val_save_bar: image = utils.make_grid(image, nrow=3, padding=5) utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5) index += 1 # save model parameters torch.save(netG.state_dict(), 'epochs/netG_epoch_%d_%d.pth' % (upscale_factor, epoch)) torch.save(netD.state_dict(), 'epochs/netD_epoch_%d_%d.pth' % (upscale_factor, epoch)) # save loss\scores\psnr\ssim results['d_loss'].append(running_results['d_loss'] / running_results['batch_sizes']) results['g_loss'].append(running_results['g_loss'] / running_results['batch_sizes']) results['d_score'].append(running_results['d_score'] / running_results['batch_sizes']) results['g_score'].append(running_results['g_score'] / running_results['batch_sizes']) results['psnr'].append(valing_results['psnr']) results['ssim'].append(valing_results['ssim']) if epoch % 10 == 0 and epoch != 0: out_path = 'statistics/' data_frame = pd.DataFrame( data={'Loss_D': results['d_loss'], 'Loss_G': results['g_loss'], 'Score_D': results['d_score'], 'Score_G': results['g_score'], 'PSNR': results['psnr'], 'SSIM': results['ssim']}, index=range(1, epoch + 1)) data_frame.to_csv(out_path + 'srf_' + str(upscale_factor) + '_train_results.csv', index_label='Epoch')