def validate(tr_epoch, attention, encoder, decoder, captions, objects, optical_flow, resnet, batch_size,dec_max_time_step): criterion = nn.MSELoss() data_iters = math.ceil(len(video_ids_vl) / batch_size) loss = 0 for batch_num in tqdm(range(data_iters)): start = batch_num*batch_size end = min((batch_num+1)*batch_size, len(video_ids_vl)) vids = video_ids_vl[start:end] caption_tensor = captions.get_tensor(vids).to(device) video_inst = captions.video_instances(vids) object_tensor = objects.get_tensor(vids, video_inst).to(device) optical_tensor = optical_flow.get_tensor(vids, video_inst).to(device) resnet_tensor = resnet.get_tensor(vids, video_inst).to(device) video_attended, _, _, _ = attention(video_inst, resnet_tensor, optical_tensor, object_tensor) for i in range(sum(video_inst)): encoder_hidden = encoder.init_hidden() for frame_num in range(max_frame): # Run Encoder for one video. frame = video_attended[i, frame_num].view(1, 1, resnet_dim) encoder_hidden = encoder(frame, encoder_hidden) word_tensor = torch.zeros((1,1,word_dim)).to(device) # SOS # Decoder input is previous predicted word for t in range(dec_max_time_step): decoder_out = decoder(word_tensor, encoder_hidden) word_ground_truth = caption_tensor[i,t].unsqueeze(0).unsqueeze(0) loss += criterion(decoder_out, word_ground_truth) word_tensor = decoder_out log_value('Validation Loss', loss, tr_epoch)
def main(): global args args = parser.parse_args() cudnn.benchmark = True if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu strong_augmentor = False if args.backbone_net == 'blresnext': backbone = blresnext_model arch_name = "ImageNet-bLResNeXt-{}-{}x{}d-a{}-b{}".format( args.depth, args.cardinality, args.basewidth, args.alpha, args.beta) backbone_setting = [ args.depth, args.basewidth, args.cardinality, args.alpha, args.beta ] elif args.backbone_net == 'blresnet': # resnet->blresnet backbone = blresnet_model arch_name = "ImageNet-bLResNet-{}-a{}-b{}".format( args.depth, args.alpha, args.beta) backbone_setting = [args.depth, args.alpha, args.beta] elif args.backbone_net == 'blseresnext': backbone = blseresnext_model arch_name = "ImageNet-bLSEResNeXt-{}-{}x{}d-a{}-b{}".format( args.depth, args.cardinality, args.basewidth, args.alpha, args.beta) backbone_setting = [ args.depth, args.basewidth, args.cardinality, args.alpha, args.beta ] strong_augmentor = True # 此数据增强策略用于 blseresnext else: raise ValueError("Unsupported backbone.") # add class number and whether or not load pretrained model backbone_setting += [1000, args.pretrained] # create model model = backbone(*backbone_setting) if args.pretrained: print("=> using pre-trained model '{}'".format(arch_name)) else: print("=> creating model '{}'".format(arch_name)) model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer train_criterion = nn.CrossEntropyLoss().cuda() val_criterion = nn.CrossEntropyLoss().cuda() # Data loading code valdir = os.path.join(args.data, 'val') val_loader = get_imagenet_dataflow( False, valdir, args.batch_size, get_augmentor(False, args.input_shape, strong_augmentor), workers=args.workers) # get_augmentor # get_imagenet_dataflow log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) if args.evaluate: val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print('Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\t' 'Speed: {:.2f} ms/batch\t'.format(args.input_shape, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) return traindir = os.path.join(args.data, 'train') train_loader = get_imagenet_dataflow(True, traindir, args.batch_size, get_augmentor(True, args.input_shape, strong_augmentor), workers=args.workers) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) if args.lr_scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, 30, gamma=0.1) elif args.lr_scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0) else: raise ValueError("Unsupported scheduler.") tensorboard_logger.configure(os.path.join(log_folder)) # optionally resume from a checkpoint best_top1 = 0.0 if args.resume: logfile = open(os.path.join(log_folder, 'log.log'), 'a') if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logfile = open(os.path.join(log_folder, 'log.log'), 'w') print(args, flush=True) print(model, flush=True) print(args, file=logfile, flush=True) print(model, file=logfile, flush=True) for epoch in range(args.start_epoch, args.epochs): scheduler.step(epoch) try: # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical lr = scheduler.get_lr()[0] except Exception as e: lr = None # train for one epoch train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \ train(train_loader, model, train_criterion, optimizer, epoch + 1) # evaluate on validation set val_top1, val_top5, val_losses, val_speed = validate( val_loader, model, val_criterion) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\t' 'Data loading: {:.2f} ms/batch'.format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), file=logfile, flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), file=logfile, flush=True) print( 'Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\t' 'Data loading: {:.2f} ms/batch'.format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), flush=True) print( 'Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch' .format(epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) save_dict = { 'epoch': epoch + 1, 'arch': arch_name, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), } save_checkpoint(save_dict, is_best, filepath=log_folder) if lr is not None: tensorboard_logger.log_value('learnnig-rate', lr, epoch + 1) tensorboard_logger.log_value('val-top1', val_top1, epoch + 1) tensorboard_logger.log_value('val-loss', val_losses, epoch + 1) tensorboard_logger.log_value('train-top1', train_top1, epoch + 1) tensorboard_logger.log_value('train-loss', train_losses, epoch + 1) tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1) logfile.close()
def train(attention, encoder, decoder, captions, objects, optical_flow, resnet, \ objects_vl, resnet_vl, optical_vl, captions_vl, n_iters, lr_rate, \ batch_size, dec_max_time_step): attention_optimizer = optim.Adam(attention.parameters(), lr=learning_rate) encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) criterion = nn.MSELoss() for epoch in tqdm(range(n_iters)): # Train mode attention = attention.train() encoder = encoder.train() decoder = decoder.train() loss = 0 data_iters = math.ceil(len(video_ids_tr) / batch_size) for i in range(data_iters): start = i * batch_size end = min((i + 1) * batch_size, len(video_ids_tr)) vids = video_ids_tr[start:end] caption_tensor = captions.get_tensor(vids).to(device) video_inst = captions.video_instances(vids) object_tensor = objects.get_tensor(vids, video_inst).to(device) optical_tensor = optical_flow.get_tensor(vids, video_inst).to(device) resnet_tensor = resnet.get_tensor(vids, video_inst).to(device) video_attended, _, _, _ = attention(video_inst, resnet_tensor, optical_tensor, object_tensor) for i in range(sum(video_inst)): encoder_hidden = encoder.init_hidden() for frame_num in range( max_frame): # Run Encoder for one video. frame = video_attended[i, frame_num].view(1, 1, resnet_dim) encoder_hidden = encoder(frame, encoder_hidden) # Run Decoder for one sentence use_teacher_forcing = True if random.random( ) < teacher_force_ratio else False word_tensor = torch.zeros((1, 1, word_dim)).to(device) # SOS if use_teacher_forcing: # Decoder input is ground truth for t in range(dec_max_time_step): decoder_out = decoder(word_tensor, encoder_hidden) word_ground_truth = caption_tensor[i, t].unsqueeze( 0).unsqueeze(0) loss += criterion(decoder_out, word_ground_truth) word_tensor = word_ground_truth else: # Decoder input is previous predicted word for t in range(dec_max_time_step): decoder_out = decoder(word_tensor, encoder_hidden) word_ground_truth = caption_tensor[i, t].unsqueeze( 0).unsqueeze(0) loss += criterion(decoder_out, word_ground_truth) word_tensor = decoder_out loss.backward() attention_optimizer.step() encoder_optimizer.step() decoder_optimizer.step() log_value('Training Loss', loss, epoch) # Save model parameters params = {'attention':attention.state_dict(), \ 'encoder':encoder.state_dict(), 'decoder':decoder.state_dict()} torch.save(params, model_params_path + str(epoch) + '.pt') # Validation Loss, Bleu scores etc. after each epoch attention = attention.eval() encoder = encoder.eval() decoder = decoder.eval() validate(epoch, attention, encoder, decoder, captions_vl, objects_vl, optical_vl, resnet_vl, batch_size, dec_max_time_step)
if args.cuda: net, criterion = net.cuda(), criterion.cuda() # early stopping parameters patience = args.patience best_loss = 1e4 # Print model to logfile print(net, file=logfile) # Change optimizer for finetuning if args.model == 'AmazonSimpleNet': optimizer = optim.Adam(net.parameters()) else: optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for e in range(args.epochs): start = time.time() train_loss, train_acc = train(net, train_loader, criterion, optimizer, args.v) end = time.time() # print stats stats = """Epoch: {}\t train loss: {:.3f}, train acc: {:.3f}\t time: {:.1f}s""".format(e, train_loss, train_acc, end - start) print(stats) print(stats, file=logfile) log_value('train_loss', train_loss, e) utils.save_model({ 'arch': args.model, 'state_dict': net.state_dict() }, 'saved-models/{}-run-{}.pth.tar'.format(args.model, run))
def train_validate_united(train_dataset, val_dataset, train_device, val_device, use_mat, use_mord, opt_type, n_epoch, batch_size, metrics, hash_code, lr, fold): train_loader = dataloader.DataLoader(dataset=train_dataset, batch_size=batch_size, collate_fn=utils.custom_collate, shuffle=False) val_loader = dataloader.DataLoader(dataset=val_dataset, batch_size=batch_size, collate_fn=utils.custom_collate, shuffle=False) criterion = nn.BCELoss() united_net = UnitedNet(dense_dim=train_dataset.get_dim('mord'), use_mat=use_mat, use_mord=use_mord).to(train_device) if opt_type == 'sgd': opt = optim.SGD(united_net.parameters(), lr=lr, momentum=0.99) elif opt_type == 'adam': opt = optim.Adam(united_net.parameters(), lr=lr) min_loss = 100 # arbitary large number early_stop_count = 0 for e in range(n_epoch): train_losses = [] val_losses = [] train_outputs = [] val_outputs = [] train_labels = [] val_labels = [] print('FOLD', fold, '-- EPOCH', e, '-- TRAINING') for i, (mord_ft, non_mord_ft, label) in enumerate(train_loader): united_net.train() mord_ft = mord_ft.float().to(train_device) non_mord_ft = non_mord_ft.view( (-1, 1, 150, 42)).float().to(train_device) mat_ft = non_mord_ft.squeeze(1).float().to(train_device) label = label.float().to(train_device) # Forward opt.zero_grad() outputs = united_net(non_mord_ft, mord_ft, mat_ft) loss = criterion(outputs, label) train_losses.append(float(loss.item())) train_outputs.extend(outputs) train_labels.extend(label) # Parameters update loss.backward() opt.step() # Validate after each epoch print('FOLD', fold, 'EPOCH', e, '--', 'VALIDATING') for i, (mord_ft, non_mord_ft, label) in enumerate(val_loader): united_net.eval() mord_ft = mord_ft.float().to(val_device) non_mord_ft = non_mord_ft.view( (-1, 1, 150, 42)).float().to(val_device) mat_ft = non_mord_ft.squeeze(1).float().to(train_device) label = label.float().to(val_device) with torch.no_grad(): outputs = united_net(non_mord_ft, mord_ft, mat_ft) loss = criterion(outputs, label) val_losses.append(float(loss.item())) val_outputs.extend(outputs) val_labels.extend(label) train_outputs = torch.stack(train_outputs) val_outputs = torch.stack(val_outputs) train_labels = torch.stack(train_labels) val_labels = torch.stack(val_labels) tensorboard_logger.log_value('train_loss', sum(train_losses) / len(train_losses), e + 1) tensorboard_logger.log_value('val_loss', sum(val_losses) / len(val_losses), e + 1) print('{"metric": "train_loss", "value": %f, "epoch": %d}' % (sum(train_losses) / len(train_losses), e + 1)) print('{"metric": "val_loss", "value": %f, "epoch": %d}' % (sum(val_losses) / len(val_losses), e + 1)) for key in metrics.keys(): train_metric = metrics[key](train_labels, train_outputs) val_metric = metrics[key](val_labels, val_outputs) print('{"metric": "%s", "value": %f, "epoch": %d}' % ('train_' + key, train_metric, e + 1)) print('{"metric": "%s", "value": %f, "epoch": %d}' % ('val_' + key, val_metric, e + 1)) tensorboard_logger.log_value('train_{}'.format(key), train_metric, e + 1) tensorboard_logger.log_value('val_{}'.format(key), val_metric, e + 1) loss_epoch = sum(val_losses) / len(val_losses) if loss_epoch < min_loss: early_stop_count = 0 min_loss = loss_epoch utils.save_model(united_net, "data/trained_models", hash_code + "_" + str(fold)) else: early_stop_count += 1 if early_stop_count > 30: print('Traning can not improve from epoch {}\tBest loss: {}'. format(e, min_loss)) break train_metrics = {} val_metrics = {} for key in metrics.keys(): train_metrics[key] = metrics[key](train_labels, train_outputs) val_metrics[key] = metrics[key](val_labels, val_outputs) return train_metrics, val_metrics
def train(model, data_loader, optimizer, init_lr=0.002, checkpoint_dir=None, checkpoint_interval=None, nepochs=None, clip_thresh=1.0): model.train() print('cuda:', use_cuda) # if use_cuda: # model = model.cuda() # optimizer = optimizer.to_device('cuda') linear_dim = model.linear_dim criterion = nn.L1Loss() global global_step, global_epoch while global_epoch < nepochs: running_loss = 0. for step, (x, input_lengths, mel, y) in tqdm(enumerate(data_loader)): # Decay learning rate current_lr = _learning_rate_decay(init_lr, global_step) for param_group in optimizer.param_groups: param_group['lr'] = current_lr optimizer.zero_grad() # Sort by length sorted_lengths, indices = torch.sort(input_lengths.view(-1), dim=0, descending=True) sorted_lengths = sorted_lengths.long().numpy() x, mel, y = x[indices], mel[indices], y[indices] # Feed data x, mel, y = Variable(x), Variable(mel), Variable(y) if use_cuda: x, mel, y = x.cuda(), mel.cuda(), y.cuda() mel_outputs, linear_outputs, attn = model( x, mel, input_lengths=sorted_lengths) # Loss mel_loss = criterion(mel_outputs, mel) n_priority_freq = int(3000 / (fs * 0.5) * linear_dim) linear_loss = 0.5 * criterion(linear_outputs, y) \ + 0.5 * criterion(linear_outputs[:, :, :n_priority_freq], y[:, :, :n_priority_freq]) loss = mel_loss + linear_loss if global_step > 0 and global_step % checkpoint_interval == 0: save_states(global_step, mel_outputs, linear_outputs, attn, y, sorted_lengths, checkpoint_dir) save_checkpoint(model, optimizer, global_step, checkpoint_dir, global_epoch) # Update loss.backward() grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), clip_thresh) optimizer.step() # Logs log_value("loss", float(loss.item()), global_step) log_value("mel loss", float(mel_loss.item()), global_step) log_value("linear loss", float(linear_loss.item()), global_step) log_value("gradient norm", grad_norm, global_step) log_value("learning rate", current_lr, global_step) global_step += 1 running_loss += loss.item() averaged_loss = running_loss / (len(data_loader)) log_value("loss (per epoch)", averaged_loss, global_epoch) print("Loss: {}".format(running_loss / (len(data_loader)))) global_epoch += 1
def tb_log(self, tb_logger, prefix='', step=None): """Log using tensorboard """ for k, v in self.meters.iteritems(): tb_logger.log_value(prefix + k, v.val, step=step)
# Make it a tiny bit faster for p in D.parameters(): p.requires_grad = False G.zero_grad() # Generator wants to fool discriminator so it wants to minimize loss of discriminator assuming label is True y.data.resize_(current_batch_size).fill_(1) y_pred_fake = D(x_fake) errG = criterion(y_pred_fake, y) errG.backward(retain_graph=True) D_G = y_pred_fake.data.mean() optimizerG.step() current_step = i + epoch*len(dataset) # Log results so we can see them in TensorBoard after log_value('errD', errD.data[0], current_step) log_value('errG', errG.data[0], current_step) if i % 50 == 0: end = time.time() fmt = '[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f time:%.4f' s = fmt % (epoch, param.n_epoch, i, len(dataset), errD.data[0], errG.data[0], D_real, D_fake, D_G, end - start) print(s) print(s, file=log_output) # Save every epoch fmt = '%s/run-%d/models/%s_epoch_%d.pth' if epoch % 25 == 0: torch.save(G.state_dict(), fmt % (param.output_folder, run, 'G', epoch)) torch.save(D.state_dict(), fmt % (param.output_folder, run, 'D', epoch))
def test(model, device, test_loader, loss_fun, epoch, optimizer, exp): model.eval() # set training mode gt_labels = [] pred_labels = [] total_loss = [] happy_pred = [] happy_label = [] angry_pred = [] angry_label = [] sad_pred = [] sad_label = [] neu_pred = [] neu_label = [] global best_acc all_preds = [] all_gts = [] with torch.no_grad(): for batch_idx, sampled_batch in enumerate(test_loader): global global_step_test audio_feats = torch.stack(sampled_batch[0]).unsqueeze(1) emo_labels = torch.LongTensor(sampled_batch[2]) gen_labels = torch.LongTensor(sampled_batch[3]) seq_lengths = torch.LongTensor(sampled_batch[4]) audio_feats, emo_labels, gen_labels, seq_lengths = audio_feats.to( device), emo_labels.to(device), gen_labels.to( device), seq_lengths.to(device) prediction_logits = model(audio_feats, seq_lengths.cpu()) loss = loss_fun(prediction_logits, emo_labels) predictions = np.argmax(prediction_logits.detach().cpu().numpy(), axis=1) for pred in predictions: pred_labels.append(pred) for lab in emo_labels.detach().cpu().numpy(): gt_labels.append(lab) total_loss.append(loss.item()) predictions_emotion = np.argmax( prediction_logits.detach().cpu().numpy(), axis=1) for pred in predictions_emotion: all_preds.append(pred) for lab in emo_labels.detach().cpu().numpy(): all_gts.append(lab) ########## Unweighted accuracy for k in range(len((emo_labels))): lab_emo = emo_labels[k] pred_emo = predictions_emotion[k] if lab_emo == 0: happy_label.append(lab_emo.detach().cpu().numpy().item()) happy_pred.append(pred_emo) elif lab_emo == 1: angry_label.append(lab_emo.detach().cpu().numpy().item()) angry_pred.append(pred_emo) elif lab_emo == 2: sad_label.append(lab_emo.detach().cpu().numpy().item()) sad_pred.append(pred_emo) else: neu_label.append(lab_emo.detach().cpu().numpy().item()) neu_pred.append(pred_emo) log_value("validation loss (step-wise)", float(loss.item()), global_step_test) global_step_test = global_step_test + 1 print( f'Total testing loss {np.mean(np.asarray(total_loss))} after {epoch}') acc = accuracy_score(gt_labels, pred_labels) print(f'Total testing accuracy {acc} after {epoch}') accuracy_happy = accuracy_score(happy_label, happy_pred) accuracy_angry = accuracy_score(angry_label, angry_pred) accuracy_sad = accuracy_score(sad_label, sad_pred) accuracy_neu = accuracy_score(neu_label, neu_pred) average = np.mean( [accuracy_happy, accuracy_angry, accuracy_sad, accuracy_neu]) #print('Happy {} , Angry {}, Sad {}, Neutral {}'.format(accuracy_happy,accuracy_angry,accuracy_sad,accuracy_neu)) print('Unweighted / class accuracy {}'.format(average)) all_acc_nums_class_specific[epoch] = average accuracy_emotion = accuracy_score(all_gts, all_preds) print('Final Weighted test accuracy {} after {} '.format( accuracy_emotion, epoch)) all_acc_nums_overall[epoch] = accuracy_emotion print('Maximum acc so far UNWEIGHTED {} -------'.format( max(all_acc_nums_class_specific.values()))) print('Maximum acc so far WEIGHTED {} -------'.format( max(all_acc_nums_overall.values()))) print('**************************') print('**************************') if average >= best_acc: best_acc = average model_save_path = os.path.join('unimodal/', exp + '_check_point_' + str(average)) state_dict = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(state_dict, model_save_path) return acc, np.mean(np.asarray(total_loss))
def train_rnn_forward_epoch(epoch, args, rnn, output, data_loader): rnn.train() output.train() loss_sum = 0 for batch_idx, data in enumerate(data_loader): rnn.zero_grad() output.zero_grad() x_unsorted = data['x'].float() y_unsorted = data['y'].float() y_len_unsorted = data['len'] y_len_max = max(y_len_unsorted) x_unsorted = x_unsorted[:, 0:y_len_max, :] y_unsorted = y_unsorted[:, 0:y_len_max, :] # initialize lstm hidden state according to batch size rnn.hidden = rnn.init_hidden(batch_size=x_unsorted.size(0)) # output.hidden = output.init_hidden(batch_size=x_unsorted.size(0)*x_unsorted.size(1)) # sort input y_len, sort_index = torch.sort(y_len_unsorted, 0, descending=True) y_len = y_len.numpy().tolist() x = torch.index_select(x_unsorted, 0, sort_index) y = torch.index_select(y_unsorted, 0, sort_index) # input, output for output rnn module # a smart use of pytorch builtin function: pack variable--b1_l1,b2_l1,...,b1_l2,b2_l2,... y_reshape = pack_padded_sequence(y, y_len, batch_first=True).data # reverse y_reshape, so that their lengths are sorted, add dimension idx = [i for i in range(y_reshape.size(0) - 1, -1, -1)] idx = torch.LongTensor(idx) y_reshape = y_reshape.index_select(0, idx) y_reshape = y_reshape.view(y_reshape.size(0), y_reshape.size(1), 1) output_x = torch.cat( (torch.ones(y_reshape.size(0), 1, 1), y_reshape[:, 0:-1, 0:1]), dim=1) output_y = y_reshape # batch size for output module: sum(y_len) output_y_len = [] output_y_len_bin = np.bincount(np.array(y_len)) for i in range(len(output_y_len_bin) - 1, 0, -1): count_temp = np.sum( output_y_len_bin[i:]) # count how many y_len is above i output_y_len.extend( [min(i, y.size(2))] * count_temp ) # put them in output_y_len; max value should not exceed y.size(2) # pack into variable x = Variable(x).cuda() y = Variable(y).cuda() output_x = Variable(output_x).cuda() output_y = Variable(output_y).cuda() # print(output_y_len) # print('len',len(output_y_len)) # print('y',y.size()) # print('output_y',output_y.size()) # if using ground truth to train h = rnn(x, pack=True, input_len=y_len) h = pack_padded_sequence( h, y_len, batch_first=True).data # get packed hidden vector # reverse h idx = [i for i in range(h.size(0) - 1, -1, -1)] idx = Variable(torch.LongTensor(idx)).cuda() h = h.index_select(0, idx) hidden_null = Variable( torch.zeros(args.num_layers - 1, h.size(0), h.size(1))).cuda() output.hidden = torch.cat( (h.view(1, h.size(0), h.size(1)), hidden_null), dim=0) # num_layers, batch_size, hidden_size y_pred = output(output_x, pack=True, input_len=output_y_len) y_pred = F.sigmoid(y_pred) # clean y_pred = pack_padded_sequence(y_pred, output_y_len, batch_first=True) y_pred = pad_packed_sequence(y_pred, batch_first=True)[0] output_y = pack_padded_sequence(output_y, output_y_len, batch_first=True) output_y = pad_packed_sequence(output_y, batch_first=True)[0] # use cross entropy loss loss = binary_cross_entropy_weight(y_pred, output_y) if epoch % args.epochs_log == 0 and batch_idx == 0: # only output first batch's statistics print( 'Epoch: {}/{}, train loss: {:.6f}, graph type: {}, num_layer: {}, hidden: {}' .format(epoch, args.epochs, loss.data[0], args.graph_type, args.num_layers, args.hidden_size_rnn)) # logging log_value('loss_' + args.fname, loss.data[0], epoch * args.batch_ratio + batch_idx) # print(y_pred.size()) feature_dim = y_pred.size(0) * y_pred.size(1) loss_sum += loss.data[0] * feature_dim / y.size(0) return loss_sum / (batch_idx + 1)
def train_vae_epoch(epoch, args, rnn, output, data_loader, optimizer_rnn, optimizer_output, scheduler_rnn, scheduler_output): rnn.train() output.train() loss_sum = 0 for batch_idx, data in enumerate(data_loader): rnn.zero_grad() output.zero_grad() x_unsorted = data['x'].float() y_unsorted = data['y'].float() y_len_unsorted = data['len'] y_len_max = max(y_len_unsorted) x_unsorted = x_unsorted[:, 0:y_len_max, :] y_unsorted = y_unsorted[:, 0:y_len_max, :] # initialize lstm hidden state according to batch size rnn.hidden = rnn.init_hidden(batch_size=x_unsorted.size(0)) # sort input y_len, sort_index = torch.sort(y_len_unsorted, 0, descending=True) y_len = y_len.numpy().tolist() x = torch.index_select(x_unsorted, 0, sort_index) y = torch.index_select(y_unsorted, 0, sort_index) x = Variable(x).cuda() y = Variable(y).cuda() # if using ground truth to train h = rnn(x, pack=True, input_len=y_len) y_pred, z_mu, z_lsgms = output(h) y_pred = F.sigmoid(y_pred) # clean y_pred = pack_padded_sequence(y_pred, y_len, batch_first=True) y_pred = pad_packed_sequence(y_pred, batch_first=True)[0] z_mu = pack_padded_sequence(z_mu, y_len, batch_first=True) z_mu = pad_packed_sequence(z_mu, batch_first=True)[0] z_lsgms = pack_padded_sequence(z_lsgms, y_len, batch_first=True) z_lsgms = pad_packed_sequence(z_lsgms, batch_first=True)[0] # use cross entropy loss loss_bce = binary_cross_entropy_weight(y_pred, y) loss_kl = -0.5 * torch.sum(1 + z_lsgms - z_mu.pow(2) - z_lsgms.exp()) loss_kl /= y.size(0) * y.size(1) * sum(y_len) # normalize loss = loss_bce + loss_kl loss.backward() # update deterministic and lstm optimizer_output.step() optimizer_rnn.step() scheduler_output.step() scheduler_rnn.step() z_mu_mean = torch.mean(z_mu.data) z_sgm_mean = torch.mean(z_lsgms.mul(0.5).exp_().data) z_mu_min = torch.min(z_mu.data) z_sgm_min = torch.min(z_lsgms.mul(0.5).exp_().data) z_mu_max = torch.max(z_mu.data) z_sgm_max = torch.max(z_lsgms.mul(0.5).exp_().data) if epoch % args.epochs_log == 0 and batch_idx == 0: # only output first batch's statistics print( 'Epoch: {}/{}, train bce loss: {:.6f}, train kl loss: {:.6f}, graph type: {}, num_layer: {}, hidden: {}' .format(epoch, args.epochs, loss_bce.data[0], loss_kl.data[0], args.graph_type, args.num_layers, args.hidden_size_rnn)) print('z_mu_mean', z_mu_mean, 'z_mu_min', z_mu_min, 'z_mu_max', z_mu_max, 'z_sgm_mean', z_sgm_mean, 'z_sgm_min', z_sgm_min, 'z_sgm_max', z_sgm_max) # logging log_value('bce_loss_' + args.fname, loss_bce.data[0], epoch * args.batch_ratio + batch_idx) log_value('kl_loss_' + args.fname, loss_kl.data[0], epoch * args.batch_ratio + batch_idx) log_value('z_mu_mean_' + args.fname, z_mu_mean, epoch * args.batch_ratio + batch_idx) log_value('z_mu_min_' + args.fname, z_mu_min, epoch * args.batch_ratio + batch_idx) log_value('z_mu_max_' + args.fname, z_mu_max, epoch * args.batch_ratio + batch_idx) log_value('z_sgm_mean_' + args.fname, z_sgm_mean, epoch * args.batch_ratio + batch_idx) log_value('z_sgm_min_' + args.fname, z_sgm_min, epoch * args.batch_ratio + batch_idx) log_value('z_sgm_max_' + args.fname, z_sgm_max, epoch * args.batch_ratio + batch_idx) loss_sum += loss.data[0] return loss_sum / (batch_idx + 1)
def main(args): def log_string(str): logger.info(str) print(str) '''CUDA ENV SETTINGS''' if args.gpu is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu if args.cudnn_off: torch.backends.cudnn.enabled = False # needed on gypsum! # -------------------------------------------------------------------------- '''CREATE DIR''' # -------------------------------------------------------------------------- timestr = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')) experiment_dir = Path('./log/') experiment_dir.mkdir(exist_ok=True) experiment_dir = experiment_dir.joinpath('pretrain_part_seg') experiment_dir.mkdir(exist_ok=True) dir_name = args.model + '_ShapeNet' + \ '_k-%d_seed-%d_lr-%.6f_lr-step-%d_lr-decay-%.2f_wt-decay-%.6f_l2norm-%d' \ % ( args.k_shot, args.seed, args.learning_rate, args.step_size, args.lr_decay, args.decay_rate, int(args.l2_norm) ) if args.normal: dir_name = dir_name + '_normals' if args.selfsup: dir_name = dir_name + 'selfsup-%s_selfsup_margin-%.2f_lambda-%.2f' \ % (args.ss_dataset, args.margin, args.lmbda) if args.rotation_z: dir_name = dir_name + '_rotation-z' if args.rotation_z_45: dir_name = dir_name + '_rotation-z-45' if args.random_anisotropic_scale: dir_name = dir_name + '_aniso-scale' experiment_dir = experiment_dir.joinpath(dir_name) experiment_dir.mkdir(exist_ok=True) checkpoints_dir = experiment_dir.joinpath('checkpoints/') checkpoints_dir.mkdir(exist_ok=True) log_dir = experiment_dir.joinpath('logs/') log_dir.mkdir(exist_ok=True) # -------------------------------------------------------------------------- '''LOG''' # -------------------------------------------------------------------------- args = parse_args() logger = logging.getLogger("Model") logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler('%s/%s.txt' % (log_dir, args.model)) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) log_string('PARAMETER ...') log_string(args) configure(log_dir) # tensorboard logdir log_string('OUTPUT DIR: %s' % experiment_dir) # -------------------------------------------------------------------------- '''DATA LOADERS''' # -------------------------------------------------------------------------- root = 'data/shapenetcore_partanno_segmentation_benchmark_v0_normal/' TRAIN_DATASET = PartNormalDataset(root=root, npoints=args.npoint, split='trainval', normal_channel=args.normal, k_shot=args.k_shot) trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=args.batch_size, shuffle=True, num_workers=4) trainDataIterator = iter(trainDataLoader) TEST_DATASET = PartNormalDataset(root=root, npoints=args.npoint, split='test', normal_channel=args.normal) testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size, shuffle=False, num_workers=4) log_string("The number of training data is: %d" % len(TRAIN_DATASET)) log_string("The number of test data is: %d" % len(TEST_DATASET)) num_classes = 16 num_part = 50 if args.selfsup: log_string('Use self-supervision - alternate batches') if not args.retain_overlaps: log_string( '\tRemove overlaps between labeled and self-sup datasets') labeled_fns = list(itertools.chain(*TEST_DATASET.meta.values())) \ + list(itertools.chain(*TRAIN_DATASET.meta.values())) else: log_string('\tUse all files in self-sup dataset') labeled_fns = [] if args.ss_dataset == 'dummy': log_string( 'Using "dummy" self-supervision dataset (rest of labeled ShapeNetSeg)' ) SELFSUP_DATASET = SelfSupPartNormalDataset( root=root, npoints=args.npoint, split='trainval', normal_channel=args.normal, k_shot=args.n_cls_selfsup, labeled_fns=labeled_fns) elif args.ss_dataset == 'acd': log_string('Using "ACD" self-supervision dataset (ShapeNet Seg)') ACD_ROOT = args.ss_path SELFSUP_DATASET = ACDSelfSupDataset(root=ACD_ROOT, npoints=args.npoint, normal_channel=args.normal, k_shot=args.n_cls_selfsup, exclude_fns=labeled_fns, use_val=True) log_string('\t %d samples' % len(SELFSUP_DATASET)) selfsup_train_fns = list( itertools.chain(*SELFSUP_DATASET.meta.values())) log_string('Val dataset for self-sup') SELFSUP_VAL = ACDSelfSupDataset(root=ACD_ROOT, npoints=args.npoint, normal_channel=args.normal, class_choice='Airplane', k_shot=args.n_cls_selfsup, use_val=False, exclude_fns=selfsup_train_fns + labeled_fns) log_string('\t %d samples' % len(SELFSUP_VAL)) selfsupDataLoader = torch.utils.data.DataLoader( SELFSUP_DATASET, batch_size=args.batch_size, shuffle=True, num_workers=4) selfsupIterator = iter(selfsupDataLoader) selfsupValLoader = torch.utils.data.DataLoader( SELFSUP_VAL, batch_size=args.batch_size, shuffle=False, num_workers=4) log_string('Load ModelNet dataset for validation') DATA_PATH = 'data/modelnet40_normal_resampled/' MN_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.npoint, split='train', normal_channel=args.normal) modelnetLoader = torch.utils.data.DataLoader(MN_DATASET, batch_size=args.batch_size, shuffle=True, num_workers=4) # -------------------------------------------------------------------------- '''MODEL LOADING''' # -------------------------------------------------------------------------- MODEL = importlib.import_module(args.model) shutil.copy('models/%s.py' % args.model, str(experiment_dir)) shutil.copy('models/pointnet_util.py', str(experiment_dir)) if args.model == 'dgcnn': classifier = MODEL.get_model(num_part, normal_channel=args.normal, k=args.dgcnn_k).cuda() else: classifier = MODEL.get_model(num_part, normal_channel=args.normal).cuda() criterion = MODEL.get_loss().cuda() if args.selfsup: selfsupCriterion = MODEL.get_selfsup_loss(margin=args.margin).cuda() log_string("The number of self-sup data is: %d" % len(SELFSUP_DATASET)) def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: torch.nn.init.xavier_normal_(m.weight.data) torch.nn.init.constant_(m.bias.data, 0.0) elif classname.find('Linear') != -1: torch.nn.init.xavier_normal_(m.weight.data) torch.nn.init.constant_(m.bias.data, 0.0) try: checkpoint = torch.load( str(experiment_dir) + '/checkpoints/best_model.pth') start_epoch = checkpoint['epoch'] classifier.load_state_dict(checkpoint['model_state_dict']) log_string('Use pretrain model') except: log_string('No existing model, starting training from scratch...') start_epoch = 0 classifier = classifier.apply(weights_init) # -------------------------------------------------------------------------- '''OPTIMIZER SETTINGS''' # -------------------------------------------------------------------------- if args.optimizer == 'Adam': optimizer = torch.optim.Adam(classifier.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay_rate) else: optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=0.9) def bn_momentum_adjust(m, momentum): if isinstance(m, torch.nn.BatchNorm2d) or isinstance( m, torch.nn.BatchNorm1d): m.momentum = momentum # LEARNING_RATE_CLIP = 1e-5 LEARNING_RATE_CLIP = args.lr_clip MOMENTUM_ORIGINAL = 0.1 MOMENTUM_DECAY = 0.5 MOMENTUM_DECAY_STEP = args.step_size if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") classifier = nn.DataParallel(classifier) # -------------------------------------------------------------------------- '''TRAINING LOOP''' # -------------------------------------------------------------------------- best_val_loss = 99999 global_epoch = 0 for epoch in range(start_epoch, args.epoch): log_string('Epoch %d (%d/%s):' % (global_epoch + 1, epoch + 1, args.epoch)) '''Adjust learning rate and BN momentum''' lr = max( args.learning_rate * (args.lr_decay**(epoch // args.step_size)), LEARNING_RATE_CLIP) log_string('Learning rate:%f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr mean_loss = [] momentum = MOMENTUM_ORIGINAL * (MOMENTUM_DECAY **(epoch // MOMENTUM_DECAY_STEP)) if momentum < 0.01: momentum = 0.01 print('BN momentum updated to: %f' % momentum) classifier = classifier.apply( lambda x: bn_momentum_adjust(x, momentum)) '''learning one epoch''' num_iters = len( selfsupDataLoader) # calc an epoch based on self-sup dataset for i in tqdm(list(range(num_iters)), total=num_iters, smoothing=0.9): '''applying self-supervised constrastive (pairwise) loss''' try: data_ss = next(selfsupIterator) except StopIteration: # reached end of this dataloader selfsupIterator = iter(selfsupDataLoader) data_ss = next(selfsupIterator) # DEBUG if DEBUG and i > 10: break points, label, target = data_ss # (points: bs x 3 x n_pts, label: bs x 1, target: bs x n_pts) points = points.data.numpy() points[:, :, 0:3] = provider.random_scale_point_cloud(points[:, :, 0:3]) points[:, :, 0:3] = provider.shift_point_cloud(points[:, :, 0:3]) if args.random_anisotropic_scale: points[:, :, 0:3] = provider.random_anisotropic_scale_point_cloud( points[:, :, 0:3], scale_low=0.8, scale_high=1.25) # pts = torch.Tensor(points) # pts = pts.transpose(2,1) # np.save(osp.join(experiment_dir, 'pts.npy'), pts.cpu().numpy()) if args.rotation_z: points[:, :, 0:3] = provider.rotate_point_cloud_y(points[:, :, 0:3]) if args.rotation_z_45: points[:, :, 0:3] = provider.rotate_point_cloud_y_pi4(points[:, :, 0:3]) points = torch.Tensor(points) points, label, target = points.float().cuda(), label.long().cuda( ), target.long().cuda() points = points.transpose(2, 1) # np.save(osp.join(experiment_dir, 'pts_z-rot.npy'), points.cpu().numpy()) # np.save(osp.join(experiment_dir, 'target.npy'), target.cpu().numpy()) # for self-sup category label is always unknown, so always zeros: category_label = torch.zeros([label.shape[0], 1, num_classes]).cuda() optimizer.zero_grad() classifier = classifier.train() _, _, feat = classifier(points, category_label) # feat: [bs x ndim x npts] ss_loss = selfsupCriterion(feat, target) * args.lmbda ss_loss.backward() optimizer.step() mean_loss.append(ss_loss.item()) log_value('selfsup_loss_iter', ss_loss.data, epoch * num_iters + i + 1) train_loss_epoch = np.mean(mean_loss) log_string('Self-sup loss is: %.5f' % train_loss_epoch) log_value('selfsup_loss_epoch', train_loss_epoch, epoch) # # # DEBUG: # with torch.no_grad(): # sa3_wt = classifier.sa3.mlp_convs[2].weight.mean() # log_string('SA3 avg wt is: %.5f' % sa3_wt.item()) # log_value('sa3_conv2_wt', sa3_wt.item(), epoch) '''validation after one epoch''' log_string('Validation: ACD on ShapeNet') with torch.no_grad(): total_val_loss = 0 for batch_id, (points, label, target) in tqdm(enumerate(selfsupValLoader), total=len(selfsupValLoader), smoothing=0.9): if DEBUG and i > 10: break cur_batch_size, NUM_POINT, _ = points.size() points, label, target = points.float().cuda(), label.long( ).cuda(), target.long().cuda() points = points.transpose(2, 1) category_label = torch.zeros([label.shape[0], 1, num_classes]).cuda() classifier = classifier.eval() _, _, feat = classifier(points, category_label) val_loss = selfsupCriterion(feat, target) total_val_loss += val_loss.data.cpu().item() avg_val_loss = total_val_loss / len(selfsupValLoader) log_value('selfsup_loss_val', avg_val_loss, epoch) '''(optional) validation on ModelNet40''' if args.modelnet_val: log_string('Validation: SVM on ModelNet40') with torch.no_grad(): log_string('Extract features on ModelNet40') if args.model == 'pointnet_part_seg': feat_train, label_train = extract_feats_pointnet( classifier, modelnetLoader, subset=0.5) elif args.model == 'pointnet2_part_seg_msg': feat_train, label_train = extract_feats(classifier, modelnetLoader, subset=0.5) else: raise ValueError log_string('Training data: %d samples, %d features' % feat_train.shape) start_time = time.time() log_string('Training SVM on ModelNet40') svm, best_C, best_score = cross_val_svm(feat_train, label_train, c_min=100, c_max=501, c_step=20, verbose=False) elapsed_time = time.time() - start_time log_string('ModelNet val Accuracy: %f (elapsed: %f seconds)' % (best_score, elapsed_time)) log_value('modelnet_val', best_score, epoch) # save every epoch if epoch % 5 == 0: savepath = str(checkpoints_dir) + ('/model_%03d.pth' % epoch) log_string('Saving model at %s' % savepath) state = { 'epoch': epoch, 'selfsup_loss': ss_loss.data, 'val_loss': avg_val_loss, 'model_state_dict': classifier.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), } torch.save(state, savepath) log_string('Saved model.') # save best model if avg_val_loss < best_val_loss: best_val_loss = avg_val_loss savepath = str(checkpoints_dir) + '/best_model.pth' log_string('Saving best model at %s' % savepath) state = { 'epoch': epoch, 'selfsup_loss': ss_loss.data, 'val_loss': avg_val_loss, 'model_state_dict': classifier.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), } torch.save(state, savepath) log_string('Saved model.') log_value('train_lr', lr, epoch) log_value('train_bn_momentum', momentum, epoch) log_string('Epoch %d Self-sup train loss: %f Val loss: %f ' % (epoch + 1, train_loss_epoch, avg_val_loss)) global_epoch += 1
def update(self, epoch): log_value('albedo_siMSE', self.A_siMSE / self.count, epoch) log_value('shading_siMSE', self.S_siMSE / self.count, epoch) log_value('albedo_siLMSE', self.A_siLMSE / self.count, epoch) log_value('shading_siLMSE', self.S_siLMSE / self.count, epoch) log_value('albedo_DSSIM', self.A_DSSIM / self.count, epoch) log_value('shading_DSSIM', self.S_DSSIM / self.count, epoch)
def update(self, epoch): log_value('loss', self.loss / self.count, epoch) log_value('albedo_loss', self.a_loss / self.count, epoch) log_value('shading_loss', self.s_loss / self.count, epoch)
def main(): global words global word2idx print("ABSA ATAE-LSTM") #train_file = restaurant_train_file train_file = pd_data test_file = restaurant_test_file print("Overall Train Data :") vocab = file_stats(train_file, pd_data=True) # Return data without 'conflict' and print stats # Shuffle the data p = np.random.permutation(len(vocab)) vocab = np.array(vocab)[p] #test_vocab = file_stats(test_file, pd_data=False) # Create word vocab and weights for embedding words, word2idx = create_word_vocab(vocab, words, word2idx) #words, word2idx = create_word_vocab(test_vocab, words, word2idx) ################################################################################# ######################### Train/ Val Split ###################################### ################################################################################# print("Splitting training data in 80/20 for train/valid") train_len = int(0.8 * len(vocab)) valid_len = int(1 * len(vocab)) train_vocab = vocab[:train_len] valid_vocab = vocab[train_len:valid_len] #test_vocab = vocab[valid_len:] """ with open("train_pd_data.pickle", "wb") as f: pickle.dump(train_vocab, f) with open("valid_pd_data.pickle", "wb") as f: pickle.dump(valid_vocab, f) with open("test_pd_data.pickle", "wb") as f: pickle.dump(test_vocab, f)""" print("\nTraining set :") file_stats(train_vocab, return_vocab=False) print("\nValidation set :") file_stats(valid_vocab, return_vocab=False) # Weight matrix weights_matrix = create_weights(words, word2idx) #print(weights_matrix.shape) #train_vocab = vocab #valid_vocab = test_vocab train_vocab = to_tensor(train_vocab, word2idx) valid_vocab = to_tensor(valid_vocab, word2idx) train_set = vocabDataset(train_vocab) valid_set = vocabDataset(valid_vocab) labels = [] for data in train_set: labels.append(data['sentiment']) labels = torch.stack(labels) valid_batch = DataLoader(valid_set, batch_size=32, shuffle=True) #b_sampler = ATAE_BatchSampler(labels, batch_size=32) weights=np.array([1.5e-05,0.0002,1.8e-05]) label_weights = weights[labels.numpy()] sampler = torch.utils.data.WeightedRandomSampler(weights= label_weights, num_samples=len(label_weights),replacement=True) train_batch = DataLoader(train_set, batch_size=32, shuffle=False, sampler=sampler,batch_sampler=None) #train_batch = DataLoader(train_set, batch_sampler=b_sampler) hidden_dim = 300 output_dim = 3 # positive, negative, neutral dropout = 0.5 model = ATAE_LSTM(weights_matrix, hidden_dim, output_dim, dropout, words, word2idx) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, weight_decay=1e-3) #optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr = 0.1, momentum=0.9, weight_decay=1e-5) ########################################################################################## ##################### L2 only on weights ############################################## ########################################################################################## """ L2_params_id = [id(p) for p in model.Wp] + [id(p) for p in model.Wx] + [id(p) for p in model.fc.weight] + [id(p) for p in model.attn.weight] L2_params = [p for p in model.parameters() if id(p) in L2_params_id and p.requires_grad] base_params = [p for p in model.parameters() if id(p) not in L2_params_id and p.requires_grad] optimizer = optim.Adam([ {'params': base_params}, {'params': L2_params, 'weight_decay': 1e-3}, ], lr=0.01) """ weighted_loss = torch.tensor([1,1,1]).float() criterion = nn.CrossEntropyLoss(weight=weighted_loss.cuda()) for epoch in range(100): t_loss, t_acc, cm1 = model.train_(train_batch, optimizer, criterion) v_loss, v_acc, cm2 = model.eval_(valid_batch, criterion) log_value('loss/train_loss', t_loss, epoch) log_value('loss/valid_loss', v_loss, epoch) log_value('acc/train_acc', t_acc, epoch) log_value('acc/valid_acc', v_acc, epoch) print(cm1) print(cm2) print(f'Epoch: {epoch+1:02}, Train Loss: {t_loss:.8f}, Train Acc: {t_acc*100:.2f}%, \ Val. Loss: {v_loss:.8f}, Val. Acc: {v_acc*100:.2f}%') path = "Saved_Models/PD8/10epoch_" + str(epoch) torch.save(model, path) path = "Saved_Models/pd_10" torch.save(model, path) print("Training done") print("Model saved at ", path)
def train(train_loader, model, optimizer, start_iter, num_iters): batch_time = AverageMeter() data_time = AverageMeter() total_losses = AverageMeter() rpn_losses = AverageMeter() odn_losses = AverageMeter() rpn_ce_losses = AverageMeter() rpn_box_losses = AverageMeter() odn_ce_losses = AverageMeter() odn_box_losses = AverageMeter() # switch to train mode end_iter = start_iter + num_iters - 1 model.train() end = time.time() # for i in range(start_iter, start_iter + num_iters): for i, (inputs, anns) in enumerate(train_loader): i += start_iter # get minibatch # inputs, anns = next(train_loader) lr = adjust_learning_rate(optimizer, args.lr, args.decay_rate, i, args.niters) # TODO: add custom # measure data loading time data_time.update(time.time() - end) optimizer.zero_grad() # forward images one by one (TODO: support batch mode later, or # multiprocess) for j, input in enumerate(inputs): input_anns = anns[j] # anns of this input if len(input_anns) == 0: continue gt_bbox = np.vstack([ann['bbox'] + [ann['ordered_id']] for ann in input_anns]) im_info= [[input.size(1), input.size(2), input_anns[0]['scale_ratio']]] input_var= torch.autograd.Variable(input.unsqueeze(0).cuda(), requires_grad=False) cls_prob, bbox_pred, rois= model(input_var, im_info, gt_bbox) loss= model.loss loss.backward() # record loss total_losses.update(loss.data[0], input_var.size(0)) rpn_losses.update(model.rpn.loss.data[0], input_var.size(0)) rpn_ce_losses.update( model.rpn.cross_entropy.data[0], input_var.size(0)) rpn_box_losses.update( model.rpn.loss_box.data[0], input_var.size(0)) odn_losses.update(model.odn.loss.data[0], input_var.size(0)) odn_ce_losses.update( model.odn.cross_entropy.data[0], input_var.size(0)) odn_box_losses.update( model.odn.loss_box.data[0], input_var.size(0)) # do SGD step optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.print_freq > 0 and (i + 1) % args.print_freq == 0: print('iter: [{0}] ' 'Time {batch_time.val:.3f} ' 'Data {data_time.val:.3f} ' 'Loss {total_losses.val:.4f} ' 'RPN {rpn_losses.val:.4f} ' '{rpn_ce_losses.val:.4f} ' '{rpn_box_losses.val:.4f} ' 'ODN {odn_losses.val:.4f} ' '{odn_ce_losses.val:.4f} ' '{odn_box_losses.val:.4f} ' .format(i, batch_time=batch_time, data_time=data_time, total_losses=total_losses, rpn_losses=rpn_losses, rpn_ce_losses=rpn_ce_losses, rpn_box_losses=rpn_box_losses, odn_losses=odn_losses, odn_ce_losses=odn_ce_losses, odn_box_losses=odn_box_losses)) del inputs del anns if i == end_iter: break print('iter: [{0}-{1}] ' 'Time {batch_time.avg:.3f} ' 'Data {data_time.avg:.3f} ' 'Loss {total_losses.avg:.4f} ' 'RPN {rpn_losses.avg:.4f} ' '{rpn_ce_losses.avg:.4f} ' '{rpn_box_losses.avg:.4f} ' 'ODN {odn_losses.avg:.4f} ' '{odn_ce_losses.avg:.4f} ' '{odn_box_losses.avg:.4f} ' .format(start_iter, end_iter, batch_time=batch_time, data_time=data_time, total_losses=total_losses, rpn_losses=rpn_losses, rpn_ce_losses=rpn_ce_losses, rpn_box_losses=rpn_box_losses, odn_losses=odn_losses, odn_ce_losses=odn_ce_losses, odn_box_losses=odn_box_losses)) if args.tensorboard: log_value('train_total_loss', total_losses.avg, end_iter) log_value('train_rpn_loss', rpn_losses.avg, end_iter) log_value('train_rpn_ce_loss', rpn_ce_losses.avg, end_iter) log_value('train_rpn_box_loss', rpn_box_losses.avg, end_iter) log_value('train_odn_loss', odn_losses.avg, end_iter) log_value('train_odn_ce_loss', odn_ce_losses.avg, end_iter) log_value('train_odn_box_loss', odn_box_losses.avg, end_iter) return total_losses.avg
# Make it a tiny bit faster for p in D.parameters(): p.requires_grad = False G.zero_grad() # Generator wants to fool discriminator so it wants to minimize loss of discriminator assuming label is True y.data.resize_(current_batch_size).fill_(1) y_pred_fake = D(x_fake) errG = criterion(y_pred_fake, y) errG.backward(retain_graph=True) D_G = y_pred_fake.data.mean() optimizerG.step() current_step = i + epoch * len(dataset) # Log results so we can see them in TensorBoard after log_value('errD', errD.item(), current_step) log_value('errG', errG.item(), current_step) if i % 50 == 0: end = time.time() fmt = '[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f time:%.4f' s = fmt % (epoch, param.n_epoch, i, len(dataset), errD.item(), errG.item(), D_real, D_fake, D_G, end - start) print(s) print(s, file=log_output) # Save every epoch fmt = '%s/run-%d/models/%s_epoch_%d.pth' if epoch % 25 == 0: torch.save(G.state_dict(), fmt % (param.output_folder, run, 'G', epoch)) torch.save(D.state_dict(),
def fit(self): config = self.config configure("{}".format(config.log_dir), flush_secs=5) num_steps_per_epoch = len(self.data_loader) cc = 0 for epoch in range(self.start_epoch, config.max_epochs): for step, (example, real_im, landmarks, right_audio, wrong_audio) in enumerate(self.data_loader): t1 = time.time() if config.cuda: example = Variable(example).cuda(async=True) real_im = Variable(real_im).cuda(async=True) right_audio = Variable(right_audio).cuda(async=True) wrong_audio = Variable(wrong_audio).cuda(async=True) else: example = Variable(example) real_im = Variable(real_im) right_audio = Variable(right_audio) wrong_audio = Variable(wrong_audio) fake_im, _ = self.generator(example, right_audio) # Train the discriminator D_real = self.discriminator(real_im, right_audio) D_wrong = self.discriminator(real_im, wrong_audio) D_fake = self.discriminator(fake_im.detach(), right_audio) loss_real = self.bce_loss_fn(D_real, self.ones) loss_wrong = self.bce_loss_fn(D_wrong, self.zeros) loss_fake = self.bce_loss_fn(D_fake, self.zeros) loss_disc = loss_real + 0.5 * (loss_fake + loss_wrong) loss_disc.backward() self.opt_d.step() self._reset_gradients() # Train the generator fake_im, audio_feature = self.generator(example, right_audio) D_fake = self.discriminator(fake_im, right_audio) f_audio = self.audio_encoder(audio_feature) f_images = self.images_encoder(real_im) avg_f_audio = F.avg_pool1d(f_audio, 16, 16).view(-1, 16) avg_f_images = F.avg_pool1d(f_images, 16, 16).view(-1, 16) loss_gen = self.bce_loss_fn(D_fake, self.ones) loss_cosine = self.cosine_loss_fn(avg_f_audio, avg_f_images, self.one_corr) loss_g = loss_gen + 0.5 * loss_cosine if config.fake_corr: fake_f_images = self.images_encoder(fake_im) fake_avg_f_images = F.avg_pool1d(fake_f_images, 16, 16).view(-1, 16) loss_fake_cosine = self.cosine_loss_fn( avg_f_audio, fake_avg_f_images, self.one_corr) loss_g += 0.5 * loss_fake_cosine loss_g.backward() self.opt_g.step() self.opt_corr.step() self._reset_gradients() t2 = time.time() if (step + 1) % 1 == 0 or (step + 1) == num_steps_per_epoch: steps_remain = num_steps_per_epoch - step + 1 + \ (config.max_epochs - epoch + 1) * num_steps_per_epoch eta = int((t2 - t1) * steps_remain) if not config.fake_corr: print( "[{}/{}][{}/{}] Loss_D: {:.4f} Loss_G: {:.4f}, cosine_loss: {}, ETA: {} second" .format(epoch + 1, config.max_epochs, step + 1, num_steps_per_epoch, loss_disc.data[0], loss_gen.data[0], loss_cosine.data[0], eta)) else: print( "[{}/{}][{}/{}] Loss_D: {:.4f} Loss_G: {:.4f}, cosine_loss: {}, fake_cosine_loss: {}, ETA: {} second" .format(epoch + 1, config.max_epochs, step + 1, num_steps_per_epoch, loss_disc.data[0], loss_gen.data[0], loss_cosine.data[0], loss_fake_cosine.data[0], eta)) log_value('discriminator_loss', loss_disc.data[0], step + num_steps_per_epoch * epoch) log_value('generator_loss', loss_gen.data[0], step + num_steps_per_epoch * epoch) log_value('cosine_loss', loss_cosine.data[0], step + num_steps_per_epoch * epoch) if config.fake_corr: log_value('fake_cosine_loss', loss_fake_cosine.data[0], step + num_steps_per_epoch * epoch) if (step) % (num_steps_per_epoch / 10) == 0: fake_store = fake_im.data.permute( 0, 2, 1, 3, 4).contiguous().view(config.batch_size * 16, 3, 64, 64) torchvision.utils.save_image(fake_store, "{}fake_{}.png".format( config.sample_dir, cc), nrow=16, normalize=True) real_store = real_im.data.permute( 0, 2, 1, 3, 4).contiguous().view(config.batch_size * 16, 3, 64, 64) torchvision.utils.save_image(real_store, "{}real_{}.png".format( config.sample_dir, cc), nrow=16, normalize=True) cc += 1 if epoch % 1 == 0: torch.save( self.generator.state_dict(), "{}/generator_{}.pth".format(config.model_dir, epoch)) torch.save( self.discriminator.state_dict(), "{}/discriminator_{}.pth".format(config.model_dir, epoch)) torch.save( self.audio_encoder.state_dict(), "{}/audio_deri_encoder_{}.pth".format( config.model_dir, epoch)) torch.save( self.images_encoder.state_dict(), "{}/flow_encoder_{}.pth".format(config.model_dir, epoch))
def train(epoch, loader): global learning_rate, start_time, batch_size learning_rate_use = learning_rate * (lr_decay_factor**((epoch)//lr_adjust_interval)) for param_group in optimizer.param_groups: param_group['lr'] = learning_rate_use f.write('Epoch: {} Learning Rate: {:.2e}'.format(epoch,learning_rate_use)) total_loss = 0.0 total_correct = 0 num_train = 50000 train_loss = AverageMeter() model.train() current_time = start_time model.module.network_init(update_interval) for batch_idx, (data, target) in enumerate(loader): if torch.cuda.is_available() and use_cuda: data, target = data.cuda(), target.cuda() #data=m(data) #print("Epoch: {}/{};".format(epoch, 20), "Training batch:{}/{};".format(batch_idx+1, math.ceil(num_train/batch_size))) t=0 mem = 0 spike =0 mask = 0 spike_count = 0 optimizer.zero_grad() while t<timesteps: output, mem, spike, mask, spike_count = model(data, t, mem, spike, mask, spike_count) output = output/(t+update_interval) #loss = criterion(output, target) loss = F.cross_entropy(output,target) train_loss.update(loss.item(), target.size(0)) loss.backward() t = t + update_interval total_loss += loss.item() optimizer.step() pred = output.max(1,keepdim=True)[1] correct = pred.eq(target.data.view_as(pred)).cpu().sum() total_correct += correct.item() if (batch_idx+1) % 10 == 0: f.write('\nEpoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f} Current:[{}/{} ({:.2f}%)] Total:[{}/{} ({:.2f}%)] Time: {}({})'.format( epoch, (batch_idx+1) * len(data), len(loader.dataset), 100. * (batch_idx+1) / len(loader), total_loss/(batch_idx+1), correct.item(), data.size(0), 100. * correct.item()/data.size(0), total_correct, data.size(0)*(batch_idx+1), 100. * total_correct/(data.size(0)*(batch_idx+1)), datetime.timedelta(seconds=(datetime.datetime.now() - start_time).seconds), datetime.timedelta(seconds=(datetime.datetime.now() - current_time).seconds) ) ) current_time = datetime.datetime.now() train_loss_per_epoch = train_loss.avg print("Epoch: {}/{};".format(epoch, 20), "########## Training loss: {}".format(train_loss_per_epoch)) log_value('train_loss', train_loss_per_epoch, epoch)
def val(epoch): avg_total_loss = 0.0 avg_sem_loss = 0.0 avg_pos_loss = 0.0 frame_count = 0 epoch_start = time.time() model.module.eval() for batch in val_loader: image_rgb, depth_target, depth_mask = Variable(batch[0]), Variable( batch[1]), Variable(batch[2]) image_lab = rgb2Lab_torch(image_rgb.cuda()) # image in lab color space LABXY_feat_tensor = build_LABXY_feat(image_lab, train_XY_feat_stack) # B* (3+2) if torch.cuda.is_available(): image_rgb = image_rgb.cuda() LABXY_feat_tensor = LABXY_feat_tensor.cuda() torch.cuda.synchronize() with torch.no_grad(): output = model(image_rgb) # white output torch.cuda.synchronize() slic_loss, loss_sem, loss_pos = compute_color_pos_loss( output, LABXY_feat_tensor, pos_weight=opt.pos_weight, kernel_size=opt.downsize) avg_total_loss += slic_loss.data.item() avg_sem_loss += loss_sem.data.item() avg_pos_loss += loss_pos.data.item() epoch_end = time.time() print( "===> Epoch {} Validation: Avg. total_loss: {:.4f}, Avg. sem_loss: {:.4f}, Avg. epoch_pos_loss: {:.4f}, Time: {:.4f}" .format(epoch, avg_total_loss / len(val_loader), avg_sem_loss / len(val_loader), avg_pos_loss / len(val_loader), (epoch_end - epoch_start))) log_value('val_total_loss', avg_total_loss / len(val_loader), epoch) log_value('val_sem_loss', avg_sem_loss / len(val_loader), epoch) log_value('val_pos_loss', avg_pos_loss / len(val_loader), epoch) # Draw the last image result spixl_map = update_spixl_map(val_spixelID[[-1], :, :, :], output[[-1], :, :, :]) # 1x1x240x960 ori_sz_spixel_map = F.interpolate( spixl_map.type(torch.float), size=(opt.input_img_height, opt.input_img_width), mode='nearest').type(torch.int) # 1x1x240x960 spix_index_np = ori_sz_spixel_map.squeeze().detach().cpu().numpy( ).transpose(0, 1) #240x960 spix_index_np = spix_index_np.astype(np.int64) #240x960, 1% here image_rgb_np = image_rgb[[-1], :, :, :].squeeze().clamp( 0, 1).detach().cpu().numpy().transpose(1, 2, 0) spixel_bd_image = mark_boundaries(image_rgb_np, spix_index_np.astype(int), color=(0, 1, 1)) spixel_viz = spixel_bd_image.astype(np.float32).transpose(2, 0, 1) spixel_viz = np.expand_dims(spixel_viz, axis=0) image_rgb_np_viz = image_rgb_np.astype(np.float32).transpose(2, 0, 1) image_rgb_np_viz = np.expand_dims(image_rgb_np_viz, axis=0) log_images('spixel', reshape_4D_array(spixel_viz, 1), step=1) log_images('image_rgb', reshape_4D_array(image_rgb_np_viz, 1), step=1) global LOSS_best if avg_total_loss < LOSS_best: LOSS_best = avg_total_loss model_out_path = opt.path_to_save + "/model_best.pth".format(epoch) torch.save(model.module.state_dict(), model_out_path) print("Checkpoint saved to {}".format(model_out_path))
def test(epoch, loader): global learning_rate, start_time, batch_size_test, leak_mem with torch.no_grad(): model.eval() total_loss = 0 correct = 0 is_best = False print_accuracy_every_batch = True global max_correct, batch_size, update_interval test_loss = AverageMeter() num_test = 10000 for batch_idx, (data, target) in enumerate(loader): #print("Epoch: {}/{};".format(epoch, 20), "Test batch: {}/{}".format(batch_idx+1, math.ceil(num_test/batch_size_test))) if torch.cuda.is_available() and use_cuda: data, target = data.cuda(), target.cuda() #data=m(data) model.module.network_init(timesteps) output, _, _, _, spike_count = model(data, 0) output = output/update_interval #for key in spike_count.keys(): # print('Key: {}, Average: {:.3f}'.format(key, (spike_count[key].sum()/spike_count[key].numel()))) loss = F.cross_entropy(output,target) test_loss.update(loss.item(), target.size(0)) total_loss += loss.item() pred = output.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() q=(batch_idx+1)*data.size(0) if((batch_idx+1)==math.ceil(num_test/batch_size_test)): q=num_test if print_accuracy_every_batch: f.write('\nAccuracy: {}/{}({:.2f}%)'.format( correct.item(), q, 100. * correct.item() / (q) ) ) test_loss_per_epoch = test_loss.avg print("Epoch: {}/{};".format(epoch, 20), "########## Test loss: {}".format(test_loss_per_epoch)) log_value('test_loss', test_loss_per_epoch, epoch) if correct>max_correct: max_correct = correct is_best = True state = { 'accuracy' : max_correct.item()/len(test_loader.dataset), 'epoch' : epoch, 'model_state_dict' : model.state_dict(), 'optimizer' : optimizer.state_dict(), 'thresholds' : ann_thresholds, 'timesteps' : timesteps, 'leak_mem' : leak_mem, 'scaling_threshold' : scaling_threshold, 'activation' : activation } filename = 'snn_'+architecture.lower()+'_'+dataset.lower()+'_'+str(timesteps)+'_lr'+str(learning_rate)+'_'+str(batch_size)+'_cf16_28'+'.pth' torch.save(state,filename) if is_best: shutil.copyfile(filename, 'best_'+filename) f.write('\nTest set: Loss: {:.6f}, Current: {:.2f}%, Best: {:.2f}%\n'. format( total_loss/(batch_idx+1), 100. * correct.item() / len(test_loader.dataset), 100. * max_correct.item() / len(test_loader.dataset) ) )
G_cost = entropy2_fake + entropy1_fake optimizerG.step() D_cost = D_cost.cpu().data.numpy() G_cost = G_cost.cpu().data.numpy() entropy2_fake = entropy2_fake.cpu().data.numpy() entropy2_real = entropy2_real.cpu().data.numpy() # monitor the loss plot('errD', D_cost, iter_idx) plot('errG', G_cost, iter_idx) plot('errD_real', entropy2_real, iter_idx) plot('errD_fake', entropy2_fake, iter_idx) log_value('errD', D_cost, iter_idx) log_value('errG', G_cost, iter_idx) log_value('errD_real', entropy2_real, iter_idx) log_value('errD_fake', entropy2_fake, iter_idx) log_writer.writerow([D_cost[0], G_cost[0], entropy2_real[0], entropy2_fake[0]]) print('iter %d[epoch %d]\t %s %.4f \t %s %.4f \t %s %.4f \t %s %.4f' % (iter_idx, epoch, 'errD', D_cost, 'errG', G_cost, 'errD_real', entropy2_real, 'errD_fake', entropy2_fake)) # checkpointing save if iter_idx % 500 == 0: torch.save(netG.state_dict(), '%s/netG_lastest.pth' % (os.path.join(opt.checkpoints_dir, opt.exp_name))) torch.save(netD.state_dict(), '%s/netD_lastest.pth' % (os.path.join(opt.checkpoints_dir, opt.exp_name)))
def train(opt, train_loader, model, epoch, val_loader, best_rsum): # average meters to record the training statistics batch_time = AverageMeter() data_time = AverageMeter() train_logger = LogCollector() # switch to train mode model.train_start() end = time.time() for i, train_data in enumerate(train_loader): # if opt.reset_train: # Always reset to train mode, this is not the default behavior model.train_start() # measure data loading time data_time.update(time.time() - end) # make sure train logger is used model.logger = train_logger # Update the model model.train_emb(*train_data) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Print log info if model.Eiters % opt.log_step == 0: logging.info( 'Epoch: [{0}][{1}/{2}]\t' '{e_log}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, e_log=str(model.logger))) # Record logs in tensorboard tb_logger.log_value('epoch', epoch, step=model.Eiters) tb_logger.log_value('step', i, step=model.Eiters) tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters) tb_logger.log_value('data_time', data_time.val, step=model.Eiters) model.logger.tb_log(tb_logger, step=model.Eiters) # validate at every val_step if model.Eiters % opt.val_step == 0: # validate(opt, val_loader, model) # evaluate on validation set rsum = validate(opt, val_loader, model) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, prefix=opt.logger_name + '/') return best_rsum
def log_value(self, name, value, step=-1): if step == -1: log_value(name, value, self.global_step) else: log_value(name, value, step) return self
def validate(opt, val_loader, model): # compute the encoding for all the validation images and captions img_embs, cap_embs = encode_data(model, val_loader, opt.log_step, logging.info) # caption retrieval (r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, measure=opt.measure) logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) # image retrieval (r1i, r5i, r10i, medri, meanr) = t2i(img_embs, cap_embs, measure=opt.measure) logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanr)) # sum of recalls to be used for early stopping currscore = r1 + r5 + r1i + r5i # record metrics in tensorboard tb_logger.log_value('r1', r1, step=model.Eiters) tb_logger.log_value('r5', r5, step=model.Eiters) tb_logger.log_value('r10', r10, step=model.Eiters) tb_logger.log_value('medr', medr, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('r1i', r1i, step=model.Eiters) tb_logger.log_value('r5i', r5i, step=model.Eiters) tb_logger.log_value('r10i', r10i, step=model.Eiters) tb_logger.log_value('medri', medri, step=model.Eiters) tb_logger.log_value('meanr', meanr, step=model.Eiters) tb_logger.log_value('rsum', currscore, step=model.Eiters) return currscore
def train_fashion_recognition(conf): dataset = FashionData(conf) train_dataloader = dataset.train_dataloader if conf["noise_cancel_method"] == "forward": train_dataloader = dataset.train_dataloader_clean_noise conf["num_occasion"] = 10 conf["num_cat"] = len(dataset.cat_code) conf["num_attr"] = len(dataset.attr_code) conf["num_country"] = len(dataset.country_code) + 1 conf["attr_class_num"] = [0] * conf["num_attr"] conf["device"] = torch.device( "cuda" if torch.cuda.is_available() else "cpu") for attr, code in dataset.attr_code.items(): conf["attr_class_num"][code] = len(dataset.attr_val_code[attr]) if not os.path.isdir(conf["checkpoint"]): os.mkdir(conf["checkpoint"]) if not os.path.isdir(conf["model_save_path"]): os.mkdir(conf["model_save_path"]) model = OccCatAttrClassifier(conf, dataset.word_embedding, dataset.meta_embed, dataset.cat_noise_estimate, dataset.attr_noise_estimate_list) model.to(device=conf["device"]) start_time = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S") log_file_name = "Loss_%s__NCM_%s__LR_%.2f__LDI_%d__NR_%.2f__Beta_%.2f__Ctx_%s__Text_%d__%s__%s" % ( conf["loss"], conf["noise_cancel_method"], conf["lr"], conf["lr_decay_interval"], conf["noise_ratio"], conf["noise_loss_beta"], conf["context"], conf["text"], conf["info"], start_time) configure(os.path.join(conf["checkpoint"], log_file_name), flush_secs=5) # init optimizer lr = conf["lr"] weight_decay = conf["weight_decay"] params = [{ 'params': model.imageCNN.parameters(), 'lr': 0.5 * lr }, { 'params': model.catW.parameters(), 'lr': lr }, { 'params': model.occW.parameters(), 'lr': lr }, { 'params': model.attrWs.parameters(), 'lr': lr }, { 'params': model.attrW1s.parameters(), 'lr': lr }, { 'params': model.occ_classifier.parameters(), 'lr': lr }, { 'params': model.cat_classifier.parameters(), 'lr': lr }, { 'params': model.attr_classifiers.parameters(), 'lr': lr }, { 'params': model.convs1.parameters(), 'lr': lr }, { 'params': model.textW.parameters(), 'lr': lr }, { 'params': model.attr_context_rnn.parameters(), 'lr': lr }, { 'params': model.visual_context_rnn.parameters(), 'lr': lr }, { 'params': model.attr_noise_transitions.parameters(), 'lr': 0.001 * lr }, { 'params': model.cat_noise_transition.parameters(), 'lr': 0.001 * lr }] optimizer = torch.optim.SGD(params, lr=lr, momentum=conf["momentum"]) exp_lr_scheduler = lr_scheduler.StepLR( optimizer, step_size=int(conf["lr_decay_interval"] * len(train_dataloader)), gamma=conf["lr_decay_gamma"]) best_occ_acc = 0.0 best_cat_acc = 0.0 best_attr_val_acc = 0.0 loss_print, occ_loss_print, attr_ttl_loss_print, cat_loss_print = [ [] for i in range(4) ] attr_loss_print = [[] for i in range(len(dataset.attr_code))] for epoch in range(conf["num_epoches"]): for batch_cnt, batch in enumerate(train_dataloader): step = int(batch_cnt + epoch * len(train_dataloader) + 1) model.to(device=conf["device"]) model.train(True) exp_lr_scheduler.step() #adjust learning rate optimizer.zero_grad() if conf["noise_cancel_method"] == "forward": # [batch_cnt, 2, 3, 224, 224] whole_img = Variable( torch.cat( [batch[0][:, 0, :, :, :], batch[0][:, 1, :, :, :]], dim=0)).to(device=conf["device"]) # [batch_cnt, 2, max_num_cloth, 3, 224, 224] imgs = Variable( torch.cat([ batch[1][:, 0, :, :, :, :], batch[1][:, 1, :, :, :, :] ], dim=0)).to(device=conf["device"]) # [batch_cnt, 2] occ, season, country = [ Variable( torch.cat([each[:, 0], each[:, 1]], dim=0).squeeze(-1)).to(device=conf["device"]) for each in [batch[2], batch[8], batch[11]] ] # [batch_cnt, 2, max_num_cloth, attr_num] attr_val, attr_val_masks = [ Variable( torch.cat([each[:, 0, :, :], each[:, 1, :, :]], dim=0)).to(device=conf["device"]) for each in [batch[3], batch[5]] ] # [batch_cnt, 2, max_num_cloth] cats, cat_masks, age, gender = [ Variable(torch.cat([each[:, 0, :], each[:, 1, :]], dim=0)).to(device=conf["device"]) for each in [batch[4], batch[6], batch[9], batch[10]] ] # [batch_cnt, 2, sent_len(16)] text = Variable( torch.cat([batch[12][:, 0, :], batch[12][:, 1, :]], dim=0)).to(device=conf["device"]) else: whole_img = Variable(batch[0]).to(device=conf["device"]) imgs = Variable(batch[1]).to(device=conf["device"]) occ, season, country = [ Variable(each.squeeze(-1)).to(device=conf["device"]) for each in [batch[2], batch[8], batch[11]] ] attr_val, attr_val_masks = [ Variable(each).to(device=conf["device"]) for each in [batch[3], batch[5]] ] cats, cat_masks, age, gender = [ Variable(each).to(device=conf["device"]) for each in [batch[4], batch[6], batch[9], batch[10]] ] text = Variable(batch[12]).to(device=conf["device"]) occ_loss, cat_losses, attr_losses = model(whole_img, imgs, occ, attr_val, cats, season, age, gender, country, text) occ_loss /= conf["batch_size"] if conf["noise_cancel_method"] == "forward": ori_cat_losses, modified_cat_losses = cat_losses ori_attr_losses, modified_attr_losses = attr_losses clean_noise_cat_loss = ori_cat_losses * cat_masks clean_cat_loss = torch.sum( clean_noise_cat_loss[:conf["batch_size_clean"], :] ) / torch.sum(cat_masks[:conf["batch_size_clean"], :]) modified_cat_losses = modified_cat_losses * cat_masks modified_cat_loss = torch.sum(modified_cat_losses[ conf["batch_size_clean"]:, :]) / torch.sum( cat_masks[conf["batch_size_clean"]:, :]) cat_loss = clean_cat_loss + conf[ "noise_loss_beta"] * modified_cat_loss # attr_losses, attr_val_masks: [batch, num_cloth, num_attrs] [20, 5, 10] per_attr_losses = [] ori_attr_losses = ori_attr_losses * attr_val_masks modified_attr_losses = modified_attr_losses * attr_val_masks num_valid_attr = 0 for attr, code in sorted(dataset.attr_code.items(), key=lambda i: i[1]): denorm = torch.sum( attr_val_masks[:conf["batch_size_clean"], :, code]) if denorm == 0: clean_per_attr_loss = torch.sum( ori_attr_losses[:conf["batch_size_clean"], :, code]) else: clean_per_attr_loss = torch.sum( ori_attr_losses[:conf["batch_size_clean"], :, code]) / denorm denorm = torch.sum( attr_val_masks[conf["batch_size_clean"]:, :, code]) if denorm == 0: modified_attr_loss = torch.sum( modified_attr_losses[conf["batch_size_clean"]:, :, code]) else: modified_attr_loss = torch.sum( modified_attr_losses[conf["batch_size_clean"]:, :, code]) / denorm num_valid_attr += 1 per_attr_loss = clean_per_attr_loss + conf[ "noise_loss_beta"] * modified_attr_loss per_attr_losses.append(per_attr_loss) attr_ttl_loss = torch.sum(torch.stack(per_attr_losses, dim=0)) / num_valid_attr if conf["loss"] == "cat": loss = cat_loss if conf["loss"] == "attr": loss = attr_ttl_loss if conf["loss"] == "all": loss = torch.sum( torch.stack([occ_loss, cat_loss] + per_attr_losses, dim=0)) / (num_valid_attr + 2) else: cat_loss = torch.sum(cat_losses * cat_masks) cat_loss = cat_loss / torch.sum(cat_masks) per_attr_losses = [] attr_losses = attr_losses * attr_val_masks num_valid_attr = 0 for attr, code in sorted(dataset.attr_code.items(), key=lambda i: i[1]): denorm = torch.sum(attr_val_masks[:, :, code]) if denorm == 0: per_attr_losses.append( torch.sum(attr_losses[:, :, code])) else: num_valid_attr += 1 per_attr_losses.append( torch.sum(attr_losses[:, :, code]) / denorm) attr_ttl_loss = torch.sum(torch.stack(per_attr_losses, dim=0)) / num_valid_attr if conf["loss"] == "cat": loss = cat_loss if conf["loss"] == "attr": loss = attr_ttl_loss if conf["loss"] == "all": loss = torch.sum( torch.stack([occ_loss, cat_loss] + per_attr_losses, dim=0)) / (num_valid_attr + 2) log_value("occ_loss", occ_loss.item(), step) log_value("cat_loss", cat_loss.item(), step) log_value("loss", loss.item(), step) occ_loss_print.append(occ_loss.item()) loss_print.append(loss.item()) log_value("attr_ttl_loss", attr_ttl_loss.item(), step) for attr, code in sorted(dataset.attr_code.items(), key=lambda i: i[1]): log_value("%s_loss" % (attr), per_attr_losses[code], step) attr_ttl_loss_print.append(attr_ttl_loss.item()) for i, each_attr_loss in enumerate(per_attr_losses): attr_loss_print[i].append(each_attr_loss) cat_loss_print.append(cat_loss.item()) if (batch_cnt + 1) % 10 == 0: each_attr_loss = [] for attr, code in sorted(dataset.attr_code.items(), key=lambda i: i[1]): each_attr_loss.append("%s:%f.4" % (attr, mean(attr_loss_print[code]))) print( "epoch/batch/total:%d/%d/%d,loss:%f.4,cat_loss:%f.4,occ_loss:%f.4,attr_loss:%f.4" % (epoch, batch_cnt, len(train_dataloader), mean(loss_print), mean(cat_loss_print), mean(occ_loss_print), mean(attr_ttl_loss_print))) loss_print, occ_loss_print, attr_ttl_loss_print, cat_loss_print = [ [] for i in range(4) ] attr_loss_print = [[] for i in range(len(dataset.attr_code))] loss.backward() optimizer.step() if (batch_cnt + 1) % int( conf["test_interval"] * len(train_dataloader)) == 0: #import ipdb #ipdb.set_trace() print("\n\nstart to test, context: %s, loss: %s" % (conf["context"], conf["loss"])) model.eval() occ_acc, cat_acc, attr_val_acc = test_fashion_recognition( model, dataset, conf) attr_val_ttl_acc = sum(attr_val_acc) / len(attr_val_acc) log_value("occ_acc", occ_acc, step) log_value("cat_acc", cat_acc, step) log_value("attr_val_acc", attr_val_ttl_acc, step) each_attr_acc = [] for attr, code in sorted(dataset.attr_code.items(), key=lambda i: i[1]): log_value("%s_acc" % (attr), attr_val_acc[code], step) each_attr_acc.append("%s:%f" % (attr, attr_val_acc[code])) print("occ_acc:%f,cat_acc:%f,attr_val_tll_acc:%f" % (occ_acc, cat_acc, attr_val_ttl_acc)) if occ_acc > best_occ_acc and cat_acc > best_cat_acc and attr_val_ttl_acc > best_attr_val_acc: best_occ_acc = occ_acc best_cat_acc = cat_acc best_attr_val_acc = attr_val_ttl_acc print("achieve best performance, save model.") print("best_occ: %f, best_cat: %f, best_attr: %f" % (best_occ_acc, best_cat_acc, best_attr_val_acc)) model_save_path = os.path.join(conf["model_save_path"], log_file_name) torch.save(model.state_dict(), model_save_path)
def train(model, args): #set up logger timestring = str(date.today()) + '_' + time.strftime("%Hh-%Mm-%Ss", time.localtime(time.time())) run_name = 'embedd_discrim_training_r1' + '_' + timestring configure("logs/" + run_name, flush_secs=5) posts_json = json.load(open(args.posts_json, 'r', encoding='utf-8', errors='replace')) print(len(posts_json)) # normalizing function to play nicely with the pretrained feature extractor normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # specify the dataloader and dataset generator train_loader = data.DataLoader( PostFolder(posts_json, args.img_dir, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True) image_feature_extractor = FeatureExtractor(args.dtype) learning_rate = 0.001 optimizer = optim.Adam([{'params': model.title_feat_extractor.parameters()}, {'params': model.lin1.parameters()}, {'params': model.lin2.parameters()}, {'params': model.attn_lin1.parameters()}, {'params': model.attn_conv1.parameters()}], lr=learning_rate) # Correctly paired title and image is labeled as 1 # Mangled titles and mismatched titles are labeled as 0 bce_loss = nn.BCELoss() batch_ctr = 0 epoch_loss = 0 for epoch in range(args.epochs): if epoch > 0: last_epoch_loss = epoch_loss epoch_loss = 0 for i, (images, titles, title_lens, score) in enumerate(train_loader): score = score/score # swapping random images and post titles num_mism = int(titles.size(0) * MISM_PROP) mism_ind = np.random.choice(titles.size(0), num_mism, replace = False) # shifting indices by same amount so that a mismatch is ensured mism_map = (mism_ind + randint(0, titles.size(0) - 1)) % titles.size(0) mism_imgs = images[torch.from_numpy(mism_ind)] mism_titles = titles.clone()[torch.from_numpy(mism_map)] mism_lens = title_lens[torch.from_numpy(mism_map)] # mangling titles num_mang = int(titles.size(0) * MANG_PROP) mang_ind = np.random.choice(titles.size(0), num_mang, replace = False) title_copy = titles.clone() num_noise = 100 noise_tensor = torch.randn(num_noise, NUM_DIMS) * 2 for ind in mang_ind: if title_lens[ind] > 1: num_words_mang = randint(int(np.ceil(title_lens[ind]*.5)), title_lens[ind] - 1) mang_words = np.random.choice(title_lens[ind] - 1, num_words_mang, replace = False) # if randint(0, 1) > 0: # set mangled word to random noise vector # title_copy[ind][torch.from_numpy(mang_words)] = noise_tensor[torch.from_numpy(np.random.choice(num_noise - 1, num_words_mang))] title_copy[ind][torch.from_numpy(mang_words)] = torch.randn(num_words_mang, NUM_DIMS) * 2 # else: # randomly change words to other words within title # title_copy[ind][torch.from_numpy(mang_words)] = title_copy[ind][torch.from_numpy(np.random.choice(title_lens[ind] - 1, num_words_mang))] mang_imgs = images[torch.from_numpy(mang_ind)] mang_titles = title_copy[torch.from_numpy(mang_ind)] mang_lens = title_lens[torch.from_numpy(mang_ind)] images = torch.cat((images, mism_imgs, mang_imgs), 0) titles = torch.cat((titles, mism_titles, mang_titles), 0) title_lens = torch.cat((title_lens, mism_lens, mang_lens), 0) score = torch.cat((score.type(torch.FloatTensor), torch.zeros(num_mism), torch.zeros(num_mang)), 0) images = image_feature_extractor.make_features(Variable(images).type(args.dtype)) pred_score = model.forward(images, Variable(titles).type(args.dtype), title_lens) optimizer.zero_grad() score_var = Variable(score).type(args.dtype) batch_loss = bce_loss(pred_score, score_var) log_value('BCE loss', batch_loss.data[0], batch_ctr) log_value('Learning rate', optimizer.param_groups[0]['lr'], batch_ctr) epoch_loss += batch_loss.data[0] batch_ctr += 1 batch_loss.backward() optimizer.step() if batch_ctr % 10000 == 0: pickle.dump(model.state_dict(), open(args.save_name + '.p', 'wb')) if epoch > 2: #arbitrary epoch choice if (last_epoch_loss - epoch_loss)/epoch_loss < .003: for param in range(len(optimizer.param_groups)): optimizer.param_groups[param]['lr'] = optimizer.param_groups[param]['lr']/2
outputs2 = model_f2(outputs) loss += criterion_d(outputs1, outputs2) * args.num_multiply_d_loss loss.backward() optimizer_g.step() d_loss += loss.data[0] / args.num_k d_loss_per_epoch += d_loss if ind % 100 == 0: print("iter [%d] DLoss: %.6f CLoss: %.4f" % (ind, d_loss, c_loss)) if ind > args.max_iter: break print("Epoch [%d] DLoss: %.4f CLoss: %.4f" % (epoch, d_loss_per_epoch, c_loss_per_epoch)) log_value('c_loss', c_loss_per_epoch, epoch) log_value('d_loss', d_loss_per_epoch, epoch) log_value('lr', args.lr, epoch) if args.adjust_lr: args.lr = adjust_learning_rate(optimizer_g, args.lr, args.weight_decay, epoch, args.epochs) args.lr = adjust_learning_rate(optimizer_f, args.lr, args.weight_decay, epoch, args.epochs) checkpoint_fn = os.path.join(pth_dir, "%s-%s.pth.tar" % (model_name, epoch + 1)) args.start_epoch = epoch + 1 save_dic = { 'epoch': epoch + 1, 'args': args, 'g_state_dict': model_g.state_dict(), 'f1_state_dict': model_f1.state_dict(), 'optimizer_g': optimizer_g.state_dict(),
def train(): program = os.path.basename(sys.argv[0]) logger = logging.getLogger(program) logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s') logging.root.setLevel(level=logging.INFO) logger.info("running %s" % ' '.join(sys.argv)) n_epochs = 100 batch_size = 50 train_interval = 1000 #test_interval = 500 #test_steps = 100 cuda.set_device(1) configure("logs", flush_secs=5) train_data = np.load('./data/train/data.npz') dev_data = np.load('./data/dev/data.npz') train_dataset = Dataset(train_data) #, train=True) dev_dataset = Dataset(dev_data) #, train=False) class_sample_count = [1, 1] weight_per_class = 1 / torch.Tensor(class_sample_count).double() weights = [weight_per_class[label] for label in train_data['labels']] sampler = data.sampler.WeightedRandomSampler(weights, len(weights)) train_dataloader = data.DataLoader( train_dataset, sampler=sampler) #, batch_size=batch_size, sampler=sampler) dev_dataloader = data.DataLoader(dev_dataset) #, shuffle=True) net = Net().cuda() criterion = nn.CrossEntropyLoss().cuda() ignored_params = list(map(id, net.sentence.conv_q.parameters())) + list( map(id, net.sentence.conv_a.parameters())) #ignored_params = list(map(id, net.sentence.conv.parameters())) base_params = filter(lambda p: id(p) not in ignored_params, net.parameters()) optimizer = optim.Adam( [ { 'params': net.sentence.conv_q.parameters() }, { 'params': net.sentence.conv_a.parameters() }, #{'params': net.sentence.conv.parameters()}, { 'params': base_params } ], lr=0.000003) latest_epoch_num = 5 model_path = './model/epoch_' + str( latest_epoch_num) + '_2017-05-24#20:07:39.params' if os.path.exists(model_path): net.load_state_dict(torch.load(model_path)) logger.info('Successfully loaded model: %s' % (model_path)) else: logger.info('Could not find model: %s' % (model_path)) for epoch in range(n_epochs): net.train() epoch += latest_epoch_num running_loss = 0.0 correct = 0 for i, train_data in enumerate(train_dataloader, 0): train_qids, train_questions, train_answers, train_overlap_feats, train_labels = train_data train_questions = Variable(train_questions.cuda()) train_answers = Variable(train_answers.cuda()) train_overlap_feats = Variable(train_overlap_feats.cuda()) train_labels = Variable(train_labels.long().cuda()) prob = net(train_questions, train_answers, train_overlap_feats) loss = criterion(prob, train_labels) loss.backward() if (i + 1) % batch_size == 0: optimizer.step() optimizer.zero_grad() #optimizer.zero_grad() #optimizer.step() running_loss += loss.data[0] _, predicted = torch.max(prob.data, 1) correct += (predicted == train_labels.data).sum() if (i + 1) % train_interval == 0: logger.info( '[%d, %5d] train_loss: %.3f, train_accuracy: %.3f' % (epoch + 1, i + 1, running_loss / train_interval, correct / train_interval)) log_value('train_loss', running_loss / train_interval) log_value('train_accuracy', correct / train_interval) running_loss = 0.0 correct = 0 logger.info("Finished %s epoch" % (epoch + 1)) torch.save( net.state_dict(), './model/epoch_%s_%s.params' % (epoch + 1, datetime.datetime.now().strftime("%Y-%m-%d#%H:%M:%S"))) logger.info( 'Saved model: ./model/epoch_%s_%s.params' % (epoch + 1, datetime.datetime.now().strftime("%Y-%m-%d#%H:%M:%S"))) # test on dev set net.eval() accurate = 0 test_nums = 0 unique_qid_nums = 0 probs, labels = [], [] qid_prev = 1 rank_score = 0.0 for j, test_data in enumerate(dev_dataloader, 0): test_qids, test_questions, test_answers, test_overlap_feats, test_labels = test_data test_questions = Variable(test_questions.cuda(), volatile=True) test_answers = Variable(test_answers.cuda(), volatile=True) test_overlap_feats = Variable(test_overlap_feats.cuda(), volatile=True) test_labels = Variable(test_labels.long().cuda(), volatile=True) if test_qids[0] != qid_prev: unique_qid_nums += 1 probs = torch.Tensor(probs) labels = torch.from_numpy(np.array(labels)) _, accurate_idx = torch.max(labels, 0) _, rank_idx = torch.sort(probs, 0, descending=True) _, rank = torch.max(rank_idx == accurate_idx[0], 0) rank_score += 1 / (rank[0] + 1) probs, labels = [], [] qid_prev = test_qids[0] test_nums += test_questions.size()[0] prob = net(test_questions, test_answers, test_overlap_feats) _, predicted = torch.max(prob.data, 1) accurate += (predicted == test_labels.data).sum() probs.append(prob.data[0][1]) labels.append(test_labels.data[0]) #_, predicted = torch.max(prob.data, 1) #right += (predicted == test_labels.data).sum() #_, prediction = torch.max(prob.data[:, 1], 0) #_, accurate_idx = torch.max(test_labels.data, 0) #accurate += (prediction == accurate_idx)[0] #_, rank_idx = torch.sort(prob.data[:, 1], 0, descending=True) #_, rank = torch.max(rank_idx == accurate_idx[0], 0) #rank_score += 1/(rank[0]+1) #if (j + 1) == test_steps: # break #logger.info('[%d, %5d] test_accuracy: %.3f, MAP: %.3f, MRR: %.3f' % (epoch+1, i+1, right / (test_nums), accurate / test_steps, rank_score / test_steps)) unique_qid_nums += 1 probs = torch.Tensor(probs) labels = torch.from_numpy(np.array(labels)) _, accurate_idx = torch.max(labels, 0) _, rank_idx = torch.sort(probs, 0, descending=True) _, rank = torch.max(rank_idx == accurate_idx[0], 0) rank_score += 1 / (rank[0] + 1) logger.info( '[%d] test_accuracy: %.3f, MRR: %.3f' % (epoch + 1, accurate / test_nums, rank_score / unique_qid_nums)) log_value('test_accuracy', accurate / test_nums) #log_value('MAP', accurate / test_steps) log_value('MRR', rank_score / unique_qid_nums) logger.info("Finished training")
def val(epoch): avg_psnr = 0 avg_mse = 0 avg_sparsity = 0 modelME.eval() modelME.netM.eval() modelME.netE.eval() for batch in val_loader: target = batch image = target.clone() image_clone = image.clone() mean_image = torch.zeros(image.shape[0], image.shape[1], image.shape[2], image.shape[3]) mean_image[:,0,:,:] = 0.5 mean_image[:,1,:,:] = 0.5 mean_image[:,2,:,:] = 0.5 std_image = torch.zeros(image.shape[0], image.shape[1], image.shape[2], image.shape[3]) std_image[:,0,:,:] = 0.5 std_image[:,1,:,:] = 0.5 std_image[:,2,:,:] = 0.5 if torch.cuda.is_available(): image = image.cuda() image_clone = image_clone.cuda() target = target.cuda() mean_image = mean_image.cuda() std_image = std_image.cuda() # Generate the corruption mask and reconstructed image corrupt_mask_conti, image_recon = modelME(image) corrupt_mask = corrupt_mask_conti.bernoulli() # Binarize the corruption mask using Bernoulli distribution, then feed into modelE mask_sparsity = corrupt_mask.sum() / (corrupt_mask.shape[0] * corrupt_mask.shape[1] * corrupt_mask.shape[2] * corrupt_mask.shape[3]) corrupt_mask = corrupt_mask.expand(corrupt_mask.shape[0], 3, corrupt_mask.shape[2], corrupt_mask.shape[3]) # Generate the corrupted image mask_image = corrupt_mask * image_clone restored_image = modelME.netE(mask_image) mse = criterion((restored_image*std_image)+mean_image, (target*std_image)+mean_image) psnr = 10 * log10(1 / mse.data[0]) avg_psnr += psnr avg_mse += mse.data[0] avg_sparsity += mask_sparsity print("===> Epoch {} Validation: Avg. Loss: {:.4f}, Avg.PSNR: {:.4f} dB, Mask Sparsity: {:.4f}".format(epoch, avg_mse / len(val_loader), avg_psnr / len(val_loader), avg_sparsity / len(val_loader))) log_value('val_loss', avg_mse / len(val_loader), epoch) log_value('val_psnr', avg_psnr / len(val_loader), epoch) log_value('val_sparsity', avg_sparsity / len(val_loader), epoch) corrupt_mask_conti = corrupt_mask_conti.expand(corrupt_mask_conti.shape[0], 3, corrupt_mask_conti.shape[2], corrupt_mask_conti.shape[3]) log_images('original_image', reshape_4D_array((image*std_image+mean_image).cpu().numpy(), 10), step=1) log_images('conti_mask', reshape_4D_array(corrupt_mask_conti.data.cpu().numpy(), 10), step=1) log_images('binar_mask', reshape_4D_array(corrupt_mask.data.cpu().numpy(), 10), step=1) log_images('restored_image', reshape_4D_array((restored_image*std_image+mean_image).data.cpu().numpy(), 10), step=1) global PSNR_best if avg_psnr > PSNR_best: PSNR_best = avg_psnr model_out_path = "epochs_NetME/" + "model_best.pth" torch.save(modelME.state_dict(), model_out_path) print("Checkpoint saved to {}".format(model_out_path))
def fit(self): config = self.config configure("{}".format(config.log_dir), flush_secs=5) depth = 15 num_iter = config.max_epochs * len(self.train_dataloader) * depth for epoch in range(self.start_epoch, config.max_epochs): train_losses = AverageMeter() train_flow2_EPEs = AverageMeter() self.net.train() for step, (real_im, real_flow) in enumerate(self.train_dataloader): if config.cuda: real_im = Variable(real_im).cuda(async=True) real_flow = Variable(real_flow).cuda(async=True) else: real_im = Variable(real_im) real_flow = Variable(real_flow) depth = real_im.size()[2] for d in range(depth - 1): prev_frm = real_im[:, :, d, :, :] frm = real_im[:, :, d + 1, :, :] proxy_flow = real_flow[:, :, d, :, :] gen_flows = self.net(torch.cat([prev_frm, frm], 1)) loss = self.criterion(gen_flows, proxy_flow) flow2_EPE = 20 * self.high_res_EPE(gen_flows[0], proxy_flow) train_losses.update(loss.data[0], real_flow.size(0)) train_flow2_EPEs.update(flow2_EPE.data[0], real_flow.size(0)) # compute gradient and do SGD step self.optimizer.zero_grad() loss.backward() self.optimizer.step() iter_idx = epoch * (len(self.train_dataloader) * depth) + step * depth + d print('epoch: {}/{} iter: {}/{}, loss: {}, EPE: {}'.format( epoch, config.max_epochs, iter_idx, num_iter, loss.data[0], flow2_EPE.data[0])) log_value('loss', loss.data[0], iter_idx) log_value('EPE', flow2_EPE.data[0], iter_idx) log_value('loss_epoch', train_losses.avg, epoch) log_value('EPE_epoch', train_flow2_EPEs.avg, epoch) self.adjust_learning_rate(self.optimizer, epoch * depth) torch.save( self.net.state_dict(), os.path.join(config.model_dir, 'flownet_{}.pth'.format(epoch))) # tesing test_flow2_EPEs = AverageMeter() self.net.eval() for step, (real_im, real_flow) in enumerate(self.test_dataloader): if config.cuda: real_im = Variable(real_im).cuda(async=True) real_flow = Variable(real_flow).cuda(async=True) else: real_im = Variable(real_im) real_flow = Variable(real_flow) depth = real_im.size()[2] for d in range(depth - 1): prev_frm = real_im[:, :, d, :, :] frm = real_im[:, :, d + 1, :, :] proxy_flow = real_flow[:, :, d, :, :] gen_flow = self.net(torch.cat([prev_frm, frm], 1)) flow2_EPE = 20 * self.high_res_EPE(gen_flow, proxy_flow) test_flow2_EPEs.update(flow2_EPE.data[0]) print('epoch: {}/{} avg_EPE: {}'.format( epoch, config.max_epochs, test_flow2_EPEs.avg)) log_value('test_EPE_epoch', test_flow2_EPEs.avg, epoch)
######################### # (2) Update G network: # ######################### for p in D.parameters(): p.requires_grad = False G.zero_grad() # Sample fake data z.data.normal_(0, 1) x_fake = G(z) # Generator Loss errG = D(x_fake) errG = errG.mean() #print(errG) errG.backward(one_neg) optimizerG.step() # Log results so we can see them in TensorBoard after log_value('errD', errD.data[0], i) log_value('errD_penalty', errD_penalty.data[0], i) log_value('errG', errG.data[0], i) if i % 50 == 0: print('[i=%d] W_distance: %.4f W_distance_penalty: %.4f Loss_G: %.4f' % (i, errD.data[0], errD_penalty.data[0], errG.data[0])) print('[i=%d] W_distance: %.4f W_distance_penalty: %.4f Loss_G: %.4f' % (i, errD.data[0], errD_penalty.data[0], errG.data[0]), file=log_output) # Save models if i % 500 == 0: torch.save(G.state_dict(), '%s/run-%d/models/G_%d.pth' % (param.output_folder, run, i)) torch.save(D.state_dict(), '%s/run-%d/models/D_%d.pth' % (param.output_folder, run, i))
def log_value(self, name, value): tensorboard_logger.log_value(name, value, self.global_step) return self
# 初始化数据加载器 train_loader = get_train_loader(train_caption_pkl_path, feature_h5_path, batch_size) total_step = len(train_loader) # 准备一下验证用的ground-truth reference_json_path = '{0}.json'.format(test_reference_txt_path) reference = COCO(reference_json_path) # 开始训练模型 best_meteor = 0 loss_count = 0 for epoch in range(num_epochs): epsilon = max(0.6, ss_factor / (ss_factor + np.exp(epoch / ss_factor))) print('epoch:%d\tepsilon:%.8f' % (epoch, epsilon)) log_value('epsilon', epsilon, epoch) for i, (videos, captions, cap_lens, video_ids) in enumerate(train_loader, start=1): # 构造mini batch的Variable videos = Variable(videos) targets = Variable(captions) if use_cuda: videos = videos.cuda() targets = targets.cuda() optimizer.zero_grad() outputs, video_encoded = banet(videos, targets, epsilon) # 因为在一个epoch快要结束的时候,有可能采不到一个刚好的batch # 所以要重新计算一下batch size bsz = len(captions) # 把output压缩(剔除pad的部分)之后拉直 outputs = torch.cat([outputs[j][:cap_lens[j]] for j in range(bsz)], 0)
) torch.save( optimizer.state_dict(), "logs/trained_models/{}_{}_optimizer.pth".format((train_b_id // 2000) * (1 + e), model_name), ) torch.cuda.empty_cache() train_iou.append(ious) train_seg_iou.append(seg_ious) train_losses.append(losses) train_prim_losses.append(p_losses) train_emb_losses.append(embed_losses) train_res_losses.append(res_losses) train_res_geom_losses.append(res_g_losses) train_res_spline_losses.append(res_s_losses) log_value("iou", iou, train_b_id + e * (config.num_train // config.batch_size // num_iter)) log_value( "embed_loss", embed_losses, train_b_id + e * (config.num_train // config.batch_size // num_iter), ) log_value( "res_loss", res_losses, train_b_id + e * (config.num_train // config.batch_size // num_iter), ) log_value( "res_g_loss", res_g_losses, train_b_id + e * (config.num_train // config.batch_size // num_iter), )
def train(attention, encoder, decoder, captions, objects, optical_flow, resnet, \ objects_vl, resnet_vl, optical_vl, captions_vl, n_iters, lr_rate, \ batch_size, dec_max_time_step): attention_optimizer = optim.Adam(attention.parameters(), lr=learning_rate) encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) criterion = nn.MSELoss() for epoch in tqdm(range(n_iters)): # Train mode attention = attention.train() encoder = encoder.train() decoder = decoder.train() loss = 0 data_iters = math.ceil(len(video_ids_tr) / batch_size) for i in range(data_iters): start = i*batch_size end = min((i+1)*batch_size, len(video_ids_tr)) vids = video_ids_tr[start:end] caption_tensor = captions.get_tensor(vids).to(device) video_inst = captions.video_instances(vids) object_tensor = objects.get_tensor(vids, video_inst).to(device) optical_tensor = optical_flow.get_tensor(vids, video_inst).to(device) resnet_tensor = resnet.get_tensor(vids, video_inst).to(device) video_attended, _, _, _ = attention(video_inst, resnet_tensor, optical_tensor, object_tensor) for i in range(sum(video_inst)): encoder_hidden = encoder.init_hidden() for frame_num in range(max_frame): # Run Encoder for one video. frame = video_attended[i, frame_num].view(1, 1, resnet_dim) encoder_hidden = encoder(frame, encoder_hidden) # Run Decoder for one sentence use_teacher_forcing = True if random.random() < teacher_force_ratio else False word_tensor = torch.zeros((1,1,word_dim)).to(device) # SOS if use_teacher_forcing: # Decoder input is ground truth for t in range(dec_max_time_step): decoder_out = decoder(word_tensor, encoder_hidden) word_ground_truth = caption_tensor[i,t].unsqueeze(0).unsqueeze(0) loss += criterion(decoder_out, word_ground_truth) word_tensor = word_ground_truth else: # Decoder input is previous predicted word for t in range(dec_max_time_step): decoder_out = decoder(word_tensor, encoder_hidden) word_ground_truth = caption_tensor[i,t].unsqueeze(0).unsqueeze(0) loss += criterion(decoder_out, word_ground_truth) word_tensor = decoder_out loss.backward() attention_optimizer.step() encoder_optimizer.step() decoder_optimizer.step() log_value('Training Loss', loss, epoch) # Save model parameters params = {'attention':attention.state_dict(), \ 'encoder':encoder.state_dict(), 'decoder':decoder.state_dict()} torch.save(params, model_params_path + str(epoch) + '.pt') # Validation Loss, Bleu scores etc. after each epoch attention = attention.eval() encoder = encoder.eval() decoder = decoder.eval() validate(epoch, attention, encoder, decoder, captions_vl, objects_vl, optical_vl, resnet_vl, batch_size, dec_max_time_step)
def forward(self, outputs, mu, sigma, pi, k , z, targets, kl_weight=1, baseline=None, step=None): batch_size = pi.size(0) kld_len = self.kld_length(pi) loss = kl_weight * kld_len.div(batch_size) loss_report = kl_weight * kld_len pty = 0.0 kld = 0.0 num_correct = 0.0 rs = [] kls = [] ### Loss for i in range(self.sample): k_i = k[i] ### Compute KLD of Latent Sequence kld_ = 0.5*(torch.exp(2*sigma[i]) + mu[i]**2) - sigma[i] - 0.5 kld_ = mask_tensor(k_i, kld_, 0) kld_ = kld_.view(kld_.size(0), kld_.size(1) * kld_.size(2)) kld_ = torch.sum(kld_, 1) kld += kld_.mean().div(self.sample) pty_, corr_ = self.p_theta_y(outputs[i], targets) pty += pty_.mean().div(self.sample) num_correct += corr_ / self.sample r_i = (pty_ - kl_weight*kld_) loss -= r_i.mean().div(self.sample) loss_report -= r_i.sum().clone().detach().div(self.sample) rs.append(r_i) kls.append(kld_) elbo = -loss.clone().detach().data[0] ### Return if in Eval Mode if not self.training: return elbo, loss_report.data[0] ### Reinforcements bl = baseline.clone().detach() rein_loss = 0.0 for i in range(self.sample): indices = k[i] - 1 # Copy Prevents Backprop Through Rewards r = rs[i].clone().detach() RE_grad = torch.log(torch.gather(pi, 1, indices.long())) reward_adjusted = r - self.r_mean - bl reinforcement = self.lam * reward_adjusted * RE_grad loss -= reinforcement.mean().div(self.sample) loss += rein_loss ### Baseline Loss r_avg = torch.stack(rs).mean(0).clone().detach() loss_bl = torch.pow(r_avg - baseline - self.r_mean, 2).mean() ### Update Running Average of Rewards if self.r_mean: self.r_mean = 0.95*self.r_mean + 0.05 * r_avg.mean().data[0] else: self.r_mean = r_avg.mean().data[0] ### Logging if self.sample > 1: klvar = torch.var(torch.stack(kls)) log_value('STD KL Divergence', torch.sqrt(klvar).data[0], step) range_ = Variable(torch.arange(1, self.max_len + 1)).unsqueeze(0) if self.gpu: range_ = range_.cuda() E_pi = (pi * range_.expand_as(pi)).sum(1).mean() mean_sig = mask_tensor(k[0], torch.exp(sigma[0]), 0) mean_sig = mean_sig / k[0].unsqueeze(1).expand_as(mean_sig) mean_sig = mean_sig.sum(1).mean() log_value('BaseLine', baseline.mean().data[0], step) log_value('Expected Length', E_pi.data[0], step) log_value('Loss', loss.data[0], step) log_value('KLD', kld.data[0] , step) log_value('KLD_LEN', kld_len.data[0], step) log_value('p_y_given_z', pty.data[0], step) log_value('r_mean_step', r_avg.mean().data[0], step) log_value('r_moving_avg', self.r_mean, step) log_value('loss BL', loss_bl.data[0], step) log_value('ELBO', elbo, step) log_value('kl_weight', kl_weight, step) log_value('mean_sigma', mean_sig.data[0], step) return loss, loss_bl, loss_report.data[0], num_correct
######################## for p in D.parameters(): p.requires_grad = False G.zero_grad() # Sample fake data z.data.resize_(param.batch_size, param.z_size, 1, 1).normal_(0, 1) x_fake = G(z) # Generator Loss errG = D(x_fake) errG.backward(one) optimizerG.step() # Log results so we can see them in TensorBoard after log_value('errD', -errD.data[0], gen_iterations) log_value('errG', errG.data[0], gen_iterations) gen_iterations = gen_iterations + 1 if gen_iterations % 50 == 0: end = time.time() print('[%d] W_distance: %.4f Loss_G: %.4f time:%.4f' % (gen_iterations, -errD.data[0], errG.data[0], end - start)) print('[%d] W_distance: %.4f Loss_G: %.4f time:%.4f' % (gen_iterations, -errD.data[0], errG.data[0], end - start), file=log_output) # Fake images saved fake_test = G(z_test) vutils.save_image(fake_test.data, '%s/run-%d/images/fake_samples_iter%05d.png' % (param.output_folder, run, gen_iterations/50), normalize=True) # Save models if gen_iterations % 500 == 0: torch.save(G.state_dict(), '%s/run-%d/models/G_%d.pth' % (param.output_folder, run, gen_iterations/50)) torch.save(D.state_dict(), '%s/run-%d/models/D_%d.pth' % (param.output_folder, run, gen_iterations/50))
# https://arxiv.org/pdf/1412.0035.pdf says to sum for each color x_transf_size = x_transf.size() x_i_diff = (x_transf[:, :, :(x_transf_size[2] - 1), :(x_transf_size[3] - 1)] - x[:, :, :(x_transf_size[2] - 1), 1:]) ** 2 x_j_diff = (x_transf[:, :, :(x_transf_size[2] - 1), :(x_transf_size[3] - 1)] - x[:, :, 1:, :(x_transf_size[3] - 1)]) ** 2 # Sum over n_colors, weidth, height and average over batch_size Loss_tv = param.total_variation_weight*((x_i_diff + x_j_diff).sum(3).sum(2).sum(1).mean()) ## Total Loss Loss = Loss_content + Loss_style + Loss_tv Loss.backward() optimizerTNet.step() current_step = i + epoch*len(dataset) # Log results so we can see them in TensorBoard after log_value('errContent', Loss_content.data[0], current_step) log_value('errStyle', Loss_style.data[0], current_step) log_value('errTV', Loss_tv.data[0], current_step) log_value('errTotal', Loss.data[0], current_step) if current_step % 100 == 0: end = time.time() fmt = '[%d/%d][%d/%d] Loss_Total %.4f Loss_Content: %.4f Loss_Style: %.4f Loss_TV: %.4f time:%.4f' s = fmt % (epoch, param.n_epoch, i, len(dataset), Loss.data[0], Loss_content.data[0], Loss_style.data[0], Loss_tv.data[0], end - start) print(s) print(s, file=log_output) #x = RGB_to_BGR(x) #vutils.save_image(x.data, '%s/run-%d/images/real_samples_%03d.png' % (param.output_folder, run, current_step/50), normalize=True) x_transf = RGB_to_BGR(x_transf) vutils.save_image(x_transf.data, '%s/run-%d/images/stylized_samples_%03d.png' % (param.output_folder, run, current_step/50), normalize=True) if current_step % 1000 == 0: