def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Time shifts: %s' % (config.time_shifts)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate ts_list = [int(t) for t in config.time_shifts.split(',')] max_ts = max(ts_list) # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load performance monitoring model pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False mean, _ = get_cmvn(config.cmvn) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=config.batch_size, shuffle=True) init_fer = True if init_fer: # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] train_losses_pos = [] train_losses_neg = [] for utt_id, mat in kaldi_io.read_mat_ark(cmd): model.eval() if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - torch.FloatTensor(mean).cuda() else: post = out[1] - torch.FloatTensor(mean) lens.append(min(post.shape[0], config.max_seq_len)) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = torch.cat([batch, post[None, :, :]], 0) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 #### DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) batch_x = batch[indices] batch_l = lens[indices] outputs = ae_model(batch_x, batch_l) # First positive loss mse_pos = samplewise_mse(outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts:-max_ts - 1, :]) # Now find negative loss s = outputs[:, max_ts:-max_ts - 1, :].size() if config.use_gpu: mse_neg = torch.zeros(s[0], s[1]).cuda() else: mse_neg = torch.zeros(s[0], s[1]) count = 0 for t in ts_list: count += 1 mse_neg += samplewise_mse( outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts + t:-max_ts - 1 + t, :]) mse_neg += samplewise_mse( outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts - t:-max_ts - 1 - t, :]) mse_neg = mse_neg / (2 * count) loss = mse_pos.mean() # (mse_pos / mse_neg).mean() train_losses_pos.append(mse_pos.mean().item()) train_losses_neg.append(mse_neg.mean().item()) optimizer.zero_grad() loss.backward() optimizer.step() if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update, Tr +ve MSE Loss: {:.3f} :: Tr -ve MSE Loss: {:.3f} :: Dev loss: {:.3f} :: Dev FER: {:.2f}".format( epoch, np.mean(train_losses_pos), np.mean(train_losses_neg), np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save( ep_fer_dev, open( os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load performance monitoring model pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False mean, _ = get_cmvn(config.cmvn) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader_check = torch.utils.data.DataLoader(dataset, batch_size=5000, shuffle=True) # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader_check: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) unsup_up = True cc = 0 for epoch in range(config.epochs): if unsup_up: # First lets do an unsupervised update with RNN-AE if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) utt_count = 0 update_num = 0 ae_loss = [] lens = [] model.train() for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: post = model(Variable( torch.FloatTensor(mat)).cuda()) - Variable( torch.FloatTensor(mean)).cuda() else: post = model(Variable(torch.FloatTensor(mat))) - Variable( torch.FloatTensor(mean)) lens.append(min(post.shape[0], config.max_seq_len)) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = torch.cat([batch, post[None, :, :]], 0) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 #### DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) batch_x = batch[indices] batch_l = lens[indices] if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = criterion(outputs, batch_x) else: loss = criterion(outputs, batch_x[:, config.time_shift:, :]) ae_loss.append(loss.item() / (config.max_seq_len * config.batch_size)) loss.backward() optimizer.step() if config.use_gpu: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']).cuda() else: batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim']) lens = [] utt_count = 0 logging.info('Finished unsupervised update of nnet') else: logging.info('Skipping unsupervised update of nnet') # Check if any utterance has a good RNN-AE score new_egs = torch.empty(0, nnet['feature_dim'] * nnet['num_frames'] + 1) new_utt_count = 0 for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: post = model(Variable( torch.FloatTensor(mat)).cuda()) - Variable( torch.FloatTensor(mean)).cuda() else: post = model(Variable(torch.FloatTensor(mat))) - Variable( torch.FloatTensor(mean)) lens = [] lens.append(post.shape[0]) post = post[None, :, :] if config.time_shift == 0: outputs = ae_model(post, lens) else: outputs = ae_model(post[:, :-config.time_shift, :], lens - config.time_shift) if config.time_shift == 0: loss = criterion(outputs, post).item() / config.max_seq_len else: loss = criterion( outputs, post[:, config.time_shift:, :]).item() / config.max_seq_len # Add the utterance for supervised update if loss < config.score_threshold: new_utt_count += 1 if config.use_gpu: labs = np.argmax( (model(Variable(torch.FloatTensor(mat)).cuda()) ).cpu().data.numpy(), axis=1) else: labs = np.argmax( (model(Variable(torch.FloatTensor(mat)))).data.numpy(), axis=1) add_egs = np.hstack((mat, labs[:, np.newaxis])) new_egs = torch.cat([new_egs, torch.FloatTensor(add_egs)]) logging.info( 'Added {:d} utterances from new domain to training set'.format( new_utt_count)) ## Update with these new utterances if new_utt_count == 0: logging.info( 'No supervised updates with zero utterances, skipping to next epoch... ' ) else: cc += 1 if cc == 20: config.score_threshold = config.score_threshold * 1.1 cc = 0 unsup_up = False train_data = new_egs[:, 0:-1] train_labels = new_egs[:, -1].long() dataset = nnetDataset(train_data, train_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=5000, shuffle=True) model.train() train_losses = [] tr_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) optimizer.zero_grad() loss = dev_criterion(batch_x, batch_l) train_losses.append(loss.item()) if config.use_gpu: tr_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: tr_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) loss.backward() optimizer.step() ## CHECK IF ADAPTATION IS WORKING AT ALL model.eval() val_losses = [] val_fer = [] for batch_x, batch_l in data_loader_check: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append( compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append( compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} AE Loss: {:.3f} update, Dev loss: {:.3f} :: Dev FER: {:.2f}".format( epoch, np.mean(ae_loss), np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save( ep_loss_dev, open( os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save( ep_fer_dev, open( os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shell_cmd = "cat {:s} | shuf > temp".format(config.scp) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp') elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp') else: cmd = 'temp' if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters(): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error("Number of cmvn paths not equal to number of model paths, exiting training!") sys.exit(1) else: num_pm_models = len(pm_paths) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=50000, shuffle=True) # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] tr_losses = [] for idx in range(num_pm_models): tr_losses.append([]) # I want to dump all the posteriors first for utt_id, mat in kaldi_io.read_mat_ark(cmd): if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - Variable(torch.FloatTensor(means[0])).cuda() else: post = out[1] - Variable(torch.FloatTensor(means[0])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[0] batch = torch.cat([batch, post[None, :, :]], 0) batches[0] = batch for idx in range(1, num_pm_models): if config.use_gpu: post = out[0][idx] - Variable(torch.FloatTensor(means[idx])).cuda() else: post = out[0][idx] - Variable(torch.FloatTensor(means[idx])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[idx] batch = torch.cat([batch, post[None, :, :]], 0) batches[idx] = batch lens.append(min(post.size(0), config.max_seq_len)) utt_count += 1 sys.stdout.flush() if utt_count == config.batch_size: update_num += 1 ## DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) for idx in range(num_pm_models): batch_x = batches[idx][indices] ae_model = pm_models[idx] batch_l = lens[indices] print(batch_x.size()) print(batch_l.size()) sys.stdout.flush() if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) optimizer.zero_grad() if config.time_shift == 0: loss = criterion(outputs, batch_x) else: loss = criterion(outputs, batch_x[:, config.time_shift:, :]) tl = tr_losses[idx] tl.append(loss.item() / (config.max_seq_len * config.batch_size)) tr_losses[idx] = tl if idx < num_pm_models - 1: loss.backward(retain_graph=True) else: loss.backward() optimizer.step() batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 logging.info("Finished unsupervised adaptation for epoch {:d} with multi-layer RNN-AE Loss".format(epoch)) # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update ".format(epoch) for idx in range(num_pm_models): print_log = print_log + "Tr loss layer {:d} = {:.3f} | ".format(idx, np.mean(tr_losses[idx])) print_log = print_log + "Dev loss: {:.3f} | Dev FER: {:.2f}".format(np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) torch.save(ep_loss_dev, open(os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb')) torch.save(ep_fer_dev, open(os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb')) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=config.log_file, filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Negative loss weight: %f' % (config.neg_weight)) logging.info('Contrastive time shifts: %s' % (config.time_shifts)) if config.loss == "MSE": criterion = samplewise_mse elif config.loss == "L1": criterion = samplewise_abs else: logging.info('Loss function {:s} is not supported'.format(config.loss)) sys.exit(1) if config.time_shifts: ts_list = [int(t) for t in config.time_shifts.split(',')] max_ts = max(ts_list) else: ts_list = None max_ts = None # Figure out all feature stuff shuff_file = config.scp feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if config.override_trans_path is not None: trans_path = config.override_trans_path if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, shuff_file) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file) else: cmd = shuff_file if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters( ): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error( "Number of cmvn paths not equal to number of model paths, exiting training!" ) sys.exit(1) else: num_pm_models = len(pm_paths) pm_scores = {} for idx in range(num_pm_models): pm_scores[idx] = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): batches = [] lens = mat.shape[0] if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - Variable(torch.FloatTensor(means[0])).cuda() else: post = out[1] - Variable(torch.FloatTensor(means[0])) batches.append(post) for idx in range(1, num_pm_models): if config.use_gpu: post = out[0][-idx] - Variable(torch.FloatTensor( means[idx])).cuda() else: post = out[0][-idx] - Variable(torch.FloatTensor(means[idx])) batches.append(post) ## Get the PM scores lens = torch.IntTensor([lens]) for idx in range(num_pm_models): batch_x = batches[idx] batch_x = batch_x[None, :, :] ae_model = pm_models[idx] batch_l = lens outputs = ae_model(batch_x, batch_l) # First positive loss if max_ts: mse_pos = criterion(outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts:-max_ts - 1, :]) else: mse_pos = criterion(outputs, batch_x) # Now find negative loss if config.use_gpu: loss_all = torch.FloatTensor([1]).cuda() else: loss_all = torch.FloatTensor([1]) s = outputs[:, max_ts:-max_ts - 1, :].size() if config.use_gpu: mse_neg = torch.zeros(s[0], s[1]).cuda() else: mse_neg = torch.zeros(s[0], s[1]) count = 0 for t in ts_list: count += 1 mse_neg += criterion(outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts + t:-max_ts - 1 + t, :]) mse_neg += criterion(outputs[:, max_ts:-max_ts - 1, :], batch_x[:, max_ts - t:-max_ts - 1 - t, :]) mse_neg = (mse_neg * config.neg_weight) / (2 * count) loss = (mse_pos / mse_neg).mean() pk = pm_scores[idx] pk[utt_id] = loss.item() pm_scores[idx] = pk pickle.dump(pm_scores, open(os.path.join(config.out_file), "wb"))