def test_optim2(self): elbo_fn = beer.EvidenceLowerBound(len(self.data)) for i, model in enumerate([self.ppca, self.plda]): with self.subTest(i=i): optim = beer.BayesianModelCoordinateAscentOptimizer( *model.grouped_parameters, lrate=1.) previous = -float('inf') for _ in range(100): optim.zero_grad() elbo = elbo_fn(model, self.data) elbo.natural_backward() optim.step() elbo = round(float(elbo) / (len(self.data) * self.dim), 3) self.assertGreaterEqual(elbo - previous, -TOLERANCE) previous = elbo
def test_optim(self): for i, model in enumerate(self.models): with self.subTest(model=self.conf_files[i]): optim = beer.BayesianModelCoordinateAscentOptimizer( model.mean_field_groups, lrate=1.) previous = -float('inf') for _ in range(N_ITER): self.seed(1) optim.zero_grad() elbo = beer.evidence_lower_bound(model, self.data) elbo.natural_backward() optim.step() elbo = round(float(elbo) / (len(self.data) * self.dim), 3) self.assertGreaterEqual(elbo - previous, -TOLERANCE) previous = elbo
def main(): parser = argparse.ArgumentParser() parser.add_argument('lm', help='unigram language model to train') parser.add_argument('data', help='data') parser.add_argument('outlm', help='output model') args = parser.parse_args() # Load the model. with open(args.lm, 'rb') as fh: model = pickle.load(fh) # Load the data for the training. data = np.load(args.data) # Count the number of in the training data. tot_counts = 0 for utt in data: tot_counts += len(data[utt]) # Prepare the optimizer for the training. params = model.mean_field_factorization() optimizer = beer.BayesianModelCoordinateAscentOptimizer(params, lrate=1.) optimizer.zero_grad() # Initialize the objective function. elbo = beer.evidence_lower_bound(datasize=tot_counts) # Re-estimate the LM. for utt in data: ft = torch.from_numpy(data[utt]) elbo += beer.evidence_lower_bound(model, ft, datasize=tot_counts) elbo.backward() optimizer.step() # Save the model. with open(args.outlm, 'wb') as fh: model = pickle.dump(model, fh)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch-size', type=int, default=-1, help='utterance number in each batch') parser.add_argument('--epochs', type=int, default=1, help='number of epochs to train') parser.add_argument('--fast-eval', action='store_true') parser.add_argument('--kl-weight', type=float, default=1., help='weighting of KL div. of the ELBO') parser.add_argument('--lrate-nnet', type=float, default=1e-3, help='learning rate for the nnet components') parser.add_argument('--lrate', type=float, default=1., help='learning rate') parser.add_argument('--nnet-optim-state', help='file where to load/save state of the nnet ' 'optimizer') parser.add_argument('--use-gpu', action='store_true') parser.add_argument('--verbose', action='store_true') parser.add_argument('model', help='model to train') parser.add_argument('alis', help='alignments') parser.add_argument('feats', help='Feature file') parser.add_argument('feat_stats', help='data statistics') parser.add_argument('out', help='output model') args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Load the data. alis = np.load(args.alis) feats = np.load(args.feats) stats = np.load(args.feat_stats) # Load the model and move it to the chosen device (CPU/GPU) with open(args.model, 'rb') as fh: model = pickle.load(fh) if args.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # NNET optimizer. nnet_optim = torch.optim.Adam(model.modules_parameters(), lr=args.lrate_nnet, eps=1e-3, amsgrad=False, weight_decay=1e-2) if args.nnet_optim_state and os.path.isfile(args.nnet_optim_state): logging.debug('load nnet optimizer state: {}'.format( args.nnet_optim_state)) optim_state = torch.load(args.nnet_optim_state) nnet_optim.load_state_dict(optim_state) # Prepare the optimizer for the training. params = model.mean_field_factorization() optimizer = beer.BayesianModelCoordinateAscentOptimizer( params, lrate=args.lrate, std_optim=nnet_optim) # If no batch_size is specified, use the whole data. batch_size = len(feats.files) if args.batch_size > 0: batch_size = args.batch_size tot_counts = int(stats['nframes']) for epoch in range(1, args.epochs + 1): # Shuffle the order of the utterance. keys = list(feats.keys()) random.shuffle(keys) batches = [ keys[i:i + batch_size] for i in range(0, len(keys), batch_size) ] logging.debug('Data shuffled into {} batches'.format(len(batches))) for batch_no, batch_keys in enumerate(batches, start=1): # Reset the gradients. optimizer.zero_grad() # Load the batch data. ft, labels = load_batch(feats, alis, batch_keys) ft, labels = ft.to(device), labels.to(device) # Compute the objective function. elbo = beer.evidence_lower_bound(model, ft, state_path=labels, kl_weight=args.kl_weight, datasize=tot_counts, fast_eval=args.fast_eval) # Compute the gradient of the model. #elbo.natural_backward() elbo.backward() # Clip the gradient to make avoid explosion. torch.nn.utils.clip_grad_norm_(model.modules_parameters(), 100.0) # Update the parameters. optimizer.step() elbo_value = float(elbo) / tot_counts log_msg = 'epoch={}/{} batch={}/{} elbo={}' logging.info( log_msg.format(epoch, args.epochs, batch_no, len(batches), round(elbo_value, 3))) if args.nnet_optim_state: torch.save(nnet_optim.state_dict(), args.nnet_optim_state) with open(args.out, 'wb') as fh: pickle.dump(model.to(torch.device('cpu')), fh)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch-size', type=int, default=-1, help='utterance number in each batch') parser.add_argument('--epochs', type=int, default=1, help='number of epochs') parser.add_argument('--fast-eval', action='store_true') parser.add_argument('--lrate', type=float, default=1., help='learning rate') parser.add_argument('--use-gpu', action='store_true') parser.add_argument('--verbose', action='store_true') parser.add_argument('hmm', help='hmm model to train') parser.add_argument('alis', help='alignments') parser.add_argument('feats', help='Feature file') parser.add_argument('feat_stats', help='data statistics') parser.add_argument('out', help='output model') args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Load the data. alis = np.load(args.alis) feats = np.load(args.feats) stats = np.load(args.feat_stats) # Load the model and move it to the chosen device (CPU/GPU) with open(args.hmm, 'rb') as fh: model = pickle.load(fh) if args.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # Prepare the optimizer for the training. params = model.mean_field_groups optimizer = beer.BayesianModelCoordinateAscentOptimizer(params, lrate=args.lrate) # If no batch_size is specified, use the whole data. batch_size = len(feats.files) if args.batch_size > 0: batch_size = args.batch_size tot_counts = int(stats['nframes']) for epoch in range(1, args.epochs + 1): # Shuffle the order of the utterance. keys = list(feats.keys()) random.shuffle(keys) batches = [keys[i: i + batch_size] for i in range(0, len(keys), batch_size)] logging.debug('Data shuffled into {} batches'.format(len(batches))) for batch_no, batch_keys in enumerate(batches, start=1): # Reset the gradients. optimizer.zero_grad() elbo = beer.evidence_lower_bound(datasize=tot_counts) for uttid in batch_keys: # Load the batch data. ft = torch.from_numpy(feats[uttid]).float() ali = torch.from_numpy(alis[uttid]).long() ft, ali = ft.to(device), ali.to(device) # Compute the objective function. elbo += beer.evidence_lower_bound(model, ft, state_path=ali, datasize=tot_counts, fast_eval=args.fast_eval) # Compute the gradient of the model. elbo.natural_backward() # Update the parameters. optimizer.step() elbo_value = float(elbo) / tot_counts log_msg = 'epoch={}/{} batch={}/{} elbo={}' logging.info(log_msg.format( epoch, args.epochs, batch_no, len(batches), round(elbo_value, 3)) ) del ft, ali with open(args.out, 'wb') as fh: pickle.dump(model.to(torch.device('cpu')), fh)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--alignments', help='utterance alignemnts') parser.add_argument('--batch-size', type=int, help='utterance number in each batch') parser.add_argument('--epochs', type=int) parser.add_argument('--fast-eval', action='store_true') parser.add_argument('--infer-type', default='viterbi', choices=['baum_welch', 'viterbi'], help='how to compute the state posteriors') parser.add_argument('--lrate', type=float, help='learning rate') parser.add_argument('--tmpdir', help='directory to store intermediary ' \ 'models') parser.add_argument('--use-gpu', action='store_true') parser.add_argument('hmm', help='hmm model to train') parser.add_argument('feats', help='Feature file') parser.add_argument('feat_stats', help='data statistics') parser.add_argument('out', help='output model') args = parser.parse_args() # Load the data for the training. feats = np.load(args.feats) ali = None if args.alignments: ali = np.load(args.alignments) stats = np.load(args.feat_stats) with open(args.hmm, 'rb') as fh: model = pickle.load(fh) if args.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # Prepare the optimizer for the training. params = model.mean_field_groups optimizer = beer.BayesianModelCoordinateAscentOptimizer(params, lrate=args.lrate) tot_counts = int(stats['nframes']) for epoch in range(1, args.epochs + 1): # Shuffle the order of the utterance. keys = list(feats.keys()) random.shuffle(keys) batches = [ keys[i:i + args.batch_size] for i in range(0, len(keys), args.batch_size) ] logging.debug('Data shuffled into {} batches'.format(len(batches))) # One mini-batch update. for batch_no, batch_keys in enumerate(batches, start=1): # Reset the gradients. optimizer.zero_grad() # Initialize the ELBO. elbo = beer.evidence_lower_bound(datasize=tot_counts) for utt in batch_keys: ft = torch.from_numpy(feats[utt]).float().to(device) # Get the alignment graph if provided. graph = None if ali is not None: graph = ali[utt][0].to(device) elbo += beer.evidence_lower_bound( model, ft, datasize=tot_counts, fast_eval=args.fast_eval, inference_graph=graph, inference_type=args.infer_type) # Compute the gradient of the model. elbo.natural_backward() # Update the parameters. optimizer.step() elbo_value = float(elbo) / (tot_counts * len(batch_keys)) log_msg = 'epoch={}/{} batch={}/{} ELBO={}' logging.info( log_msg.format(epoch, args.epochs, batch_no, len(batches), round(elbo_value, 3))) if args.tmpdir: path = os.path.join(args.tmpdir, str(epoch) + '.mdl') with open(path, 'wb') as fh: pickle.dump(model.to(torch.device('cpu')), fh) with open(args.out, 'wb') as fh: pickle.dump(model.to(torch.device('cpu')), fh)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=1, help='number of epochs to train') parser.add_argument('--fast-eval', action='store_true') parser.add_argument('--lrate', type=float, default=1., help='learning rate') parser.add_argument('--use-gpu', action='store_true') parser.add_argument('--verbose', action='store_true') parser.add_argument('model', help='model to train') parser.add_argument('batches', help='list of batches file') parser.add_argument('feat_stats', help='data statistics') parser.add_argument('out', help='output model') args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Load the data. stats = np.load(args.feat_stats) # Load the batches. batches_list = [] with open(args.batches, 'r') as f: for line in f: batches_list.append(line.strip()) # Load the model and move it to the chosen device (CPU/GPU) with open(args.model, 'rb') as fh: model = pickle.load(fh) if args.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # Prepare the optimizer for the training. params = model.mean_field_groups optimizer = beer.BayesianModelCoordinateAscentOptimizer(params, lrate=args.lrate) tot_counts = int(stats['nframes']) for epoch in range(1, args.epochs + 1): # Shuffle the order of the utterance. random.shuffle(batches_list) for batch_no, path in enumerate(batches_list, start=1): # Reset the gradients. optimizer.zero_grad() # Load the batch data. batch = np.load(path) ft = torch.from_numpy(batch['features']).float() ft = ft.to(device) # Compute the objective function. elbo = beer.evidence_lower_bound(model, ft, datasize=tot_counts, fast_eval=args.fast_eval) # Compute the gradient of the model. elbo.natural_backward() # Update the parameters. optimizer.step() elbo_value = float(elbo) / tot_counts log_msg = 'epoch={}/{} batch={}/{} elbo={}' logging.info( log_msg.format(epoch, args.epochs, batch_no, len(batches_list), round(elbo_value, 3))) with open(args.out, 'wb') as fh: pickle.dump(model.to(torch.device('cpu')), fh)