def main(args, logger): logger.debug(f'reading configuration file: {args.conf}') with open(args.conf, 'r') as f: conf = yaml.load(f) groups_conf = {group_conf['group_name']: group_conf for group_conf in conf} logger.debug(f'load the acoustic units name and group') with open(args.units, 'r') as f: grouped_unitnames = defaultdict(list) for line in f: name, group = line.strip().split() grouped_unitnames[group].append(name) if not args.dataset: logger.debug('no dataset provided assuming zero mean and ' \ 'identity covariance matrix') mean, var = torch.zeros(args.dimension).float(), \ torch.ones(args.dimension).float() else: logger.debug(f'using "{args.dataset}" dataset for ' \ 'initialization') with open(args.dataset, 'rb') as f: dataset = pickle.load(f) mean, var = dataset.mean, dataset.var start_pdf_id = 0 pdfs = [] units = {} for group in grouped_unitnames: logger.debug(f'creating HMM for group "{group}"') tot_emitting_states = 0 for name in grouped_unitnames[group]: logger.debug(f'creating HMM for unit "{name}"') group_conf = groups_conf[group] graph, start_pdf_id = create_unit_graph(group_conf['topology'], start_pdf_id) units[name] = graph tot_emitting_states += count_emitting_state(graph) pdfs.append(create_pdfs(mean, var, tot_emitting_states, group_conf)) emissions = beer.JointModelSet(pdfs) logger.debug('saving the HMMs on disk...') with open(args.out, 'wb') as f: pickle.dump((units, emissions), f) logger.info(f'created {len(units)} HMMs for a total of {len(emissions)}' \ f' emitting states') logger.info(f'expected features dimension: {len(mean)}')
def main(args, logger): logger.debug(f'reading configuration file: {args.conf}') with open(args.conf, 'r') as f: conf = yaml.load(f) if not args.dataset: logger.debug('no dataset provided assuming zero mean and ' \ 'identity covariance matrix') mean, var = torch.zeros(args.dim).float(), \ torch.ones(args.dim).float() else: logger.debug(f'using "{args.dataset}" dataset for ' \ 'initialization') with open(args.dataset, 'rb') as f: dataset = pickle.load(f) mean, var = dataset.mean, dataset.var start_pdf_id = 0 pdfs = [] units = {} for unit_group in conf: prefix = unit_group['group_name'] logger.debug(f'creating HMM for group "{prefix}"') tot_emitting_states = 0 for i in range(1, unit_group['n_units'] + 1): unit_name = prefix + str(i) graph, start_pdf_id = create_unit_graph(unit_group['topology'], start_pdf_id) units[unit_name] = graph tot_emitting_states += count_emitting_state(graph) pdfs.append(create_pdfs(mean, var, tot_emitting_states, unit_group)) emissions = beer.JointModelSet(pdfs) logger.debug('saving the HMMs on disk...') with open(args.out, 'wb') as f: pickle.dump((units, emissions), f) logger.info(f'created {len(units)} HMMs for a total of {len(emissions)}' \ f' emitting states') logger.info(f'expected features dimension: {len(mean)}')
def main(): parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--stats', help='Feature statistics file for hmm model') group.add_argument('--dim', type=int, help='Dimension of feature, used for vae-hmm model') parser.add_argument('conf', help='Configuration file') parser.add_argument('phones', help='list of phones') parser.add_argument('hmm_graphs', help='hmm graph for each unit') parser.add_argument('emissions', help='outout emissions') args = parser.parse_args() # Load the HMM configuration. with open(args.conf, 'r') as fid: conf = yaml.load(fid) # Load the phones. phones = [] with open(args.phones, 'r') as fid: for line in fid: tokens = line.split() phones.append(tokens[0]) phones = phones # Get the data statistics. if args.stats: stats = np.load(args.stats) mean = torch.from_numpy(stats['mean']).float() var = torch.from_numpy(stats['var']).float() else: dim = args.dim mean = torch.zeros(dim).float() var = torch.ones(dim).float() unit_count = 0 pdf_id = 0 units = {} emissions = [] for group in conf: # Create the unit graphs. for i in range(group['n_units']): unit_id = unit_count unit_count += 1 nstates = group['n_state_per_unit'] arcs = parse_topology(group['topology']) unit, pdf_id = create_unit_graph(nstates, arcs, pdf_id) units[phones[unit_id]] = unit tot_states = group['n_units'] * (group['n_state_per_unit'] - 2) modelset = create_emissions(group, mean, var) modelset = beer.MixtureSet.create(tot_states, modelset) emissions.append(modelset) # Merge the pdfs into a single set. emissions = beer.JointModelSet(emissions) with open(args.hmm_graphs, 'wb') as fid: pickle.dump(units, fid) with open(args.emissions, 'wb') as fid: pickle.dump(emissions, fid)