Пример #1
0
def main(args, logger):
    logger.debug(f'reading configuration file: {args.conf}')
    with open(args.conf, 'r') as f:
        conf = yaml.load(f)
    groups_conf = {group_conf['group_name']: group_conf for group_conf in conf}

    logger.debug(f'load the acoustic units name and group')
    with open(args.units, 'r') as f:
        grouped_unitnames = defaultdict(list)
        for line in f:
            name, group = line.strip().split()
            grouped_unitnames[group].append(name)

    if not args.dataset:
        logger.debug('no dataset provided assuming zero mean and ' \
                     'identity covariance matrix')
        mean, var = torch.zeros(args.dimension).float(), \
                    torch.ones(args.dimension).float()
    else:
        logger.debug(f'using "{args.dataset}" dataset for ' \
                     'initialization')
        with open(args.dataset, 'rb') as f:
            dataset = pickle.load(f)
        mean, var = dataset.mean, dataset.var

    start_pdf_id = 0
    pdfs = []
    units = {}
    for group in grouped_unitnames:
        logger.debug(f'creating HMM for group "{group}"')
        tot_emitting_states = 0
        for name in grouped_unitnames[group]:
            logger.debug(f'creating HMM for unit "{name}"')
            group_conf = groups_conf[group]
            graph, start_pdf_id = create_unit_graph(group_conf['topology'],
                                                    start_pdf_id)
            units[name] = graph
            tot_emitting_states += count_emitting_state(graph)
        pdfs.append(create_pdfs(mean, var, tot_emitting_states, group_conf))
    emissions = beer.JointModelSet(pdfs)

    logger.debug('saving the HMMs on disk...')
    with open(args.out, 'wb') as f:
        pickle.dump((units, emissions), f)

    logger.info(f'created {len(units)} HMMs for a total of {len(emissions)}' \
                f' emitting states')
    logger.info(f'expected features dimension: {len(mean)}')
Пример #2
0
def main(args, logger):
    logger.debug(f'reading configuration file: {args.conf}')
    with open(args.conf, 'r') as f:
        conf = yaml.load(f)

    if not args.dataset:
        logger.debug('no dataset provided assuming zero mean and ' \
                     'identity covariance matrix')
        mean, var = torch.zeros(args.dim).float(), \
                    torch.ones(args.dim).float()
    else:
        logger.debug(f'using "{args.dataset}" dataset for ' \
                     'initialization')
        with open(args.dataset, 'rb') as f:
            dataset = pickle.load(f)
        mean, var = dataset.mean, dataset.var

    start_pdf_id = 0
    pdfs = []
    units = {}
    for unit_group in conf:
        prefix = unit_group['group_name']

        logger.debug(f'creating HMM for group "{prefix}"')

        tot_emitting_states = 0
        for i in range(1, unit_group['n_units'] + 1):
            unit_name = prefix + str(i)
            graph, start_pdf_id = create_unit_graph(unit_group['topology'],
                                                    start_pdf_id)
            units[unit_name] = graph
            tot_emitting_states += count_emitting_state(graph)
        pdfs.append(create_pdfs(mean, var, tot_emitting_states, unit_group))
    emissions = beer.JointModelSet(pdfs)

    logger.debug('saving the HMMs on disk...')
    with open(args.out, 'wb') as f:
        pickle.dump((units, emissions), f)

    logger.info(f'created {len(units)} HMMs for a total of {len(emissions)}' \
                f' emitting states')
    logger.info(f'expected features dimension: {len(mean)}')
def main():
    parser = argparse.ArgumentParser()
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--stats', help='Feature statistics file for hmm model')
    group.add_argument('--dim',
                       type=int,
                       help='Dimension of feature, used for vae-hmm model')
    parser.add_argument('conf', help='Configuration file')
    parser.add_argument('phones', help='list of phones')
    parser.add_argument('hmm_graphs', help='hmm graph for each unit')
    parser.add_argument('emissions', help='outout emissions')
    args = parser.parse_args()

    # Load the HMM configuration.
    with open(args.conf, 'r') as fid:
        conf = yaml.load(fid)

    # Load the phones.
    phones = []
    with open(args.phones, 'r') as fid:
        for line in fid:
            tokens = line.split()
            phones.append(tokens[0])
    phones = phones

    # Get the data statistics.
    if args.stats:
        stats = np.load(args.stats)
        mean = torch.from_numpy(stats['mean']).float()
        var = torch.from_numpy(stats['var']).float()
    else:
        dim = args.dim
        mean = torch.zeros(dim).float()
        var = torch.ones(dim).float()

    unit_count = 0
    pdf_id = 0
    units = {}
    emissions = []
    for group in conf:

        # Create the unit graphs.
        for i in range(group['n_units']):
            unit_id = unit_count
            unit_count += 1
            nstates = group['n_state_per_unit']
            arcs = parse_topology(group['topology'])
            unit, pdf_id = create_unit_graph(nstates, arcs, pdf_id)
            units[phones[unit_id]] = unit

        tot_states = group['n_units'] * (group['n_state_per_unit'] - 2)
        modelset = create_emissions(group, mean, var)
        modelset = beer.MixtureSet.create(tot_states, modelset)
        emissions.append(modelset)

    # Merge the pdfs into a single set.
    emissions = beer.JointModelSet(emissions)

    with open(args.hmm_graphs, 'wb') as fid:
        pickle.dump(units, fid)

    with open(args.emissions, 'wb') as fid:
        pickle.dump(emissions, fid)