Exemplo n.º 1
0
    def e_step(args_list):
        exp_llh = 0.
        acc_stats = None
        n_frames = 0

        for arg in args_list:
            fea_file = arg

            # Mean / Variance normalization.
            data = read_htk(fea_file)
            data -= data_stats['mean']
            data /= numpy.sqrt(data_stats['var'])

            # Get the accumulated sufficient statistics for the
            # given set of features.
            s_stats = model.get_sufficient_stats(data)
            posts, llh, new_acc_stats = model.get_posteriors(s_stats,
                                                             accumulate=True)

            exp_llh += numpy.sum(llh)
            n_frames += len(data)
            if acc_stats is None:
                acc_stats = new_acc_stats
            else:
                acc_stats += new_acc_stats

        return (exp_llh, acc_stats, n_frames)
Exemplo n.º 2
0
def log_predictive(fea_file):
    """Lower-bound of the predictive distribution.

    Parameters
    ----------
    fea_file : str
        Path to a features file (HTK format).

    Returns
    -------
    stats : dict
        Dictionary of with the log-predictive probability.

    """
    # pylint: disable=too-many-locals
    # Because this function is meant to be executed in a
    # separated thread, we prefer to reduce the number of
    # function call to simplify the dependencies.

    # pylint: disable=global-variable-not-assigned
    # pylint: disable=undefined-variable
    # The value of these global variable will be pushed
    # to the workers dynamically.
    global MODEL, TEMP_DIR

    # pylint: disable=redefined-outer-name
    # pylint: disable=reimported
    # These imports are done on the remote workers.
    import os
    import pickle

    # Extract the key of the utterance.
    basename = os.path.basename(fea_file)
    key, ext = os.path.splitext(basename)
    if '[' in ext:
        idx = ext.index('[')
        key += ext[idx:]

    # Load the features.
    data = read_htk(fea_file)

    expected_llh, unit_stats, state_resps, comp_resps = \
        MODEL.expected_log_likelihood(data)

    # Add the normalizer to the stats to compute the
    # lower bound.
    stats = {}
    stats[-1] = {
        'E_log_X': expected_llh,
        'N': data.shape[0]
    }

    # Store the stats.
    out_path = os.path.join(TEMP_DIR, key)
    with open(out_path, 'wb') as file_obj:
        pickle.dump(stats, file_obj)

    return out_path
Exemplo n.º 3
0
def collect_data_stats(filename):
    """Job to collect the statistics."""
    # We  re-import this module here because this code will run
    # remotely.
    import amdtk
    data = amdtk.read_htk(filename)
    stats_0 = data.shape[0]
    stats_1 = data.sum(axis=0)
    stats_2 = (data**2).sum(axis=0)
    retval = (stats_0, stats_1, stats_2)
    return retval
Exemplo n.º 4
0
def count_frames(fea_file):
    """Count the number of frames in the features file.

    Parameters
    ----------
    fea_file : str
        Path to the features file.

    Returns
    -------
    count : int
        Number of frames.

    """
    return read_htk(fea_file).shape[0]
Exemplo n.º 5
0
def collect_data_stats(filename):
    print("filename:", filename)
    """Job to collect the statistics."""
    # We  re-import this module here because this code will run
    # remotely.
    import amdtk

    data = amdtk.read_htk(filename)
    amdtk.utils.test_import()

    stat_length = data.shape[0]
    stat_sum = data.sum(axis=0)
    stat_squared_sum = (data**2).sum(axis=0)
    return (
        stat_length,
        stat_sum,
        stat_squared_sum
    )
Exemplo n.º 6
0
    def e_step_nonstatic(self, args_list):

        model = self.model
        data_stats = self.data_stats

        exp_llh = 0.
        acc_stats = None
        n_frames = 0

        for arg in args_list:
            (fea_file, top_file) = arg

            # Mean / Variance normalization.
            data = read_htk(fea_file)
            data -= data_stats['mean']
            data /= np.sqrt(data_stats['var'])

            # Read top PLU sequence from file
            with open(top_file, 'r') as f:
                topstring = f.read()
                tops = topstring.strip().split(',')
                tops = [int(x) for x in tops]

            # Get the accumulated sufficient statistics for the
            # given set of features.
            s_stats = model.get_sufficient_stats(data)
            posts, llh, new_acc_stats = model.get_posteriors(s_stats,
                                                             tops,
                                                             accumulate=True,
                                                             filename=fea_file)

            exp_llh += np.sum(llh)
            n_frames += len(data)
            if acc_stats is None:
                acc_stats = new_acc_stats
            else:
                acc_stats += new_acc_stats

        return (exp_llh, acc_stats, n_frames)
Exemplo n.º 7
0
def std_exp(fea_file):
    """E-Step of the standard Variational Bayes.

    Parameters
    ----------
    fea_file : str
        Path to a features file (HTK format).

    Returns
    -------
    stats : dict
        Dictionary of statistics.

    """
    # pylint: disable=too-many-locals
    # Because this function is meant to be executed in a
    # separated thread, we prefer to reduce the number of
    # function call to simplify the dependencies.

    # pylint: disable=global-variable-not-assigned
    # pylint: disable=undefined-variable
    # The value of these global variable will be pushed
    # to the workers dynamically.
    global MODEL, TEMP_DIR, ALIGNMENTS

    # pylint: disable=redefined-outer-name
    # pylint: disable=reimported
    # These imports are done on the remote workers.
    import os
    import pickle

    # Extract the key of the utterance.
    basename = os.path.basename(fea_file)
    key, ext = os.path.splitext(basename)
    if '[' in ext:
        idx = ext.index('[')
        key += ext[idx:]

    # Load the features.
    data = read_htk(fea_file)

    # Compute the responsibilities per component.
    if ALIGNMENTS is not None:
        ali = ALIGNMENTS[fea_file]
    else:
        ali = None
    expected_llh, unit_stats, state_resps, comp_resps = \
            MODEL.expected_log_likelihood(data)

    # Get the sufficient statistics of the model given the
    # responsibilities.
    stats = MODEL.get_stats(data, unit_stats, state_resps,
                            comp_resps)

    # Add the normalizer to the stats to compute the
    # lower bound.
    stats[-1] = {
        'E_log_X': expected_llh,
        'N': data.shape[0]
    }

    # Store the stats.
    out_path = os.path.join(TEMP_DIR, key)
    with open(out_path, 'wb') as file_obj:
        pickle.dump(stats, file_obj)

    return out_path
Exemplo n.º 8
0
def run_amdtk_nc(args):

    elbo = []
    time = []

    def callback(args):
        elbo.append(args['objective'])
        time.append(args['time'])
        print('elbo=' + str(elbo[-1]), 'time=' + str(time[-1]))

    rc = Client(profile=args.profile)
    rc.debug = DEBUG
    dview = rc[:]
    print('Connected to', len(dview), 'jobs.')

    print("done importing!")

    audio_dir = os.path.abspath(args.audio_dir)
    print('audio dir:', audio_dir)
    eval_dir = os.path.abspath(args.eval_dir)
    print('eval dir:', eval_dir)
    output_dir = os.path.abspath(args.output_dir)
    print('output dir:', output_dir)

    fea_paths = []
    top_paths = []

    for root, dirs, files in os.walk(audio_dir):
        for file in files:
            print('file:', file)
            if file.lower().endswith(".fea"):
                fea_paths.append(os.path.join(root, file))
            if file.lower().endswith(".top"):
                top_paths.append(os.path.join(root, file))

    # fea_paths = [os.path.abspath(fname) for fname in glob.glob(fea_path_mask)]
    # top_path_mask = os.path.join(audio_dir,'*top')
    # top_paths = [os.path.abspath(fname) for fname in glob.glob(top_path_mask)]

    for path in fea_paths:
        assert (os.path.exists(path))

    print('fea_paths:', fea_paths)
    print('top_paths:', top_paths)

    zipped_paths = list(zip(sorted(fea_paths), sorted(top_paths)))

    print('zipped_paths:', zipped_paths)

    assert (len(zipped_paths) > 0)

    for path_pair in zipped_paths:
        assert (re.sub("\.fea", "",
                       path_pair[0]) == re.sub("\.top", "", path_pair[1]))

    print("There are {} files".format(len(fea_paths)))
    print("Getting mean and variance of input data...")
    data_stats = dview.map_sync(collect_data_stats_by_speaker, fea_paths)

    # Accumulate the statistics over all the utterances.
    final_data_stats = accumulate_stats_by_speaker(data_stats)

    tops = []
    # Read top PLU sequences from file
    for top_path in top_paths:
        with open(top_path, 'r') as f:
            topstring = f.read()
            top_list = topstring.strip().split(',')
            tops += [int(x) for x in top_list]

    num_tops = max(tops) + 1

    elbo = []
    time = []

    print("Creating phone loop model...")
    conc = 0.1

    if resume == None:

        model = amdtk.PhoneLoopNoisyChannel.create(
            n_units=str(args.bottom_plu_count),  # number of acoustic units
            n_states=3,  # number of states per unit
            n_comp_per_state=args.n_comp,  # number of Gaussians per emission
            n_top_units=num_tops,  # size of top PLU alphabet
            mean=np.zeros_like(final_data_stats[list(
                final_data_stats.keys())[0]]['mean']),
            var=np.ones_like(
                final_data_stats[list(final_data_stats.keys())[0]]['var'] /
                20),
            max_slip_factor=args.max_slip_factor,
            extra_cond=args.extra_cond,
            limits={
                'plu_top': args.plu_top_limit,
                'mem': args.mem_limit,
                'time': args.time_limit
            })

    else:
        with open(resume, 'rb') as f1:
            model = pickle.load(f1)

        numgex = re.compile("[\d]+")
        res_epoch, res_batch = [int(x) for x in numgex.findall(str(resume))]
        model.start_epoch = res_epoch
        model.starting_batch = res_batch
    if train:
        if not os.path.exists(os.path.join(output_dir, "models")):
            os.mkdir(os.path.join(output_dir, "models"))

        data_stats = final_data_stats
        print("Creating VB optimizer...")
        optimizer = amdtk.NoisyChannelOptimizer(
            dview,
            data_stats,
            args={
                'epochs': args.n_epochs,
                'batch_size': args.batch_size,
                'lrate': args.lrate,
                'output_dir': output_dir,
                'audio_dir': audio_dir,
                'eval_audio_dir': audio_dir,
                'audio_samples_per_sec': 100
            },
            model=model,
            pkl_path=os.path.join(output_dir, "models"))

        print("Running VB optimization...")
        begin = systime.time()
        print("running with {} paths".format(len(list(zipped_paths))))
        optimizer.run(zipped_paths, callback)
        end = systime.time()
        print("VB optimization took ", end - begin, " seconds.")

    print("***ELBO***")
    for i, n in enumerate(elbo):
        print('Epoch ' + str(i) + ': ELBO=' + str(n))

    if args.decode:
        for (fea_path, top_path) in zipped_paths:
            data = amdtk.read_htk(fea_path)

            # Normalize the data
            data_mean = np.mean(data)
            data_var = np.var(data)
            data = (data - data_mean) / np.sqrt(data_var)

            # Read top PLU sequence from file
            with open(top_path, 'r') as f:
                topstring = f.read()
                tops = topstring.strip().split(',')
                tops = [int(x) for x in tops]

            #result = model.decode(data, tops, state_path=False)
            #result_path = model.decode(data, tops, state_path=True)
            (result_intervals, edit_path,
             _) = model.decode(data, tops, phone_intervals=True, edit_ops=True)

            print("---")
            print("Phone sequence for file", fea_path, ":")
            print(result_intervals)
            print(edit_path)
Exemplo n.º 9
0
    graph_items = []
    for key,value in sorted(dictionary.items()):
        how_many_xs = round(value/num_per_x)
        xs = 'x' * how_many_xs
        if xs=='' and value > 0:
            xs = '.'
        num_string = '('+str(key)+', {0:.2f}) '.format(value)
        graph_items.append((num_string, xs))
    max_len = max([len(x[0]) for x in graph_items])
    for item in graph_items:
        space_padding = ' ' * (max_len-len(item[0]))
        print(item[0]+space_padding+item[1])


for path in paths:
    data = amdtk.read_htk(path)

    # Normalize the data
    data_mean = np.mean(data)
    data_var = np.var(data)
    data = (data-data_mean)/np.sqrt(data_var)

    print("type of model is ", type(model))

    result = model.decode(data, state_path=False)
    result_path = model.decode(data, state_path=True)
    result_intervals = model.decode(data, phone_intervals=True)
    print("---")
    print("Phone sequence for file", path, ":")
    print(result)
Exemplo n.º 10
0
    def e_step(args_list):
        import os
        from amdtk import read_htk

        exp_llh = 0.
        acc_stats = None
        n_frames = 0

        skipped = 0

        for arg in args_list:
            (fea_file, top_file) = arg
            # Mean / Variance normalization.
            data = read_htk(fea_file)
            speaker = os.path.split(os.path.split(fea_file)[0])[1]
            data -= data_stats[speaker]['mean']

            data /= numpy.sqrt(data_stats[speaker]['var'])
            # Read top PLU sequence from file
            with open(top_file, 'r') as f:
                data_str = f.read()
                tops = data_str.strip().split(',')
                tops = [int(x) for x in tops]
            # Get the accumulated sufficient statistics for the
            # given set of features.
            s_stats = model.get_sufficient_stats(data)

            start_time = time.time()

            if curr_timing_dir is not None:
                curr_timing_file = os.path.join(
                    curr_timing_dir,
                    os.path.split(fea_file)[1][:-4])
                curr_timing_file = curr_timing_file + '.txt'
                with open(curr_timing_file, 'w') as f:
                    f.write('PLU tops: {}\n'.format(len(tops)))
                    f.write('PLU bottom types: {}\n'.format(model.n_units))
                    f.write('Frames: {}\n'.format(data.shape[1]))
                    f.write('Max slip factor: {}\n'.format(
                        model.max_slip_factor))
                    f.write('\n')
                    f.write('Start time: {}\n'.format(
                        time.strftime("%Y-%m-%d_%H:%M:%S",
                                      time.localtime(start_time))))
                    f.write('\n')
            else:
                curr_timing_file = None

            posts, llh, new_acc_stats = model.get_posteriors(
                s_stats,
                tops,
                accumulate=True,
                filename=fea_file,
                output=curr_timing_file)
            end_time = time.time()

            if curr_timing_file is not None:
                with open(curr_timing_file, 'a') as f:
                    f.write('\nEnd time: {}\n'.format(
                        time.strftime("%Y-%m-%d_%H:%M:%S",
                                      time.localtime(end_time))))
                    f.write('Elapsed time: {}\n'.format(end_time - start_time))
                    f.write('DONE')

            if llh is not None:
                exp_llh += numpy.sum(llh)
                n_frames += len(data)

            if new_acc_stats is not None:
                if acc_stats is None:
                    acc_stats = new_acc_stats
                else:
                    acc_stats += new_acc_stats
            else:
                skipped += 1

        return (exp_llh, acc_stats, n_frames, skipped)