def load_parameters(script_line: str, script_path: str):
    """This function extracts an utterance's string identifier and features from an archive.

    Args:
        script_line: The string representing the desired utterance.
        script_path: The location of the script file that contains the line.

    Returns:
        feat: An array representing the acoustic data, one row per frame of data.
        utt: A string identifier for the utterance.
    """
    utt, arc, frame_start, frame_end = parse_script_line(
        script_line, script_path)
    feat = read_htk_user_feat(arc)
    assert (frame_start == 0)
    assert (frame_end + 1 - frame_start == len(feat))
    return feat, utt
    # plot waveform
    plt.plot(x)
    plt.title('waveform')
    plt.savefig('fig/waveform.png', bbox_inches='tight')
    plt.close()

    # plot mel filterbank
    for i in range(0, fe.num_mel):
        plt.plot(fe.mel_filterbank[i, :])
    plt.title('mel filterbank')
    plt.savefig('fig/mel_filterbank.png', bbox_inches='tight')
    plt.close()

    # plot log mel spectrum (fbank)
    plt.imshow(
        feat, origin='lower', aspect=4
    )  # flip the image so that vertical frequency axis goes from low to high
    plt.title('log mel filterbank features (fbank)')
    plt.savefig('fig/fbank.png', bbox_inches='tight')
    plt.close()

htk.write_htk_user_feat(feat, feat_file)
print("Wrote {0} frames to {1}".format(feat.shape[1], feat_file))
#
#if you want to verify, that the file was written correctly:
feat2 = htk.read_htk_user_feat(name=feat_file)
print("Read {0} frames rom {1}".format(feat2.shape[1], feat_file))
print("Per-element absolute error is {0}".format(
    np.linalg.norm(feat - feat2) / (feat2.shape[0] * feat2.shape[1])))
Exemplo n.º 3
0
# plot mel filterbank
for i in range(0, fe.num_mel):
    plt.plot(fe.mel_filterbank[i, :])
plt.title('mel filterbank')
plt.savefig('fig/mel_filterbank.png', bbox_inches='tight')
plt.close()

# plot log mel spectrum (fbank)
plt.imshow(
    feat, origin='lower', aspect=4
)  # flip the image so that vertical frequency axis goes from low to high
plt.title('log mel filterbank features (fbank)')
plt.savefig('fig/fbank.png', bbox_inches='tight')
plt.close()

htk.write_htk_user_feat(feat, feat_file)
print("Wrote {0} frames to {1}".format(feat.shape[1], feat_file))

# if you want to verify, that the file was written correctly:
feat2 = htk.read_htk_user_feat(name=feat_file).transpose()
print("Read {0} frames rom {1}".format(feat2.shape[1], feat_file))
print("Per-element absolute error is {0}".format(
    np.linalg.norm(feat - feat2) / (feat2.shape[0] * feat2.shape[1])))

plt.imshow(
    feat2, origin='lower', aspect=4
)  # flip the image so that vertical frequency axis goes from low to high
plt.title('Expected log mel filterbank features')
plt.savefig('fig/fbank_exp.png', bbox_inches='tight')
plt.close()