Exemplo n.º 1
0
    trainer = Trainer(params, args.model_dir, single_cpu=True)

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    #trainer.build("predict", dim=dim)
    trainer.build("predict",
                  dim=dim,
                  loss_type="extract_asoftmax",
                  num_speakers=154)

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] < args.min_chunk_size:
            tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." %
                            (key, feature.shape[0], args.min_chunk_size))
            continue
        if feature.shape[0] > args.chunk_size:
            feature_array = []
            feature_length = []
            num_chunks = int(
                np.ceil(
                    float(feature.shape[0] - args.chunk_size) /
                    (args.chunk_size / 2))) + 1
            tf.logging.info(
                "[INFO] Key %s length %d > %d, split to %d segments." %
                (key, feature.shape[0], args.chunk_size, num_chunks))
phone_scp = sys.argv[2]
monophone_ark = sys.argv[3]

ind = 0
phone2ind = {}
num2ind = {}

with open(phones, 'r') as f:
    for line in f.readlines():
        phone, num = line.strip().split(" ")
        num = int(num)
        p = phone.rsplit("_", 1)[0]
        if p not in phone2ind:
            phone2ind[p] = ind
            ind += 1
        num2ind[num] = phone2ind[p]

ind2cnt = {}
fp_out = open_or_fd(monophone_ark, "wb")
for key, vec in read_vec_int_scp(phone_scp):
    a = []
    for v in vec:
        a.append(num2ind[v])
        if num2ind[v] not in ind2cnt:
            ind2cnt[num2ind[v]] = 0
        ind2cnt[num2ind[v]] += 1
    write_vec_int(fp_out, np.array(a), key=key)

fp_out.close()
for ind in ind2cnt:
    print("%d %d" % (ind, ind2cnt[ind]))
Exemplo n.º 3
0
import sys
from dataset.kaldi_io import open_or_fd, read_mat_scp, write_mat
import numpy as np

if len(sys.argv) != 4:
    print('Usage: %s phone_class post_in_scp post_out_ark' % sys.argv[0])
    quit()

phone_class = []
with open(sys.argv[1], "r") as f:
    for line in f.readlines():
        phones = line.strip().split(" ")
        phone_class.append([int(p) for p in phones])
num_classes = len(phone_class)

fp_out = open_or_fd(sys.argv[3], "wb")
for key, mat in read_mat_scp(sys.argv[2]):
    post_new = np.zeros((mat.shape[0], num_classes))
    for index, phones in enumerate(phone_class):
        post_new[:, index] = np.sum(mat[:, phones], axis=1)
    write_mat(fp_out, post_new, key=key)

fp_out.close()
Exemplo n.º 4
0
    trainer = BaseMT(params, args.model_dir, dim, num_total_speakers, num_total_phones, single_cpu=True)
    trainer.build("predict")
    tf.logging.info("Extract embeddings (or outputs) from node %s" % args.node)
    assert args.node in trainer.endpoints, "The node %s is not in the endpoints" % args.node

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe.")

    if args.ali_rspecifier.rsplit(".", 1)[1] != "scp":
        sys.exit("The ali-rspecifier is expected to be scp file.")

    num_err = 0
    num_done = 0
    # Preload the first alignment.
    fp_ali = open_or_fd(args.ali_rspecifier)
    ali_key, ali_value = read_ali(fp_ali)

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        # if feature.shape[0] < args.min_chunk_size:
        #     tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." % (key, feature.shape[0], args.min_chunk_size))
        #     continue

        # The alignments are assumed to be less than the features (due to decoding failure).
        if ali_key != key:
            tf.logging.warn("Cannot find the ali for %s." % key)
            num_err += 1
            continue

        if feature.shape[0] > args.chunk_size:
    pdf_to_index = {}
    index_to_pdf = []

    with open(args.phone_set, "r") as f:
        index = 0
        for line in f.readlines():
            phones = line.strip().split(" ")
            tmp = set()
            for p in phones:
                tmp |= phone_to_pdf[phone_id[p]]
                for pdf in phone_to_pdf[phone_id[p]]:
                    if pdf in pdf_to_index:
                        assert (pdf_to_index[pdf] == index)
                    pdf_to_index[pdf] = index
            index_to_pdf.append(list(tmp))
            index += 1
    num_classes = len(index_to_pdf)

    fp_out = open_or_fd(args.post_out, "wb")
    for index, (key, post) in enumerate(read_mat_scp(args.post_in)):
        post_new = np.zeros((post.shape[0], num_classes))
        for i in range(num_classes):
            post_new[:, i] = np.sum(post[:, index_to_pdf[i]], axis=1)
        assert (np.allclose(np.sum(post_new, axis=1),
                            np.ones((post.shape[0], 1)),
                            rtol=1e-02))
        write_mat(fp_out, post_new, key=key)

    fp_out.close()
    print("%d classes" % len(index_to_pdf))
    num_classes = len(index_to_pdf)

    # Sanity check
    for pdf in pdf_to_index:
        assert (pdf in index_to_pdf[pdf_to_index[pdf]])

    index_stat = {}  # index -> [count, post]
    num_acc = 0
    num_frames = 0

    dim = 0
    num_err = 0
    num_done = 0
    fp_ali = open_or_fd(args.ali)
    ali_key, ali_value = read_ali(fp_ali)
    for index, (key, post) in enumerate(read_mat_scp(args.post)):
        if ali_key != key:
            num_err += 1
            continue

        # Main computation
        # Convert alignment
        ali_value_new = [pdf_to_index[a] for a in ali_value]

        # Convert posteriors
        post_new = np.zeros((post.shape[0], num_classes))
        for i in range(num_classes):
            post_new[:, i] = np.sum(post[:, index_to_pdf[i]], axis=1)
        assert (np.allclose(np.sum(post_new, axis=1),
Exemplo n.º 7
0
                             dtype=np.float64)
    # Sanity check on the prior and convert to log
    assert (np.allclose(np.sum(prior_vec), 1.0))
    # Sine the prior is floored during training, it is safe to apply log on the prior
    log_prior_vec = np.log(prior_vec)[np.newaxis, :]

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe.")

    num_done = 0

    is_output_logpost = False
    if len(args.write_per_frame_log_posteriors) > 0:
        is_output_logpost = True
        fp_logpost = open_or_fd(args.write_per_frame_log_posteriors, "wb")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] > args.chunk_size:
            # feature_array = []
            # ali_array = []
            # feature_length = []
            # num_chunks = int(np.ceil(float(feature.shape[0]) / args.chunk_size))
            # tf.logging.info("[INFO] Key %s length %d > %d, split to %d segments." % (
            #                 key, feature.shape[0], args.chunk_size, num_chunks))
            # for i in range(num_chunks):
            #     start = i * args.chunk_size
            #     this_chunk_size = args.chunk_size if feature.shape[0] - start > args.chunk_size else feature.shape[
            #                                                                                              0] - start
            #     feature_length.append(this_chunk_size)
Exemplo n.º 8
0
vector_in = sys.argv[1:-1]
vector_out = sys.argv[-1]

vector_tot = []
vector_names = []

vector_single = {}
for key, vec in read_vec_flt_scp(vector_in[0]):
    vector_names.append(key)
    vector_single[key] = vec
    dim = vec.shape[0]
vector_tot.append(vector_single)

for vector_file in vector_in[1:]:
    vector_single = {}
    index = 0
    for key, vec in read_vec_flt_scp(vector_file):
        assert(key == vector_names[index])
        index += 1
        vector_single[key] = vec
        dim = vec.shape[0]
    vector_tot.append(vector_single)

with open_or_fd(vector_out, 'wb') as f:
    for name in vector_names:
        vector = []
        for vector_single in vector_tot:
            vector.append(vector_single[name])
        vector = np.concatenate(vector, axis=0)
        write_vec_flt(f, vector, key=name)