Exemplo n.º 1
0
                np.tile(embeddings[:, -1, :], [1, pad, 1])
            ],
                                        axis=1)
            embedding_last = trainer.predict(feature_array[-1])
            pad = (feature_array[-1].shape[0] - embedding_last.shape[0]) / 2
            embedding_last = np.concatenate([
                np.tile(embedding_last[0, :], [pad, 1]), embedding_last,
                np.tile(embedding_last[-1, :], [pad, 1])
            ],
                                            axis=0)
            embeddings = np.reshape(embeddings, [
                embeddings.shape[0] * embeddings.shape[1], embeddings.shape[2]
            ])
            embedding = np.concatenate([embeddings, embedding_last], axis=0)
        else:
            tf.logging.info("[INFO] Key %s length %d." %
                            (key, feature.shape[0]))
            embedding = trainer.predict(feature)
            # padding
            pad = (feature.shape[0] - embedding.shape[0]) / 2
            embedding = np.concatenate([
                np.tile(embedding[0, :], [pad, 1]), embedding,
                np.tile(embedding[-1, :], [pad, 1])
            ],
                                       axis=0)

        assert (embedding.shape[0] == feature.shape[0])
        write_mat(fp_out, embedding, key=key)
    fp_out.close()
    trainer.close()
Exemplo n.º 2
0
import sys
from dataset.kaldi_io import open_or_fd, read_mat_scp, write_mat
import numpy as np

if len(sys.argv) != 4:
    print('Usage: %s phone_class post_in_scp post_out_ark' % sys.argv[0])
    quit()

phone_class = []
with open(sys.argv[1], "r") as f:
    for line in f.readlines():
        phones = line.strip().split(" ")
        phone_class.append([int(p) for p in phones])
num_classes = len(phone_class)

fp_out = open_or_fd(sys.argv[3], "wb")
for key, mat in read_mat_scp(sys.argv[2]):
    post_new = np.zeros((mat.shape[0], num_classes))
    for index, phones in enumerate(phone_class):
        post_new[:, index] = np.sum(mat[:, phones], axis=1)
    write_mat(fp_out, post_new, key=key)

fp_out.close()
Exemplo n.º 3
0
            #
            # log_prob = np.reshape(log_prob, [log_prob.shape[0] * log_prob.shape[1], log_prob.shape[2]])
            # log_prob = np.concatenate([log_prob, log_prob_last], axis=0)
            # assert(log_prob.shape[0] == feature.shape[0] and log_prob.shape[1] == prior_vec.shape[0])

            raise NotImplementedError("Do not let the utterance to be split.")
        else:
            tf.logging.info("[INFO] Key %s length %d." %
                            (key, feature.shape[0]))
            log_prob = trainer.predict_phone(node, feature, [feature.shape[0]])
            assert (log_prob.shape[0] == feature.shape[0]
                    and log_prob.shape[1] == prior_vec.shape[0])

        # Convert to log-posteriors to log-likelihood
        log_like = log_prob - log_prior_vec
        write_mat(fp_out, log_like, key=key)

        if is_output_logpost:
            write_mat(fp_logpost,
                      np.array(log_prob, dtype=np.float32),
                      key=key)

        num_done += 1

    if is_output_logpost:
        fp_logpost.close()

    fp_out.close()
    trainer.close()
    tf.logging.info("Compute %d log-likelihood." % (num_done))
Exemplo n.º 4
0
            #     start = i * args.chunk_size
            #     this_chunk_size = args.chunk_size if feature.shape[0] - start > args.chunk_size else feature.shape[
            #                                                                                              0] - start
            #     feature_length.append(this_chunk_size)
            #     feature_array.append(feature[start:start + this_chunk_size])
            #
            # # Except for the last feature, the length of other features should be the same (=chunk_size)
            # log_prob = trainer.predict_phone(node,
            #                                    np.array(feature_array[:-1], dtype=np.float32),
            #                                    feature_length[:-1])
            # log_prob_last = trainer.predict_phone(node, feature_array[-1], [feature_length[-1]])
            #
            # log_prob = np.reshape(log_prob, [log_prob.shape[0] * log_prob.shape[1], log_prob.shape[2]])
            # log_prob = np.concatenate([log_prob, log_prob_last], axis=0)
            # assert(log_prob.shape[0] == feature.shape[0] and log_prob.shape[1] == prior_vec.shape[0])

            raise NotImplementedError("Do not let the utterance to be split.")
        else:
            tf.logging.info("[INFO] Key %s length %d." % (key, feature.shape[0]))
            log_prob = trainer.predict_phone(node, feature, [feature.shape[0]])
            assert(log_prob.shape[0] == feature.shape[0] and log_prob.shape[1] == prior_vec.shape[0])

        # Convert to log-posteriors to log-likelihood
        log_like = log_prob - log_prior_vec
        write_mat(fp_out, log_like, key=key)
        num_done += 1

    fp_out.close()
    trainer.close()
    tf.logging.info("Compute %d log-likelihood." % (num_done))
Exemplo n.º 5
0
    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Attention weights
    params.embedding_node = "attention_weights"

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    trainer = Trainer(params, args.model_dir, dim, single_cpu=True)
    trainer.build("predict")

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] < args.min_chunk_size:
            tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." %
                            (key, feature.shape[0], args.min_chunk_size))
            continue
        if feature.shape[0] > args.chunk_size:
            # We only extract the first segment
            feature = feature[:args.chunk_size]
        attention_weights = trainer.predict(feature)
        write_mat(fp_out, attention_weights, key=key)
    fp_out.close()
    trainer.close()