trainer = Trainer(params, args.model_dir, single_cpu=True) with open(os.path.join(nnet_dir, "feature_dim"), "r") as f: dim = int(f.readline().strip()) #trainer.build("predict", dim=dim) trainer.build("predict", dim=dim, loss_type="extract_asoftmax", num_speakers=154) if args.rspecifier.rsplit(".", 1)[1] == "scp": # The rspecifier cannot be scp sys.exit("The rspecifier must be ark or input pipe") fp_out = open_or_fd(args.wspecifier, "wb") for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)): if feature.shape[0] < args.min_chunk_size: tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." % (key, feature.shape[0], args.min_chunk_size)) continue if feature.shape[0] > args.chunk_size: feature_array = [] feature_length = [] num_chunks = int( np.ceil( float(feature.shape[0] - args.chunk_size) / (args.chunk_size / 2))) + 1 tf.logging.info( "[INFO] Key %s length %d > %d, split to %d segments." % (key, feature.shape[0], args.chunk_size, num_chunks))
phone_scp = sys.argv[2] monophone_ark = sys.argv[3] ind = 0 phone2ind = {} num2ind = {} with open(phones, 'r') as f: for line in f.readlines(): phone, num = line.strip().split(" ") num = int(num) p = phone.rsplit("_", 1)[0] if p not in phone2ind: phone2ind[p] = ind ind += 1 num2ind[num] = phone2ind[p] ind2cnt = {} fp_out = open_or_fd(monophone_ark, "wb") for key, vec in read_vec_int_scp(phone_scp): a = [] for v in vec: a.append(num2ind[v]) if num2ind[v] not in ind2cnt: ind2cnt[num2ind[v]] = 0 ind2cnt[num2ind[v]] += 1 write_vec_int(fp_out, np.array(a), key=key) fp_out.close() for ind in ind2cnt: print("%d %d" % (ind, ind2cnt[ind]))
import sys from dataset.kaldi_io import open_or_fd, read_mat_scp, write_mat import numpy as np if len(sys.argv) != 4: print('Usage: %s phone_class post_in_scp post_out_ark' % sys.argv[0]) quit() phone_class = [] with open(sys.argv[1], "r") as f: for line in f.readlines(): phones = line.strip().split(" ") phone_class.append([int(p) for p in phones]) num_classes = len(phone_class) fp_out = open_or_fd(sys.argv[3], "wb") for key, mat in read_mat_scp(sys.argv[2]): post_new = np.zeros((mat.shape[0], num_classes)) for index, phones in enumerate(phone_class): post_new[:, index] = np.sum(mat[:, phones], axis=1) write_mat(fp_out, post_new, key=key) fp_out.close()
trainer = BaseMT(params, args.model_dir, dim, num_total_speakers, num_total_phones, single_cpu=True) trainer.build("predict") tf.logging.info("Extract embeddings (or outputs) from node %s" % args.node) assert args.node in trainer.endpoints, "The node %s is not in the endpoints" % args.node if args.rspecifier.rsplit(".", 1)[1] == "scp": # The rspecifier cannot be scp sys.exit("The rspecifier must be ark or input pipe.") if args.ali_rspecifier.rsplit(".", 1)[1] != "scp": sys.exit("The ali-rspecifier is expected to be scp file.") num_err = 0 num_done = 0 # Preload the first alignment. fp_ali = open_or_fd(args.ali_rspecifier) ali_key, ali_value = read_ali(fp_ali) fp_out = open_or_fd(args.wspecifier, "wb") for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)): # if feature.shape[0] < args.min_chunk_size: # tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." % (key, feature.shape[0], args.min_chunk_size)) # continue # The alignments are assumed to be less than the features (due to decoding failure). if ali_key != key: tf.logging.warn("Cannot find the ali for %s." % key) num_err += 1 continue if feature.shape[0] > args.chunk_size:
pdf_to_index = {} index_to_pdf = [] with open(args.phone_set, "r") as f: index = 0 for line in f.readlines(): phones = line.strip().split(" ") tmp = set() for p in phones: tmp |= phone_to_pdf[phone_id[p]] for pdf in phone_to_pdf[phone_id[p]]: if pdf in pdf_to_index: assert (pdf_to_index[pdf] == index) pdf_to_index[pdf] = index index_to_pdf.append(list(tmp)) index += 1 num_classes = len(index_to_pdf) fp_out = open_or_fd(args.post_out, "wb") for index, (key, post) in enumerate(read_mat_scp(args.post_in)): post_new = np.zeros((post.shape[0], num_classes)) for i in range(num_classes): post_new[:, i] = np.sum(post[:, index_to_pdf[i]], axis=1) assert (np.allclose(np.sum(post_new, axis=1), np.ones((post.shape[0], 1)), rtol=1e-02)) write_mat(fp_out, post_new, key=key) fp_out.close()
print("%d classes" % len(index_to_pdf)) num_classes = len(index_to_pdf) # Sanity check for pdf in pdf_to_index: assert (pdf in index_to_pdf[pdf_to_index[pdf]]) index_stat = {} # index -> [count, post] num_acc = 0 num_frames = 0 dim = 0 num_err = 0 num_done = 0 fp_ali = open_or_fd(args.ali) ali_key, ali_value = read_ali(fp_ali) for index, (key, post) in enumerate(read_mat_scp(args.post)): if ali_key != key: num_err += 1 continue # Main computation # Convert alignment ali_value_new = [pdf_to_index[a] for a in ali_value] # Convert posteriors post_new = np.zeros((post.shape[0], num_classes)) for i in range(num_classes): post_new[:, i] = np.sum(post[:, index_to_pdf[i]], axis=1) assert (np.allclose(np.sum(post_new, axis=1),
dtype=np.float64) # Sanity check on the prior and convert to log assert (np.allclose(np.sum(prior_vec), 1.0)) # Sine the prior is floored during training, it is safe to apply log on the prior log_prior_vec = np.log(prior_vec)[np.newaxis, :] if args.rspecifier.rsplit(".", 1)[1] == "scp": # The rspecifier cannot be scp sys.exit("The rspecifier must be ark or input pipe.") num_done = 0 is_output_logpost = False if len(args.write_per_frame_log_posteriors) > 0: is_output_logpost = True fp_logpost = open_or_fd(args.write_per_frame_log_posteriors, "wb") fp_out = open_or_fd(args.wspecifier, "wb") for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)): if feature.shape[0] > args.chunk_size: # feature_array = [] # ali_array = [] # feature_length = [] # num_chunks = int(np.ceil(float(feature.shape[0]) / args.chunk_size)) # tf.logging.info("[INFO] Key %s length %d > %d, split to %d segments." % ( # key, feature.shape[0], args.chunk_size, num_chunks)) # for i in range(num_chunks): # start = i * args.chunk_size # this_chunk_size = args.chunk_size if feature.shape[0] - start > args.chunk_size else feature.shape[ # 0] - start # feature_length.append(this_chunk_size)
vector_in = sys.argv[1:-1] vector_out = sys.argv[-1] vector_tot = [] vector_names = [] vector_single = {} for key, vec in read_vec_flt_scp(vector_in[0]): vector_names.append(key) vector_single[key] = vec dim = vec.shape[0] vector_tot.append(vector_single) for vector_file in vector_in[1:]: vector_single = {} index = 0 for key, vec in read_vec_flt_scp(vector_file): assert(key == vector_names[index]) index += 1 vector_single[key] = vec dim = vec.shape[0] vector_tot.append(vector_single) with open_or_fd(vector_out, 'wb') as f: for name in vector_names: vector = [] for vector_single in vector_tot: vector.append(vector_single[name]) vector = np.concatenate(vector, axis=0) write_vec_flt(f, vector, key=name)