def get_labformat(timestamp, subsample): begin = 0 duration = 0 labformat = [] for idx, t in enumerate(timestamp): # 25ms frame_length,10ms hop_length, 1/subsample subsample = get_subsample(configs) # time duration duration = len(t) * 0.01 * subsample if idx < len(timestamp) - 1: print("{:.2f} {:.2f} {}".format(begin, begin + duration, char_dict[t[-1]])) labformat.append("{:.2f} {:.2f} {}\n".format( begin, begin + duration, char_dict[t[-1]])) else: non_blank = 0 for i in t: if i != 0: token = i break print("{:.2f} {:.2f} {}".format(begin, begin + duration, char_dict[token])) labformat.append("{:.2f} {:.2f} {}\n".format( begin, begin + duration, char_dict[token])) begin = begin + duration return labformat
mask = make_pad_mask(encoder_out_lens) # (B, maxlen) topk_index = topk_index.masked_fill_(mask, eos) # (B, maxlen) alignment = [hyp.tolist() for hyp in topk_index] hyps = [remove_duplicates_and_blank(hyp) for hyp in alignment] for index, i in enumerate(key): content = [] if len(hyps[index]) > 0: for w in hyps[index]: if w == eos: break content.append(char_dict[w]) f_ctc_results.write('{} {}\n'.format(i, " ".join(content))) f_ctc_results.flush() for index, i in enumerate(key): timestamp = get_frames_timestamp(alignment[index]) subsample = get_subsample(configs) word_seq, word_time = get_labformat_frames( timestamp, subsample, char_dict) for index_j in range(len(word_seq)): for keyword in word_unit_list: keyword_len = len(word_unit_dict[keyword]) if index_j + keyword_len > len(word_seq): continue if (word_seq[index_j:index_j + keyword_len] == word_unit_dict[keyword]): f_keyword_results.write("{} {} {} {} {}\n".format( word_id_dict[keyword], i, word_time[index_j][0], word_time[index_j + keyword_len - 1][1], 0.0)) f_keyword_results.flush() f_keyword_results.close()