def read_stanford_dependency_parses(dirpath): pdct = {} for f in os.listdir(dirpath): if ".relations" in f: pdct[f.split(".rel")[0]] = io.open_file_line_by_line( os.path.join(dirpath, f)) return pdct
def read_stanford_dependency_parses(dirpath): pdct = {} for f in os.listdir(dirpath): if ".relations" in f: pdct[f.split(".rel")[0]] = io.open_file_line_by_line(os.path.join(dirpath, f)) return pdct
nargs=argparse.REMAINDER, default='-order 4 -interpolate -gt3min 1 -wbdiscount -debug 3'.split()) parser.add_argument( '-f', action='store_true', help="Force overwrite of outputpath file if it exists.") parser.add_argument( '-no_syll_stress', action='store_true', help="Replace syllable stress markers with a boundary marker sb.") args = parser.parse_args() wf = io.open_writefile_safe(os.path.join(args.outpath, "sents.txt"), args.f) labs = io.parse_mlf(io.open_file_line_by_line(args.input_mlf), "align_mlf") labs = get_phoneme_strings(labs, args.no_syll_stress) for lab in labs: wf.write(" ".join(lab) + "\n") wf.close() txtpath = os.path.join(args.outpath, "sents.txt") lmpath = os.path.join(args.outpath, "ngram.lm") #This allows for people to pass their own options to the ngram binary options = " " + " ".join(args.lm_binary_options) subprocess.call(args.ngram_binary_path + " -text " + txtpath + " -lm " + lmpath + options,
help= "Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path')) parser.add_argument( '-collapse_closure', action="store_true", help= "Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs." ) parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.") args = parser.parse_args() if args.merge_hvite_state_with_sp_align_mlf != None: state_mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_sp_align_mlf[0]) phone_mlf = sire_io.open_file_line_by_line( args.merge_hvite_state_with_sp_align_mlf[1]) state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf") phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf") merged_utts = merge_hvite_state_with_sp_align_mlf( state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe( args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/" + utt.pop(0) + ".rec\"\n")
else: c_merge.append(s_lab[s_lab_count]) s_lab_count += 1 merged.append(c_merge) return merged if __name__ == "__main__": parser = argparse.ArgumentParser(description='Utility file convertion related methods.') parser.add_argument('-merge_hvite_state_with_full_context', nargs=3, help="Merge an HVite state level alignment MLF with full-context labels in a directory and output state-level full-context labels to another.", metavar=('mlf_path', 'lab_dir', 'out_dir')) parser.add_argument('-merge_hvite_state_with_sp_align_mlf', nargs=3, help="Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path')) parser.add_argument('-collapse_closure', action="store_true", help="Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs.") parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.") args = parser.parse_args() if args.merge_hvite_state_with_sp_align_mlf != None: state_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[0]) phone_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[1]) state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf") phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf") merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts) if args.f == True: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True) else: wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2]) wf.write("#!MLF!#\n") for utt in merged_utts: wf.write("\"*/"+utt.pop(0)+".rec\"\n") for phone in utt: for state in phone: wf.write(" ".join(state)+"\n") wf.write(".\n")