",": "",
            ":": "",
            ";": "",
            "?": "",
            "\\": " ",
            "\t": " "
            }

from_chars = ''.join(normdict.keys())
to_chars = ''.join(normdict.values())

#t_table = maketrans(from_chars, to_chars)


## Main

numtable = writenumbers.loadNumTable(sys.argv[1])
transcript = codecs.open(sys.argv[2], "r", "utf8")
outtext = codecs.open(sys.argv[3], "w", "utf8")


for line in transcript:
    normtext1 = re.sub(r'[\.,:;\?]', '', line)
    normtext2 = re.sub(r'[\t\\]', ' ', normtext1)
    normtext3 = re.sub(r'  +', ' ', normtext2.strip())
    normtext4 = writenumbers.normNumber(normtext3, numtable)
    outtext.write(normtext4)

transcript.close()
outtext.close()
示例#2
0
    "\t": " ",
    #".": ""
}

t_table = str.maketrans(normdict)

## Utility function


def getuttid_text(line):
    return line.split(" ", 1)


## Main

numtable = writenumbers.loadNumTable(sys.argv[1])
textin = codecs.open(sys.argv[2], "r", "utf8")
fid = codecs.open(sys.argv[3], "w", "utf8")
outtext = codecs.open(sys.argv[4], "w", "utf8")

for line in textin:
    utt_id, text = getuttid_text(line)
    normtext1 = text.translate(t_table)
    normtext2 = re.sub(r'  +', ' ', normtext1.strip())
    normtext3 = writenumbers.normNumber(normtext2, numtable)

    fid.write(utt_id + "\n")
    outtext.write(normtext3)

textin.close()
outtext.close()