示例#1
0
def load_data():
    char2idx, _ = load_vocab()

    csv = os.path.join(Hyper.data_dir, "metadata.csv")
    names, lengths, texts = [], [], []
    with codecs.open(csv, 'r', "utf-8") as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            fname, text = line.split('|')
            text = text_normalize(text) + 'E'  # append the end of string mark
            for char in text:
                if char <= '9' and char >= '0':
                    raise ValueError(
                        "[data]: after text normalize, there should be no digits."
                    )
            text = [char2idx[char] for char in text]

            names.append(fname)
            lengths.append(len(text))
            texts.append(text)
            if len(text) > Hyper.data_max_text_length:
                raise Exception("[load data]: length of text is out of range")

    return names, lengths, texts
示例#2
0
def process_text(text, padding=False):
    char2idx, _ = load_vocab()
    text = text_normalize(text) + 'E'  # append the end of string mark
    for char in text:
        if char <= '9' and char >= '0':
            raise ValueError("[data]: after text normalize, there should be no digits.")
    text = [char2idx[char] for char in text]
    if padding:
        text = np.concatenate((text, np.zeros(Hyper.data_max_text_length - len(text))))
    return text