示例#1
0
def prepare_align(config):
    in_dir = config["path"]["corpus_path"]
    out_dir = config["path"]["raw_path"]
    sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
    max_wav_value = config["preprocessing"]["audio"]["max_wav_value"]
    cleaners = config["preprocessing"]["text"]["text_cleaners"]
    for speaker in tqdm(os.listdir(in_dir)):
        for chapter in os.listdir(os.path.join(in_dir, speaker)):
            for file_name in os.listdir(os.path.join(in_dir, speaker,
                                                     chapter)):
                if file_name[-4:] != ".wav":
                    continue
                base_name = file_name[:-4]
                text_path = os.path.join(in_dir, speaker, chapter,
                                         "{}.normalized.txt".format(base_name))
                wav_path = os.path.join(in_dir, speaker, chapter,
                                        "{}.wav".format(base_name))
                with open(text_path) as f:
                    text = f.readline().strip("\n")
                text = _clean_text(text, cleaners)

                os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
                wav, _ = librosa.load(wav_path, sampling_rate)
                wav = wav / max(abs(wav)) * max_wav_value
                wavfile.write(
                    os.path.join(out_dir, speaker, "{}.wav".format(base_name)),
                    sampling_rate,
                    wav.astype(np.int16),
                )
                with open(
                        os.path.join(out_dir, speaker,
                                     "{}.lab".format(base_name)),
                        "w",
                ) as f1:
                    f1.write(text)
def prepare_data(config):
    in_dir = config["path"]["corpus_path"]
    out_dir = config["path"]["raw_path"]
    sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
    max_wav_value = config["preprocessing"]["audio"]["max_wav_value"]
    cleaners = config["preprocessing"]["text"]["text_cleaners"]
    speaker = "LJSpeech"
    with open(os.path.join(in_dir, "metadata.csv"), encoding="utf-8") as f:
        for line in tqdm(f):
            parts = line.strip().split("|")
            base_name = parts[0]
            text = parts[2]
            text = _clean_text(text, cleaners)

            wav_path = os.path.join(in_dir, "wavs", "{}.wav".format(base_name))
            if os.path.exists(wav_path):
                os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
                wav, _ = librosa.load(wav_path, sampling_rate)
                wav = wav / max(abs(wav)) * max_wav_value
                wavfile.write(
                    os.path.join(out_dir, speaker, "{}.wav".format(base_name)),
                    sampling_rate,
                    wav.astype(np.int16),
                )
                with open(
                        os.path.join(out_dir, speaker,
                                     "{}.lab".format(base_name)),
                        "w",
                ) as f1:
                    f1.write(text)
示例#3
0
def normalize_string(s, labels):
    """
    Normalizes string. For example:
    'call me at 8:00 pm!' -> 'call me at eight zero zero pm'
    Args:
        s: string to normalize
        labels: labels used during model training.
    Returns:
            Normalized string
    """
    def good_token(token, labels):
        s = set(labels)
        for t in token:
            if not t in s:
                return False
        return True

    punctuation = string.punctuation
    punctuation = punctuation.replace("+", "")
    punctuation = punctuation.replace("&", "")
    for l in labels:
        punctuation = punctuation.replace(l, "")
    # Turn all punctuation to whitespace
    table = str.maketrans(punctuation, " " * len(punctuation))

    try:
        text = _clean_text(s, ["english_cleaners"], table).strip()
        return ''.join([t for t in text if good_token(t, labels=labels)])
    except:
        print("WARNING: Normalizing {} failed".format(s))
        return None
示例#4
0
文件: data.py 项目: zer0x42/flowtron
 def get_text(self, text):
     text = _clean_text(text, self.text_cleaners)
     words = re.findall(r'\S*\{.*?\}\S*|\S+', text)
     text = ' '.join([get_arpabet(word, self.cmudict)
                      if random.random() < self.p_arpabet else word
                      for word in words])
     text_norm = torch.LongTensor(text_to_sequence(text))
     return text_norm
示例#5
0
def prepare_align(in_dir):
    with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split('|')
            basename = parts[0]
            text = parts[2]
            text = _clean_text(text, hp.text_cleaners)
            
            with open(os.path.join(in_dir, 'wavs', '{}.txt'.format(basename)), 'w') as f1:
                f1.write(text)
示例#6
0
def prepare_align(in_dir):
    for dirpath, dirnames, filenames in tqdm(os.walk(in_dir)):
        for file in filenames:
            if file.endswith(".txt"):
                path_in = os.path.join(dirpath, file)
                with open(path_in, 'r', encoding='utf-8') as f:
                    lines = f.readlines()
                    assert (len(lines) == 1)
                    text = lines[0]
                    text = _clean_text(text, hp.text_cleaners)

                path_out = os.path.join(dirpath, file)
                with open(path_out, 'w', encoding='utf-8') as f:
                    f.write(text)
示例#7
0
def prepare_align(in_dir):
    with open(os.path.join(in_dir, 'prompts.gui'), encoding='utf-8') as f:
        for line in f:
            basename = line.strip('\n')
            wav_path = os.path.join(in_dir, 'wavn', '{}.wav'.format(basename))
            if os.path.exists(wav_path):
                text = re.sub(' +', ' ', re.sub(r'[#@|]', '',
                                                next(f).strip())).strip(' ')
                text = re.sub(r'\s([?.!":,-;\'\"](?:\s|$))', r'\1', text)
                text = _clean_text(text, hp.text_cleaners)

                with open(
                        os.path.join(in_dir, 'wavn',
                                     '{}.txt'.format(basename)), 'w') as f1:
                    f1.write(text)
示例#8
0
def prepare_align(in_dir):
    for spker in os.listdir(os.path.join(in_dir, 'txt')):
        for txt_file in os.listdir(os.path.join(in_dir, 'txt', spker)):
            with open(os.path.join(in_dir, 'txt', spker, txt_file),
                      encoding='utf-8') as f:
                for line in f:
                    basename = txt_file.replace(".txt", "")
                    text = line
                    text = _clean_text(text, hp.text_cleaners)

                    with open(
                            os.path.join(in_dir, 'wav48', spker,
                                         '{}.txt'.format(basename)),
                            'w') as f1:
                        f1.write(text)
示例#9
0
def prepare_align(in_dir):
    for r, d, f in os.walk(in_dir):
        for file in f:
            if file.endswith(".txt"):
                basename = file.replace('.txt', '')
                with open(
                        os.path.join(r, file),
                        'r',
                ) as rf:
                    text = rf.read().strip()
                    text = _clean_text(text, hp.text_cleaners)

                with open(os.path.join(in_dir, '{}.txt'.format(basename)),
                          'w') as f1:
                    f1.write(text)
示例#10
0
    def get_text(self, text):
        text = _clean_text(text, self.text_cleaners)
        words = re.findall(r'\S*\{.*?\}\S*|\S+', text)
        text = ' '.join([
            get_arpabet(word, self.cmudict)
            if random.random() < self.p_arpabet else word for word in words
        ])

        # from hparams import create_hparams_and_paths
        # hparams, path = create_hparams_and_paths()
        with open('config.json') as f:
            data = f.read()
        embeeding_config = json.loads(data)["embeeding_config"]
        text_embedding = TextEmbedding(embeeding_config)
        text_norm = text_embedding.text_norm(text)
        from ZaG2P.api import load_model
        g2p_model, viet_dict = load_model()
        text_out = text_embedding.g2s(text_norm)
        sequence = text_embedding.text2seq(text_out)

        text_norm = torch.LongTensor(sequence)
        return text_norm
示例#11
0
    def encode_text(self, text):
        if type(text) is not unicode:
            text = text.decode('utf-8')

        lines = text.splitlines()
        sents = []

        for line in lines:
            sents.extend(self._sent_detector.tokenize(line.strip()))

        norm_sents = [
            text_normalize(
                _clean_text(sent,
                            ['english_cleaners']).decode('utf-8')).strip()
            for sent in sents
        ]

        final_sents = []

        for sent in norm_sents:
            chunks = self.chunk_sentence(sent)

            for chunk in chunks:
                s = chunk

                if s.endswith(',') or s.endswith(';'):
                    s = s[:-1]

                final_sents.append(s + 'E')

        texts = np.zeros((len(final_sents), hp.max_N), np.int32)

        for i, sent in enumerate(final_sents):
            texts[i, :len(sent)] = [self._char2idx[char] for char in sent]

        return texts
示例#12
0
import text
from utils import load_filepaths_and_text

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--out_extension", default="cleaned")
    parser.add_argument("--text_index", default=1, type=int)
    parser.add_argument("--filelists",
                        nargs="+",
                        default=[
                            "filelists/ljs_audio_text_val_filelist.txt",
                            "filelists/ljs_audio_text_test_filelist.txt"
                        ])
    parser.add_argument("--text_cleaners",
                        nargs="+",
                        default=["english_cleaners2"])

    args = parser.parse_args()

    for filelist in args.filelists:
        print("START:", filelist)
        filepaths_and_text = load_filepaths_and_text(filelist)
        for i in range(len(filepaths_and_text)):
            original_text = filepaths_and_text[i][args.text_index]
            cleaned_text = text._clean_text(original_text, args.text_cleaners)
            filepaths_and_text[i][args.text_index] = cleaned_text

        new_filelist = filelist + "." + args.out_extension
        with open(new_filelist, "w", encoding="utf-8") as f:
            f.writelines(["|".join(x) + "\n" for x in filepaths_and_text])
示例#13
0
def clean_text(text):
    """
    This uses Tacotron's text cleaners to do some extra cleaning. For example,
    One of the steps it takes is to convert numbers into words.
    """
    return _clean_text(text, ['english_cleaners'])