예제 #1
0
    def __init__(self,
                 filename_py="train.txt",
                 vocab_file_py='vocab_pinyin.txt',
                 filename_hz="train_hanzi.txt",
                 vocab_file_hz='vocab_hanzi.txt',
                 sort=False,
                 descent=False):

        self.basename, self.py_text = process_meta(
            os.path.join(hp.preprocessed_path, filename_py))
        self.sort = sort

        self.py_vocab = open(os.path.join(hp.preprocessed_path,
                                          vocab_file_py)).read().split('\n')

        assert ('pad' in self.py_vocab and 'sp1' in self.py_vocab
                and 'sil' in self.py_vocab)
        _, self.py_text = process_meta(
            os.path.join(hp.preprocessed_path, filename_py))

        self.py2idx = dict([(c, i) for i, c in enumerate(self.py_vocab)])

        if hp.with_hanzi:
            self.hz_vocab = open(
                os.path.join(hp.preprocessed_path,
                             vocab_file_hz)).read().split('\n')
            assert ('pad' in self.hz_vocab and 'sp1' in self.hz_vocab
                    and 'sil' in self.hz_vocab)
            _, self.hz_text = process_meta(
                os.path.join(hp.preprocessed_path, filename_hz))
            self.hz2idx = dict([(c, i) for i, c in enumerate(self.hz_vocab)])

        if sort:
            names = [
                l.split('|')[0]
                for l in open(os.path.join(hp.preprocessed_path,
                                           filename)).read().split('\n')[:-1]
            ]
            mel_len = [
                np.load(hp.preprocessed_path +
                        '/mel/baker-mel-{}.npy'.format(n)).shape[0]
                for n in names
            ]
            self.map_idx = np.argsort(mel_len)
            #i=names[map_idx[-1]]
        else:
            self.map_idx = [i for i in range(len(self.basename))]

        self.map_idx_rev = self.map_idx[::-1]

        self.descent = descent
예제 #2
0
def get_data_to_buffer(file='train.txt'):
    buffer = list()
    basename, text = process_meta(
        os.path.join(hparams.data_path, 'outdir', file), [])

    start = time.perf_counter()
    for i in tqdm(range(len(text))):

        mel_gt_name = os.path.join(
            hparams.data_path, 'outdir', 'mel', "{}-mel-{}.npy".format(hparams.dataset, basename[i]))
        mel_gt_target = np.load(mel_gt_name)
        duration = np.load(os.path.join(
            hparams.data_path, 'outdir', "alignment", "{}-ali-{}.npy".format(hparams.dataset, basename[i])))
        character = text[i].strip()
        character = np.array(
            text_to_sequence(character, hparams.text_cleaners))

        character = torch.from_numpy(character)
        duration = torch.from_numpy(duration)
        mel_gt_target = torch.from_numpy(mel_gt_target)

        buffer.append({"name": basename[i], "text": character, "duration": duration,
                       "mel_target": mel_gt_target})

    end = time.perf_counter()
    print("cost {:.2f}s to load all data into buffer.".format(end-start))

    return buffer
예제 #3
0
 def __init__(self,
              filename="train.txt",
              sort=True,
              speaker_lookup_table=None):
     self.basename, self.text = process_meta(
         os.path.join(hparams.preprocessed_path, filename))
     self.sort = sort
     self.speaker_lookup_table = speaker_lookup_table
예제 #4
0
def check_text_to_sequence(fn):
    basename_list, text_list = utils.process_meta(
        os.path.join("..", hp.preprocessed_path, fn))
    for i, basename in enumerate(basename_list):
        text = text_list[i]
        global cur_processing
        cur_processing = basename
        text_to_sequence(text)
    print("check text done. fn=%s, cnt=%d" % (fn, len(basename_list)))
예제 #5
0
 def __init__(self, filename="train.txt", sort=True):
     self.basename, self.text = process_meta(
         os.path.join(hparams.preprocessed_path, filename))
     self.sort = sort
예제 #6
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 18 23:25:32 2018

@author: DBLITALMK
Upload reviews File to Database
"""
import os
os.chdir('C:/Users/DBLITALMK/Documents/Legacy/Case Study/env/Analysis/')

from utils import getDFSample, process_meta, getDFStream,getDFTop,getDFStreamFilter
from sqlalchemy import create_engine

print('connecting to database')
engine = create_engine('postgresql://*****:*****@localhost:5432/amazon_reviews')

# test process reviews date ##############
index = 0
for df in getDFStream('metadata.json.gz', splits=200000):
    index += 1
    print('iteration %d' % index)
    t = process_meta(df)
    print('   data retrieved')
    print('   description %d' % max( t['description'].apply(lambda x: len(x) if isinstance(x,str) else 0) ) )
    t.to_sql('meta',engine, if_exists='append', index=False)
    print('   data uploaded')
예제 #7
0
 def __init__(self, filename="train.txt", list_unuse=[], sort=True):
     self.basename, self.text = process_meta(
         os.path.join(hparams.data_path, 'outdir', filename), list_unuse)
     self.sort = sort