示例#1
0
    def test_ljspeech_loader(self):
        print(" ---- Run data loader for 100 iterations ----")
        MAX = 10
        RF = 11
        C = load_config('test_conf.json')
        dataset = LJSpeechDataset(os.path.join(C.data_path, "mels", "meta_fftnet.csv"),
                                  os.path.join(C.data_path, "mels"),
                                  C.sample_rate,
                                  C.num_mels, C.num_freq,
                                  C.min_level_db, C.frame_shift_ms,
                                  C.frame_length_ms, C.preemphasis, C.ref_level_db,
                                  RF, C.min_wav_len, C.max_wav_len)
        dataloader = DataLoader(dataset, batch_size=2,
                                shuffle=False, collate_fn=dataset.collate_fn,
                                drop_last=True, num_workers=2)

        count = 0
        last_T = 0
        for data in dataloader:
            wavs = data[0]
            mels = data[1]
            print(" > iter: ", count)
            assert wavs.shape[1] >= last_T
            last_T = wavs.shape[1]
            assert wavs.shape[1] == mels.shape[1]
            assert wavs.shape[0] == mels.shape[0]
            assert wavs.shape[1] > RF
            assert wavs.max() > 0 and wavs.mean() > 0
            count += 1
            if count == MAX:
                break
示例#2
0
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--config_path',
        type=str,
        help='path to config file for training',
    )

    parser.add_argument('--debug',
                        type=bool,
                        default=False,
                        help='Stop asking for git hash before the run.')

    parser.add_argument('--finetune_path', type=str)
    args = parser.parse_args()
    c = load_config(args.config_path)

    # setup output paths and read configs
    _ = os.path.dirname(os.path.realpath(__file__))
    OUT_PATH = os.path.join(_, c.output_path)
    OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name, True)
    CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints')
    shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json'))

    # setup TensorBoard
    tb = SummaryWriter(OUT_PATH)

    # create the FFTNet model
    model = FFTNetModel(hid_channels=256,
                        out_channels=256,
                        n_layers=c.num_quant,
from multiprocessing import Pool


parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type=str,
                    help='Folder path to checkpoints.')
parser.add_argument('--out_path', type=str,
                    help='path to config file for training.')
parser.add_argument('--config', type=str,
                    help='conf.json file for run settings.')
args = parser.parse_args()

DATA_PATH = args.data_path
OUT_PATH = args.out_path
CONFIG = load_config(args.config)
ap = AudioProcessor(CONFIG.sample_rate, CONFIG.num_mels, CONFIG.num_freq, CONFIG.min_level_db,
                    CONFIG.frame_shift_ms, CONFIG.frame_length_ms, CONFIG.preemphasis,
                    CONFIG.ref_level_db)         
        
def extract_mel(file_path):
    x, fs = sf.read(file_path)
    mel = ap.melspectrogram(x.astype('float32'))
    file_name = os.path.basename(file_path).replace(".wav","")
    mel_file = file_name + ".mel"
    np.save(os.path.join(OUT_PATH, mel_file), mel, allow_pickle=False)
    mel_len = mel.shape[1]
    wav_len = x.shape[0]
    return file_path, mel_file, str(wav_len), str(mel_len)

glob_path = os.path.join(DATA_PATH, "**/*.wav")
示例#4
0
parser = argparse.ArgumentParser()
parser.add_argument('--restore_path',
                    type=str,
                    help='Folder path to checkpoints',
                    default=0)
parser.add_argument(
    '--config_path',
    type=str,
    help='path to config file for training',
)
parser.add_argument('--debug',
                    type=bool,
                    default=False,
                    help='do not ask for git has before run.')
args = parser.parse_args()
C = load_config(args.config)


def train():
    pass


def evaluate():
    pass


def main():
    if C.max_wav_len < model.receptive_field:
        raise RuntimeError(" > Max wav length {} cannot be smaller then\
                           the model receptive field {}.".format(
            c.max_wav_len, model.receptive_field))