Пример #1
0
from utils.audio import Audio

np.random.seed(42)

parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, required=True)
parser.add_argument('--skip_phonemes', action='store_true')
parser.add_argument('--skip_mels', action='store_true')
parser.add_argument('--phonemizer_parallel_jobs', type=int, default=16)
parser.add_argument('--phonemizer_batch_size', type=int, default=16)

args = parser.parse_args()
for arg in vars(args):
    print('{}: {}'.format(arg, getattr(args, arg)))

cm = Config(args.config, model_kind='autoregressive')
cm.create_remove_dirs()
metadatareader = DataReader.from_config(cm, kind='original', scan_wavs=True)

if not args.skip_mels:
    import sys

    def process_wav(wav_path: Path):
        file_name = wav_path.stem
        y, sr = audio.load_wav(str(wav_path))
        mel = audio.mel_spectrogram(y)
        assert mel.shape[1] == audio.config['mel_channels'], len(
            mel.shape) == 2
        mel_path = (cm.mel_dir / file_name).with_suffix('.npy')
        np.save(mel_path, mel)
        return (file_name, mel.shape[0])
Пример #2
0
from utils.config_manager import Config
from data.audio import Audio

np.random.seed(42)

parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, default='config/session_paths.yaml')
parser.add_argument('--skip_phonemes', action='store_true')
parser.add_argument('--skip_mels', action='store_true')
parser.add_argument('--skip_speakers', action='store_true')

args = parser.parse_args()
for arg in vars(args):
    print('{}: {}'.format(arg, getattr(args, arg)))

cm = Config(args.config, asr=True)
cm.create_remove_dirs()
metadatareader = DataReader.from_config(cm, kind='original')
summary_manager = SummaryManager(model=None,
                                 log_dir=cm.log_dir / 'data_preprocessing',
                                 config=cm.config,
                                 default_writer='data_preprocessing')
print(f'\nFound {len(metadatareader.filenames)} audio files.')
audio = Audio(config=cm.config)

if not args.skip_mels:

    def process_file(tuples):
        len_dict = {}
        spk_file_dict = {}
        remove_files = []
Пример #3
0
from data.datasets import ASRDataset
from utils.logging_utils import SummaryManager
from utils.scripts_utils import dynamic_memory_allocation, basic_train_parser
from ctc_segmentation import ctc_segmentation, determine_utterance_segments
from ctc_segmentation import CtcSegmentationParameters
from ctc_segmentation import prepare_token_list
import tgt

np.random.seed(42)
tf.random.set_seed(42)
dynamic_memory_allocation()

parser = basic_train_parser()
args = parser.parse_args()

config = Config(config_path=args.config, asr=True)
config_dict = config.config
config.create_remove_dirs(clear_dir=args.clear_dir,
                          clear_logs=args.clear_logs,
                          clear_weights=args.clear_weights)
config.dump_config()
config.print_config()

model = config.get_model()
config.compile_model(model)

data_handler = ASRDataset.from_config(config,
                                      tokenizer=model.text_pipeline.tokenizer,
                                      kind='valid')
dataset = data_handler.get_dataset(
    bucket_batch_sizes=config_dict['bucket_batch_sizes'],
Пример #4
0
    summary_manager.display_loss(model_out, tag='Validation', plot_all=True)
    summary_manager.display_attention_heads(model_out,
                                            tag='ValidationAttentionHeads')
    # summary_manager.display_mel(mel=model_out['mel_linear'][0], tag=f'Validation/linear_mel_out')
    summary_manager.display_mel(
        mel=model_out['final_output'][0],
        tag=f'Validation/predicted_mel_{fname[0].numpy().decode("utf-8")}')
    # residual = abs(model_out['mel_linear'] - model_out['final_output'])
    # summary_manager.display_mel(mel=residual[0], tag=f'Validation/conv-linear_residual')
    summary_manager.display_mel(
        mel=val_mel[0],
        tag=f'Validation/target_mel_{fname[0].numpy().decode("utf-8")}')
    return val_loss['loss']


config_manager = Config(config_path=args.config, model_kind='autoregressive')
config = config_manager.config
config_manager.create_remove_dirs(clear_dir=args.clear_dir,
                                  clear_logs=args.clear_logs,
                                  clear_weights=args.clear_weights)
config_manager.dump_config()
config_manager.print_config()
#

# get model, prepare data for model, create datasets
model = config_manager.get_model()
config_manager.compile_model(model)
data_prep = AutoregressivePreprocessor.from_config(
    config_manager, tokenizer=model.text_pipeline.tokenizer)
train_data_handler = TextMelDataset.from_config(config_manager,
                                                preprocessor=data_prep,
Пример #5
0
        summary_manager.display_mel(
            mel=tar_value,
            tag=f'Test/{fname[j].numpy().decode("utf-8")}/target')
        summary_manager.display_audio(
            tag=f'Prediction {fname[j].numpy().decode("utf-8")}/target',
            mel=tar_value)
        summary_manager.display_audio(
            tag=f'Prediction {fname[j].numpy().decode("utf-8")}/prediction',
            mel=predval)
    return val_loss['loss']


parser = basic_train_parser()
args = parser.parse_args()

config = Config(config_path=args.config)
config_dict = config.config
config.create_remove_dirs(clear_dir=args.clear_dir,
                          clear_logs=args.clear_logs,
                          clear_weights=args.clear_weights)
config.dump_config()
config.print_config()

model = config.get_model()
config.compile_model(model)

data_prep = TTSPreprocessor.from_config(
    config=config, tokenizer=model.text_pipeline.tokenizer)
train_data_handler = TTSDataset.from_config(config,
                                            preprocessor=data_prep,
                                            kind='train')
Пример #6
0
    if args.file is not None:
        with open(args.file, 'r') as file:
            text = file.readlines()
        fname = Path(args.file).stem
    elif args.text is not None:
        text = [args.text]
        fname = 'custom_text'
    else:
        fname = None
        text = None
        print(
            f'Specify either an input text (-t "some text") or a text input file (-f /path/to/file.txt)'
        )
        exit()
    config_loader = Config(config_path=args.config)
    outdir = Path(
        args.outdir) if args.outdir is not None else config_loader.log_dir
    outdir = outdir / 'outputs' / f'{fname}'
    outdir.mkdir(exist_ok=True, parents=True)
    print('===' * 10, outdir)
    audio = Audio(config_loader.config)
    if args.checkpoint is not None:
        all_weights = [args.checkpoint]

    elif args.all_weights:
        all_weights = [(config_loader.weights_dir / x.stem).as_posix()
                       for x in config_loader.weights_dir.iterdir()
                       if x.suffix == '.index']
    else:
        all_weights = [None]  # default
Пример #7
0
assert (args.fill_mode_max is False) or (args.fill_mode_next is
                                         False), 'Choose one gap filling mode.'
weighted = not args.best
binary = args.binary
fill_gaps = args.fill_mode_max or args.fill_mode_next
fix_jumps = args.fix_jumps
fill_mode = f"{f'max' * args.fill_mode_max}{f'next' * args.fill_mode_next}"
filling_tag = f"{f'(max)' * args.fill_mode_max}{f'(next)' * args.fill_mode_next}"
tag_description = ''.join([
    f'{"_weighted" * weighted}{"_best" * (not weighted)}',
    f'{"_binary" * binary}', f'{"_filled" * fill_gaps}{filling_tag}',
    f'{"_fix_jumps" * fix_jumps}', f'_layer{args.extract_layer}'
])
writer_tag = f'DurationExtraction{tag_description}'
print(writer_tag)
config_manager = Config(config_path=args.config, model_kind='autoregressive')
config = config_manager.config
config_manager.print_config()
if args.autoregressive_weights != '':
    model = config_manager.load_model(args.autoregressive_weights)
else:
    model = config_manager.load_model()
if model.r != 1:
    print(
        f"ERROR: model's reduction factor is greater than 1, check config. (r={model.r}"
    )

data_prep = AutoregressivePreprocessor.from_config(
    config=config_manager, tokenizer=model.text_pipeline.tokenizer)
data_handler = TextMelDataset.from_config(config_manager,
                                          preprocessor=data_prep,