Exemplo n.º 1
0
 def setUp(self):
     self.voice_changer: VoiceChanger = AttrDict(
         acoustic_converter=AttrDict(config=AttrDict(dataset=AttrDict(
             acoustic_param=AcousticParam(sampling_rate=16000), ), ), ),
         super_resolution=AttrDict(config=AttrDict(dataset=AttrDict(
             param=Param(), ), ), ),
         output_sampling_rate=24000,
     )
     self.stream = ConvertStream(voice_changer=self.voice_changer)
     self.stream.in_segment_method._keys = ['f0']
Exemplo n.º 2
0
def create_from_json(s: Union[str, Path]):
    d = json.load(open(s))
    backward_compatible(d)

    return Config(
        dataset=DatasetConfig(
            acoustic_param=AcousticParam(**d['dataset']['acoustic_param']),
            input_glob=Path(d['dataset']['input_glob']),
            target_glob=Path(d['dataset']['target_glob']),
            indexes_glob=Path(d['dataset']['indexes_glob']),
            in_features=d['dataset']['in_features'],
            out_features=d['dataset']['out_features'],
            train_crop_size=d['dataset']['train_crop_size'],
            input_global_noise=d['dataset']['input_global_noise'],
            input_local_noise=d['dataset']['input_local_noise'],
            target_global_noise=d['dataset']['target_global_noise'],
            target_local_noise=d['dataset']['target_local_noise'],
            seed=d['dataset']['seed'],
            num_test=d['dataset']['num_test'],
        ),
        model=ModelConfig(
            in_channels=d['model']['in_channels'],
            out_channels=d['model']['out_channels'],
            generator_base_channels=d['model']['generator_base_channels'],
            generator_extensive_layers=d['model']
            ['generator_extensive_layers'],
            discriminator_base_channels=d['model']
            ['discriminator_base_channels'],
            discriminator_extensive_layers=d['model']
            ['discriminator_extensive_layers'],
            weak_discriminator=d['model']['weak_discriminator'],
            glu_generator=d['model']['glu_generator'],
        ),
        loss=LossConfig(
            mse=d['loss']['mse'],
            adversarial=d['loss']['adversarial'],
        ),
        train=TrainConfig(
            batchsize=d['train']['batchsize'],
            gpu=d['train']['gpu'],
            log_iteration=d['train']['log_iteration'],
            snapshot_iteration=d['train']['snapshot_iteration'],
            stop_iteration=d['train']['stop_iteration'],
            optimizer=d['train']['optimizer'],
            pretrained_model=d['train']['pretrained_model'],
        ),
        project=ProjectConfig(
            name=d['project']['name'],
            tags=d['project']['tags'],
        ))
Exemplo n.º 3
0
import math
import multiprocessing
from pathlib import Path
from pprint import pprint

import matplotlib.pyplot as plt
import numpy
import tqdm
from sprocket.speech import FeatureExtractor

from yukarin import Wave
from yukarin.param import AcousticParam
from yukarin.utility.sprocket_utility import SpeakerYML
from yukarin.utility.sprocket_utility import low_cut_filter

base_acoustic_param = AcousticParam()

parser = argparse.ArgumentParser()
parser.add_argument('--input_wave_glob', '-i')
parser.add_argument('--candidate_threshold', '-th', nargs='+', type=float)
parser.add_argument('--output_image', '-o', type=Path)
parser.add_argument('--speaker_yml', type=Path)
parser.add_argument('--pad_second',
                    type=float,
                    default=base_acoustic_param.pad_second)
arguments = parser.parse_args()

# read parameters from speaker yml
sconf = SpeakerYML(arguments.speaker_yml)

# constract FeatureExtractor class