def inference(model_itr, bar): bar.next() model_path = Path('05output/predictor_' + str(model_itr) + '.npz') config_path = Path('recipe/config.json') config = create_config(config_path) acoustic_converter = AcousticConverter(config, model_path, gpu=0) wave = acoustic_converter(voice_path="01input02/music0001_80.wav") librosa.output.write_wav('inference_output_' + str(model_itr) + '.wav', wave.wave, wave.sampling_rate, norm=True)
def process(p: Path, acoustic_converter: AcousticConverter): try: if p.suffix in ['.npy', '.npz']: fn = glob.glob(str(input_wave_directory / p.stem) + '.*')[0] p = Path(fn) wave = acoustic_converter(p) librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True) except: import traceback print('error!', str(p)) print(traceback.format_exc()) for model_name in args.model_names: base_model = model_directory / model_name config = create_config(base_model / 'config.json') input_paths = list(sorted([Path(p) for p in glob.glob(str(config.dataset.input_glob))])) numpy.random.RandomState(config.dataset.seed).shuffle(input_paths) path_train = input_paths[0] path_test = input_paths[-1] if it is not None: model_path = base_model / 'predictor_{}.npz'.format(it) else: model_paths = base_model.glob('predictor_*.npz') model_path = list(sorted(model_paths, key=extract_number))[-1] print(model_path) acoustic_converter = AcousticConverter(config, model_path, gpu=gpu) output = Path('./output').absolute() / base_model.name
fn = glob.glob(str(input_wave_directory / p.stem) + '.*')[0] p = Path(fn) wave = acoustic_converter(p) librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True) except: import traceback print('error!', str(p)) print(traceback.format_exc()) for model_name in args.model_names: base_model = model_directory / model_name config = create_config(base_model / 'config.json') input_paths = list( sorted([Path(p) for p in glob.glob(str(config.dataset.input_glob))])) numpy.random.RandomState(config.dataset.seed).shuffle(input_paths) path_train = input_paths[0] path_test = input_paths[-1] if it is not None: model_path = base_model / 'predictor_{}.npz'.format(it) else: model_paths = base_model.glob('predictor_*.npz') model_path = list(sorted(model_paths, key=extract_number))[-1] print(model_path) acoustic_converter = AcousticConverter(config, model_path, gpu=gpu)
parser.add_argument('--f0_floor1', type=float, default=71) parser.add_argument('--f0_ceil1', type=float, default=800) parser.add_argument('--f0_floor2', type=float, default=71) parser.add_argument('--f0_ceil2', type=float, default=800) parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity']) parser.add_argument('--disable_alignment', action='store_true') parser.add_argument('--enable_overwrite', action='store_true') arguments = parser.parse_args() pprint(dir(arguments)) pre_convert = arguments.pre_converter1_config is not None if pre_convert: config = create_config(arguments.pre_converter1_config) pre_converter1 = AcousticConverter(config, arguments.pre_converter1_model) else: pre_converter1 = None def generate_feature(path1, path2): out1 = Path(arguments.output1_directory, path1.stem + '.npy') out2 = Path(arguments.output2_directory, path2.stem + '.npy') if out1.exists() and out2.exists() and not arguments.enable_overwrite: return # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db,
class AudioConfig(NamedTuple): rate: int chunk: int vocoder_buffer_size: int out_norm: float model_base_path = Path('~/Github/become-yukarin/trained/').expanduser() test_data_path = Path('tests/test-deep-learning-yuduki-yukari.wav') test_output_path = Path('output.wav') print('model loading...', flush=True) model_path = model_base_path / Path('harvest-innoise03/predictor_1390000.npz') config_path = model_base_path / Path('harvest-innoise03/config.json') config = create_config(config_path) acoustic_converter = AcousticConverter(config, model_path, gpu=0) print('model 1 loaded!', flush=True) model_path = model_base_path / Path('sr-noise3/predictor_180000.npz') config_path = model_base_path / Path('sr-noise3/config.json') sr_config = create_sr_config(config_path) super_resolution = SuperResolution(sr_config, model_path, gpu=0) print('model 2 loaded!', flush=True) audio_config = AudioConfig( rate=config.dataset.param.voice_param.sample_rate, chunk=config.dataset.param.voice_param.sample_rate, vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16, out_norm=4.5, )
def main(): print('model loading...', flush=True) queue_input_wave = Queue() queue_output_wave = Queue() model_path = Path('./trained/harvest-innoise03/predictor_1390000.npz') config_path = Path('./trained/harvest-innoise03/config.json') config = create_config(config_path) acoustic_converter = AcousticConverter(config, model_path, gpu=0) print('model 1 loaded!', flush=True) model_path = Path('./trained/sr-noise3/predictor_180000.npz') config_path = Path('./trained/sr-noise3/config.json') sr_config = create_sr_config(config_path) super_resolution = SuperResolution(sr_config, model_path, gpu=0) print('model 2 loaded!', flush=True) audio_instance = pyaudio.PyAudio() audio_config = AudioConfig( rate=config.dataset.param.voice_param.sample_rate, audio_chunk=config.dataset.param.voice_param.sample_rate, convert_chunk=config.dataset.param.voice_param.sample_rate, vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16, out_norm=2.5, ) process_converter = Process(target=convert_worker, kwargs=dict( config=config, audio_config=audio_config, acoustic_converter=acoustic_converter, super_resolution=super_resolution, queue_input_wave=queue_input_wave, queue_output_wave=queue_output_wave, )) process_converter.start() signal.signal(signal.SIGINT, lambda signum, frame: process_converter.terminate()) audio_stream = audio_instance.open( format=pyaudio.paFloat32, channels=1, rate=audio_config.rate, frames_per_buffer=audio_config.audio_chunk, input=True, output=True, ) # process_converter.join() while True: # input audio in_data = audio_stream.read(audio_config.audio_chunk) wave = numpy.fromstring(in_data, dtype=numpy.float32) print('input', len(wave), flush=True) queue_input_wave.put(wave) # output try: wave = queue_output_wave.get_nowait() except: wave = None if wave is not None: print('output', len(wave), flush=True) wave *= audio_config.out_norm b = wave.astype(numpy.float32).tobytes() audio_stream.write(b)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha) parser.add_argument('--f0_estimating_method', type=str, default=base_acoustic_feature_param.f0_estimating_method) parser.add_argument('--f0_floor1', type=float, default=71) parser.add_argument('--f0_ceil1', type=float, default=800) parser.add_argument('--f0_floor2', type=float, default=71) parser.add_argument('--f0_ceil2', type=float, default=800) parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity']) parser.add_argument('--disable_alignment', action='store_true') parser.add_argument('--enable_overwrite', action='store_true') arguments = parser.parse_args() pprint(dir(arguments)) pre_convert = arguments.pre_converter1_config is not None if pre_convert: config = create_config(arguments.pre_converter1_config) pre_converter1 = AcousticConverter(config, arguments.pre_converter1_model) else: pre_converter1 = None def generate_feature(path1, path2): out1 = Path(arguments.output1_directory, path1.stem + '.npy') out2 = Path(arguments.output2_directory, path2.stem + '.npy') if out1.exists() and out2.exists() and not arguments.enable_overwrite: return # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db,