def __init__(self, fc_stack_layers=2, fc_stack_ch=256, rnn_ch=512, rnn_type='gru', n_harmonics=100, amp_scale_fn=ddsp.core.exp_sigmoid, f0_depth=64, hz_min=20.0, hz_max=1200.0, sample_rate=16000, name='sinusoidal_to_harmonic_encoder'): """Constructor.""" super().__init__(name=name) self.n_harmonics = n_harmonics self.amp_scale_fn = amp_scale_fn self.f0_depth = f0_depth self.hz_min = hz_min self.hz_max = hz_max self.sample_rate = sample_rate # Layers. self.pre_rnn = nn.fc_stack(fc_stack_ch, fc_stack_layers) self.rnn = nn.rnn(rnn_ch, rnn_type) self.post_rnn = nn.fc_stack(fc_stack_ch, fc_stack_layers) self.amp_out = nn.dense(1) self.hd_out = nn.dense(n_harmonics) self.f0_out = nn.dense(f0_depth)
def __init__(self, rnn_channels=512, rnn_type='gru', ch=512, layers_per_stack=3, input_keys=('ld_scaled', 'f0_scaled', 'z'), output_splits=(('amps', 1), ('harmonic_distribution', 40)), name=None): super().__init__(output_splits=output_splits, name=name) stack = lambda: nn.fc_stack(ch, layers_per_stack) self.input_keys = input_keys # Layers. self.input_stacks = [stack() for k in self.input_keys] self.rnn = nn.rnn(rnn_channels, rnn_type) self.out_stack = stack() self.dense_out = nn.dense(self.n_out) # Backwards compatability. self.f_stack = self.input_stacks[0] if len( self.input_stacks) >= 1 else None self.l_stack = self.input_stacks[1] if len( self.input_stacks) >= 2 else None self.z_stack = self.input_stacks[2] if len( self.input_stacks) >= 3 else None
def __init__(self, rnn_channels=512, rnn_type="gru", n_rnn=1, ch=512, layers_per_stack=3, output_splits=(("amps", 1), ("harmonic_distribution", 40)), name="f0_rnn_fc_decoder"): super().__init__(output_splits=output_splits, name=name) # Create layers. stack = lambda: nn.fc_stack(ch, layers_per_stack) self.f0_stack = stack() self.n_rnn = n_rnn self.rnn = [nn.rnn(rnn_channels, rnn_type)] for _ in range(self.n_rnn-1): self.rnn.append(nn.rnn(rnn_channels, rnn_type)) self.out_stack = stack() self.dense_out = nn.dense(self.n_out)
def __init__(self, rnn_channels=512, rnn_type="gru", ch=512, layers_per_stack=3, input_keys=["f0_scaled", "osc_scaled"], output_splits=(("amps", 1), ("harmonic_distribution", 40)), name="multi_input_rnn_fc_decoder"): super().__init__(output_splits=output_splits, name=name) self.input_keys = input_keys stack = lambda: nn.fc_stack(ch, layers_per_stack) # Layers. self.stacks = [] for _ in range(self.n_in): self.stacks.append(stack()) rnn_channels = make_iterable(rnn_channels) self.n_rnn = len(rnn_channels) self.rnn = [nn.rnn(rnn_channels[0], rnn_type)] for i in range(self.n_rnn-1): self.rnn.append(nn.rnn(rnn_channels[i+1], rnn_type)) self.out_stack = stack() self.dense_out = nn.dense(self.n_out)
def __init__(self, rnn_channels=512, rnn_type='gru', ch=512, layers_per_stack=3, output_splits=(('amps', 1), ('harmonic_distribution', 40)), name='rnn_fc_decoder'): super(RnnFcDecoder, self).__init__(output_splits=output_splits, name=name) stack = lambda: nn.fc_stack(ch, layers_per_stack) # Layers. self.f_stack = stack() self.l_stack = stack() self.rnn = nn.rnn(rnn_channels, rnn_type) self.out_stack = stack() self.dense_out = nn.dense(self.n_out)
def __init__(self, rnn_channels=512, rnn_type='gru', ch=512, layers_per_stack=3, append_f0_loudness=True, output_splits=(('amps', 1), ('harmonic_distribution', 40)), name=None): super().__init__(output_splits=output_splits, name=name) self.append_f0_loudness = append_f0_loudness stack = lambda: nn.fc_stack(ch, layers_per_stack) # Layers. self.f_stack = stack() self.l_stack = stack() self.z_stack = stack() self.rnn = nn.rnn(rnn_channels, rnn_type) self.out_stack = stack() self.dense_out = nn.dense(self.n_out)
def __init__(self, rnn_channels=512, rnn_type='gru', z_dims=32, z_time_steps=250, f0_encoder=None, name='mfcc_time_distrbuted_rnn_encoder'): super(MfccTimeDistributedRnnEncoder, self).__init__( f0_encoder=f0_encoder, name=name) if z_time_steps not in [63, 125, 250, 500, 1000]: raise ValueError( '`z_time_steps` currently limited to 63,125,250,500 and 1000') self.z_audio_spec = { 63: { 'fft_size': 2048, 'overlap': 0.5 }, 125: { 'fft_size': 1024, 'overlap': 0.5 }, 250: { 'fft_size': 1024, 'overlap': 0.75 }, 500: { 'fft_size': 512, 'overlap': 0.75 }, 1000: { 'fft_size': 256, 'overlap': 0.75 } } self.fft_size = self.z_audio_spec[z_time_steps]['fft_size'] self.overlap = self.z_audio_spec[z_time_steps]['overlap'] # Layers. self.z_norm = nn.Normalize('instance') self.rnn = nn.rnn(rnn_channels, rnn_type) self.dense_out = nn.dense(z_dims)