Пример #1
0
  def __init__(self, details, encoder_decoder, hparams,
               steps_per_quarter=4, steps_per_second=100):
    hparams_dict = {
        'batch_size': 64,
        'rnn_layer_sizes': [128, 128],
        'dropout_keep_prob': 1.0,
        'attn_length': 0,
        'clip_norm': 3,
        'learning_rate': 0.001,
        'residual_connections': False,
        'use_cudnn': False
    }
    hparams_dict.update(hparams.values())

    self.details = details
    self.encoder_decoder = encoder_decoder
    self.hparams = contrib_training.HParams(**hparams_dict)
    self.steps_per_quarter = steps_per_quarter
    self.steps_per_second = steps_per_second
Пример #2
0
def get_default_hparams():
    """Returns the default hyperparameters.

  Returns:
    A tf.contrib.training.HParams object representing the default
    hyperparameters for the model.
  """
    return contrib_training.HParams(
        batch_size=8,
        learning_rate=0.0006,
        decay_steps=10000,
        decay_rate=0.98,
        clip_norm=3.0,
        transform_audio=True,
        onset_lstm_units=256,
        offset_lstm_units=256,
        velocity_lstm_units=0,
        frame_lstm_units=0,
        combined_lstm_units=256,
        acoustic_rnn_stack_size=1,
        combined_rnn_stack_size=1,
        activation_loss=False,
        stop_activation_gradient=False,
        stop_onset_gradient=True,
        stop_offset_gradient=True,
        weight_frame_and_activation_loss=False,
        share_conv_features=False,
        temporal_sizes=[3, 3, 3],
        freq_sizes=[3, 3, 3],
        num_filters=[48, 48, 96],
        pool_sizes=[1, 2, 2],
        dropout_keep_amts=[1.0, 0.75, 0.75],
        fc_size=768,
        fc_dropout_keep_amt=0.5,
        use_lengths=False,
        use_cudnn=False,  # DEPRECATED
        rnn_dropout_drop_amt=0.0,
        bidirectional=True,
        predict_frame_threshold=0.5,
        predict_onset_threshold=0.5,
        predict_offset_threshold=0,
    )
Пример #3
0
def get_hparams(config_name):
  """Set hyperparameters.

  Args:
    config_name: Name of config module to use.

  Returns:
    A HParams object (magenta) with defaults.
  """
  hparams = contrib_training.HParams(
      # Optimization
      batch_size=16,
      learning_rate=1e-4,
      adam_beta=0.5,
      max_steps=6000 * 50000,
      samples_per_second=16000,
      num_samples=64000,
      # Preprocessing
      n_fft=1024,
      hop_length=256,
      mask=True,
      log_mag=True,
      use_cqt=False,
      re_im=False,
      dphase=True,
      mag_only=False,
      pad=True,
      mu_law_num=0,
      raw_audio=False,
      # Graph
      num_latent=64,  # dimension of z.
      cost_phase_mask=False,
      phase_loss_coeff=1.0,
      fw_loss_coeff=1.0,  # Frequency weighted cost
      fw_loss_cutoff=1000,
  )
  # Set values from a dictionary in the config
  config = utils.get_module("baseline.models.ae_configs.%s" % config_name)
  if hasattr(config, "config_hparams"):
    config_hparams = config.config_hparams
    hparams.update(config_hparams)
  return hparams
Пример #4
0
]

# Default configurations.
default_configs = {
    'one_drum':
        events_rnn_model.EventSequenceRnnConfig(
            generator_pb2.GeneratorDetails(
                id='one_drum', description='Drums RNN with 2-state encoding.'),
            note_seq.OneHotEventSequenceEncoderDecoder(
                note_seq.MultiDrumOneHotEncoding(
                    [[39] +  # use hand clap as default when decoding
                     list(range(note_seq.MIN_MIDI_PITCH, 39)) +
                     list(range(39, note_seq.MAX_MIDI_PITCH + 1))])),
            contrib_training.HParams(
                batch_size=128,
                rnn_layer_sizes=[128, 128],
                dropout_keep_prob=0.5,
                clip_norm=5,
                learning_rate=0.001),
            steps_per_quarter=2),
    'drum_kit':
        events_rnn_model.EventSequenceRnnConfig(
            generator_pb2.GeneratorDetails(
                id='drum_kit',
                description='Drums RNN with multiple drums and binary counters.'
            ),
            note_seq.LookbackEventSequenceEncoderDecoder(
                note_seq.MultiDrumOneHotEncoding(),
                lookback_distances=[],
                binary_counter_bits=6),
            contrib_training.HParams(
                batch_size=128,
Пример #5
0
    self.num_velocity_bins = num_velocity_bins
    self.control_signals = control_signals
    self.optional_conditioning = optional_conditioning
    self.note_performance = note_performance


default_configs = {
    'performance':
        PerformanceRnnConfig(
            generator_pb2.GeneratorDetails(
                id='performance', description='Performance RNN'),
            note_seq.OneHotEventSequenceEncoderDecoder(
                note_seq.PerformanceOneHotEncoding()),
            contrib_training.HParams(
                batch_size=64,
                rnn_layer_sizes=[512, 512, 512],
                dropout_keep_prob=1.0,
                clip_norm=3,
                learning_rate=0.001)),
    'performance_with_dynamics':
        PerformanceRnnConfig(
            generator_pb2.GeneratorDetails(
                id='performance_with_dynamics',
                description='Performance RNN with dynamics'),
            note_seq.OneHotEventSequenceEncoderDecoder(
                note_seq.PerformanceOneHotEncoding(num_velocity_bins=32)),
            contrib_training.HParams(
                batch_size=64,
                rnn_layer_sizes=[512, 512, 512],
                dropout_keep_prob=1.0,
                clip_norm=3,
                learning_rate=0.001),
            branch_factor,
            steps_per_iteration,
            modify_events_callback=modify_events_callback)

    def polyphonic_sequence_log_likelihood(self, sequence):
        """Evaluate the log likelihood of a polyphonic sequence.

    Args:
      sequence: The PolyphonicSequence object for which to evaluate the log
          likelihood.

    Returns:
      The log likelihood of `sequence` under this model.
    """
        return self._evaluate_log_likelihood([sequence])[0]


default_configs = {
    'polyphony':
    events_rnn_model.EventSequenceRnnConfig(
        generator_pb2.GeneratorDetails(id='polyphony',
                                       description='Polyphonic RNN'),
        note_seq.OneHotEventSequenceEncoderDecoder(
            polyphony_encoder_decoder.PolyphonyOneHotEncoding()),
        contrib_training.HParams(batch_size=64,
                                 rnn_layer_sizes=[256, 256, 256],
                                 dropout_keep_prob=0.5,
                                 clip_norm=5,
                                 learning_rate=0.001)),
}
Пример #7
0
    return self._generate_events(
        num_steps=num_steps, primer_events=primer_sequence, temperature=None,
        beam_size=beam_size, branch_factor=branch_factor,
        steps_per_iteration=steps_per_iteration)


default_configs = {
    'rnn-nade': events_rnn_model.EventSequenceRnnConfig(
        generator_pb2.GeneratorDetails(
            id='rnn-nade',
            description='RNN-NADE'),
        mm.PianorollEncoderDecoder(),
        contrib_training.HParams(
            batch_size=64,
            rnn_layer_sizes=[128, 128, 128],
            nade_hidden_units=128,
            dropout_keep_prob=0.5,
            clip_norm=5,
            learning_rate=0.001)),
    'rnn-nade_attn': events_rnn_model.EventSequenceRnnConfig(
        generator_pb2.GeneratorDetails(
            id='rnn-nade_attn',
            description='RNN-NADE with attention.'),
        mm.PianorollEncoderDecoder(),
        contrib_training.HParams(
            batch_size=48,
            rnn_layer_sizes=[128, 128],
            attn_length=32,
            nade_hidden_units=128,
            dropout_keep_prob=0.5,
            clip_norm=5,
 def setUp(self):
     super(PianorollPipelineTest, self).setUp()
     self.config = events_rnn_model.EventSequenceRnnConfig(
         None, mm.PianorollEncoderDecoder(88), contrib_training.HParams())
Пример #9
0
def copy_hparams(hparams):
    """Return a copy of an HParams instance."""
    return contrib_training.HParams(**hparams.values())
    }),

    # Reverb (for now just single-parameter).
    ('reverb', {
        'reverberance': (0.0, 70.0, 'linear'),
    }),
]

# Default hyperparameter values from the above pipeline. Note the additional
# `transform_audio` hparam that defaults to False, i.e. by default no audio
# transformation will be performed.
DEFAULT_AUDIO_TRANSFORM_HPARAMS = contrib_training.HParams(
    transform_audio=False,
    audio_transform_noise_type='pinknoise',
    audio_transform_min_noise_vol=0.0,
    audio_transform_max_noise_vol=0.04,
    **dict(('audio_transform_%s_%s_%s' % (m, stage_name, param_name), value)
           for stage_name, params_dict in AUDIO_TRANSFORM_PIPELINE
           for param_name, (min_value, max_value, _) in params_dict.items()
           for m, value in [('min', min_value), ('max', max_value)]))


class AudioTransformParameter(object):
    """An audio transform parameter with min and max value."""
    def __init__(self, name, min_value, max_value, scale):
        """Initialize an AudioTransformParameter.

    Args:
      name: The name of the parameter. Should be the same as the name of the
          parameter passed to sox.
      min_value: The minimum value of the parameter, a float.
Пример #11
0
 def setUp(self):
     self.config = events_rnn_model.EventSequenceRnnConfig(
         None,
         magenta.music.OneHotEventSequenceEncoderDecoder(
             magenta.music.MultiDrumOneHotEncoding()),
         contrib_training.HParams())
Пример #12
0
 def setUp(self):
     self.config = events_rnn_model.EventSequenceRnnConfig(
         None,
         magenta.music.OneHotEventSequenceEncoderDecoder(
             polyphony_encoder_decoder.PolyphonyOneHotEncoding()),
         contrib_training.HParams())
 def setUp(self):
   super(PerformancePipelineTest, self).setUp()
   self.config = performance_model.PerformanceRnnConfig(
       None,
       note_seq.OneHotEventSequenceEncoderDecoder(
           note_seq.PerformanceOneHotEncoding()), contrib_training.HParams())
Пример #14
0
    audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS,
    contrib_training.HParams(
        eval_batch_size=1,
        predict_batch_size=1,
        shuffle_buffer_size=64,
        sample_rate=16000,
        spec_type='mel',
        spec_mel_htk=True,
        spec_log_amplitude=True,
        spec_hop_length=512,
        spec_n_bins=229,
        spec_fmin=30.0,  # A0
        cqt_bins_per_octave=36,
        truncated_length_secs=0.0,
        max_expected_train_example_len=0,
        onset_length=32,
        offset_length=32,
        onset_mode='length_ms',
        onset_delay=0,
        min_frame_occupancy_for_label=0.0,
        jitter_amount_ms=0,
        min_duration_ms=0,
        backward_shift_amount_ms=0,
        velocity_scale=80.0,
        velocity_bias=10.0,
        drum_data_map='',
        drum_prediction_map='',
        velocity_loss_weight=1.0,
        splice_n_examples=0,
        viterbi_decoding=False,
        viterbi_alpha=0.5))
Пример #15
0
 def setUp(self):
   super().setUp()
   self.config = events_rnn_model.EventSequenceRnnConfig(
       None,
       note_seq.OneHotEventSequenceEncoderDecoder(
           note_seq.MultiDrumOneHotEncoding()), contrib_training.HParams())