def __init__(self, details, encoder_decoder, hparams, steps_per_quarter=4, steps_per_second=100): hparams_dict = { 'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 1.0, 'attn_length': 0, 'clip_norm': 3, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False } hparams_dict.update(hparams.values()) self.details = details self.encoder_decoder = encoder_decoder self.hparams = contrib_training.HParams(**hparams_dict) self.steps_per_quarter = steps_per_quarter self.steps_per_second = steps_per_second
def get_default_hparams(): """Returns the default hyperparameters. Returns: A tf.contrib.training.HParams object representing the default hyperparameters for the model. """ return contrib_training.HParams( batch_size=8, learning_rate=0.0006, decay_steps=10000, decay_rate=0.98, clip_norm=3.0, transform_audio=True, onset_lstm_units=256, offset_lstm_units=256, velocity_lstm_units=0, frame_lstm_units=0, combined_lstm_units=256, acoustic_rnn_stack_size=1, combined_rnn_stack_size=1, activation_loss=False, stop_activation_gradient=False, stop_onset_gradient=True, stop_offset_gradient=True, weight_frame_and_activation_loss=False, share_conv_features=False, temporal_sizes=[3, 3, 3], freq_sizes=[3, 3, 3], num_filters=[48, 48, 96], pool_sizes=[1, 2, 2], dropout_keep_amts=[1.0, 0.75, 0.75], fc_size=768, fc_dropout_keep_amt=0.5, use_lengths=False, use_cudnn=False, # DEPRECATED rnn_dropout_drop_amt=0.0, bidirectional=True, predict_frame_threshold=0.5, predict_onset_threshold=0.5, predict_offset_threshold=0, )
def get_hparams(config_name): """Set hyperparameters. Args: config_name: Name of config module to use. Returns: A HParams object (magenta) with defaults. """ hparams = contrib_training.HParams( # Optimization batch_size=16, learning_rate=1e-4, adam_beta=0.5, max_steps=6000 * 50000, samples_per_second=16000, num_samples=64000, # Preprocessing n_fft=1024, hop_length=256, mask=True, log_mag=True, use_cqt=False, re_im=False, dphase=True, mag_only=False, pad=True, mu_law_num=0, raw_audio=False, # Graph num_latent=64, # dimension of z. cost_phase_mask=False, phase_loss_coeff=1.0, fw_loss_coeff=1.0, # Frequency weighted cost fw_loss_cutoff=1000, ) # Set values from a dictionary in the config config = utils.get_module("baseline.models.ae_configs.%s" % config_name) if hasattr(config, "config_hparams"): config_hparams = config.config_hparams hparams.update(config_hparams) return hparams
] # Default configurations. default_configs = { 'one_drum': events_rnn_model.EventSequenceRnnConfig( generator_pb2.GeneratorDetails( id='one_drum', description='Drums RNN with 2-state encoding.'), note_seq.OneHotEventSequenceEncoderDecoder( note_seq.MultiDrumOneHotEncoding( [[39] + # use hand clap as default when decoding list(range(note_seq.MIN_MIDI_PITCH, 39)) + list(range(39, note_seq.MAX_MIDI_PITCH + 1))])), contrib_training.HParams( batch_size=128, rnn_layer_sizes=[128, 128], dropout_keep_prob=0.5, clip_norm=5, learning_rate=0.001), steps_per_quarter=2), 'drum_kit': events_rnn_model.EventSequenceRnnConfig( generator_pb2.GeneratorDetails( id='drum_kit', description='Drums RNN with multiple drums and binary counters.' ), note_seq.LookbackEventSequenceEncoderDecoder( note_seq.MultiDrumOneHotEncoding(), lookback_distances=[], binary_counter_bits=6), contrib_training.HParams( batch_size=128,
self.num_velocity_bins = num_velocity_bins self.control_signals = control_signals self.optional_conditioning = optional_conditioning self.note_performance = note_performance default_configs = { 'performance': PerformanceRnnConfig( generator_pb2.GeneratorDetails( id='performance', description='Performance RNN'), note_seq.OneHotEventSequenceEncoderDecoder( note_seq.PerformanceOneHotEncoding()), contrib_training.HParams( batch_size=64, rnn_layer_sizes=[512, 512, 512], dropout_keep_prob=1.0, clip_norm=3, learning_rate=0.001)), 'performance_with_dynamics': PerformanceRnnConfig( generator_pb2.GeneratorDetails( id='performance_with_dynamics', description='Performance RNN with dynamics'), note_seq.OneHotEventSequenceEncoderDecoder( note_seq.PerformanceOneHotEncoding(num_velocity_bins=32)), contrib_training.HParams( batch_size=64, rnn_layer_sizes=[512, 512, 512], dropout_keep_prob=1.0, clip_norm=3, learning_rate=0.001),
branch_factor, steps_per_iteration, modify_events_callback=modify_events_callback) def polyphonic_sequence_log_likelihood(self, sequence): """Evaluate the log likelihood of a polyphonic sequence. Args: sequence: The PolyphonicSequence object for which to evaluate the log likelihood. Returns: The log likelihood of `sequence` under this model. """ return self._evaluate_log_likelihood([sequence])[0] default_configs = { 'polyphony': events_rnn_model.EventSequenceRnnConfig( generator_pb2.GeneratorDetails(id='polyphony', description='Polyphonic RNN'), note_seq.OneHotEventSequenceEncoderDecoder( polyphony_encoder_decoder.PolyphonyOneHotEncoding()), contrib_training.HParams(batch_size=64, rnn_layer_sizes=[256, 256, 256], dropout_keep_prob=0.5, clip_norm=5, learning_rate=0.001)), }
return self._generate_events( num_steps=num_steps, primer_events=primer_sequence, temperature=None, beam_size=beam_size, branch_factor=branch_factor, steps_per_iteration=steps_per_iteration) default_configs = { 'rnn-nade': events_rnn_model.EventSequenceRnnConfig( generator_pb2.GeneratorDetails( id='rnn-nade', description='RNN-NADE'), mm.PianorollEncoderDecoder(), contrib_training.HParams( batch_size=64, rnn_layer_sizes=[128, 128, 128], nade_hidden_units=128, dropout_keep_prob=0.5, clip_norm=5, learning_rate=0.001)), 'rnn-nade_attn': events_rnn_model.EventSequenceRnnConfig( generator_pb2.GeneratorDetails( id='rnn-nade_attn', description='RNN-NADE with attention.'), mm.PianorollEncoderDecoder(), contrib_training.HParams( batch_size=48, rnn_layer_sizes=[128, 128], attn_length=32, nade_hidden_units=128, dropout_keep_prob=0.5, clip_norm=5,
def setUp(self): super(PianorollPipelineTest, self).setUp() self.config = events_rnn_model.EventSequenceRnnConfig( None, mm.PianorollEncoderDecoder(88), contrib_training.HParams())
def copy_hparams(hparams): """Return a copy of an HParams instance.""" return contrib_training.HParams(**hparams.values())
}), # Reverb (for now just single-parameter). ('reverb', { 'reverberance': (0.0, 70.0, 'linear'), }), ] # Default hyperparameter values from the above pipeline. Note the additional # `transform_audio` hparam that defaults to False, i.e. by default no audio # transformation will be performed. DEFAULT_AUDIO_TRANSFORM_HPARAMS = contrib_training.HParams( transform_audio=False, audio_transform_noise_type='pinknoise', audio_transform_min_noise_vol=0.0, audio_transform_max_noise_vol=0.04, **dict(('audio_transform_%s_%s_%s' % (m, stage_name, param_name), value) for stage_name, params_dict in AUDIO_TRANSFORM_PIPELINE for param_name, (min_value, max_value, _) in params_dict.items() for m, value in [('min', min_value), ('max', max_value)])) class AudioTransformParameter(object): """An audio transform parameter with min and max value.""" def __init__(self, name, min_value, max_value, scale): """Initialize an AudioTransformParameter. Args: name: The name of the parameter. Should be the same as the name of the parameter passed to sox. min_value: The minimum value of the parameter, a float.
def setUp(self): self.config = events_rnn_model.EventSequenceRnnConfig( None, magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.MultiDrumOneHotEncoding()), contrib_training.HParams())
def setUp(self): self.config = events_rnn_model.EventSequenceRnnConfig( None, magenta.music.OneHotEventSequenceEncoderDecoder( polyphony_encoder_decoder.PolyphonyOneHotEncoding()), contrib_training.HParams())
def setUp(self): super(PerformancePipelineTest, self).setUp() self.config = performance_model.PerformanceRnnConfig( None, note_seq.OneHotEventSequenceEncoderDecoder( note_seq.PerformanceOneHotEncoding()), contrib_training.HParams())
audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS, contrib_training.HParams( eval_batch_size=1, predict_batch_size=1, shuffle_buffer_size=64, sample_rate=16000, spec_type='mel', spec_mel_htk=True, spec_log_amplitude=True, spec_hop_length=512, spec_n_bins=229, spec_fmin=30.0, # A0 cqt_bins_per_octave=36, truncated_length_secs=0.0, max_expected_train_example_len=0, onset_length=32, offset_length=32, onset_mode='length_ms', onset_delay=0, min_frame_occupancy_for_label=0.0, jitter_amount_ms=0, min_duration_ms=0, backward_shift_amount_ms=0, velocity_scale=80.0, velocity_bias=10.0, drum_data_map='', drum_prediction_map='', velocity_loss_weight=1.0, splice_n_examples=0, viterbi_decoding=False, viterbi_alpha=0.5))
def setUp(self): super().setUp() self.config = events_rnn_model.EventSequenceRnnConfig( None, note_seq.OneHotEventSequenceEncoderDecoder( note_seq.MultiDrumOneHotEncoding()), contrib_training.HParams())