def initialize_session(acoustic_checkpoint, hparams): """Initializes a transcription session.""" with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) batch, iterator = data.provide_batch(batch_size=1, examples=examples, hparams=hparams, is_training=False, truncated_length=0) model.get_model(batch, hparams, is_training=False) session = tf.Session() saver = tf.train.Saver() saver.restore(session, acoustic_checkpoint) onset_probs_flat = tf.get_default_graph().get_tensor_by_name( 'onsets/onset_probs_flat:0') frame_probs_flat = tf.get_default_graph().get_tensor_by_name( 'frame_probs_flat:0') velocity_values_flat = tf.get_default_graph().get_tensor_by_name( 'velocity/velocity_values_flat:0') return TranscriptionSession(session=session, examples=examples, iterator=iterator, onset_probs_flat=onset_probs_flat, frame_probs_flat=frame_probs_flat, velocity_values_flat=velocity_values_flat, hparams=hparams)
def initialize_session(acoustic_checkpoint, hparams): """Initializes a transcription session.""" with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) hparams.batch_size = 1 batch, iterator = data.provide_batch( batch_size=1, examples=examples, hparams=hparams, is_training=False, truncated_length=0) model.get_model(batch, hparams, is_training=False) session = tf.Session() saver = tf.train.Saver() saver.restore(session, acoustic_checkpoint) onset_probs_flat = tf.get_default_graph().get_tensor_by_name( 'onsets/onset_probs_flat:0') frame_probs_flat = tf.get_default_graph().get_tensor_by_name( 'frame_probs_flat:0') velocity_values_flat = tf.get_default_graph().get_tensor_by_name( 'velocity/velocity_values_flat:0') return TranscriptionSession( session=session, examples=examples, iterator=iterator, onset_probs_flat=onset_probs_flat, frame_probs_flat=frame_probs_flat, velocity_values_flat=velocity_values_flat, hparams=hparams)
def test(checkpoint_path, test_dir, examples_path, hparams, num_batches=None): """Evaluate the model at a single checkpoint.""" tf.gfile.MakeDirs(test_dir) _trial_summary(hparams, examples_path, test_dir) with tf.Graph().as_default(): transcription_data = _get_data(examples_path, hparams, is_training=False) unused_loss, losses, labels, predictions, images = model.get_model( transcription_data, hparams, is_training=False) metrics_to_values, metrics_to_updates = _get_eval_metrics( losses, labels, predictions, images, hparams) metric_values = slim.evaluation.evaluate_once( master='', checkpoint_path=checkpoint_path, logdir=test_dir, num_evals=num_batches or transcription_data.num_batches, eval_op=list(metrics_to_updates.values()), final_op=list(metrics_to_values.values())) metrics_to_values = dict( zip(list(metrics_to_values.keys()), metric_values)) for metric in metrics_to_values: value = metrics_to_values[metric] if np.isscalar(value): print('%s: %f' % (metric, value))
def evaluate(train_dir, eval_dir, examples_path, hparams, num_batches=None): """Evaluate the model repeatedly.""" tf.gfile.MakeDirs(eval_dir) _trial_summary(hparams, examples_path, eval_dir) with tf.Graph().as_default(): transcription_data = _get_data(examples_path, hparams, is_training=False) unused_loss, losses, labels, predictions, images = model.get_model( transcription_data, hparams, is_training=False) _, metrics_to_updates = _get_eval_metrics(losses, labels, predictions, images, hparams) hooks = [ tf.contrib.training.StopAfterNEvalsHook( num_batches or transcription_data.num_batches), tf.contrib.training.SummaryAtEndHook(eval_dir) ] tf.contrib.training.evaluate_repeatedly( train_dir, eval_ops=list(metrics_to_updates.values()), hooks=hooks, eval_interval_secs=60, timeout=None)
def test(checkpoint_path, test_dir, examples_path, hparams, num_batches=None, master=''): """Evaluate the model at a single checkpoint.""" tf.gfile.MakeDirs(test_dir) _trial_summary(hparams, examples_path, test_dir) with tf.Graph().as_default(): transcription_data = _get_data( examples_path, hparams, is_training=False) unused_loss, losses, labels, predictions, images = model.get_model( transcription_data, hparams, is_training=False) metrics_to_values, metrics_to_updates = _get_eval_metrics( losses, labels, predictions, images, hparams) metric_values = slim.evaluation.evaluate_once( checkpoint_path=checkpoint_path, logdir=test_dir, num_evals=num_batches or transcription_data.num_batches, eval_op=list(metrics_to_updates.values()), final_op=list(metrics_to_values.values()), master=master) metrics_to_values = dict(zip(list(metrics_to_values.keys()), metric_values)) for metric in metrics_to_values: value = metrics_to_values[metric] if np.isscalar(value): print('%s: %f' % (metric, value))
def evaluate(train_dir, eval_dir, examples_path, hparams, num_batches=None, master=''): """Evaluate the model repeatedly.""" tf.gfile.MakeDirs(eval_dir) _trial_summary(hparams, examples_path, eval_dir) with tf.Graph().as_default(): transcription_data = _get_data(examples_path, hparams, is_training=False) unused_loss, losses, labels, predictions, images = model.get_model( transcription_data, hparams, is_training=False) _, metrics_to_updates = _get_eval_metrics( losses, labels, predictions, images, hparams) hooks = [ tf.contrib.training.StopAfterNEvalsHook( num_batches or transcription_data.num_batches), tf.contrib.training.SummaryAtEndHook(eval_dir)] tf.contrib.training.evaluate_repeatedly( train_dir, eval_ops=list(metrics_to_updates.values()), hooks=hooks, eval_interval_secs=60, timeout=None, master=master)
def train(train_dir, examples_path, hparams, checkpoints_to_keep=5, keep_checkpoint_every_n_hours=1, num_steps=None): """Train loop.""" tf.gfile.MakeDirs(train_dir) _trial_summary(hparams, examples_path, train_dir) with tf.Graph().as_default(): transcription_data = _get_data(examples_path, hparams, is_training=True) loss, losses, unused_labels, unused_predictions, images = model.get_model( transcription_data, hparams, is_training=True) tf.summary.scalar('loss', loss) for label, loss_collection in losses.iteritems(): loss_label = 'losses/' + label tf.summary.scalar(loss_label, tf.reduce_mean(loss_collection)) for name, image in images.iteritems(): tf.summary.image(name, image) global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(hparams.learning_rate, global_step, hparams.decay_steps, hparams.decay_rate, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = slim.learning.create_train_op( loss, optimizer, clip_gradient_norm=hparams.clip_norm, summarize_gradients=True) logging_dict = { 'global_step': tf.train.get_global_step(), 'loss': loss } hooks = [tf.train.LoggingTensorHook(logging_dict, every_n_iter=100)] if num_steps: hooks.append(tf.train.StopAtStepHook(num_steps)) scaffold = tf.train.Scaffold(saver=tf.train.Saver( max_to_keep=checkpoints_to_keep, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)) tf.contrib.training.train(train_op=train_op, logdir=train_dir, scaffold=scaffold, hooks=hooks, save_checkpoint_secs=300)
def model_inference(acoustic_checkpoint, hparams, examples_path, run_dir): """Runs inference for the given examples.""" tf.logging.info('acoustic_checkpoint=%s', acoustic_checkpoint) tf.logging.info('examples_path=%s', examples_path) tf.logging.info('run_dir=%s', run_dir) with tf.Graph().as_default(): num_dims = constants.MIDI_PITCHES # Build the acoustic model within an 'acoustic' scope to isolate its # variables from the other models. with tf.variable_scope('acoustic'): truncated_length = 0 if FLAGS.max_seconds_per_sequence: truncated_length = int( math.ceil((FLAGS.max_seconds_per_sequence * data.hparams_frames_per_second(hparams)))) acoustic_data_provider, _ = data.provide_batch( batch_size=1, examples=examples_path, hparams=hparams, is_training=False, truncated_length=truncated_length, include_note_sequences=True) _, _, data_labels, _, _ = model.get_model( acoustic_data_provider, hparams, is_training=False) # The checkpoints won't have the new scopes. acoustic_variables = { re.sub(r'^acoustic/', '', var.op.name): var for var in slim.get_variables(scope='acoustic/') } acoustic_restore = tf.train.Saver(acoustic_variables) onset_probs_flat = tf.get_default_graph().get_tensor_by_name( 'acoustic/onsets/onset_probs_flat:0') frame_probs_flat = tf.get_default_graph().get_tensor_by_name( 'acoustic/frame_probs_flat:0') offset_probs_flat = tf.get_default_graph().get_tensor_by_name( 'acoustic/offsets/offset_probs_flat:0') velocity_values_flat = tf.get_default_graph().get_tensor_by_name( 'acoustic/velocity/velocity_values_flat:0') # Define some metrics. (metrics_to_updates, metric_note_precision, metric_note_recall, metric_note_f1, metric_note_precision_with_offsets, metric_note_recall_with_offsets, metric_note_f1_with_offsets, metric_note_precision_with_offsets_velocity, metric_note_recall_with_offsets_velocity, metric_note_f1_with_offsets_velocity, metric_frame_labels, metric_frame_predictions) = infer_util.define_metrics(num_dims) summary_op = tf.summary.merge_all() global_step = tf.contrib.framework.get_or_create_global_step() global_step_increment = global_step.assign_add(1) # Use a custom init function to restore the acoustic and language models # from their separate checkpoints. def init_fn(unused_self, sess): acoustic_restore.restore(sess, acoustic_checkpoint) scaffold = tf.train.Scaffold(init_fn=init_fn) session_creator = tf.train.ChiefSessionCreator( scaffold=scaffold, master=FLAGS.master) with tf.train.MonitoredSession(session_creator=session_creator) as sess: tf.logging.info('running session') summary_writer = tf.summary.FileWriter( logdir=run_dir, graph=sess.graph) tf.logging.info('Inferring for %d batches', acoustic_data_provider.num_batches) infer_times = [] num_frames = [] for unused_i in range(acoustic_data_provider.num_batches): start_time = time.time() (labels, filenames, note_sequences, frame_probs, onset_probs, offset_probs, velocity_values) = sess.run([ data_labels, acoustic_data_provider.filenames, acoustic_data_provider.note_sequences, frame_probs_flat, onset_probs_flat, offset_probs_flat, velocity_values_flat, ]) # We expect these all to be length 1 because batch size is 1. assert len(filenames) == len(note_sequences) == 1 # These should be the same length and have been flattened. assert len(labels) == len(frame_probs) == len(onset_probs) frame_predictions = frame_probs > FLAGS.frame_threshold if FLAGS.require_onset: onset_predictions = onset_probs > FLAGS.onset_threshold else: onset_predictions = None if FLAGS.use_offset: offset_predictions = offset_probs > FLAGS.offset_threshold else: offset_predictions = None sequence_prediction = sequences_lib.pianoroll_to_note_sequence( frame_predictions, frames_per_second=data.hparams_frames_per_second(hparams), min_duration_ms=0, min_midi_pitch=constants.MIN_MIDI_PITCH, onset_predictions=onset_predictions, offset_predictions=offset_predictions, velocity_values=velocity_values) end_time = time.time() infer_time = end_time - start_time infer_times.append(infer_time) num_frames.append(frame_probs.shape[0]) tf.logging.info( 'Infer time %f, frames %d, frames/sec %f, running average %f', infer_time, frame_probs.shape[0], frame_probs.shape[0] / infer_time, np.sum(num_frames) / np.sum(infer_times)) tf.logging.info('Scoring sequence %s', filenames[0]) def shift_notesequence(ns_time): return ns_time + hparams.backward_shift_amount_ms / 1000. sequence_label = infer_util.score_sequence( sess, global_step_increment, summary_op, summary_writer, metrics_to_updates, metric_note_precision, metric_note_recall, metric_note_f1, metric_note_precision_with_offsets, metric_note_recall_with_offsets, metric_note_f1_with_offsets, metric_note_precision_with_offsets_velocity, metric_note_recall_with_offsets_velocity, metric_note_f1_with_offsets_velocity, metric_frame_labels, metric_frame_predictions, frame_labels=labels, sequence_prediction=sequence_prediction, frames_per_second=data.hparams_frames_per_second(hparams), sequence_label=sequences_lib.adjust_notesequence_times( music_pb2.NoteSequence.FromString(note_sequences[0]), shift_notesequence)[0], sequence_id=filenames[0]) # Make filenames UNIX-friendly. filename = filenames[0].decode('utf-8').replace('/', '_').replace( ':', '.') output_file = os.path.join(run_dir, filename + '.mid') tf.logging.info('Writing inferred midi file to %s', output_file) midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file) label_output_file = os.path.join(run_dir, filename + '_label.mid') tf.logging.info('Writing label midi file to %s', label_output_file) midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file) # Also write a pianoroll showing acoustic model output vs labels. pianoroll_output_file = os.path.join(run_dir, filename + '_pianoroll.png') tf.logging.info('Writing acoustic logit/label file to %s', pianoroll_output_file) with tf.gfile.GFile(pianoroll_output_file, mode='w') as f: scipy.misc.imsave( f, infer_util.posterior_pianoroll_image( frame_probs, sequence_prediction, labels, overlap=True, frames_per_second=data.hparams_frames_per_second(hparams))) summary_writer.flush()
print('acoustic_checkpoint=' + acoustic_checkpoint) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) num_dims = constants.MIDI_PITCHES batch, iterator = data.provide_batch(batch_size=1, examples=examples, hparams=hparams, is_training=False, truncated_length=0) model.get_model(batch, hparams, is_training=False) session = tf.Session() saver = tf.train.Saver() saver.restore(session, acoustic_checkpoint) onset_probs_flat = tf.get_default_graph().get_tensor_by_name( 'onsets/onset_probs_flat:0') frame_probs_flat = tf.get_default_graph().get_tensor_by_name( 'frame_probs_flat:0') # file = open('./data/wav_format/xinjing.wav','w') ##todo # to_process = [] # wav_data = audio_io.samples_to_wav_data( # librosa.util.normalize(librosa.core.load(file, sr=hparams.sample_rate)[0]),
def train(train_dir, examples_path, hparams, checkpoints_to_keep=5, keep_checkpoint_every_n_hours=1, num_steps=None, master='', task=0, num_ps_tasks=0): """Train loop.""" tf.gfile.MakeDirs(train_dir) is_chief = task == 0 if is_chief: _trial_summary(hparams, examples_path, train_dir) with tf.Graph().as_default(): with tf.device( tf.train.replica_device_setter(num_ps_tasks, merge_devices=True)): transcription_data = _get_data(examples_path, hparams, is_training=True) loss, losses, unused_labels, unused_predictions, images = model.get_model( transcription_data, hparams, is_training=True) tf.summary.scalar('loss', loss) for label, loss_collection in losses.items(): loss_label = 'losses/' + label tf.summary.scalar(loss_label, tf.reduce_mean(loss_collection)) for name, image in images.items(): tf.summary.image(name, image) global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay( hparams.learning_rate, global_step, hparams.decay_steps, hparams.decay_rate, staircase=True) tf.summary.scalar('learning_rate', learning_rate) frame_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) frame_train_op = slim.learning.create_train_op( loss, frame_optimizer, clip_gradient_norm=hparams.clip_norm, summarize_gradients=True, variables_to_train=None) logging_dict = {'global_step': tf.train.get_global_step(), 'loss': loss} if hasattr(hparams, 'sampling_probability'): logging_dict['sampling_probability'] = hparams.sampling_probability frame_hooks = [tf.train.LoggingTensorHook(logging_dict, every_n_iter=100)] if num_steps: frame_hooks.append(tf.train.StopAtStepHook(num_steps)) scaffold = tf.train.Scaffold( saver=tf.train.Saver( max_to_keep=checkpoints_to_keep, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)) tf.contrib.training.train( train_op=frame_train_op, logdir=train_dir, scaffold=scaffold, hooks=frame_hooks, save_checkpoint_secs=300, master=master, is_chief=is_chief)