def __init__(self, path='onsets-frames'): """Load the Onset-Frames Model (arXiv:1710.11153 [cs.SD]) and pretrained weights from the path using tensorflow and magenta. Args: path (str, optional): The path the model weights. Defaults to 'onsets-frames'. """ tf.disable_eager_execution() tf.disable_v2_behavior() tf.logging.set_verbosity(tf.logging.ERROR) self.config = configs.CONFIG_MAP['onsets_frames'] self.hparams = self.config.hparams self.hparams.use_cudnn = False self.hparams.batch_size = 1 self.checkpoint_dir = path self.examples = tf.placeholder(tf.string, [None]) self.dataset = data.provide_batch(examples=self.examples, preprocess_examples=True, params=self.hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) self.estimator = train_util.create_estimator(self.config.model_fn, self.checkpoint_dir, self.hparams) self.iterator = tf.data.make_initializable_iterator(self.dataset) self.next_record = self.iterator.get_next()
def run(argv, config_map, data_fn): """Create transcriptions.""" tf.logging.set_verbosity(FLAGS.log) config = config_map[FLAGS.config] hparams = config.hparams # For this script, default to not using cudnn. hparams.use_cudnn = False hparams.parse(FLAGS.hparams) hparams.batch_size = 1 hparams.truncated_length_secs = 0 with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) dataset = data_fn(examples=examples, preprocess_examples=True, params=hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) estimator = train_util.create_estimator( config.model_fn, os.path.expanduser(FLAGS.model_dir), hparams) iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() with tf.Session() as sess: sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) for filename in argv[1:]: tf.logging.info('Starting transcription for %s...', filename) # The reason we bounce between two Dataset objects is so we can use # the data processing functionality in data.py without having to # construct all the Example protos in memory ahead of time or create # a temporary tfrecord file. tf.logging.info('Processing file...') sess.run( iterator.initializer, { examples: [ create_example(filename, FLAGS.load_audio_with_librosa) ] }) def transcription_data(params): del params return tf.data.Dataset.from_tensors(sess.run(next_record)) input_fn = infer_util.labels_to_features_wrapper( transcription_data) tf.logging.info('Running inference...') checkpoint_path = None if FLAGS.checkpoint_path: checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path) prediction_list = list( estimator.predict(input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False)) assert len(prediction_list) == 1 sequence_prediction = music_pb2.NoteSequence.FromString( prediction_list[0]['sequence_predictions'][0]) midi_filename = filename + FLAGS.transcribed_file_suffix + '.midi' midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) tf.logging.info('Transcription written to %s.', midi_filename)
def transcribe(audio, sr, cuda=False): """ Google sucks and want to use audio path (raw wav) instead of decoded samples loosing in decoupling between file format and DSP input audio and sample rate, output mat like asmd with (pitch, ons, offs, velocity) """ # simple hack because google sucks... in this way we can accept audio data # already loaded and keep our reasonable interface (and decouple i/o # from processing) original_google_sucks = audio_io.wav_data_to_samples audio_io.wav_data_to_samples = google_sucks audio = np.array(audio) config = configs.CONFIG_MAP['onsets_frames'] hparams = config.hparams hparams.use_cudnn = cuda hparams.batch_size = 1 examples = tf.placeholder(tf.string, [None]) dataset = data.provide_batch(examples=examples, preprocess_examples=True, params=hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) estimator = train_util.create_estimator(config.model_fn, CHECKPOINT_DIR, hparams) iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() example_list = list( audio_label_data_utils.process_record(wav_data=audio, sample_rate=sr, ns=music_pb2.NoteSequence(), example_id="fakeid", min_length=0, max_length=-1, allow_empty_notesequence=True, load_audio_with_librosa=False)) assert len(example_list) == 1 to_process = [example_list[0].SerializeToString()] sess = tf.Session() sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) sess.run(iterator.initializer, {examples: to_process}) def transcription_data(params): del params return tf.data.Dataset.from_tensors(sess.run(next_record)) # put back the original function (it still writes and reload... stupid # though audio_io.wav_data_to_samples = original_google_sucks input_fn = infer_util.labels_to_features_wrapper(transcription_data) prediction_list = list( estimator.predict(input_fn, yield_single_examples=False)) assert len(prediction_list) == 1 notes = music_pb2.NoteSequence.FromString( prediction_list[0]['sequence_predictions'][0]).notes out = np.empty((len(notes), 4)) for i, note in enumerate(notes): out[i] = [note.pitch, note.start_time, note.end_time, note.velocity] return out
# 하이퍼 파라미터 설정 config = configs.CONFIG_MAP['onsets_frames'] hparams = config.hparams hparams.use_cudnn = False hparams.batch_size = 1 # Placeholder 설정 examples = tf.placeholder(tf.string, [None]) # 배치 생성 dataset = data.provide_batch(examples=examples, preprocess_examples=True, params=hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) # Estimator 생성 estimator = train_util.create_estimator( config.model_fn, CHECKPOINT_DIR, hparams) # Iterator 생성 iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() # 세션 생성 sess = tf.Session() # 세션 초기화 sess.run([tf.initializers.global_variables(), tf.initializers.local_variables()]) # 모델 데이터로 Datasets 만들기 def input_fn(params): del params return tf.data.Dataset.from_tensors(sess.run(next_record))
example = tf.placeholder(tf.string, [None]) # Logging tf.logging.info('model_dir=%s', MODEL_DIR) tf.logging.info('checkpoint_path=%s', 'checkpoint') # 배치 생성 dataset = data.provide_batch(examples=example, preprocess_examples=True, params=hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) # Estimator estimator = train_util.create_estimator(config.model_fn, MODEL_DIR, hparams) # 배치를 순환하는 이터레이터 iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() sess = tf.Session() #전역변수 및 로컬변수 초기화 sess.run( [tf.initializers.global_variables(), tf.initializers.local_variables()]) #tensor로부터 사용하려는 데이터를 불러와 Dataset 인스턴스 생성 def input_fn(params):
def main(input, output): MAESTRO_CHECKPOINT_DIR = '/data/maestro/train' config = configs.CONFIG_MAP['onsets_frames'] hparams = config.hparams hparams.use_cudnn = False hparams.batch_size = 1 checkpoint_dir = MAESTRO_CHECKPOINT_DIR examples = tf.placeholder(tf.string, [None]) dataset = data.provide_batch( examples=examples, preprocess_examples=True, params=hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) estimator = train_util.create_estimator( config.model_fn, checkpoint_dir, hparams) iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() to_process = [] def process(files): for fn in files: print('**\n\n', fn, '\n\n**') with open(fn, 'rb', buffering=0) as f: wav_data = f.read() example_list = list( audio_label_data_utils.process_record( wav_data=wav_data, ns=music_pb2.NoteSequence(), example_id=fn, min_length=0, max_length=-1, allow_empty_notesequence=True)) assert len(example_list) == 1 to_process.append(example_list[0].SerializeToString()) print('Processing complete for', fn) sess = tf.Session() sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) sess.run(iterator.initializer, {examples: to_process}) def transcription_data(params): del params return tf.data.Dataset.from_tensors(sess.run(next_record)) input_fn = infer_util.labels_to_features_wrapper(transcription_data) #@title Run inference prediction_list = list( estimator.predict( input_fn, yield_single_examples=False)) assert len(prediction_list) == 1 # Ignore warnings caused by pyfluidsynth import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) sequence_prediction = music_pb2.NoteSequence.FromString( prediction_list[0]['sequence_predictions'][0]) pathname = fn.split('/').pop() print('**\n\n', pathname, '\n\n**') midi_filename = '{outputs}/{file}.mid'.format(outputs=output,file=pathname) midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) files = ['{inputs}/{file}'.format(inputs=input, file=file) for file in os.listdir(input) if file.split('.').pop() == 'wav'] print('the files', files) process(files)
def model_inference(model_fn, model_dir, checkpoint_path, hparams, examples_path, output_dir, summary_writer, master, preprocess_examples, write_summary_every_step=True): """Runs inference for the given examples.""" tf.logging.info('model_dir=%s', model_dir) tf.logging.info('checkpoint_path=%s', checkpoint_path) tf.logging.info('examples_path=%s', examples_path) tf.logging.info('output_dir=%s', output_dir) estimator = train_util.create_estimator( model_fn, model_dir, hparams, master=master) with tf.Graph().as_default(): num_dims = constants.MIDI_PITCHES dataset = data.provide_batch( examples=examples_path, preprocess_examples=preprocess_examples, hparams=hparams, is_training=False) # Define some metrics. (metrics_to_updates, metric_note_precision, metric_note_recall, metric_note_f1, metric_note_precision_with_offsets, metric_note_recall_with_offsets, metric_note_f1_with_offsets, metric_note_precision_with_offsets_velocity, metric_note_recall_with_offsets_velocity, metric_note_f1_with_offsets_velocity, metric_frame_labels, metric_frame_predictions) = infer_util.define_metrics(num_dims) summary_op = tf.summary.merge_all() if write_summary_every_step: global_step = tf.train.get_or_create_global_step() global_step_increment = global_step.assign_add(1) else: global_step = tf.constant( estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP)) global_step_increment = global_step iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() with tf.Session() as sess: sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) infer_times = [] num_frames = [] sess.run(iterator.initializer) while True: try: record = sess.run(next_record) except tf.errors.OutOfRangeError: break def input_fn(params): del params return tf.data.Dataset.from_tensors(record) start_time = time.time() # TODO(fjord): This is a hack that allows us to keep using our existing # infer/scoring code with a tf.Estimator model. Ideally, we should # move things around so that we can use estimator.evaluate, which will # also be more efficient because it won't have to restore the checkpoint # for every example. prediction_list = list( estimator.predict( input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False)) assert len(prediction_list) == 1 input_features = record[0] input_labels = record[1] filename = input_features.sequence_id[0] note_sequence = music_pb2.NoteSequence.FromString( input_labels.note_sequence[0]) labels = input_labels.labels[0] frame_probs = prediction_list[0]['frame_probs'][0] frame_predictions = prediction_list[0]['frame_predictions'][0] onset_predictions = prediction_list[0]['onset_predictions'][0] velocity_values = prediction_list[0]['velocity_values'][0] offset_predictions = prediction_list[0]['offset_predictions'][0] if not FLAGS.require_onset: onset_predictions = None if not FLAGS.use_offset: offset_predictions = None sequence_prediction = sequences_lib.pianoroll_to_note_sequence( frame_predictions, frames_per_second=data.hparams_frames_per_second(hparams), min_duration_ms=0, min_midi_pitch=constants.MIN_MIDI_PITCH, onset_predictions=onset_predictions, offset_predictions=offset_predictions, velocity_values=velocity_values) end_time = time.time() infer_time = end_time - start_time infer_times.append(infer_time) num_frames.append(frame_predictions.shape[0]) tf.logging.info( 'Infer time %f, frames %d, frames/sec %f, running average %f', infer_time, frame_predictions.shape[0], frame_predictions.shape[0] / infer_time, np.sum(num_frames) / np.sum(infer_times)) tf.logging.info('Scoring sequence %s', filename) def shift_notesequence(ns_time): return ns_time + hparams.backward_shift_amount_ms / 1000. sequence_label = sequences_lib.adjust_notesequence_times( note_sequence, shift_notesequence)[0] infer_util.score_sequence( sess, global_step_increment, metrics_to_updates, metric_note_precision, metric_note_recall, metric_note_f1, metric_note_precision_with_offsets, metric_note_recall_with_offsets, metric_note_f1_with_offsets, metric_note_precision_with_offsets_velocity, metric_note_recall_with_offsets_velocity, metric_note_f1_with_offsets_velocity, metric_frame_labels, metric_frame_predictions, frame_labels=labels, sequence_prediction=sequence_prediction, frames_per_second=data.hparams_frames_per_second(hparams), sequence_label=sequence_label, sequence_id=filename) if write_summary_every_step: # Make filenames UNIX-friendly. filename_safe = filename.decode('utf-8').replace('/', '_').replace( ':', '.') output_file = os.path.join(output_dir, filename_safe + '.mid') tf.logging.info('Writing inferred midi file to %s', output_file) midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file) label_output_file = os.path.join(output_dir, filename_safe + '_label.mid') tf.logging.info('Writing label midi file to %s', label_output_file) midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file) # Also write a pianoroll showing acoustic model output vs labels. pianoroll_output_file = os.path.join(output_dir, filename_safe + '_pianoroll.png') tf.logging.info('Writing acoustic logit/label file to %s', pianoroll_output_file) with tf.gfile.GFile(pianoroll_output_file, mode='w') as f: scipy.misc.imsave( f, infer_util.posterior_pianoroll_image( frame_probs, sequence_prediction, labels, overlap=True, frames_per_second=data.hparams_frames_per_second(hparams))) summary = sess.run(summary_op) summary_writer.add_summary(summary, sess.run(global_step)) summary_writer.flush() if not write_summary_every_step: # Only write the summary variables for the final step. summary = sess.run(summary_op) summary_writer.add_summary(summary, sess.run(global_step)) summary_writer.flush()
def model_inference(model_fn, model_dir, checkpoint_path, data_fn, hparams, examples_path, output_dir, summary_writer, master, preprocess_examples, shuffle_examples): """Runs inference for the given examples.""" tf.logging.info('model_dir=%s', model_dir) tf.logging.info('checkpoint_path=%s', checkpoint_path) tf.logging.info('examples_path=%s', examples_path) tf.logging.info('output_dir=%s', output_dir) estimator = train_util.create_estimator(model_fn, model_dir, hparams, master=master) transcription_data = functools.partial( data_fn, examples=examples_path, preprocess_examples=preprocess_examples, is_training=False, shuffle_examples=shuffle_examples, skip_n_initial_records=0) input_fn = infer_util.labels_to_features_wrapper(transcription_data) start_time = time.time() infer_times = [] num_frames = [] file_num = 0 all_metrics = collections.defaultdict(list) for predictions in estimator.predict(input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False): # Remove batch dimension for convenience. for k in predictions.keys(): if predictions[k].shape[0] != 1: raise ValueError( 'All predictions must have batch size 1, but shape of ' '{} was: {}'.format(k, +predictions[k].shape[0])) predictions[k] = predictions[k][0] end_time = time.time() infer_time = end_time - start_time infer_times.append(infer_time) num_frames.append(predictions['frame_predictions'].shape[0]) tf.logging.info( 'Infer time %f, frames %d, frames/sec %f, running average %f', infer_time, num_frames[-1], num_frames[-1] / infer_time, np.sum(num_frames) / np.sum(infer_times)) tf.logging.info('Scoring sequence %s', predictions['sequence_ids']) sequence_prediction = music_pb2.NoteSequence.FromString( predictions['sequence_predictions']) sequence_label = music_pb2.NoteSequence.FromString( predictions['sequence_labels']) # Make filenames UNIX-friendly. filename_chars = predictions['sequence_ids'].decode('utf-8') filename_chars = [c if c.isalnum() else '_' for c in filename_chars] filename_safe = ''.join(filename_chars).rstrip() filename_safe = '{:04d}_{}'.format(file_num, filename_safe[:200]) file_num += 1 output_file = os.path.join(output_dir, filename_safe + '.mid') tf.logging.info('Writing inferred midi file to %s', output_file) midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file) label_output_file = os.path.join(output_dir, filename_safe + '_label.mid') tf.logging.info('Writing label midi file to %s', label_output_file) midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file) # Also write a pianoroll showing acoustic model output vs labels. pianoroll_output_file = os.path.join(output_dir, filename_safe + '_pianoroll.png') tf.logging.info('Writing acoustic logit/label file to %s', pianoroll_output_file) with tf.gfile.GFile(pianoroll_output_file, mode='w') as f: scipy.misc.imsave( f, infer_util.posterior_pianoroll_image( predictions['frame_probs'], predictions['frame_labels'])) # Update histogram and current scalar for metrics. with tf.Graph().as_default(), tf.Session().as_default(): for k, v in predictions.items(): if not k.startswith('metrics/'): continue all_metrics[k].extend(v) histogram_name = 'histogram/' + k metric_summary = tf.summary.histogram(histogram_name, tf.constant( all_metrics[k], name=histogram_name), collections=[]) summary_writer.add_summary(metric_summary.eval(), global_step=file_num) scalar_name = k metric_summary = tf.summary.scalar(scalar_name, tf.constant( np.mean(all_metrics[k]), name=scalar_name), collections=[]) summary_writer.add_summary(metric_summary.eval(), global_step=file_num) summary_writer.flush() start_time = time.time() # Write final mean values for all metrics. with tf.Graph().as_default(), tf.Session().as_default(): for k, v in all_metrics.items(): final_scalar_name = 'final/' + k metric_summary = tf.summary.scalar(final_scalar_name, tf.constant( np.mean(all_metrics[k]), name=final_scalar_name), collections=[]) summary_writer.add_summary(metric_summary.eval()) summary_writer.flush() start_time = time.time()
def model_inference(model_dir, checkpoint_path, hparams, examples_path, output_dir, summary_writer, write_summary_every_step=True): """Runs inference for the given examples.""" tf.logging.info('model_dir=%s', model_dir) tf.logging.info('checkpoint_path=%s', checkpoint_path) tf.logging.info('examples_path=%s', examples_path) tf.logging.info('output_dir=%s', output_dir) estimator = train_util.create_estimator(model_dir, hparams) with tf.Graph().as_default(): num_dims = constants.MIDI_PITCHES if FLAGS.max_seconds_per_sequence: truncated_length = int( math.ceil((FLAGS.max_seconds_per_sequence * data.hparams_frames_per_second(hparams)))) else: truncated_length = 0 dataset = data.provide_batch(batch_size=1, examples=examples_path, hparams=hparams, is_training=False, truncated_length=truncated_length) # Define some metrics. (metrics_to_updates, metric_note_precision, metric_note_recall, metric_note_f1, metric_note_precision_with_offsets, metric_note_recall_with_offsets, metric_note_f1_with_offsets, metric_note_precision_with_offsets_velocity, metric_note_recall_with_offsets_velocity, metric_note_f1_with_offsets_velocity, metric_frame_labels, metric_frame_predictions) = infer_util.define_metrics(num_dims) summary_op = tf.summary.merge_all() if write_summary_every_step: global_step = tf.train.get_or_create_global_step() global_step_increment = global_step.assign_add(1) else: global_step = tf.constant( estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP)) global_step_increment = global_step iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() with tf.Session() as sess: sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) infer_times = [] num_frames = [] sess.run(iterator.initializer) while True: try: record = sess.run(next_record) except tf.errors.OutOfRangeError: break def input_fn(): return tf.data.Dataset.from_tensors(record) start_time = time.time() # TODO(fjord): This is a hack that allows us to keep using our existing # infer/scoring code with a tf.Estimator model. Ideally, we should # move things around so that we can use estimator.evaluate, which will # also be more efficient because it won't have to restore the checkpoint # for every example. prediction_list = list( estimator.predict(input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False)) assert len(prediction_list) == 1 input_features = record[0] input_labels = record[1] filename = input_features.sequence_id[0] note_sequence = music_pb2.NoteSequence.FromString( input_labels.note_sequence[0]) labels = input_labels.labels[0] frame_probs = prediction_list[0]['frame_probs_flat'] onset_probs = prediction_list[0]['onset_probs_flat'] velocity_values = prediction_list[0]['velocity_values_flat'] offset_probs = prediction_list[0]['offset_probs_flat'] frame_predictions = frame_probs > FLAGS.frame_threshold if FLAGS.require_onset: onset_predictions = onset_probs > FLAGS.onset_threshold else: onset_predictions = None if FLAGS.use_offset: offset_predictions = offset_probs > FLAGS.offset_threshold else: offset_predictions = None sequence_prediction = sequences_lib.pianoroll_to_note_sequence( frame_predictions, frames_per_second=data.hparams_frames_per_second(hparams), min_duration_ms=0, min_midi_pitch=constants.MIN_MIDI_PITCH, onset_predictions=onset_predictions, offset_predictions=offset_predictions, velocity_values=velocity_values) end_time = time.time() infer_time = end_time - start_time infer_times.append(infer_time) num_frames.append(frame_probs.shape[0]) tf.logging.info( 'Infer time %f, frames %d, frames/sec %f, running average %f', infer_time, frame_probs.shape[0], frame_probs.shape[0] / infer_time, np.sum(num_frames) / np.sum(infer_times)) tf.logging.info('Scoring sequence %s', filename) def shift_notesequence(ns_time): return ns_time + hparams.backward_shift_amount_ms / 1000. sequence_label = sequences_lib.adjust_notesequence_times( note_sequence, shift_notesequence)[0] infer_util.score_sequence( sess, global_step_increment, metrics_to_updates, metric_note_precision, metric_note_recall, metric_note_f1, metric_note_precision_with_offsets, metric_note_recall_with_offsets, metric_note_f1_with_offsets, metric_note_precision_with_offsets_velocity, metric_note_recall_with_offsets_velocity, metric_note_f1_with_offsets_velocity, metric_frame_labels, metric_frame_predictions, frame_labels=labels, sequence_prediction=sequence_prediction, frames_per_second=data.hparams_frames_per_second(hparams), sequence_label=sequence_label, sequence_id=filename) if write_summary_every_step: # Make filenames UNIX-friendly. filename_safe = filename.decode('utf-8').replace( '/', '_').replace(':', '.') output_file = os.path.join(output_dir, filename_safe + '.mid') tf.logging.info('Writing inferred midi file to %s', output_file) midi_io.sequence_proto_to_midi_file( sequence_prediction, output_file) label_output_file = os.path.join( output_dir, filename_safe + '_label.mid') tf.logging.info('Writing label midi file to %s', label_output_file) midi_io.sequence_proto_to_midi_file( sequence_label, label_output_file) # Also write a pianoroll showing acoustic model output vs labels. pianoroll_output_file = os.path.join( output_dir, filename_safe + '_pianoroll.png') tf.logging.info('Writing acoustic logit/label file to %s', pianoroll_output_file) with tf.gfile.GFile(pianoroll_output_file, mode='w') as f: scipy.misc.imsave( f, infer_util.posterior_pianoroll_image( frame_probs, sequence_prediction, labels, overlap=True, frames_per_second=data. hparams_frames_per_second(hparams))) summary = sess.run(summary_op) summary_writer.add_summary(summary, sess.run(global_step)) summary_writer.flush() if not write_summary_every_step: # Only write the summary variables for the final step. summary = sess.run(summary_op) summary_writer.add_summary(summary, sess.run(global_step)) summary_writer.flush()
checkpoint_dir = MAESTRO_CHECKPOINT_DIR elif model_type.startswith('E-GMD'): config = configs.CONFIG_MAP['drums'] hparams = config.hparams hparams.batch_size = 1 checkpoint_dir = EGMD_CHECKPOINT_DIR else: raise ValueError('Unknown Model Type') examples = tf.placeholder(tf.string, [None]) dataset = data.provide_batch(examples=examples, preprocess_examples=True, params=hparams, is_training=False, shuffle_examples=False, skip_n_initial_records=0) estimator = train_util.create_estimator(config.model_fn, checkpoint_dir, hparams) iterator = tf.data.make_initializable_iterator(dataset) next_record = iterator.get_next() def transcription_data(params): del params return tf.data.Dataset.from_tensors(sess.run(next_record)) """# Upload Audio Run the following cell to upload audio files. """ # pianoAudio = 'misty.wav' pianoAudio = 'lalala.wav'
def main(argv): tf.logging.set_verbosity(FLAGS.log) hparams = tf_utils.merge_hparams(constants.DEFAULT_HPARAMS, model.get_default_hparams()) # For this script, default to not using cudnn. hparams.use_cudnn = False hparams.parse(FLAGS.hparams) hparams.batch_size = 1 with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) dataset = data.provide_batch(batch_size=1, examples=examples, hparams=hparams, is_training=False, truncated_length=0) estimator = train_util.create_estimator( os.path.expanduser(FLAGS.model_dir), hparams) iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() with tf.Session() as sess: sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) for filename in argv[1:]: tf.logging.info('Starting transcription for %s...', filename) # The reason we bounce between two Dataset objects is so we can use # the data processing functionality in data.py without having to # construct all the Example protos in memory ahead of time or create # a temporary tfrecord file. tf.logging.info('Processing file...') sess.run(iterator.initializer, {examples: [create_example(filename)]}) def input_fn(): return tf.data.Dataset.from_tensors(sess.run(next_record)) tf.logging.info('Running inference...') checkpoint_path = None if FLAGS.checkpoint_path: checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path) prediction_list = list( estimator.predict(input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False)) assert len(prediction_list) == 1 sequence_prediction = transcribe_audio(prediction_list[0], hparams, FLAGS.frame_threshold, FLAGS.onset_threshold) midi_filename = filename + '.midi' midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) tf.logging.info('Transcription written to %s.', midi_filename)
def main(argv): tf.logging.set_verbosity(FLAGS.log) config = configs.CONFIG_MAP[FLAGS.config] hparams = config.hparams # For this script, default to not using cudnn. hparams.use_cudnn = False hparams.parse(FLAGS.hparams) hparams.batch_size = 1 hparams.truncated_length_secs = 0 with tf.Graph().as_default(): examples = tf.placeholder(tf.string, [None]) dataset = data.provide_batch( examples=examples, preprocess_examples=True, hparams=hparams, is_training=False) estimator = train_util.create_estimator(config.model_fn, os.path.expanduser(FLAGS.model_dir), hparams) iterator = dataset.make_initializable_iterator() next_record = iterator.get_next() with tf.Session() as sess: sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) for filename in argv[1:]: tf.logging.info('Starting transcription for %s...', filename) # The reason we bounce between two Dataset objects is so we can use # the data processing functionality in data.py without having to # construct all the Example protos in memory ahead of time or create # a temporary tfrecord file. tf.logging.info('Processing file...') sess.run(iterator.initializer, {examples: [create_example(filename)]}) def input_fn(params): del params return tf.data.Dataset.from_tensors(sess.run(next_record)) tf.logging.info('Running inference...') checkpoint_path = None if FLAGS.checkpoint_path: checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path) prediction_list = list( estimator.predict( input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False)) assert len(prediction_list) == 1 sequence_prediction = transcribe_audio(prediction_list[0], hparams) midi_filename = filename + '.midi' midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename) tf.logging.info('Transcription written to %s.', midi_filename)