def testGenerateId(self): sequence_id_1 = note_sequence_io.generate_note_sequence_id( '/my/file/name', 'my_collection', 'midi') self.assertEqual('/id/midi/my_collection/', sequence_id_1[0:23]) sequence_id_2 = note_sequence_io.generate_note_sequence_id( '/my/file/name', 'your_collection', 'abc') self.assertEqual('/id/abc/your_collection/', sequence_id_2[0:24]) self.assertEqual(sequence_id_1[23:], sequence_id_2[24:]) sequence_id_3 = note_sequence_io.generate_note_sequence_id( '/your/file/name', 'my_collection', 'abc') self.assertNotEqual(sequence_id_3[22:], sequence_id_1[23:]) self.assertNotEqual(sequence_id_3[22:], sequence_id_2[24:])
def convert_abc(root_dir, sub_dir, full_file_path): """Converts an abc file to a sequence proto. Args: root_dir: A string specifying the root directory for the files being converted. sub_dir: The directory being converted currently. full_file_path: the full path to the file to convert. Returns: Either a NoteSequence proto or None if the file could not be converted. """ try: tunes, exceptions = abc_parser.parse_abc_tunebook( tf.gfile.FastGFile(full_file_path, 'rb').read()) except abc_parser.ABCParseException as e: tf.logging.warning( 'Could not parse ABC file %s. It will be skipped. Error was: %s', full_file_path, e) return None for exception in exceptions: tf.logging.warning( 'Could not parse tune in ABC file %s. It will be skipped. Error was: ' '%s', full_file_path, exception) sequences = [] for idx, tune in tunes.iteritems(): tune.collection_name = os.path.basename(root_dir) tune.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) tune.id = note_sequence_io.generate_note_sequence_id( '{}_{}'.format(tune.filename, idx), tune.collection_name, 'abc') sequences.append(tune) tf.logging.info('Converted ABC file %s.', full_file_path) return sequences
def convert_musicxml(root_dir, sub_dir, full_file_path): """Converts a musicxml file to a sequence proto. Args: root_dir: A string specifying the root directory for the files being converted. sub_dir: The directory being converted currently. full_file_path: the full path to the file to convert. Returns: Either a NoteSequence proto or None if the file could not be converted. """ try: sequence = musicxml_reader.musicxml_file_to_sequence_proto( full_file_path) except musicxml_reader.MusicXMLConversionError as e: tf.logging.warning( 'Could not parse MusicXML file %s. It will be skipped. Error was: %s', full_file_path, e) return None sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'musicxml') tf.logging.info('Converted MusicXML file %s.', full_file_path) return sequence
def convert_midi(root_dir, sub_dir, full_file_path): """Converts a midi file to a sequence proto. Args: root_dir: A string specifying the root directory for the files being converted. sub_dir: The directory being converted currently. full_file_path: the full path to the file to convert. Returns: Either a NoteSequence proto or None if the file could not be converted. """ try: sequence = midi_io.midi_to_sequence_proto( tf.gfile.FastGFile(full_file_path, 'rb').read()) except midi_io.MIDIConversionError as e: tf.logging.warning( 'Could not parse MIDI file %s. It will be skipped. Error was: %s', full_file_path, e) return None sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'midi') return sequence
def runTest(self, relative_root, recursive): """Tests the output for the given parameters.""" root_dir = os.path.join(self.root_dir, relative_root) expected_filenames = self.expected_dir_midi_contents[relative_root] if recursive: for sub_dir in self.expected_sub_dirs[relative_root]: for filename in self.expected_dir_midi_contents[os.path.join( relative_root, sub_dir)]: expected_filenames.add(os.path.join(sub_dir, filename)) with tempfile.NamedTemporaryFile( prefix='ConvertMidiDirToSequencesTest') as output_file: convert_dir_to_note_sequences.convert_directory( root_dir, output_file.name, recursive) actual_filenames = set() for sequence in note_sequence_io.note_sequence_record_iterator( output_file.name): self.assertEqual( note_sequence_io.generate_note_sequence_id( sequence.filename, os.path.basename(relative_root), 'midi'), sequence.id) self.assertEqual(os.path.basename(root_dir), sequence.collection_name) self.assertNotEqual(0, len(sequence.notes)) actual_filenames.add(sequence.filename) self.assertEqual(expected_filenames, actual_filenames)
def runTest(self, relative_root, recursive): """Tests the output for the given parameters.""" root_dir = os.path.join(self.root_dir, relative_root) expected_filenames = self.expected_dir_midi_contents[relative_root] if recursive: for sub_dir in self.expected_sub_dirs[relative_root]: for filename in self.expected_dir_midi_contents[ os.path.join(relative_root, sub_dir)]: expected_filenames.add(os.path.join(sub_dir, filename)) with tempfile.NamedTemporaryFile( prefix='ConvertMidiDirToSequencesTest') as output_file: convert_dir_to_note_sequences.convert_directory( root_dir, output_file.name, recursive) actual_filenames = set() for sequence in note_sequence_io.note_sequence_record_iterator( output_file.name): self.assertEqual( note_sequence_io.generate_note_sequence_id( sequence.filename, os.path.basename(relative_root), 'midi'), sequence.id) self.assertEqual(os.path.basename(root_dir), sequence.collection_name) self.assertNotEqual(0, len(sequence.notes)) actual_filenames.add(sequence.filename) self.assertEqual(expected_filenames, actual_filenames)
def convert_abc(root_dir, sub_dir, full_file_path): """Converts an abc file to a sequence proto. Args: root_dir: A string specifying the root directory for the files being converted. sub_dir: The directory being converted currently. full_file_path: the full path to the file to convert. Returns: Either a NoteSequence proto or None if the file could not be converted. """ try: tunes, exceptions = abc_parser.parse_abc_tunebook( tf.gfile.FastGFile(full_file_path, 'rb').read()) except abc_parser.ABCParseError as e: tf.logging.warning( 'Could not parse ABC file %s. It will be skipped. Error was: %s', full_file_path, e) return None for exception in exceptions: tf.logging.warning( 'Could not parse tune in ABC file %s. It will be skipped. Error was: ' '%s', full_file_path, exception) sequences = [] for idx, tune in tunes.iteritems(): tune.collection_name = os.path.basename(root_dir) tune.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) tune.id = note_sequence_io.generate_note_sequence_id( '{}_{}'.format(tune.filename, idx), tune.collection_name, 'abc') sequences.append(tune) tf.logging.info('Converted ABC file %s.', full_file_path) return sequences
def convert_musicxml(root_dir, sub_dir, full_file_path): """Converts a musicxml file to a sequence proto. Args: root_dir: A string specifying the root directory for the files being converted. sub_dir: The directory being converted currently. full_file_path: the full path to the file to convert. Returns: Either a NoteSequence proto or None if the file could not be converted. """ try: sequence = musicxml_reader.musicxml_file_to_sequence_proto(full_file_path) except musicxml_reader.MusicXMLConversionError as e: tf.logging.warning( 'Could not parse MusicXML file %s. It will be skipped. Error was: %s', full_file_path, e) return None sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'musicxml') tf.logging.info('Converted MusicXML file %s.', full_file_path) return sequence
def convert_directory(root_dir, sub_dir, sequence_writer, recursive=False): """Converts MIDIs to NoteSequences and writes to `sequence_writer`. MIDI files found in the specified directory specified by the combination of `root_dir` and `sub_dir` and converted to NoteSequence protos with the basename of `root_dir` as the collection_name, and the relative path to the MIDI file from `root_dir` as the filename. If `recursive` is true, recursively converts any subdirectories of the specified directory. Args: root_dir: A string specifying a root directory. sub_dir: A string specifying a path to a directory under `root_dir` in which to convert MIDI contents. sequence_writer: A NoteSequenceRecordWriter to write the resulting NoteSequence protos to. recursive: A boolean specifying whether or not recursively convert MIDIs contained in subdirectories of the specified directory. Returns: The number of NoteSequence protos written as an integer. """ dir_to_convert = os.path.join(root_dir, sub_dir) tf.logging.info("Converting MIDI files in '%s'.", dir_to_convert) files_in_dir = tf.gfile.ListDirectory(os.path.join(dir_to_convert)) recurse_sub_dirs = [] sequences_written = 0 sequences_skipped = 0 for file_in_dir in files_in_dir: full_file_path = os.path.join(dir_to_convert, file_in_dir) if tf.gfile.IsDirectory(full_file_path): if recursive: recurse_sub_dirs.append(os.path.join(sub_dir, file_in_dir)) continue try: sequence = midi_io.midi_to_sequence_proto( tf.gfile.FastGFile(full_file_path).read()) except midi_io.MIDIConversionError as e: tf.logging.warning( 'Could not parse MIDI file %s. It will be skipped. Error was: %s', full_file_path, e) sequences_skipped += 1 continue sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, file_in_dir) sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'midi') sequence_writer.write(sequence) sequences_written += 1 tf.logging.info("Converted %d MIDI files in '%s'.", sequences_written, dir_to_convert) tf.logging.info('Could not parse %d MIDI files.', sequences_skipped) for recurse_sub_dir in recurse_sub_dirs: sequences_written += convert_directory(root_dir, recurse_sub_dir, sequence_writer, recursive) return sequences_written
def convert_midi(root_dir, sub_dir, full_file_path): try: sequence = midi_io.midi_to_sequence_proto( tf.gfile.FastGFile(full_file_path, 'rb').read()) except midi_io.MIDIConversionError as e: tf.logging.warning( 'Could not parse MIDI file %s. It will be skipped. Error was: %s', full_file_path, e) return None sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'midi') tf.logging.info('Converted MIDI file %s.', full_file_path) return sequence
def musicnet_iterator(musicnet_file): """An iterator over the MusicNet archive that yields audio and NoteSequences. The MusicNet archive (in .npz format) can be downloaded from: https://homes.cs.washington.edu/~thickstn/media/musicnet.npz Args: musicnet_file: The path to the MusicNet NumPy archive (.npz) containing audio and transcriptions for 330 classical recordings. Yields: Tuples where the first element is a NumPy array of sampled audio (at 44.1 kHz) and the second element is a NoteSequence proto containing the transcription. """ with tf.gfile.FastGFile(musicnet_file, 'rb') as f: # Unfortunately the gfile seek function breaks the reading of NumPy # archives, so we read the archive first then load as StringIO. musicnet_string = f.read() musicnet_stringio = StringIO.StringIO(musicnet_string) musicnet = np.load(musicnet_stringio) for file_id in musicnet.files: audio, note_interval_tree = musicnet[file_id] sequence = note_interval_tree_to_sequence_proto( note_interval_tree, MUSICNET_SAMPLE_RATE) sequence.filename = file_id sequence.collection_name = 'MusicNet' sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'musicnet') sequence.source_info.source_type = ( music_pb2.NoteSequence.SourceInfo.PERFORMANCE_BASED) sequence.source_info.encoding_type = ( music_pb2.NoteSequence.SourceInfo.MUSICNET) sequence.source_info.parser = ( music_pb2.NoteSequence.SourceInfo.MAGENTA_MUSICNET) yield audio, sequence
def convert_midi(root_dir, sub_dir, full_file_path, output_file): data_converter = CONFIG_MAP[FLAGS.config].data_converter augmenter = CONFIG_MAP[FLAGS.config].note_sequence_augmenter ret = [] try: sequence = midi_io.midi_to_sequence_proto( tf.gfile.GFile(full_file_path, 'rb').read()) except midi_io.MIDIConversionError as e: tf.logging.warning( 'Could not parse MIDI file %s. It will be skipped. Error was: %s', full_file_path, e) return [] sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) sequence.id = note_sequence_io.generate_note_sequence_id( sequence.filename, sequence.collection_name, 'midi') # tf.logging.info('Converted MIDI file %s.', full_file_path) for s in (augmenter.get_all(sequence) if augmenter is not None else [sequence]): data = data_converter.to_tensors(s) for inp, c, l in zip(data.inputs, data.controls, data.lengths): s = list(inp.shape) inp = inp.reshape(-1).tolist() c = c.reshape(-1).tolist() if len(c) == 0: c = [0] if isinstance(l, int): l = [l] ret.append({'notes': inp, 'chords': c, 'shape': s, 'lengths': l}) if len(ret) > 0: np.save( "{}_npy/{}".format(output_file, os.path.basename(full_file_path)), ret) return ret
else: if recursive and tf.gfile.IsDirectory(full_file_path): recurse_sub_dirs.append(os.path.join(sub_dir, file_in_dir)) else: tf.logging.warning('Unable to find a converter for file %s', full_file_path) for recurse_sub_dir in recurse_sub_dirs: convert_files(root_dir, recurse_sub_dir, writer, recursive) def convert_midi(root_dir, sub_dir, full_file_path): sequence = midi_io.midi_to_sequence_proto(tf.gfile.FastGFile(full_file_path, 'rb').read()) except midi_io.MIDIConversionError as e: tf.logging.warning('Could not parse MIDI file %s. It will be skipped. Error was: %s',full_file_path, e) return None sequence.collection_name = os.path.basename(root_dir) sequence.filename = os.path.join(sub_dir, os.path.basename(full_file_path)) sequence.id = note_sequence_io.generate_note_sequence_id(sequence.filename, sequence.collection_name, 'midi') tf.logging.info('Converted MIDI file %s.', full_file_path) return sequence def convert_directory(root_dir, output_file, recursive=False): with note_sequence_io.NoteSequenceRecordWriter(output_file) as writer: convert_files(root_dir, '', writer, recursive) def main(unused_argv): tf.logging.set_verbosity(FLAGS.log) if not FLAGS.input_dir: tf.logging.fatal('--input_dir required') return if not FLAGS.output_file: tf.logging.fatal('--output_file required') return