def prepare_pianorolls(metadata: pd.DataFrame): """Calculate pianorolls and save pre-calculated feature. Args: metadata: metadata to the dataset. Returns: No return, save pre-calculated features instead. """ for i, row in metadata.iterrows(): print(f'Preparing pianoroll {i+1}/{len(metadata)}', end='\r') # get midi file and pianoroll file midi_file = row['midi_file'] pianoroll_file = row['pianoroll_file'] # if already calculated, skip if os.path.exists(pianoroll_file): continue # calculate pianoroll and save feature midi_data = pm.PrettyMIDI(midi_file) pianoroll = midi_data.get_piano_roll( fs=1. / Constants.hop_time)[21:21 + 88] # 88 piano keys mkdir(os.path.split(pianoroll_file)[0]) pickle.dump(pianoroll, open(pianoroll_file, 'wb'), protocol=2) print()
def prepare_spectrograms(metadata: pd.DataFrame, spectrogram_setting: Any): """Calculate spectrograms and save the pre-calculated features. Args: metadata: metadata to the dataset. spectrogram_setting: the spectrogram setting (type and parameters). Returns: No return, save pre-calculated features instead. """ for i, row in metadata.iterrows(): print(f'Preparing spectrogram {i+1}/{len(metadata)}', end='\r') # get audio file and spectrogram file audio_file = row['audio_file'] spectrogram_file = os.path.join( row['spectrograms_folder'], f'{spectrogram_setting.to_string()}.pkl') # if already calculated, skip if os.path.exists(spectrogram_file): continue # calculate spectrogram if spectrogram_setting.type == 'STFT': spectrogram = SpectrogramUtil.STFT_from_file( audio_file, win_length=spectrogram_setting.win_length) elif spectrogram_setting.type == 'Mel': spectrogram = SpectrogramUtil.melspectrogram_from_file( audio_file, win_length=spectrogram_setting.win_length, n_mels=spectrogram_setting.n_mels) elif spectrogram_setting.type == 'CQT': spectrogram = SpectrogramUtil.CQT_from_file( audio_file, bins_per_octave=spectrogram_setting.bins_per_octave, n_octaves=spectrogram_setting.n_octaves) elif spectrogram_setting.type == 'HCQT': spectrogram = SpectrogramUtil.HCQT_from_file( audio_file, bins_per_octave=spectrogram_setting.bins_per_octave, n_octaves=spectrogram_setting.n_octaves, n_harms=spectrogram_setting.n_harms) elif spectrogram_setting.type == 'VQT': spectrogram = SpectrogramUtil.VQT_from_file( audio_file, bins_per_octave=spectrogram_setting.bins_per_octave, n_octaves=spectrogram_setting.n_octaves, gamma=spectrogram_setting.gamma) # save feature mkdir(row['spectrograms_folder']) pickle.dump(spectrogram, open(spectrogram_file, 'wb'), protocol=2) print()
def prepare_scores(metadata: pd.DataFrame): """Calculate score representations for each piece Args: metadata: metadata to the dataset. Returns: No return, save pre-calculated features instead. """ for i, row in metadata.iterrows(): print(f'Preparing scores {i+1}/{len(metadata)}', end='\r') score_file = row['score_file'] downbeats_file = row['downbeats_file'] score_reshaped_folder = row['score_reshaped_folder'] score_lilypond_folder = row['score_lilypond_folder'] mkdir(score_reshaped_folder) mkdir(score_lilypond_folder) downbeats = pickle.load(open(downbeats_file, 'rb')) if os.path.exists( os.path.join( score_reshaped_folder, f'{len(downbeats)-2}.pkl')) and os.path.exists( os.path.join(score_lilypond_folder, f'{len(downbeats)-2}.pkl')): continue score_m21_list = split_bars(score_file, len(downbeats)) for bar_index, score_m21 in enumerate(score_m21_list): score_reshaped_right = ScoreReshaped.from_m21(score_m21.right) score_reshaped_left = ScoreReshaped.from_m21(score_m21.left) score_lilypond_right = ScoreLilyPond.from_m21(score_m21.right) score_lilypond_left = ScoreLilyPond.from_m21(score_m21.left) pickle.dump(tuple([score_reshaped_right, score_reshaped_left]), open( os.path.join(score_reshaped_folder, f'{bar_index}.pkl'), 'wb'), protocol=2) pickle.dump(tuple([score_lilypond_right, score_lilypond_left]), open( os.path.join(score_lilypond_folder, f'{bar_index}.pkl'), 'wb'), protocol=2) print()
def prepare_downbeats(metadata: pd.DataFrame): """Calculate downbeats and endtime for each piece Args: metadata: metadata to the dataset. Returns: No return, save pre-calculated features instead. """ for i, row in metadata.iterrows(): print(f'Preparing downbeats {i+1}/{len(metadata)}', end='\r') midi_file = row['midi_file'] downbeats_file = row['downbeats_file'] if os.path.exists(downbeats_file): continue # calculate downbeats midi_data = pm.PrettyMIDI(midi_file) downbeats, _ = get_downbeats_and_end_time(midi_data) mkdir(os.path.split(downbeats_file)[0]) pickle.dump(downbeats, open(downbeats_file, 'wb'), protocol=2) print()
def test_audio2score(args): spectrogram_setting = SpectrogramSetting() if args.task == 'audio2score': DataModule = Audio2ScoreTranscriptionDataModule elif args.task == 'joint': DataModule = JointTranscriptionDataModule datamodule = DataModule(spectrogram_setting, args.score_type, args.dataset_folder, args.feature_folder) transcriber = Audio2ScoreTranscriber( model_checkpoint=args.model_checkpoint, score_type=args.score_type, model_type=args.task, gpu=0) evaluation_results = pd.DataFrame(columns=[ 'wer-right', 'wer-left', 'wer', 'mv2h-multipitch', 'mv2h-voice', 'mv2h-meter', 'mv2h-value', 'mv2h' ]) for i, row in datamodule.metadata_test.iterrows(): print(f'Evaluating test set {i+1}/{len(datamodule.metadata_test)}') if i == 2: break downbeats_file = row['downbeats_file'] spectrograms_folder = row['spectrograms_folder'] score_folder = row['score_reshaped_folder'] if args.score_type == 'Reshaped' \ else row['score_lilypond_folder'] piano = row['piano'] name = row['name'] # get ground truth downbeats downbeats = pickle.load(open(downbeats_file, 'rb')) # get predicted score and target score by bar score_right_list_pred, score_left_list_pred = [], [] score_right_list_targ, score_left_list_targ = [], [] quarter_lengths = [] key_signatures, time_signatures = [], [] for bar_index in range(len(downbeats) - 2): # ignore final bar print(f'\ttranscribing bar {bar_index+1}/{len(downbeats)-2}', end='\r') if bar_index == 2: break # predicted score spectrogram = pickle.load(open(os.path.join(spectrograms_folder, f'{spectrogram_setting.to_string()}.pkl') + \ f'.{bar_index}.pkl', 'rb')) score_right_pred, score_left_pred = transcriber.transcribe_one_bar_from_spectrogram( spectrogram) # target score score_right_targ, score_left_targ = pickle.load( open(os.path.join(score_folder, f'{bar_index}.pkl'), 'rb')) # quarter_lengths quarter_length = score_right_targ.to_m21().quarterLength score_right_list_pred.append(score_right_pred) score_left_list_pred.append(score_left_pred) score_right_list_targ.append(score_right_targ) score_left_list_targ.append(score_left_targ) quarter_lengths.append(quarter_length) key_signatures.append(m21.key.KeySignature(sharps=0)) time_signatures.append( m21.meter.TimeSignature(f'{quarter_length}/4')) print('\n\tgetting predicted score and evaluate') if args.score_type == 'Reshaped': score_targ = combine_bars_from_score_reshaped( key_signatures, time_signatures, score_right_list_targ, score_left_list_targ, quarter_lengths) score_pred = combine_bars_from_score_reshaped( key_signatures, time_signatures, score_right_list_pred, score_left_list_pred, quarter_lengths) elif args.score_type == 'LilyPond': score_targ = combine_bars_from_score_lilypond( key_signatures, time_signatures, score_right_list_targ, score_left_list_targ, quarter_lengths) score_pred = combine_bars_from_score_lilypond( key_signatures, time_signatures, score_right_list_pred, score_left_list_pred, quarter_lengths) # save target and predicted scores score_targ_file = os.path.join(output_path, args.score_type, piano, f'{name}_targ.mid') score_pred_file = os.path.join(output_path, args.score_type, piano, f'{name}_pred.mid') mkdir(os.path.split(score_targ_file)[0]) score_targ.write('midi', score_targ_file) score_pred.write('midi', score_pred_file) # evaluate wer_right, wer_left = evaluate_word_error_rate(score_right_list_pred, score_left_list_pred, score_right_list_targ, score_left_list_targ, args.score_type) mv2h_result = Eval.mv2h_evaluation(score_targ_file, score_pred_file, args.MV2H_path) # update evaluation results evaluation_results.loc[i] = [ wer_right, wer_left, np.mean([wer_right, wer_left]), mv2h_result['Multi-pitch'], mv2h_result['Voice'], mv2h_result['Meter'], mv2h_result['Value'], np.mean([ mv2h_result['Multi-pitch'], mv2h_result['Voice'], mv2h_result['Meter'], mv2h_result['Value'] ]) ] print(np.mean(evaluation_results)) evaluation_results_file = os.path.join(output_path, args.score_type, 'evaluation_results.csv') evaluation_results.to_csv(evaluation_results_file, index=False)
import pandas as pd import numpy as np import music21 as m21 import pytorch_lightning as pl from audio2score.data.datamodule import PianorollTranscriptionDataModule, Audio2ScoreTranscriptionDataModule, JointTranscriptionDataModule from audio2score.models.models import PianorollTranscriptionModel from audio2score.settings import SpectrogramSetting from audio2score.transcribers import Audio2ScoreTranscriber from audio2score.scores.helper import combine_bars_from_score_reshaped, combine_bars_from_score_lilypond from audio2score.scores.lilypondscore import ScoreLilyPond from audio2score.utilities.utils import mkdir from audio2score.utilities.evaluation_utils import Eval output_path = 'outputs/' # folder to save predicted scores and evaluation results mkdir(output_path) def test_audio2pr(args): spectrogram_setting = SpectrogramSetting() datamodule = PianorollTranscriptionDataModule(spectrogram_setting, args.dataset_folder, args.feature_folder) model = PianorollTranscriptionModel.load_from_checkpoint( args.model_checkpoint, in_channels=spectrogram_setting.channels, freq_bins=spectrogram_setting.freq_bins) trainer = pl.Trainer(gpus=1, reload_dataloaders_every_epoch=True, auto_select_gpus=True)
def get_metadata(dataset_folder: str, feature_folder: str, split: str, three_train_pianos: Optional[bool] = True): """Get metadata for the dataset. Args: dataset_folder: folder to the MuseSyn dataset. feature_folder: folder to save pre-calculated features. split: train/test/valid split. three_train_pianos: whether or not to use only three pianos for model training Returns: metadata: (pd.DataFrame) dataset metadata. """ if three_train_pianos: pianos = Constants.pianos if split == 'test' else Constants.pianos[: -1] else: pianos = Constants.pianos print(f'Get {split} metadata, {len(pianos)} pianos') metadata_file = f'metadata/cache/{split}-pianos={len(pianos)}.csv' if os.path.exists(metadata_file): return pd.read_csv(metadata_file) metadata = [] for i, row in pd.read_csv(f'metadata/{split}.txt').iterrows(): name = row['name'] for piano in pianos: # get information for each piece midi_file = os.path.join(dataset_folder, 'midi', name + '.mid') audio_file = os.path.join(dataset_folder, 'flac', piano, name + '.flac') score_file = os.path.join(dataset_folder, 'xml', name + '.xml') spectrograms_folder = os.path.join(feature_folder, 'spectrograms', piano, name) pianoroll_file = os.path.join(feature_folder, 'pianoroll', name + '.pkl') downbeats_file = os.path.join(feature_folder, 'downbeats', name + '.pkl') score_reshaped_folder = os.path.join(feature_folder, 'score_reshaped', name) score_lilypond_folder = os.path.join(feature_folder, 'score_lilypond', name) duration = pm.PrettyMIDI(midi_file).get_end_time() # udpate metadata metadata.append({ 'name': name, 'piano': piano, 'midi_file': midi_file, 'audio_file': audio_file, 'score_file': score_file, 'split': split, 'spectrograms_folder': spectrograms_folder, 'pianoroll_file': pianoroll_file, 'downbeats_file': downbeats_file, 'score_reshaped_folder': score_reshaped_folder, 'score_lilypond_folder': score_lilypond_folder, 'duration': duration }) # to DataFrame and save metadata metadata = pd.DataFrame(metadata) mkdir(os.path.split(metadata_file)[0]) metadata.to_csv(metadata_file) return metadata