def visualize(self, data_type, max_iteration=None): '''Visualize the log mel spectrogram. Args: data_type: 'train' | 'validate' max_iteration: None | int, use maximum iteration of partial data for fast evaluation ''' mel_bins = config.mel_bins audio_duration = config.audio_duration frames_num = config.frames_num coarse_classes_num = config.coarse_classes_num coarse_idx_to_lb = config.coarse_idx_to_lb generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_input=True, return_target=True) rows_num = 3 cols_num = 3 fig, axs = plt.subplots(rows_num, cols_num, figsize=(10, 5)) for k in range(coarse_classes_num): for n, audio_name in enumerate(output_dict['audio_name']): if output_dict['coarse_target'][n, k] > 0.5: row = k // cols_num col = k % cols_num title = coarse_idx_to_lb[k] title = '{}\n{}'.format(coarse_idx_to_lb[k], audio_name) axs[row, col].set_title(title, color='r') logmel = inverse_scale(output_dict['feature'][n], self.data_generator.scalar['mean'], self.data_generator.scalar['std']) axs[row, col].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet') axs[row, col].set_xticks([0, frames_num]) axs[row, col].set_xticklabels( ['0', '{:.1f} s'.format(audio_duration)]) axs[row, col].xaxis.set_ticks_position('bottom') axs[row, col].set_ylabel('Mel bins') axs[row, col].set_yticks([]) break for k in range(coarse_classes_num, rows_num * cols_num): row = k // cols_num col = k % cols_num axs[row, col].set_visible(False) fig.tight_layout(pad=0, w_pad=0, h_pad=0) plt.show()
def evaluate(self, data_loader): # Forward output_dict = forward(model=self.model, generator=data_loader, return_target=True) clipwise_output = output_dict[ 'clipwise_output'] # (audios_num, classes_num) target = output_dict['target'] # (audios_num, classes_num) cm = metrics.confusion_matrix(np.argmax(target, axis=-1), np.argmax(clipwise_output, axis=-1), labels=None) precision = calculate_precision(target, clipwise_output) recall = calculate_recall(target, clipwise_output) f_score = calculate_f_score(target, clipwise_output) # print('Val recall: {}'.format(recall)) # print('Val accuracy: {}'.format(accuracy)) # print(cm) statistics = { 'precision': precision, 'recall': recall, 'f_score': f_score, 'cm': cm } return statistics, output_dict
def evaluate(self, data_loader): """Forward evaluation data and calculate statistics. Args: data_loader: object Returns: statistics: dict, {'average_precision': (classes_num,), 'auc': (classes_num,)} """ # Forward output_dict = forward(model=self.model, generator=data_loader, return_target=True) clipwise_output = output_dict[ 'clipwise_output'] # (audios_num, classes_num) target = output_dict['target'] # (audios_num, classes_num) average_precision = metrics.average_precision_score(target, clipwise_output, average=None) auc = metrics.roc_auc_score(target, clipwise_output, average=None) statistics = {'average_precision': average_precision, 'auc': auc} return statistics
def visualize(self, data_type, source, max_iteration=None): '''Visualize log mel spectrogram of different sound classes. Args: data_type: 'train' | 'validate' source: 'a' | 'b' | 'c' max_iteration: None | int, maximum iteration to run to speed up evaluation ''' mel_bins = config.mel_bins audio_duration = config.audio_duration frames_num = config.frames_num labels = config.labels in_domain_classes_num = len(config.labels) - 1 idx_to_lb = config.idx_to_lb generate_func = self.data_generator.generate_validate( data_type=data_type, source=source, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_input=True, return_target=True) # Plot log mel spectrogram of different sound classes rows_num = 3 cols_num = 4 fig, axs = plt.subplots(rows_num, cols_num, figsize=(10, 5)) for k in range(in_domain_classes_num): for n, audio_name in enumerate(output_dict['audio_name']): if output_dict['target'][n, k] == 1: title = idx_to_lb[k] row = k // cols_num col = k % cols_num axs[row, col].set_title(title, color='r') logmel = inverse_scale(output_dict['feature'][n], self.data_generator.scalar['mean'], self.data_generator.scalar['std']) axs[row, col].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet') axs[row, col].set_xticks([0, frames_num]) axs[row, col].set_xticklabels( ['0', '{:.1f} s'.format(audio_duration)]) axs[row, col].xaxis.set_ticks_position('bottom') axs[row, col].set_ylabel('Mel bins') axs[row, col].set_yticks([]) break for k in range(in_domain_classes_num, rows_num * cols_num): row = k // cols_num col = k % cols_num axs[row, col].set_visible(False) fig.tight_layout(pad=0, w_pad=0, h_pad=0) plt.show()
def evaluate(self, data_type, metadata_dir, submissions_dir, max_validate_num=None): '''Evaluate the performance. Args: data_type: 'train' | 'validate' metadata_dir: string, directory of reference meta csvs submissions_dir: string: directory to write out submission csvs max_validate_num: None | int, maximum iteration to run to speed up evaluation ''' # Forward generate_func = self.data_generator.generate_validate( data_type=data_type, max_validate_num=max_validate_num) list_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) # Calculate loss (total_loss, event_loss, position_loss) = self.calculate_loss(list_dict) logging.info('{:<20} {}: {:.3f}, {}: {:.3f}, {}: {:.3f}' ''.format(data_type + ' statistics: ', 'total_loss', total_loss, 'event_loss', event_loss, 'position_loss', position_loss)) # Write out submission and evaluate using code provided by organizer write_submission(list_dict, submissions_dir) prediction_paths = [ os.path.join(submissions_dir, '{}.csv'.format(dict['name'])) for dict in list_dict ] statistics = calculate_metrics(metadata_dir, prediction_paths) for key in statistics.keys(): logging.info(' {:<20} {:.3f}'.format(key + ' :', statistics[key])) return statistics
def evaluate(self, data_loader): # Forward output_dict = forward( model=self.model, generator=data_loader, return_target=True) clipwise_output = output_dict['clipwise_output'] # (audios_num, classes_num) target = output_dict['target'] # (audios_num, classes_num) cm = metrics.confusion_matrix(np.argmax(target, axis=-1), np.argmax(clipwise_output, axis=-1), labels=None) accuracy = calculate_accuracy(target, clipwise_output) statistics = {'accuracy': accuracy} return statistics
def evaluate(self, data_loader, reference_csv_path, submission_path): """Evaluate AT and SED performance. Args: data_loader: object reference_csv_path: str, strongly labelled ground truth csv submission: str, path to write out submission file Returns: statistics: dict output_dict: dict """ output_dict = forward(model=self.model, data_loader=data_loader, return_input=False, return_target=True) statistics = {} # Clipwise statistics statistics['clipwise_ap'] = metrics.average_precision_score( output_dict['target'], output_dict['clipwise_output'], average=None) # Framewise statistics if 'strong_target' in output_dict.keys(): statistics['framewise_ap'] = sed_average_precision( output_dict['strong_target'], output_dict['framewise_output'], average=None) # Framewise predictions to eventwise predictions predict_event_list = frame_prediction_to_event_prediction( output_dict, self.sed_params_dict) # Write eventwise predictions to submission file write_submission(predict_event_list, submission_path) # SED with official tool statistics['sed_metrics'] = official_evaluate(reference_csv_path, submission_path) return statistics, output_dict
def evaluate(self, reference_csv_path, submission_path): """Evaluate AT and SED performance. Args: reference_csv_path: str, strongly labelled ground truth csv submission: str, path to write out submission file """ output_dict = forward( model=self.model, generator=self.generator, return_input=False, return_target=True) predictions = {'clipwise_output': output_dict['clipwise_output'], 'framewise_output': output_dict['framewise_output']} statistics = {} # Weak statistics clipwise_ap = metrics.average_precision_score( output_dict['target'], output_dict['clipwise_output'], average=None) statistics['clipwise_ap'] = clipwise_ap logging.info(' clipwise mAP: {:.3f}'.format(np.mean(clipwise_ap))) if 'strong_target' in output_dict.keys(): framewise_ap = sed_average_precision(output_dict['strong_target'], output_dict['framewise_output'], average=None) statistics['framewise_ap'] = framewise_ap logging.info(' framewise mAP: {:.3f}'.format(np.mean(framewise_ap))) # Obtain eventwise prediction frame framewise prediction using predefined thresholds predict_event_list = frame_prediction_to_event_prediction(output_dict, self.sed_params_dict) # Write predicted events to submission file write_submission(predict_event_list, submission_path) # SED with official tool results = official_evaluate(reference_csv_path, submission_path) logging.info(' {}'.format(results['overall']['error_rate'])) statistics['sed_metrics'] = results return statistics, predictions
def evaluate(self): # Forward output_dict = forward( model=self.model, generator=self.generator, return_target=True) clipwise_output = output_dict['clipwise_output'] # (audios_num, classes_num) target = output_dict['target'] # (audios_num, classes_num) average_precision = metrics.average_precision_score( target, clipwise_output, average=None) auc = metrics.roc_auc_score(target, clipwise_output, average=None) statistics = {'average_precision': average_precision, 'auc': auc} return statistics
def evaluate(self, data_type): # Forward list_dict = forward( model=self.model, generate_func=self.data_generator.generate_validate(data_type), cuda=self.cuda, return_target=True, max_validate_num=self.max_validate_num) # Calculate loss (total_loss, event_loss, position_loss) = self.calculate_loss(list_dict) logging.info('{:<20} {}: {:.3f}, {}: {:.3f}, {}: {:.3f}' ''.format(data_type + ' statistics: ', 'total_loss', total_loss, 'event_loss', event_loss, 'position_loss', position_loss)) return list_dict
def inference_evaluation(args): '''Inference on evaluation data and write out submission file. Args: subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'leaderboard' | 'evaluation' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' mel_bins = config.mel_bins frames_per_second = config.frames_per_second in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) trained_sub_dir = get_subdir(subtask, 'development') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(trained_sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = EvaluationDataGenerator( feature_hdf5_path=feature_hdf5_path, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_evaluation(data_type) # Inference output_dict = forward(model, generate_func, cuda, return_input=False, return_target=False) # Write submission write_submission(output_dict, subtask, data_type, submission_path)
def evaluate(self, data_type, target_source, max_iteration=None, verbose=False): '''Evaluate the performance. Args: data_type: 'train' | 'validate' target_source: 'curated' | 'noisy' max_iteration: None | int, maximum iteration to run to speed up evaluation verbose: bool ''' assert (data_type in ['train', 'validate']) assert (target_source in ['curated', 'noisy']) generate_func = self.data_generator.generate_validate( data_type=data_type, target_source=target_source, max_iteration=max_iteration) # Results of segments output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) # Results of audio recordings result_dict = segment_prediction_to_clip_prediction( output_dict, average='arithmetic') output = result_dict['output'] target = result_dict['target'] # Mean average precision average_precision = metrics.average_precision_score(target, output, average=None) mAP = np.mean(average_precision) # Label-weighted label-ranking average precision (per_class_lwlrap, weight_per_class) = calculate_per_class_lwlrap(target, output) mean_lwlrap = np.sum(per_class_lwlrap * weight_per_class) logging.info(' Target source: {}, mAP: {:.3f}, mean_lwlrap: {:.3f}' ''.format(target_source, mAP, mean_lwlrap)) statistics = { 'average_precision': average_precision, 'per_class_lwlrap': per_class_lwlrap, 'weight_per_class': weight_per_class } if verbose: for n in range(self.classes_num): logging.info(' {:<20}{:.3f}'.format(self.labels[n], per_class_lwlrap[n])) logging.info('') return statistics
def visualize(self, data_type, max_validate_num=None): '''Visualize the log mel spectrogram, reference and prediction of sound events, elevation and azimuth. Args: data_type: 'train' | 'validate' max_validate_num: None | int, maximum iteration to run to speed up evaluation ''' mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num labels = config.labels # Forward generate_func = self.data_generator.generate_validate( data_type=data_type, max_validate_num=max_validate_num) list_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_input=True, return_target=True) for n, dict in enumerate(list_dict): print('File: {}'.format(dict['name'])) frames_num = dict['target_event'].shape[1] length_in_second = frames_num / float(frames_per_second) fig, axs = plt.subplots(4, 2, figsize=(15, 10)) logmel = inverse_scale(dict['feature'][0][0], self.data_generator.scalar['mean'], self.data_generator.scalar['std']) axs[0, 0].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet') axs[1, 0].matshow(dict['target_event'][0].T, origin='lower', aspect='auto', cmap='jet') axs[2, 0].matshow(dict['output_event'][0].T, origin='lower', aspect='auto', cmap='jet') axs[0, 0].set_title('Log mel spectrogram', color='r') axs[1, 0].set_title('Reference sound events', color='r') axs[2, 0].set_title('Predicted sound events', color='b') for i in range(4): for j in range(1): axs[i, j].set_xticks([0, frames_num]) axs[i, j].set_xticklabels( ['0', '{:.1f} s'.format(length_in_second)]) axs[i, j].xaxis.set_ticks_position('bottom') axs[i, j].set_yticks(np.arange(classes_num)) axs[i, j].set_yticklabels(labels) axs[i, j].yaxis.grid(color='w', linestyle='solid', linewidth=0.2) axs[0, 0].set_ylabel('Mel bins') axs[0, 0].set_yticks([0, mel_bins]) axs[0, 0].set_yticklabels([0, mel_bins]) axs[3, 0].set_visible(False) axs[0, 1].set_visible(False) axs[1, 1].set_visible(False) axs[2, 1].set_visible(False) axs[3, 1].set_visible(False) fig.tight_layout() plt.show()
def inference_test(args): '''Inference and calculate metrics on validation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace train_sources: 'curated' | 'noisy' | 'curated_and_noisy' segment_seconds: float, duration of audio recordings to be padded or split hop_seconds: float, hop seconds between segments pad_type: 'constant' | 'repeat' model_type: string, e.g. 'Cnn_9layers_AvgPooling' iteration: int, load model of this iteration batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool, visualize the logmel spectrogram of segments ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace train_source = args.train_source segment_seconds = args.segment_seconds hop_seconds = args.hop_seconds pad_type = args.pad_type model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' # Use model trained on full data without validation mel_bins = config.mel_bins classes_num = config.classes_num frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' test_feature_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'test.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train_noisy.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join( workspace, 'submissions', filename, 'logmel_{}frames_{}melbins'.format(frames_per_second, mel_bins), 'train_source={}'.format(train_source), 'segment={}s,hop={}s,pad_type={}' ''.format(segment_seconds, hop_seconds, pad_type), 'holdout_fold={}' ''.format(holdout_fold), model_type, '{}_iterations_submission.csv' ''.format(iteration)) create_folder(os.path.dirname(submission_path)) # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator( test_feature_hdf5_path=test_feature_hdf5_path, segment_seconds=segment_seconds, hop_seconds=hop_seconds, pad_type=pad_type, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_test() # Results of segments output_dict = forward(model=model, generate_func=generate_func, cuda=cuda) # Results of audio recordings result_dict = segment_prediction_to_clip_prediction(output_dict, average='arithmetic') # Write submission write_submission(result_dict, submission_path)
def evaluate(self, data_type, iteration, max_iteration=None, verbose=False): '''Evaluate the performance. Args: data_type: 'train' | 'validate' max_iteration: None | int, maximum iteration to run to speed up evaluation verbose: bool ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) file = 'wrong_list/' + 'wrong_classification_' + str(iteration) output = output_dict[ 'output'] # (audios_num*28, in_domain_classes_num) target = output_dict[ 'target'] # (audios_num*28, in_domain_classes_num) # filename = output_dict['filename'] prob = output # Subtask a, b use log softmax as output # Evaluate y_true = np.argmax(target, axis=-1) y_pred = np.argmax(prob, axis=-1) # for i in range(28*20): # print(prob[i*28:(i+1)*28]) # for i in range(28*20): # print(y_true[i*28:(i+1)*28]) # print(np.sum(y_pred==0.)) # print(np.sum(y_true==0.)) # print(np.sum(y_pred==y_true)) confusion_matrix = metrics.confusion_matrix( y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) classwise_accuracy = np.diag(confusion_matrix) \ / np.sum(confusion_matrix, axis=-1) length = len(y_true) // config.audio_num l = length // 64 + 1 #batch_size rest = (len(y_true) - (l - 1) * 64 * config.audio_num) // config.audio_num z_true = [] z_pred = [] for i in range(length): x = y_true[i * config.audio_num:(i + 1) * config.audio_num] tag = 1 for j in range(len(x)): if x[j] < 0.5: tag = 0 if tag == 0: z_true.append(0) else: z_true.append(1) x = y_pred[i * config.audio_num:(i + 1) * config.audio_num] tag = 1 for j in range(len(x)): if x[j] < 0.5: tag = 0 if tag == 0: z_pred.append(0) else: z_pred.append(1) num = 0 for i in range(len(z_true)): if (z_true[i] == z_pred[i]): num = num + 1 all_acc = float(num) * 100 / float(len(z_true)) num = 0 for i in range(len(y_true)): if (y_true[i] == y_pred[i]): num = num + 1 segment_acc = float(num) * 100 / float(len(y_true)) logging.info('Data type: {}'.format(data_type)) logging.info(' Segment Average ccuracy: {:.3f}'.format( np.mean(segment_acc))) logging.info(' All Average ccuracy: {:.3f}'.format( np.mean(all_acc))) if verbose: classes_num = len(classwise_accuracy) for n in range(classes_num): logging.info('{:<20}{:.3f}'.format(self.labels[n], classwise_accuracy[n])) logging.info(confusion_matrix) statistics = { 'accuracy': classwise_accuracy, 'confusion_matrix': confusion_matrix } return statistics
def evaluate(self, data_type, submission_path=None, annotation_path=None, yaml_path=None, max_iteration=None): '''Evaluate prediction performance. Args: data_type: 'train' | 'validate' submission_path: None | string, path submission csv annotation_path: None | string, path of reference csv yaml_path: None | string, path of yaml taxonomy file max_iteration: None | int, use maximum iteration of partial data for fast evaluation ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) output = output_dict['output'] # target = output_dict['{}_target'.format(self.taxonomy_level)] # target = self.get_binary_target(target) # # average_precision = metrics.average_precision_score(target, output, average=None) # # if self.verbose: # logging.info('{} average precision:'.format(data_type)) # for k, label in enumerate(self.labels): # logging.info(' {:<40}{:.3f}'.format(label, average_precision[k])) # logging.info(' {:<40}{:.3f}'.format('Average', np.mean(average_precision))) # else: # logging.info('{}:'.format(data_type)) # logging.info(' mAP: {:.3f}'.format(np.mean(average_precision))) # # statistics = {} # statistics['average_precision'] = average_precision # Write submission and evaluate with official evaluation tool # https://github.com/sonyc-project/urban-sound-tagging-baseline if submission_path: write_submission_csv(audio_names=output_dict['audio_name'], outputs=output, taxonomy_level=self.taxonomy_level, submission_path=submission_path) # The following code are from official evaluation code # df_dict = offical_metrics.evaluate( # prediction_path=submission_path, # annotation_path=annotation_path, # yaml_path=yaml_path, # mode=self.taxonomy_level) # # micro_auprc, eval_df = offical_metrics.micro_averaged_auprc( # df_dict, return_df=True) # # macro_auprc, class_auprc = offical_metrics.macro_averaged_auprc( # df_dict, return_classwise=True) # # # Get index of first threshold that is at least 0.5 # thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0] # # logging.info(' Official evaluation: ') # logging.info(' Micro AUPRC: {:.3f}'.format(micro_auprc)) # logging.info(' Micro F1-score (@0.5): {:.3f}'.format(eval_df['F'][thresh_0pt5_idx])) # logging.info(' Macro AUPRC: {:.3f}'.format(macro_auprc)) # # statistics['micro_auprc'] = micro_auprc # statistics['micro_f1'] = eval_df['F'][thresh_0pt5_idx] # statistics['macro_auprc'] = macro_auprc return submission_path
def visualize(self, data_type, max_iteration=None): '''Visualize logmel spectrogram, reference and prediction. Args: data_type: 'train' | 'validate' max_iteration: None | int, maximum iteration to run to speed up evaluation ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) mel_bins = config.mel_bins audio_duration = config.audio_duration labels = config.labels # Forward generate_func = self.data_generator.generate_validate( data_type=data_type) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_input=True, return_target=True) (audios_num, frames_num, classes_num) = output_dict['framewise_output'].shape for n in range(audios_num): print('File: {}'.format(output_dict['audio_name'][n])) for k in range(classes_num): print('{:<20}{:<8}{:.3f}'.format( labels[k], output_dict['weak_target'][n, k], output_dict['clipwise_output'][n, k])) event_prediction = np.zeros((frames_num, classes_num)) for k in range(classes_num): if output_dict['clipwise_output'][n, k] \ > self.sed_params_dict['sed_high_threshold']: bgn_fin_pairs = activity_detection( x=output_dict['framewise_output'][n, :, k], thres=self.sed_params_dict['sed_high_threshold'], low_thres=self.sed_params_dict['sed_low_threshold'], n_smooth=self.sed_params_dict['n_smooth'], n_salt=self.sed_params_dict['n_salt']) for pair in bgn_fin_pairs: event_prediction[pair[0]:pair[1], k] = 1 # Plot fig, axs = plt.subplots(4, 1, figsize=(10, 8)) logmel = inverse_scale(output_dict['feature'][n], self.data_generator.scalar['mean'], self.data_generator.scalar['std']) axs[0].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet') if 'strong_target' in output_dict.keys(): axs[1].matshow(output_dict['strong_target'][n].T, origin='lower', aspect='auto', cmap='jet') masked_framewise_output = output_dict['framewise_output'][ n] * output_dict['clipwise_output'][n] axs[2].matshow(masked_framewise_output.T, origin='lower', aspect='auto', cmap='jet') axs[3].matshow(event_prediction.T, origin='lower', aspect='auto', cmap='jet') axs[0].set_title('Log mel spectrogram', color='r') axs[1].set_title('Reference sound events', color='r') axs[2].set_title('Framewise prediction', color='b') axs[3].set_title('Eventwise prediction', color='b') for i in range(4): axs[i].set_xticks([0, frames_num]) axs[i].set_xticklabels( ['0', '{:.1f} s'.format(audio_duration)]) axs[i].xaxis.set_ticks_position('bottom') axs[i].set_yticks(np.arange(classes_num)) axs[i].set_yticklabels(labels) axs[i].yaxis.grid(color='w', linestyle='solid', linewidth=0.2) axs[0].set_ylabel('Mel bins') axs[0].set_yticks([0, mel_bins]) axs[0].set_yticklabels([0, mel_bins]) fig.tight_layout() plt.show()
def evaluate(self, data_type, metadata_path, submission_path, max_iteration=None): '''Write out submission file and evaluate the performance. Args: data_type: 'train' | 'validate' metadata_path: string, path of reference csv submission_path: string, path to write out submission max_iteration: None | int, maximum iteration to run to speed up evaluation ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) # Evaluate audio tagging if 'weak_target' in output_dict: weak_target = output_dict['weak_target'] clipwise_output = output_dict['clipwise_output'] average_precision = metrics.average_precision_score( weak_target, clipwise_output, average=None) mAP = np.mean(average_precision) logging.info('{} statistics:'.format(data_type)) logging.info(' Audio tagging mAP: {:.3f}'.format(mAP)) statistics = {} statistics['average_precision'] = average_precision if 'strong_target' in output_dict: # Write out submission file write_submission(output_dict, self.sed_params_dict, submission_path) # Evaluate SED with official tools reference_dict = read_csv_file_for_sed_eval_tool(metadata_path) predict_dict = read_csv_file_for_sed_eval_tool(submission_path) # Event & segment based metrics event_based_metric = sed_eval.sound_event.EventBasedMetrics( event_label_list=config.labels, evaluate_onset=True, evaluate_offset=True, t_collar=0.200, percentage_of_length=0.2) segment_based_metric = sed_eval.sound_event.SegmentBasedMetrics( event_label_list=config.labels, time_resolution=0.2) for audio_name in output_dict['audio_name']: if audio_name in reference_dict.keys(): ref_list = reference_dict[audio_name] else: ref_list = [] if audio_name in predict_dict.keys(): pred_list = predict_dict[audio_name] else: pred_list = [] event_based_metric.evaluate(ref_list, pred_list) segment_based_metric.evaluate(ref_list, pred_list) event_metrics = event_based_metric.results_class_wise_average_metrics( ) f_measure = event_metrics['f_measure']['f_measure'] error_rate = event_metrics['error_rate']['error_rate'] deletion_rate = event_metrics['error_rate']['deletion_rate'] insertion_rate = event_metrics['error_rate']['insertion_rate'] statistics['event_metrics'] = { 'f_measure': f_measure, 'error_rate': error_rate, 'deletion_rate': deletion_rate, 'insertion_rate': insertion_rate } logging.info(' Event-based, classwise F score: {:.3f}, ER: ' '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format( f_measure, error_rate, deletion_rate, insertion_rate)) segment_metrics = segment_based_metric.results_class_wise_average_metrics( ) f_measure = segment_metrics['f_measure']['f_measure'] error_rate = segment_metrics['error_rate']['error_rate'] deletion_rate = segment_metrics['error_rate']['deletion_rate'] insertion_rate = segment_metrics['error_rate']['insertion_rate'] statistics['segment_metrics'] = { 'f_measure': f_measure, 'error_rate': error_rate, 'deletion_rate': deletion_rate, 'insertion_rate': insertion_rate } logging.info(' Segment based, classwise F score: {:.3f}, ER: ' '{:.3f}, Del: {:.3f}, Ins: {:.3f}'.format( f_measure, error_rate, deletion_rate, insertion_rate)) if self.verbose: logging.info(event_based_metric) logging.info(segment_based_metric) return statistics
def transcribe(self, audio, midi_path): """Transcribe an audio recording. Args: audio: (audio_samples,) midi_path: str, path to write out the transcribed MIDI. Returns: transcribed_dict, dict: {'output_dict':, ..., 'est_note_events': ..., 'est_pedal_events': ...} """ audio = audio[None, :] # (1, audio_samples) # Pad audio to be evenly divided by segment_samples audio_len = audio.shape[1] pad_len = int(np.ceil(audio_len / self.segment_samples)) \ * self.segment_samples - audio_len audio = np.concatenate((audio, np.zeros((1, pad_len))), axis=1) # Enframe to segments segments = self.enframe(audio, self.segment_samples) """(N, segment_samples)""" # Forward output_dict = forward(self.model, segments, batch_size=1) """{'reg_onset_output': (N, segment_frames, classes_num), ...}""" # Deframe to original length for key in output_dict.keys(): output_dict[key] = self.deframe(output_dict[key])[0:audio_len] """output_dict: { 'reg_onset_output': (segment_frames, classes_num), 'reg_offset_output': (segment_frames, classes_num), 'frame_output': (segment_frames, classes_num), 'velocity_output': (segment_frames, classes_num), 'reg_pedal_onset_output': (segment_frames, 1), 'reg_pedal_offset_output': (segment_frames, 1), 'pedal_frame_output': (segment_frames, 1)}""" # Post processor if self.post_processor_type == 'regression': """Proposed high-resolution regression post processing algorithm.""" post_processor = RegressionPostProcessor( self.frames_per_second, classes_num=self.classes_num, onset_threshold=self.onset_threshold, offset_threshold=self.offset_threshod, frame_threshold=self.frame_threshold, pedal_offset_threshold=self.pedal_offset_threshold) elif self.post_processor_type == 'onsets_frames': """Google's onsets and frames post processing algorithm. Only used for comparison.""" post_processor = OnsetsFramesPostProcessor(self.frames_per_second, self.classes_num) # Post process output_dict to MIDI events (est_note_events, est_pedal_events) = \ post_processor.output_dict_to_midi_events(output_dict) # Write MIDI events to file if midi_path: write_events_to_midi(start_time=0, note_events=est_note_events, pedal_events=est_pedal_events, midi_path=midi_path) print('Write out to {}'.format(midi_path)) transcribed_dict = { 'output_dict': output_dict, 'est_note_events': est_note_events, 'est_pedal_events': est_pedal_events } return transcribed_dict
def visualize(self, data_type, target_source, save_fig_path, max_iteration=None): '''Visualize logmel of different sound classes. Args: data_type: 'train' | 'validate' target_source: 'curated' | 'noisy' save_fig_path: string, path to save figure max_iteration: None | int, maximum iteration to run to speed up evaluation ''' generate_func = self.data_generator.generate_validate( data_type=data_type, target_source=target_source, max_iteration=max_iteration) # Results of segments output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True, return_input=True) target = output_dict['target'] output = output_dict['output'] feature = output_dict['feature'] (audios_num, segment_frames, mel_bins) = feature.shape segment_duration = segment_frames / self.frames_per_second # Plot log mel spectrogram of different sound classes rows_num = 10 cols_num = 8 fig, axs = plt.subplots(rows_num, cols_num, figsize=(15, 15)) for k in range(self.classes_num): for n, audio_name in enumerate(output_dict['audio_name']): if target[n, k] == 1: title = self.idx_to_lb[k][0:20] row = k // cols_num col = k % cols_num axs[row, col].set_title(title, color='r', fontsize=9) logmel = inverse_scale(feature[n], self.data_generator.scalar['mean'], self.data_generator.scalar['std']) axs[row, col].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet') axs[row, col].set_xticks([0, segment_frames]) axs[row, col].set_xticklabels( ['0', '{:.1f} s'.format(segment_duration)], fontsize=6) axs[row, col].xaxis.set_ticks_position('bottom') axs[row, col].set_ylabel('Mel bins', fontsize=7) axs[row, col].set_yticks([]) break for k in range(self.classes_num, rows_num * cols_num): row = k // cols_num col = k % cols_num axs[row, col].set_visible(False) plt.tight_layout(pad=0, w_pad=0, h_pad=0) plt.savefig(save_fig_path) logging.info('Save figure to {}'.format(save_fig_path))
def inference_evaluation(args): '''Inference on evaluation data. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' iteration: int holdout_fold: 'none', which means using model trained on all development data batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type iteration = args.iteration holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename mel_bins = config.mel_bins frames_per_second = config.frames_per_second labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' evaluate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'evaluate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoint_path = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth') submission_path = os.path.join( workspace, 'submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'best2_submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) model = Model(classes_num) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path, scalar=scalar, batch_size=batch_size) # Forward output_dict = forward(model=model, generate_func=data_generator.generate(), cuda=cuda, return_target=False) # Write submission write_submission_csv(audio_names=output_dict['audio_name'], outputs=output_dict['output'], taxonomy_level=taxonomy_level, submission_path=submission_path)
def visualize(self, data_type): mel_bins = config.mel_bins frames_per_second = config.frames_per_second classes_num = config.classes_num labels = config.labels # Forward list_dict = forward( model=self.model, generate_func=self.data_generator.generate_validate(data_type), cuda=self.cuda, return_input=True, return_target=True) for dict in list_dict: print('File: {}'.format(dict['name'])) frames_num = dict['target_event'].shape[1] length_in_second = frames_num / float(frames_per_second) fig, axs = plt.subplots(4, 2, figsize=(15, 10)) axs[0, 0].matshow(dict['feature'][0][0].T, origin='lower', aspect='auto', cmap='jet') axs[1, 0].matshow(dict['target_event'][0].T, origin='lower', aspect='auto', cmap='jet') axs[2, 0].matshow(dict['output_event'][0].T, origin='lower', aspect='auto', cmap='jet') axs[0, 1].matshow(dict['target_elevation'][0].T, origin='lower', aspect='auto', cmap='jet') axs[1, 1].matshow(dict['target_azimuth'][0].T, origin='lower', aspect='auto', cmap='jet') masksed_evaluation = dict['output_elevation'] * dict['output_event'] axs[2, 1].matshow(masksed_evaluation[0].T, origin='lower', aspect='auto', cmap='jet') masksed_azimuth = dict['output_azimuth'] * dict['output_event'] axs[3, 1].matshow(masksed_azimuth[0].T, origin='lower', aspect='auto', cmap='jet') axs[0, 0].set_title('Log mel spectrogram', color='r') axs[1, 0].set_title('Reference sound events', color='r') axs[2, 0].set_title('Predicted sound events', color='b') axs[0, 1].set_title('Reference elevation', color='r') axs[1, 1].set_title('Reference azimuth', color='r') axs[2, 1].set_title('Predicted elevation', color='b') axs[3, 1].set_title('Predicted azimuth', color='b') for i in range(4): for j in range(2): axs[i, j].set_xticks([0, frames_num]) axs[i, j].set_xticklabels( ['0', '{:.1f} s'.format(length_in_second)]) axs[i, j].xaxis.set_ticks_position('bottom') axs[i, j].set_yticks(np.arange(classes_num)) axs[i, j].set_yticklabels(labels) axs[i, j].yaxis.grid(color='w', linestyle='solid', linewidth=0.2) axs[0, 0].set_ylabel('Mel bins') axs[0, 0].set_yticks([0, mel_bins]) axs[0, 0].set_yticklabels([0, mel_bins]) axs[3, 0].set_visible(False) fig.tight_layout() plt.show()
def evaluate(self, data_type, source, max_iteration=None, verbose=False): '''Evaluate the performance. Args: data_type: 'train' | 'validate' source: 'a' | 'b' | 'c' | 's1' | 's2' | 's3' max_iteration: None | int, maximum iteration to run to speed up evaluation verbose: bool ''' generate_func = self.data_generator.generate_validate( data_type=data_type, source=source, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) if output_dict['output'].ndim == 2: # single scale models output = output_dict[ 'output'] # (audios_num, in_domain_classes_num) target = output_dict[ 'target'] # (audios_num, in_domain_classes_num) loss = output_dict['loss'] prob = np.exp(output) # Evaluate y_true = np.argmax(target, axis=-1) y_pred = np.argmax(prob, axis=-1) confusion_matrix = metrics.confusion_matrix( y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) classwise_accuracy = np.diag(confusion_matrix) \ / np.sum(confusion_matrix, axis=-1) logging.info('Single-Classifier:') logging.info('Data type: {}'.format(data_type)) logging.info(' Average ccuracy: {:.3f}'.format( np.mean(classwise_accuracy))) logging.info(' Log loss: {:.3f}'.format(log_loss(y_true, loss))) else: for i in range(output_dict['output'].shape[1] - 1): output = output_dict[ 'output'][:, i, :] # (audios_num, in_domain_classes_num) target = output_dict[ 'target'] # (audios_num, in_domain_classes_num) loss = output_dict['loss'][:, i, :] prob = np.exp(output) # Evaluate y_true = np.argmax(target, axis=-1) y_pred = np.argmax(prob, axis=-1) confusion_matrix = metrics.confusion_matrix( y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) classwise_accuracy = np.diag(confusion_matrix) \ / np.sum(confusion_matrix, axis=-1) logging.info('Scale' + str(i + 1) + '-Classifier:') logging.info('Data type: {}'.format(data_type)) logging.info(' Average ccuracy: {:.3f}'.format( np.mean(classwise_accuracy))) logging.info(' Log loss: {:.3f}'.format( log_loss(y_true, loss))) output = output_dict[ 'output'][:, -1, :] # (audios_num, in_domain_classes_num) target = output_dict[ 'target'] # (audios_num, in_domain_classes_num) output = output_dict['loss'][:, -1, :] prob = np.exp(output) # Evaluate y_true = np.argmax(target, axis=-1) y_pred = np.argmax(prob, axis=-1) confusion_matrix = metrics.confusion_matrix( y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) classwise_accuracy = np.diag(confusion_matrix) \ / np.sum(confusion_matrix, axis=-1) logging.info('Global-Classifier:') logging.info('Data type: {}'.format(data_type)) logging.info(' Average ccuracy: {:.3f}'.format( np.mean(classwise_accuracy))) logging.info(' Log loss: {:.3f}'.format(log_loss(y_true, loss))) if verbose: classes_num = len(classwise_accuracy) for n in range(classes_num): logging.info('{:<20}{:.3f}'.format(self.labels[n], classwise_accuracy[n])) logging.info(confusion_matrix) statistics = { 'accuracy': classwise_accuracy, 'confusion_matrix': confusion_matrix } return statistics
def evaluate(self, data_type, iteration, max_iteration=None, verbose=False): '''Evaluate the performance. Args: data_type: 'train' | 'validate' max_iteration: None | int, maximum iteration to run to speed up evaluation verbose: bool ''' generate_func = self.data_generator.generate_validate( data_type=data_type, max_iteration=max_iteration) # Forward output_dict = forward( model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) file = 'wrong_list/'+ 'wrong_classification_' + str(iteration) output = output_dict['output'] # (audios_num, in_domain_classes_num) target = output_dict['target'] # (audios_num, in_domain_classes_num) filename = output_dict['filename'] prob = np.exp(output) # Subtask a, b use log softmax as output # Evaluate y_true = np.argmax(target, axis=-1) y_pred = np.argmax(prob, axis=-1) # print(y_pred) if data_type=='validate': for i in range(len(y_true)): if y_true[i] != y_pred[i]: with open(file,'a') as f: audioname = filename[i] true_idx = str(y_true[i]) pred_idx = str(y_pred[i]) true_label = self.idx_to_lb[y_true[i]] pred_label = self.idx_to_lb[y_pred[i]] f.write(audioname+'\t'+true_idx+'\t'+true_label+'\t'+pred_idx+'\t'+pred_label+'\n') confusion_matrix = metrics.confusion_matrix(y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) classwise_accuracy = np.diag(confusion_matrix) \ / np.sum(confusion_matrix, axis=-1) logging.info('Data type: {}'.format(data_type)) logging.info(' Average ccuracy: {:.3f}'.format(np.mean(classwise_accuracy))) if verbose: classes_num = len(classwise_accuracy) for n in range(classes_num): logging.info('{:<20}{:.3f}'.format(self.labels[n], classwise_accuracy[n])) logging.info(confusion_matrix) statistics = { 'accuracy': classwise_accuracy, 'confusion_matrix': confusion_matrix} return statistics
def evaluate(self, data_type, source, max_iteration=None, verbose=False): '''Evaluate the performance. Args: data_type: 'train' | 'validate' source: 'a' | 'b' | 'c' max_iteration: None | int, maximum iteration to run to speed up evaluation verbose: bool ''' generate_func = self.data_generator.generate_validate( data_type=data_type, source=source, max_iteration=max_iteration) # Forward output_dict = forward(model=self.model, generate_func=generate_func, cuda=self.cuda, return_target=True) output = output_dict['output'] # (audios_num, in_domain_classes_num) target = output_dict['target'] # (audios_num, in_domain_classes_num) if self.subtask in ['a', 'b']: prob = np.exp(output) # Subtask a, b use log softmax as output elif self.subtask == 'c': prob = output # Subtask c use sigmoid as output # Evaluate y_true = np.argmax(target, axis=-1) y_pred = np.argmax(prob, axis=-1) if self.subtask == 'c': for n, class_id in enumerate(y_pred): if prob[n, class_id] < 0.5: y_pred[n] = self.lb_to_idx['unknown'] if self.subtask in ['a', 'b']: confusion_matrix = metrics.confusion_matrix( y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) elif self.subtask == 'c': confusion_matrix = metrics.confusion_matrix( y_true, y_pred, labels=np.arange(self.all_classes_num)) classwise_accuracy = np.diag(confusion_matrix) \ / np.sum(confusion_matrix, axis=-1) logging.info('Data type: {}'.format(data_type)) logging.info(' Source: {}'.format(source)) if self.subtask in ['a', 'b']: logging.info(' Average ccuracy: {:.3f}'.format( np.mean(classwise_accuracy))) elif self.subtask == 'c': logging.info( ' In domain accuracy: {:.3f}, Unknown accuracy: {:.3f}' ''.format(np.mean(classwise_accuracy[0:-1]), classwise_accuracy[-1])) if verbose: classes_num = len(classwise_accuracy) for n in range(classes_num): logging.info('{:<20}{:.3f}'.format(self.labels[n], classwise_accuracy[n])) logging.info(confusion_matrix) statistics = { 'accuracy': classwise_accuracy, 'confusion_matrix': confusion_matrix } return statistics