def __init__(self, params, ref_files_folder=None, use_polar_format=True): self._use_polar_format = use_polar_format self._desc_dir = ref_files_folder if ref_files_folder is not None else os.path.join( params['dataset_dir'], 'metadata_dev') self._doa_thresh = params['lad_doa_thresh'] # Load feature class self._feat_cls = cls_feature_class.FeatureClass(params) # collect reference files self._ref_labels = {} for split in os.listdir(self._desc_dir): for ref_file in os.listdir(os.path.join(self._desc_dir, split)): # Load reference description file gt_dict = self._feat_cls.load_output_format_file( os.path.join(self._desc_dir, split, ref_file)) if not self._use_polar_format: gt_dict = self._feat_cls.convert_output_format_polar_to_cartesian( gt_dict) self._ref_labels[ref_file] = self._feat_cls.segment_labels( gt_dict, self._feat_cls.get_nb_frames()) self._nb_ref_files = len(self._ref_labels) print('SELD metrics class: loaded : {} reference files'.format( len(self._ref_labels)))
def __init__(self, dataset='foa', feat_label_dir='', is_eval=False, split=1, batch_size=16, seq_len=64, shuffle=True, per_file=False, channels_separate=False): self._per_file = per_file self._is_eval = is_eval self._splits = np.array(split) self._batch_size = batch_size self._seq_len = seq_len self._shuffle = shuffle self._feat_cls = cls_feature_class.FeatureClass( feat_label_dir=feat_label_dir, dataset=dataset, is_eval=is_eval) self._label_dir = self._feat_cls.get_label_dir() self._feat_dir = self._feat_cls.get_normalized_feat_dir() self._filenames_list = list() self._nb_frames_file = 0 # Using a fixed number of frames in feat files. Updated in _get_label_filenames_sizes() self._feat_len = None self._2_nb_ch = 2 * self._feat_cls.get_nb_channels() self._label_len = None # total length of label - DOA + SED self._doa_len = None # DOA label length self._class_dict = self._feat_cls.get_classes() self._nb_classes = len(self._class_dict.keys()) self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr( ) self._get_filenames_list_and_feat_label_sizes() self._batch_seq_len = self._batch_size * self._seq_len self._circ_buf_feat = None self._circ_buf_label = None self._channels_separate = channels_separate if self._per_file: self._nb_total_batches = len(self._filenames_list) else: self._nb_total_batches = int( np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._seq_len * self._batch_size)))) # self._dummy_feat_vec = np.ones(self._feat_len.shape) * print('\tDatagen_mode: {}, nb_files: {}, nb_classes:{}\n' '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'. format('eval' if self._is_eval else 'dev', len(self._filenames_list), self._nb_classes, self._nb_frames_file, self._feat_len, self._2_nb_ch, self._label_len)) print('\tDataset: {}, split: {}\n' '\tbatch_size: {}, seq_len: {}, shuffle: {}\n' '\tlabel_dir: {}\n ' '\tfeat_dir: {}\n'.format(dataset, split, self._batch_size, self._seq_len, self._shuffle, self._label_dir, self._feat_dir))
def __init__( self, params, split=1, shuffle=True, per_file=False, is_eval=False ): self._per_file = per_file self._is_eval = is_eval self._splits = np.array(split) self._batch_size = params['batch_size'] self._feature_seq_len = params['feature_sequence_length'] self._label_seq_len = params['label_sequence_length'] self._is_accdoa = params['is_accdoa'] self._doa_objective = params['doa_objective'] self._shuffle = shuffle self._feat_cls = cls_feature_class.FeatureClass(params=params, is_eval=self._is_eval) self._label_dir = self._feat_cls.get_label_dir() self._feat_dir = self._feat_cls.get_normalized_feat_dir() self._filenames_list = list() self._nb_frames_file = 0 # Using a fixed number of frames in feat files. Updated in _get_label_filenames_sizes() self._nb_mel_bins = self._feat_cls.get_nb_mel_bins() self._nb_ch = None self._label_len = None # total length of label - DOA + SED self._doa_len = None # DOA label length self._class_dict = self._feat_cls.get_classes() self._nb_classes = self._feat_cls.get_nb_classes() self._get_filenames_list_and_feat_label_sizes() self._feature_batch_seq_len = self._batch_size*self._feature_seq_len self._label_batch_seq_len = self._batch_size*self._label_seq_len self._circ_buf_feat = None self._circ_buf_label = None if self._per_file: self._nb_total_batches = len(self._filenames_list) else: self._nb_total_batches = int(np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._feature_batch_seq_len)))) # self._dummy_feat_vec = np.ones(self._feat_len.shape) * print( '\tDatagen_mode: {}, nb_files: {}, nb_classes:{}\n' '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'.format( 'eval' if self._is_eval else 'dev', len(self._filenames_list), self._nb_classes, self._nb_frames_file, self._nb_mel_bins, self._nb_ch, self._label_len ) ) print( '\tDataset: {}, split: {}\n' '\tbatch_size: {}, feat_seq_len: {}, label_seq_len: {}, shuffle: {}\n' '\tTotal batches in dataset: {}\n' '\tlabel_dir: {}\n ' '\tfeat_dir: {}\n'.format( params['dataset'], split, self._batch_size, self._feature_seq_len, self._label_seq_len, self._shuffle, self._nb_total_batches, self._label_dir, self._feat_dir ) )
def calculate_metrics(metadata_dir, prediction_paths): '''Calculate metrics using official tool. This part of code is modified from: https://github.com/sharathadavanne/seld-dcase2019/blob/master/calculate_SELD_metrics.py Args: metadata_dir: string, directory of reference files. prediction_paths: list of string Returns: metrics: dict ''' # Load feature class feat_cls = cls_feature_class.FeatureClass() # Load evaluation metric class eval = evaluation_metrics.SELDMetrics(nb_frames_1s=feat_cls.nb_frames_1s(), data_gen=feat_cls) eval.reset() # Reset the evaluation metric parameters for prediction_path in prediction_paths: reference_path = os.path.join( metadata_dir, '{}.csv'.format(get_filename(prediction_path))) prediction_dict = evaluation_metrics.load_output_format_file( prediction_path) reference_dict = feat_cls.read_desc_file(reference_path) # Generate classification labels for SELD reference_tensor = feat_cls.get_clas_labels_for_file(reference_dict) prediction_tensor = evaluation_metrics.output_format_dict_to_classification_labels( prediction_dict, feat_cls) # Calculated SED and DOA scores eval.update_sed_scores(prediction_tensor.max(2), reference_tensor.max(2)) eval.update_doa_scores(prediction_tensor, reference_tensor) # Overall SED and DOA scores sed_error_rate, sed_f1_score = eval.compute_sed_scores() doa_error, doa_frame_recall = eval.compute_doa_scores() seld_score = evaluation_metrics.compute_seld_metric( [sed_error_rate, sed_f1_score], [doa_error, doa_frame_recall]) metrics = { 'sed_error_rate': sed_error_rate, 'sed_f1_score': sed_f1_score, 'doa_error': doa_error, 'doa_frame_recall': doa_frame_recall, 'seld_score': seld_score } return metrics
# Visualize the DCASE 2019 SELD task dataset distribution import os import numpy as np import sys sys.path.append(os.path.join(sys.path[0], '..')) import cls_feature_class import matplotlib.pyplot as plot plot.switch_backend('Qt4Agg') # plot.switch_backend('TkAgg') from IPython import embed # Path to the metadata folder dev_dataset = '/home/adavanne/taitoSharedData/DCASE2019/dataset/metadata_dev' feat_cls = cls_feature_class.FeatureClass() hop_len_s = feat_cls.get_hop_len_sec() max_frames = feat_cls.get_nb_frames() unique_classes_dict = feat_cls.get_classes() nb_classes = len(unique_classes_dict) azi_list, ele_list = feat_cls.get_azi_ele_list() min_azi_ind = min(azi_list) // 10 min_ele_ind = min(ele_list) // 10 nb_ir = 5 nb_files_per_split = [0] * 5 split_info_dic = {} for dataset_path in [dev_dataset]: for file in os.listdir(dataset_path): desc_dict = feat_cls.read_desc_file(os.path.join(dataset_path, file)) split = int(file[5]) ir = int(file[9]) ov = int(file[13])
# Extracts the features, labels, and normalizes the training and test split features. Make sure you update the location # of the downloaded datasets before in the cls_feature_class.py import cls_feature_class dataset_name = 'tau' # Datasets: ansim, resim, cansim, cresim and real # Extracts feature and labels for all overlap and splits for ovo in [1, 2]: # SE overlap for splito in [0]: # all splits. Use [1, 8, 9] for 'real' dataset for nffto in [512]: feat_cls = cls_feature_class.FeatureClass(ov=ovo, split=splito, nfft=nffto, dataset=dataset_name) # Extract features and normalize them feat_cls.extract_all_feature() feat_cls.preprocess_features() # # Extract labels in regression mode feat_cls.extract_all_labels('regr', 0)
def __init__(self, datagen_mode='train', dataset='resim', ov=1, ov_num=1, split=1, db=30, batch_size=32, seq_len=64, shuffle=True, nfft=512, classifier_mode='regr', weakness=0, cnn3d=False, xyz_def_zero=False, extra_name='', azi_only=False): self._datagen_mode = datagen_mode self._classifier_mode = classifier_mode self._batch_size = batch_size self._seq_len = seq_len self._shuffle = shuffle self._split = split self._ov_num = ov_num self._feat_cls = cls_feature_class.FeatureClass(dataset=dataset, ov=ov, split=split, db=db, nfft=nfft) self._label_dir = self._feat_cls.get_label_dir(classifier_mode, weakness, extra_name) self._feat_dir = self._feat_cls.get_normalized_feat_dir(extra_name) self._thickness = weakness self._xyz_def_zero = xyz_def_zero self._azi_only = azi_only self._filenames_list = list() self._nb_frames_file = None # Assuming number of frames in feat files are the same self._feat_len = None self._2_nb_ch = 8 self._label_len = None # total length of label - DOA + SED self._doa_len = None # DOA label length self._class_dict = self._feat_cls.get_classes() self._nb_classes = len(self._class_dict.keys()) self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr( ) self._is_cnn3d_model = cnn3d self._get_label_filenames_sizes() self._batch_seq_len = self._batch_size * self._seq_len self._circ_buf_feat = None self._circ_buf_label = None self._nb_total_batches = int( np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._seq_len * self._batch_size)))) print('Datagen_mode: {}, nb_files: {}, nb_classes:{}\n' 'nb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'. format(self._datagen_mode, len(self._filenames_list), self._nb_classes, self._nb_frames_file, self._feat_len, self._2_nb_ch, self._label_len)) print('Dataset: {}, ov: {}, split: {}\n' 'batch_size: {}, seq_len: {}, shuffle: {}\n' 'label_dir: {}\n ' 'feat_dir: {}\n'.format(dataset, ov, split, self._batch_size, self._seq_len, self._shuffle, self._label_dir, self._feat_dir))
# Extracts the features, labels, and normalizes the development and evaluation split features. import cls_feature_class import parameter process_str = 'dev'#, eval' # 'dev' or 'eval' will extract features for the respective set accordingly # 'dev, eval' will extract features of both sets together params = parameter.get_params() if 'dev' in process_str: # -------------- Extract features and labels for development set ----------------------------- dev_feat_cls = cls_feature_class.FeatureClass(params, is_eval=False) # Extract features and normalize them dev_feat_cls.extract_all_feature() dev_feat_cls.preprocess_features() # # Extract labels in regression mode dev_feat_cls.extract_all_labels() if 'eval' in process_str: # -----------------------------Extract ONLY features for evaluation set----------------------------- eval_feat_cls = cls_feature_class.FeatureClass(params, is_eval=True) # Extract features and normalize them eval_feat_cls.extract_all_feature() eval_feat_cls.preprocess_features()
# Extracts the features, labels, and normalizes the development and evaluation split features. # NOTE: Change the dataset_dir and feat_label_dir path accordingly import cls_feature_class process_str = 'dev' # 'dev' or 'eval' will extract features for the respective set accordingly # 'dev, eval' will extract features of both sets together dataset_name = 'foa' # 'foa' -ambisonic or 'mic' - microphone signals dataset_dir = '/Volumes/Dinge/DCASE2019_subset/' # Base folder containing the foa/mic and metadata folders feat_label_dir = '/Volumes/Dinge/DCASE2019_subset/feat_label_tmp/' # Directory to dump extracted features and labels if 'dev' in process_str: # -------------- Extract features and labels for development set ----------------------------- dev_feat_cls = cls_feature_class.FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir, feat_label_dir=feat_label_dir) # Extract features and normalize them dev_feat_cls.extract_all_feature() dev_feat_cls.preprocess_features() # # Extract labels in regression mode dev_feat_cls.extract_all_labels() if 'eval' in process_str: # -----------------------------Extract ONLY features for evaluation set----------------------------- eval_feat_cls = cls_feature_class.FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir, feat_label_dir=feat_label_dir, is_eval=True) # Extract features and normalize them
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] feat_cls = cls_feature_class.FeatureClass(params) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [6] val_splits = [5] train_splits = [[1, 2, 3, 4]] elif params['mode'] == 'eval': test_splits = [[7, 8]] val_splits = [[6]] train_splits = [[1, 2, 3, 4, 5]] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False, per_file=True, is_eval=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) nb_classes = data_gen_train.get_nb_classes() print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'], params['doa_objective'])) print('Using loss weights : {}'.format(params['loss_weights'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], f_pool_size=params['f_pool_size'], t_pool_size=params['t_pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], doa_objective=params['doa_objective'], is_accdoa=params['is_accdoa']) # Dump results in DCASE output format for calculating final scores dcase_output_val_folder = os.path.join( params['dcase_output_dir'], '{}_{}_{}_val'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.delete_and_create_folder(dcase_output_val_folder) print('Dumping recording-wise val results in: {}'.format( dcase_output_val_folder)) # Initialize evaluation metric class score_obj = ComputeSELDResults(params) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] tr_loss = np.zeros(nb_epoch) seld_metric = np.zeros((nb_epoch, 5)) # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2, ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] # predict once per epoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) if params['is_accdoa']: sed_pred, doa_pred = get_accdoa_labels(pred, nb_classes) sed_pred = reshape_3Dto2D(sed_pred) doa_pred = reshape_3Dto2D(doa_pred) else: sed_pred = reshape_3Dto2D(pred[0]) > 0.5 doa_pred = reshape_3Dto2D(pred[1] if params['doa_objective'] is 'mse' else pred[1][:, :, nb_classes:]) # Calculate the DCASE 2021 metrics - Location-aware detection and Class-aware localization scores dump_DCASE2021_results(data_gen_val, feat_cls, dcase_output_val_folder, sed_pred, doa_pred) seld_metric[epoch_cnt, :] = score_obj.get_SELD_Results( dcase_output_val_folder) patience_cnt += 1 if seld_metric[epoch_cnt, -1] < best_seld_metric: best_seld_metric = seld_metric[epoch_cnt, -1] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, ' '\n\t\t DCASE2021 SCORES: ER: {:0.2f}, F: {:0.1f}, LE: {:0.1f}, LR:{:0.1f}, seld_score (early stopping score): {:0.2f}, ' 'best_seld_score: {:0.2f}, best_epoch : {}\n'.format( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], seld_metric[epoch_cnt, 0], seld_metric[epoch_cnt, 1] * 100, seld_metric[epoch_cnt, 2], seld_metric[epoch_cnt, 3] * 100, seld_metric[epoch_cnt, -1], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score (early stopping score) : {}'.format( best_seld_metric)) print('\n\tDCASE2021 scores') print( '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}' .format(seld_metric[best_epoch, 2], seld_metric[best_epoch, 3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(seld_metric[best_epoch, 0], seld_metric[best_epoch, 1] * 100)) # ------------------ Calculate metric scores for unseen test split --------------------------------- print( '\nLoading the best model and predicting results on the testing split' ) print('\tLoading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=split, shuffle=False, per_file=True, is_eval=True if params['mode'] is 'eval' else False) model = keras_model.load_seld_model('{}_model.h5'.format(unique_name), params['doa_objective']) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) if params['is_accdoa']: test_sed_pred, test_doa_pred = get_accdoa_labels( pred_test, nb_classes) test_sed_pred = reshape_3Dto2D(test_sed_pred) test_doa_pred = reshape_3Dto2D(test_doa_pred) else: test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = reshape_3Dto2D( pred_test[1] if params['doa_objective'] is 'mse' else pred_test[1][:, :, nb_classes:]) # Dump results in DCASE output format for calculating final scores dcase_output_test_folder = os.path.join( params['dcase_output_dir'], '{}_{}_{}_test'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.delete_and_create_folder(dcase_output_test_folder) print('Dumping recording-wise test results in: {}'.format( dcase_output_test_folder)) dump_DCASE2021_results(data_gen_test, feat_cls, dcase_output_test_folder, test_sed_pred, test_doa_pred) if params['mode'] is 'dev': # Calculate DCASE2021 scores test_seld_metric = score_obj.get_SELD_Results( dcase_output_test_folder) print('Results on test split:') print('\tDCASE2021 Scores') print( '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}' .format(test_seld_metric[2], test_seld_metric[3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_seld_metric[0], test_seld_metric[1] * 100)) print('\tSELD (early stopping metric): {:0.2f}'.format( test_seld_metric[-1]))
# Extracts the features, labels, and normalizes the development and evaluation split features. import cls_feature_class import doanet_parameters params = doanet_parameters.get_params() # -------------- Extract features and labels for development set ----------------------------- dev_feat_cls = cls_feature_class.FeatureClass(params) # # Extract features and normalize them dev_feat_cls.extract_all_feature() dev_feat_cls.preprocess_features() # # Extract labels in regression mode dev_feat_cls.extract_all_labels()
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] feat_cls = cls_feature_class.FeatureClass(params) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1] val_splits = [2] train_splits = [[3, 4, 5, 6]] elif params['mode'] == 'eval': test_splits = [[7, 8]] val_splits = [[1]] train_splits = [[2, 3, 4, 5, 6]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) nb_classes = data_gen_train.get_nb_classes() gt = collect_test_labels(data_gen_val, data_out, nb_classes, params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'], params['doa_objective'])) print('Using loss weights : {}'.format(params['loss_weights'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], f_pool_size=params['f_pool_size'], t_pool_size=params['t_pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], doa_objective=params['doa_objective']) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] seld_metric = np.zeros(nb_epoch) new_seld_metric = np.zeros(nb_epoch) tr_loss = np.zeros(nb_epoch) doa_metric = np.zeros((nb_epoch, 6)) sed_metric = np.zeros((nb_epoch, 2)) new_metric = np.zeros((nb_epoch, 4)) # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2, ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] # predict once per peoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D( pred[1] if params['doa_objective'] is 'mse' else pred[1][:, :, nb_classes:]) # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) doa_metric[ epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz( doa_pred, doa_gt, sed_pred, sed_gt) seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric( sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :]) # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores cls_new_metric = SELD_evaluation_metrics.SELDMetrics( nb_classes=data_gen_val.get_nb_classes(), doa_threshold=params['lad_doa_thresh']) pred_dict = feat_cls.regression_label_format_to_output_format( sed_pred, doa_pred) gt_dict = feat_cls.regression_label_format_to_output_format( sed_gt, doa_gt) pred_blocks_dict = feat_cls.segment_labels(pred_dict, sed_pred.shape[0]) gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0]) cls_new_metric.update_seld_scores_xyz(pred_blocks_dict, gt_blocks_dict) new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores() new_seld_metric[ epoch_cnt] = evaluation_metrics.early_stopping_metric( new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:]) # Visualize the metrics with respect to epochs plot_functions(unique_name, tr_loss, sed_metric, doa_metric, seld_metric, new_metric, new_seld_metric) patience_cnt += 1 if new_seld_metric[epoch_cnt] < best_seld_metric: best_seld_metric = new_seld_metric[epoch_cnt] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, ' '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, ' '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, ' 'best_seld_score: {:0.2f}, best_epoch : {}\n'.format( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100, doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100, seld_metric[epoch_cnt], new_metric[epoch_cnt, 0], new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2], new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break avg_scores_val.append([ new_metric[best_epoch, 0], new_metric[best_epoch, 1], new_metric[best_epoch, 2], new_metric[best_epoch, 3], best_seld_metric ]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score (early stopping score) : {}'.format( best_seld_metric)) print('\n\tDCASE2020 scores') print( '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}' .format(new_metric[best_epoch, 2], new_metric[best_epoch, 3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(new_metric[best_epoch, 0], new_metric[best_epoch, 1] * 100)) print('\n\tDCASE2019 scores') print( '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}' .format(doa_metric[best_epoch, 0], doa_metric[best_epoch, 1] * 100)) print( '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'. format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100)) # ------------------ Calculate metric scores for unseen test split --------------------------------- print( '\nLoading the best model and predicting results on the testing split' ) print('\tLoading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=split, shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) model = keras_model.load_seld_model('{}_model.h5'.format(unique_name), params['doa_objective']) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D( pred_test[1] if params['doa_objective'] is 'mse' else pred_test[1][:, :, nb_classes:]) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print('Dumping recording-wise results in: {}'.format( dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = feat_cls.regression_label_format_to_output_format( test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :]) data_gen_test.write_output_format_file(output_file, output_dict) if params['mode'] is 'dev': test_data_in, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, nb_classes, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # Calculate DCASE2019 scores test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.early_stopping_metric( test_sed_loss, test_doa_loss) # Calculate DCASE2020 scores cls_new_metric = SELD_evaluation_metrics.SELDMetrics( nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20) test_pred_dict = feat_cls.regression_label_format_to_output_format( test_sed_pred, test_doa_pred) test_gt_dict = feat_cls.regression_label_format_to_output_format( test_sed_gt, test_doa_gt) test_pred_blocks_dict = feat_cls.segment_labels( test_pred_dict, test_sed_pred.shape[0]) test_gt_blocks_dict = feat_cls.segment_labels( test_gt_dict, test_sed_gt.shape[0]) cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict, test_gt_blocks_dict) test_new_metric = cls_new_metric.compute_seld_scores() test_new_seld_metric = evaluation_metrics.early_stopping_metric( test_new_metric[:2], test_new_metric[2:]) avg_scores_test.append([ test_new_metric[0], test_new_metric[1], test_new_metric[2], test_new_metric[3], test_new_seld_metric ]) print('Results on test split:') print('\tDCASE2020 Scores') print( '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}' .format(test_new_metric[2], test_new_metric[3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_new_metric[0], test_new_metric[1] * 100)) print('\tSELD (early stopping metric): {:0.2f}'.format( test_new_seld_metric)) print('\n\tDCASE2019 Scores') print( '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}' .format(test_doa_loss[0], test_doa_loss[1] * 100)) print( '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_sed_loss[0], test_sed_loss[1] * 100))
def __init__(self, dataset='foa', nb_ch=4, feat_label_dir='', is_eval=False, split=1, batch_size=32, seq_len=128, shuffle=True, per_file=False, feat_type='mel', doa=None, seed=1, onlyphase=False, trial=None): # MY ADDITION self.doa = doa # specify whether regression or classification is used for doa self.seed = seed # fix random seed so that each generator will give same results self.onlyphase = onlyphase self._per_file = per_file self._is_eval = is_eval self._splits = np.array(split) self._batch_size = batch_size self._seq_len = seq_len self._shuffle = shuffle self._feat_cls = cls_feature_class.FeatureClass( feat_label_dir=feat_label_dir, dataset=dataset, is_eval=self._is_eval, doa=doa) self._label_dir = self._feat_cls.get_label_dir() self._feat_dir = self._feat_cls.get_normalized_feat_dir() #print('feature path is {}'.format(self._feat_dir)) #print('label path is {}'.format(self._label_dir)) # print('\tthis is {}:'.format('eval' if self._is_eval else 'dev')) #print('train first') if onlyphase: self._feat_dir = self._feat_dir + '_phase' if trial: self._feat_dir = self._feat_dir + '_trial' + str(trial) #print('trial is 1 {}'.format(self._feat_dir)) self._filenames_list = list() self._nb_frames_file = 0 # Using a fixed number of frames in feat files. Updated in _get_label_filenames_sizes() self._feat_len = None self._2_nb_ch = nb_ch #self._feat_cls.get_nb_channels() if feat_type == 'mel' or onlyphase == True else 2 * self._feat_cls.get_nb_channels() self._label_len = None # total length of label - DOA + SED self._doa_len = None # DOA label length self._class_dict = self._feat_cls.get_classes() self._nb_classes = len(self._class_dict.keys()) self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr( ) self._get_filenames_list_and_feat_label_sizes() self._nb_azi = 36 # range(-180, 180, 10) self._nb_ele = 9 # range(-40, 50, 10) self._batch_seq_len = self._batch_size * self._seq_len self._circ_buf_feat = None self._circ_buf_label = None if self._per_file: self._nb_total_batches = len(self._filenames_list) else: # ******************************************************************************************************************************** # self._nb_total_batches = int( np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._seq_len * self._batch_size)))) #self._nb_total_batches = int(np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._seq_len * self._batch_size)))) # ******************************************************************************************************************************** # # self._dummy_feat_vec = np.ones(self._feat_len.shape) * print('\tDatagen_mode: {}, nb_files: {}, nb_classes: {}\n' '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len: {}\n'. format('eval' if self._is_eval else 'dev', len(self._filenames_list), self._nb_classes, self._nb_frames_file, self._feat_len, self._2_nb_ch, self._label_len)) print('\tDataset: {}, split: {}\n' '\tbatch_size: {}, seq_len: {}, shuffle: {}\n' '\tlabel_dir: {}\n ' '\tfeat_dir: {}\n'.format(dataset, split, self._batch_size, self._seq_len, self._shuffle, self._label_dir, self._feat_dir))
def __init__(self, datagen_mode='train', dataset='ansim', ov=1, split=1, db=30, batch_size=32, seq_len=64, shuffle=True, nfft=512, classifier_mode='regr', weakness=0, cnn3d=False, xyz_def_zero=False, extra_name='', azi_only=False, debug_load_single_batch=False, data_format='channels_first', params=None, load_files_before_after_splitting_point=None): if params is None: params = {} self.params = params self._datagen_mode = datagen_mode self._classifier_mode = classifier_mode self._batch_size = batch_size self._seq_len = seq_len self._shuffle = shuffle self._feat_cls = cls_feature_class.FeatureClass(dataset=dataset, ov=ov, split=split, db=db, nfft=nfft) self._label_dir = self._feat_cls.get_label_dir(classifier_mode, weakness, extra_name) self._feat_dir = self._feat_cls.get_normalized_feat_dir(extra_name) self._thickness = weakness self._xyz_def_zero = xyz_def_zero self._azi_only = azi_only self._debug_load_single_batch = debug_load_single_batch self._data_format = data_format self._nb_frames_file = 0 # Assuming number of frames in feat files are the same self._feat_len = None self._2_nb_ch = 2 * self._feat_cls.get_nb_channels() self._label_len = None # total length of label - DOA + SED self._doa_len = None # DOA label length self._class_dict = self._feat_cls.get_classes() self._nb_classes = len(self._class_dict.keys()) self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr( ) self._is_cnn3d_model = cnn3d self._filenames_list = [] self.create_filenames_list(load_files_before_after_splitting_point) self.get_feature_label_shapes() self._batch_seq_len = self._batch_size * self._seq_len self._circ_buf_feat = None self._circ_buf_label = None if self._debug_load_single_batch: num_files_for_one_batch = int( np.ceil(float(self._batch_seq_len) / self._nb_frames_file)) num_files_for_one_batch = max(num_files_for_one_batch, 1) self._filenames_list = self._filenames_list[: num_files_for_one_batch] self._nb_total_batches = int( np.floor((len(self._filenames_list) * self._nb_frames_file / float(self._batch_seq_len)))) logger.info( f"Data generator {datagen_mode}: {self._nb_total_batches} batches per epoch." ) assert (self._nb_total_batches >= 1) logger.info( 'Datagen_mode: {}, nb_files: {}, nb_classes:{}\n' 'nb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'. format(self._datagen_mode, len(self._filenames_list), self._nb_classes, self._nb_frames_file, self._feat_len, self._2_nb_ch, self._label_len)) logger.info('Dataset: {}, ov: {}, split: {}\n' 'batch_size: {}, seq_len: {}, shuffle: {}\n' 'label_dir: {}\n ' 'feat_dir: {}\n'.format(dataset, ov, split, self._batch_size, self._seq_len, self._shuffle, self._label_dir, self._feat_dir)) logger.debug("Complete file list:") for file_name in self._filenames_list: logger.debug(file_name)