def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1, 2, 3, 4] val_splits = [2, 3, 4, 1] train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]] # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split. # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits # test_splits = [1] # val_splits = [2] # train_splits = [[3, 4]] elif params['mode'] == 'eval': test_splits = [0] val_splits = [1] train_splits = [[2, 3, 4]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], split=train_splits[split_cnt], batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir']) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( dataset=params['dataset'], split=val_splits[split_cnt], batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) gt = collect_test_labels(data_gen_val, data_out, params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) # rescaling the reference elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose nb_classes = data_gen_train.get_nb_classes() def_elevation = data_gen_train.get_default_elevation() doa_gt[:, nb_classes:] = doa_gt[:, nb_classes:] / (180. / def_elevation) print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, pool_size{}\n\trnn_size: {}, fnn_size: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights']) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 seld_metric = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) val_loss = np.zeros(params['nb_epochs']) doa_metric = np.zeros((params['nb_epochs'], 6)) sed_metric = np.zeros((params['nb_epochs'], 2)) nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), validation_data=data_gen_val.generate(), validation_steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] val_loss[epoch_cnt] = hist.history.get('val_loss')[-1] # predict once per peoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) # Calculate the metrics sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D(pred[1]) # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose doa_pred[:, nb_classes:] = doa_pred[:, nb_classes:] / (180. / def_elevation) sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) doa_metric[ epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr( doa_pred, doa_gt, sed_pred, sed_gt) seld_metric[epoch_cnt] = evaluation_metrics.compute_seld_metric( sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :]) # Visualize the metrics with respect to epochs plot_functions(unique_name, tr_loss, val_loss, sed_metric, doa_metric, seld_metric) patience_cnt += 1 if seld_metric[epoch_cnt] < best_seld_metric: best_seld_metric = seld_metric[epoch_cnt] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, ' 'ER_overall: %.2f, F1_overall: %.2f, ' 'doa_error_pred: %.2f, good_pks_ratio:%.2f, ' 'seld_score: %.2f, best_seld_score: %.2f, best_epoch : %d\n' % (epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1], doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1], seld_metric[epoch_cnt], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break avg_scores_val.append([ sed_metric[best_epoch, 0], sed_metric[best_epoch, 1], doa_metric[best_epoch, 0], doa_metric[best_epoch, 1], best_seld_metric ]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score: {}'.format(best_seld_metric)) print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format( doa_metric[best_epoch, 0], doa_metric[best_epoch, 1])) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( sed_metric[best_epoch, 0], sed_metric[best_epoch, 1])) # ------------------ Calculate metric scores for unseen test split --------------------------------- print('Loading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) print( '\nLoading the best model and predicting results on the testing split' ) model = load_model('{}_model.h5'.format(unique_name)) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1]) # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / ( 180. / def_elevation) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print('Dumping recording-wise results in: {}'.format( dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = evaluation_metrics.regression_label_format_to_output_format( data_gen_test, test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :] * 180 / np.pi) evaluation_metrics.write_output_format_file( output_file, output_dict) if params['mode'] is 'dev': test_data_in, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / ( 180. / def_elevation) test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.compute_seld_metric( test_sed_loss, test_doa_loss) avg_scores_test.append([ test_sed_loss[0], test_sed_loss[1], test_doa_loss[0], test_doa_loss[1], test_metric_loss ]) print('Results on test split:') print('\tSELD_score: {}, '.format(test_metric_loss)) print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format( test_doa_loss[0], test_doa_loss[1])) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( test_sed_loss[0], test_sed_loss[1])) print('\n\nValidation split scores per fold:\n') for cnt in range(len(val_splits)): print( '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}' .format(cnt, avg_scores_val[cnt][0], avg_scores_val[cnt][1], avg_scores_val[cnt][2], avg_scores_val[cnt][3], avg_scores_val[cnt][4])) if params['mode'] is 'dev': print('\n\nTesting split scores per fold:\n') for cnt in range(len(val_splits)): print( '\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}' .format(cnt, avg_scores_test[cnt][0], avg_scores_test[cnt][1], avg_scores_test[cnt][2], avg_scores_test[cnt][3], avg_scores_test[cnt][4]))
def main(argv): task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': # test_splits = [1, 2, 3, 4] # val_splits = [2, 3, 4, 1] # train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]] # TODO for debug only test_splits = [1] val_splits = [1] train_splits = [[1, 1]] # SUGGESTION: Considering the long training time, major tuning of the method can be done on the first split. # Once you finlaize the method you can evaluate its performance on the complete cross-validation splits # test_splits = [1] # val_splits = [2] # train_splits = [[3, 4]] elif params['mode'] == 'eval': test_splits = [0] val_splits = [1] train_splits = [[2, 3, 4]] # ------------------ Calculate metric scores for unseen test split --------------------------------- print('Loading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) # print('\nLoading the best model and predicting results on the testing split') # model = load_model('{}_model.h5'.format(unique_name)) # pred_test = model.predict_generator( # generator=data_gen_test.generate(), # steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), # verbose=2 # ) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D(pred_test[1]) # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / ( 180. / def_elevation) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print( 'Dumping recording-wise results in: {}'.format(dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = evaluation_metrics.regression_label_format_to_output_format( data_gen_test, test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :] * 180 / np.pi) evaluation_metrics.write_output_format_file( output_file, output_dict) if params['mode'] is 'dev': _, _, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # rescaling the reference elevation from [-180 180] to [-def_elevation def_elevation] for scoring purpose test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / ( 180. / def_elevation) test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.compute_seld_metric( test_sed_loss, test_doa_loss) avg_scores_test.append([ test_sed_loss[0], test_sed_loss[1], test_doa_loss[0], test_doa_loss[1], test_metric_loss ]) print('Results on test split:') print('\tSELD_score: {}, '.format(test_metric_loss)) print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format( test_doa_loss[0], test_doa_loss[1])) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( test_sed_loss[0], test_sed_loss[1]))
def main(args): ''' Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 ''' # use parameter set defined by user dataset, mode, task_id, job_id = args.dataset, args.mode, args.name, args.job_id task = 'sed' feat_type = 'mel' nb_ch = 4 doa_type = None params, model_params = parameter.get_params(dataset=dataset, mode=mode, task_id=task_id, feat_type=feat_type, doa=doa_type) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1, 2, 3, 4] val_splits = [2, 3, 4, 1] train_splits = [[3, 4], [4, 1], [1, 2], [2, 3]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print('\nThis is split {}'.format(split_cnt)) # Unique name for the run model_dir_prefix = os.path.join( params['model_dir'], task) if task == 'sed' else os.path.join( params['model_dir'], 'doa_reg') cls_feature_class.create_folder(model_dir_prefix) #model_id = int(job_id) + split_cnt unique_name = '{}{}_{}_{}_sed_dev_split{}'.format( task_id, str(job_id), params['dataset'], params['feat_type'], split_cnt + 1) unique_name = os.path.join(model_dir_prefix, unique_name) model_name = '{}_model.h5'.format(unique_name) print('\tmodel unique name: {}\n'.format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], split=train_splits[split_cnt], batch_size=params['batch_size'], seq_len=params['seq_length'], feat_label_dir=params['feat_label_dir'], feat_type=feat_type, doa=doa_type) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( dataset=params['dataset'], split=val_splits[split_cnt], batch_size=params['batch_size'], seq_len=3000, per_file=True, feat_label_dir=params['feat_label_dir'], shuffle=False, feat_type=feat_type, doa=doa_type) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) gt = collect_test_labels_3000(data_gen_val) sed_gt = evaluation_metrics.reshape_3Dto2D(gt) # [3000*100, 11] nb_classes = data_gen_train.get_nb_classes() def_elevation = data_gen_train.get_default_elevation() if task_id == 'crnn': model = CUDA(CRNN_SED(data_in, data_out[0])) elif task_id == 'mcrnn': model = CUDA(MCRNN_SED(data_in, data_out[0])) model.apply(kaiming_init) total_num = sum(param.numel() for param in model.parameters()) print('==========================================') print('Total parameter number for {}: {}'.format( model_params['method'], total_num)) print('==========================================') # Pytorch optimizer optimizer = optim.Adam(params=model.parameters(), lr=0.001) feat_torch = CUDA( Variable( torch.FloatTensor(params['batch_size'], nb_ch, params['seq_length'], params['feat_dim']))) label_sed = CUDA( Variable( torch.FloatTensor(params['batch_size'], params['seq_length'], 11))) best_seld_metric = 99999 best_sed_metric = 99999 best_epoch = -1 patience_cnt = 0 seld_metric = np.zeros(params['nb_epochs']) tr_loss = np.zeros(params['nb_epochs']) sed_val_loss = np.zeros(params['nb_epochs']) sed_metric = np.zeros((params['nb_epochs'], 2)) nb_epoch = params['nb_epochs'] # start training pbar_epoch = tqdm(total=nb_epoch, desc='[Epoch]') for epoch_cnt in range(nb_epoch): # train stage model.train() iter_cnt = 0 for feat, label in data_gen_train.generate(): feat_torch.resize_(params['batch_size'], nb_ch, params['seq_length'], params['feat_dim']) feat_torch.data.copy_(torch.from_numpy(feat)) label_sed.resize_(params['batch_size'], params['seq_length'], 11) label_sed.data.copy_(torch.from_numpy(label[0])) sed = model(feat_torch) sed_loss = bce_loss(sed, label_sed) doa_loss = 0.0 total_loss = sed_loss + doa_loss optimizer.zero_grad() total_loss.backward() optimizer.step() if iter_cnt % params['print_iter'] == 0: pbar_epoch.write( 'Iteration: {:3d}, sed_loss: {:.4f}, doa_loss: {:.4f}, total_loss: {:.4f}' .format(iter_cnt, sed_loss, doa_loss, total_loss)) #pbar_iteration.update(1) iter_cnt += 1 if iter_cnt >= data_gen_train.get_total_batches_in_data(): break iter_cnt = 0 sed_validation_loss = 0 entire_pred_sed = np.zeros( (data_gen_val._batch_size * data_gen_val.get_total_batches_in_data(), 3000, 11)) model.eval() with torch.no_grad(): for feat, label in data_gen_val.generate(): batch_size = feat.shape[0] feat_torch.resize_(batch_size, nb_ch, 3000, params['feat_dim']) feat_torch.data.copy_(torch.from_numpy(feat)) label_sed.resize_(batch_size, 3000, 11) label_sed.copy_(torch.from_numpy(label[0])) sed = model(feat_torch) sed_loss = bce_loss(sed, label_sed) sed_validation_loss += sed_loss # concat all predictions entire_pred_sed[ iter_cnt * batch_size:(iter_cnt + 1) * batch_size, :] = sed.detach().cpu().numpy() iter_cnt += 1 if iter_cnt >= data_gen_val.get_total_batches_in_data(): break sed_validation_loss = sed_validation_loss / data_gen_val.get_total_batches_in_data( ) tr_loss[epoch_cnt] = total_loss sed_val_loss[epoch_cnt] = sed_validation_loss # Calculate the metrics sed_pred = evaluation_metrics.reshape_3Dto2D( entire_pred_sed) > params[ 'threshold'] # compared with threshold sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) patience_cnt += 1 if sed_metric[epoch_cnt, 0] < best_sed_metric: best_sed_metric = sed_metric[epoch_cnt, 0] best_epoch = epoch_cnt save_model(model, model_name) patience_cnt = 0 pbar_epoch.update(1) pbar_epoch.write( 'epoch_cnt: %d, sed_tr_loss: %.4f, sed_val_loss: %.4f, ER_overall: %.2f, F1_overall: %.2f, best_sed_ER: %.4f, best_epoch : %d\n' % (epoch_cnt, tr_loss[epoch_cnt], sed_val_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1], best_sed_metric, best_epoch)) if patience_cnt >= params['patience']: break pbar_epoch.close() avg_scores_val.append( [sed_metric[best_epoch, 0], sed_metric[best_epoch, 1]] ) #, doa_metric[best_epoch, 0], doa_metric[best_epoch, 1], best_seld_metric]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( sed_metric[best_epoch, 0], sed_metric[best_epoch, 1])) # ------------------ Calculate metric scores for unseen test split --------------------------------- print('Loading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=3000, feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=True, is_eval=True if params['mode'] is 'eval' else False, #False feat_type=feat_type, doa=doa_type) test_batch_size = data_gen_test._batch_size print( '\nLoading the best model and predicting results on the testing split' ) model = load_model(model, '{}_model.h5'.format(unique_name)) model.eval() # test stage total_test_batches = data_gen_test.get_total_batches_in_data() pbar_test = tqdm(total=total_test_batches, desc='[Testing]') iter_cnt = 0 entire_test_sed = np.zeros((100, 3000, 11)) with torch.no_grad(): if params['mode'] == 'dev': for feat, label in data_gen_test.generate(): batch_size = feat.shape[0] feat_torch.data.resize_(batch_size, nb_ch, 3000, params['feat_dim']) feat_torch.data.copy_(torch.from_numpy(feat)) sed = model(feat_torch) # concat all predictions entire_test_sed[ iter_cnt * test_batch_size:(iter_cnt + 1) * test_batch_size, :] = sed.detach().cpu().numpy() pbar_test.update(1) iter_cnt += 1 if iter_cnt >= data_gen_test.get_total_batches_in_data(): break print('the test batch_size is{}'.format(batch_size)) pbar_test.close() test_sed_pred = evaluation_metrics.reshape_3Dto2D( entire_test_sed) > params['threshold'] if params['mode'] == 'dev': _, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels_3000(data_gen_test) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt) test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) avg_scores_test.append([test_sed_loss[0], test_sed_loss[1]]) print('Results on test split:') print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format( test_sed_loss[0], test_sed_loss[1])) print('\n\nValidation split scores per fold:\n') for cnt in range(len(val_splits)): print('\t Split {} - SED ER: {} F1: {}'.format(val_splits[cnt], avg_scores_val[cnt][0], avg_scores_val[cnt][1])) if params['mode'] == 'dev': print('\n\nTesting split scores per fold:\n') for cnt in range(len(val_splits)): print('\t Split {} - SED ER: {} F1: {}'.format( test_splits[cnt], avg_scores_test[cnt][0], avg_scores_test[cnt][1]))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] feat_cls = cls_feature_class.FeatureClass(params) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [6] val_splits = [5] train_splits = [[1, 2, 3, 4]] elif params['mode'] == 'eval': test_splits = [[7, 8]] val_splits = [[6]] train_splits = [[1, 2, 3, 4, 5]] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False, per_file=True, is_eval=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) nb_classes = data_gen_train.get_nb_classes() print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'], params['doa_objective'])) print('Using loss weights : {}'.format(params['loss_weights'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], f_pool_size=params['f_pool_size'], t_pool_size=params['t_pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], doa_objective=params['doa_objective'], is_accdoa=params['is_accdoa']) # Dump results in DCASE output format for calculating final scores dcase_output_val_folder = os.path.join( params['dcase_output_dir'], '{}_{}_{}_val'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.delete_and_create_folder(dcase_output_val_folder) print('Dumping recording-wise val results in: {}'.format( dcase_output_val_folder)) # Initialize evaluation metric class score_obj = ComputeSELDResults(params) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] tr_loss = np.zeros(nb_epoch) seld_metric = np.zeros((nb_epoch, 5)) # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2, ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] # predict once per epoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) if params['is_accdoa']: sed_pred, doa_pred = get_accdoa_labels(pred, nb_classes) sed_pred = reshape_3Dto2D(sed_pred) doa_pred = reshape_3Dto2D(doa_pred) else: sed_pred = reshape_3Dto2D(pred[0]) > 0.5 doa_pred = reshape_3Dto2D(pred[1] if params['doa_objective'] is 'mse' else pred[1][:, :, nb_classes:]) # Calculate the DCASE 2021 metrics - Location-aware detection and Class-aware localization scores dump_DCASE2021_results(data_gen_val, feat_cls, dcase_output_val_folder, sed_pred, doa_pred) seld_metric[epoch_cnt, :] = score_obj.get_SELD_Results( dcase_output_val_folder) patience_cnt += 1 if seld_metric[epoch_cnt, -1] < best_seld_metric: best_seld_metric = seld_metric[epoch_cnt, -1] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, ' '\n\t\t DCASE2021 SCORES: ER: {:0.2f}, F: {:0.1f}, LE: {:0.1f}, LR:{:0.1f}, seld_score (early stopping score): {:0.2f}, ' 'best_seld_score: {:0.2f}, best_epoch : {}\n'.format( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], seld_metric[epoch_cnt, 0], seld_metric[epoch_cnt, 1] * 100, seld_metric[epoch_cnt, 2], seld_metric[epoch_cnt, 3] * 100, seld_metric[epoch_cnt, -1], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score (early stopping score) : {}'.format( best_seld_metric)) print('\n\tDCASE2021 scores') print( '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}' .format(seld_metric[best_epoch, 2], seld_metric[best_epoch, 3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(seld_metric[best_epoch, 0], seld_metric[best_epoch, 1] * 100)) # ------------------ Calculate metric scores for unseen test split --------------------------------- print( '\nLoading the best model and predicting results on the testing split' ) print('\tLoading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=split, shuffle=False, per_file=True, is_eval=True if params['mode'] is 'eval' else False) model = keras_model.load_seld_model('{}_model.h5'.format(unique_name), params['doa_objective']) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) if params['is_accdoa']: test_sed_pred, test_doa_pred = get_accdoa_labels( pred_test, nb_classes) test_sed_pred = reshape_3Dto2D(test_sed_pred) test_doa_pred = reshape_3Dto2D(test_doa_pred) else: test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = reshape_3Dto2D( pred_test[1] if params['doa_objective'] is 'mse' else pred_test[1][:, :, nb_classes:]) # Dump results in DCASE output format for calculating final scores dcase_output_test_folder = os.path.join( params['dcase_output_dir'], '{}_{}_{}_test'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.delete_and_create_folder(dcase_output_test_folder) print('Dumping recording-wise test results in: {}'.format( dcase_output_test_folder)) dump_DCASE2021_results(data_gen_test, feat_cls, dcase_output_test_folder, test_sed_pred, test_doa_pred) if params['mode'] is 'dev': # Calculate DCASE2021 scores test_seld_metric = score_obj.get_SELD_Results( dcase_output_test_folder) print('Results on test split:') print('\tDCASE2021 Scores') print( '\tClass-aware localization scores: Localization Error: {:0.1f}, Localization Recall: {:0.1f}' .format(test_seld_metric[2], test_seld_metric[3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_seld_metric[0], test_seld_metric[1] * 100)) print('\tSELD (early stopping metric): {:0.2f}'.format( test_seld_metric[-1]))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = parameter.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] feat_cls = cls_feature_class.FeatureClass(params) train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1] val_splits = [2] train_splits = [[3, 4, 5, 6]] elif params['mode'] == 'eval': test_splits = [[7, 8]] val_splits = [[1]] train_splits = [[2, 3, 4, 5, 6]] avg_scores_val = [] avg_scores_test = [] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False) # Collect the reference labels for validation data data_in, data_out = data_gen_train.get_data_sizes() print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) nb_classes = data_gen_train.get_nb_classes() gt = collect_test_labels(data_gen_val, data_out, nb_classes, params['quick_test']) sed_gt = evaluation_metrics.reshape_3Dto2D(gt[0]) doa_gt = evaluation_metrics.reshape_3Dto2D(gt[1]) print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n\tdoa_objective: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'], params['doa_objective'])) print('Using loss weights : {}'.format(params['loss_weights'])) model = keras_model.get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'], nb_cnn2d_filt=params['nb_cnn2d_filt'], f_pool_size=params['f_pool_size'], t_pool_size=params['t_pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], doa_objective=params['doa_objective']) best_seld_metric = 99999 best_epoch = -1 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] seld_metric = np.zeros(nb_epoch) new_seld_metric = np.zeros(nb_epoch) tr_loss = np.zeros(nb_epoch) doa_metric = np.zeros((nb_epoch, 6)) sed_metric = np.zeros((nb_epoch, 2)) new_metric = np.zeros((nb_epoch, 4)) # start training for epoch_cnt in range(nb_epoch): start = time.time() # train once per epoch hist = model.fit_generator( generator=data_gen_train.generate(), steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(), epochs=params['epochs_per_fit'], verbose=2, ) tr_loss[epoch_cnt] = hist.history.get('loss')[-1] # predict once per peoch pred = model.predict_generator( generator=data_gen_val.generate(), steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(), verbose=2) sed_pred = evaluation_metrics.reshape_3Dto2D(pred[0]) > 0.5 doa_pred = evaluation_metrics.reshape_3Dto2D( pred[1] if params['doa_objective'] is 'mse' else pred[1][:, :, nb_classes:]) # Calculate the DCASE 2019 metrics - Detection-only and Localization-only scores sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores( sed_pred, sed_gt, data_gen_val.nb_frames_1s()) doa_metric[ epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr_xyz( doa_pred, doa_gt, sed_pred, sed_gt) seld_metric[epoch_cnt] = evaluation_metrics.early_stopping_metric( sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :]) # Calculate the DCASE 2020 metrics - Location-aware detection and Class-aware localization scores cls_new_metric = SELD_evaluation_metrics.SELDMetrics( nb_classes=data_gen_val.get_nb_classes(), doa_threshold=params['lad_doa_thresh']) pred_dict = feat_cls.regression_label_format_to_output_format( sed_pred, doa_pred) gt_dict = feat_cls.regression_label_format_to_output_format( sed_gt, doa_gt) pred_blocks_dict = feat_cls.segment_labels(pred_dict, sed_pred.shape[0]) gt_blocks_dict = feat_cls.segment_labels(gt_dict, sed_gt.shape[0]) cls_new_metric.update_seld_scores_xyz(pred_blocks_dict, gt_blocks_dict) new_metric[epoch_cnt, :] = cls_new_metric.compute_seld_scores() new_seld_metric[ epoch_cnt] = evaluation_metrics.early_stopping_metric( new_metric[epoch_cnt, :2], new_metric[epoch_cnt, 2:]) # Visualize the metrics with respect to epochs plot_functions(unique_name, tr_loss, sed_metric, doa_metric, seld_metric, new_metric, new_seld_metric) patience_cnt += 1 if new_seld_metric[epoch_cnt] < best_seld_metric: best_seld_metric = new_seld_metric[epoch_cnt] best_epoch = epoch_cnt model.save(model_name) patience_cnt = 0 print( 'epoch_cnt: {}, time: {:0.2f}s, tr_loss: {:0.2f}, ' '\n\t\t DCASE2019 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, FR:{:0.1f}, seld_score: {:0.2f}, ' '\n\t\t DCASE2020 SCORES: ER: {:0.2f}, F: {:0.1f}, DE: {:0.1f}, DE_F:{:0.1f}, seld_score (early stopping score): {:0.2f}, ' 'best_seld_score: {:0.2f}, best_epoch : {}\n'.format( epoch_cnt, time.time() - start, tr_loss[epoch_cnt], sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1] * 100, doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1] * 100, seld_metric[epoch_cnt], new_metric[epoch_cnt, 0], new_metric[epoch_cnt, 1] * 100, new_metric[epoch_cnt, 2], new_metric[epoch_cnt, 3] * 100, new_seld_metric[epoch_cnt], best_seld_metric, best_epoch)) if patience_cnt > params['patience']: break avg_scores_val.append([ new_metric[best_epoch, 0], new_metric[best_epoch, 1], new_metric[best_epoch, 2], new_metric[best_epoch, 3], best_seld_metric ]) print('\nResults on validation split:') print('\tUnique_name: {} '.format(unique_name)) print('\tSaved model for the best_epoch: {}'.format(best_epoch)) print('\tSELD_score (early stopping score) : {}'.format( best_seld_metric)) print('\n\tDCASE2020 scores') print( '\tClass-aware localization scores: DOA_error: {:0.1f}, F-score: {:0.1f}' .format(new_metric[best_epoch, 2], new_metric[best_epoch, 3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(new_metric[best_epoch, 0], new_metric[best_epoch, 1] * 100)) print('\n\tDCASE2019 scores') print( '\tLocalization-only scores: DOA_error: {:0.1f}, Frame recall: {:0.1f}' .format(doa_metric[best_epoch, 0], doa_metric[best_epoch, 1] * 100)) print( '\tDetection-only scores: Error rate: {:0.2f}, F-score: {:0.1f}\n'. format(sed_metric[best_epoch, 0], sed_metric[best_epoch, 1] * 100)) # ------------------ Calculate metric scores for unseen test split --------------------------------- print( '\nLoading the best model and predicting results on the testing split' ) print('\tLoading testing dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=split, shuffle=False, per_file=params['dcase_output'], is_eval=True if params['mode'] is 'eval' else False) model = keras_model.load_seld_model('{}_model.h5'.format(unique_name), params['doa_objective']) pred_test = model.predict_generator( generator=data_gen_test.generate(), steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(), verbose=2) test_sed_pred = evaluation_metrics.reshape_3Dto2D(pred_test[0]) > 0.5 test_doa_pred = evaluation_metrics.reshape_3Dto2D( pred_test[1] if params['doa_objective'] is 'mse' else pred_test[1][:, :, nb_classes:]) if params['dcase_output']: # Dump results in DCASE output format for calculating final scores dcase_dump_folder = os.path.join( params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode'])) cls_feature_class.create_folder(dcase_dump_folder) print('Dumping recording-wise results in: {}'.format( dcase_dump_folder)) test_filelist = data_gen_test.get_filelist() # Number of frames for a 60 second audio with 20ms hop length = 3000 frames max_frames_with_content = data_gen_test.get_nb_frames() # Number of frames in one batch (batch_size* sequence_length) consists of all the 3000 frames above with # zero padding in the remaining frames frames_per_file = data_gen_test.get_frame_per_file() for file_cnt in range(test_sed_pred.shape[0] // frames_per_file): output_file = os.path.join( dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv')) dc = file_cnt * frames_per_file output_dict = feat_cls.regression_label_format_to_output_format( test_sed_pred[dc:dc + max_frames_with_content, :], test_doa_pred[dc:dc + max_frames_with_content, :]) data_gen_test.write_output_format_file(output_file, output_dict) if params['mode'] is 'dev': test_data_in, test_data_out = data_gen_test.get_data_sizes() test_gt = collect_test_labels(data_gen_test, test_data_out, nb_classes, params['quick_test']) test_sed_gt = evaluation_metrics.reshape_3Dto2D(test_gt[0]) test_doa_gt = evaluation_metrics.reshape_3Dto2D(test_gt[1]) # Calculate DCASE2019 scores test_sed_loss = evaluation_metrics.compute_sed_scores( test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s()) test_doa_loss = evaluation_metrics.compute_doa_scores_regr_xyz( test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt) test_metric_loss = evaluation_metrics.early_stopping_metric( test_sed_loss, test_doa_loss) # Calculate DCASE2020 scores cls_new_metric = SELD_evaluation_metrics.SELDMetrics( nb_classes=data_gen_test.get_nb_classes(), doa_threshold=20) test_pred_dict = feat_cls.regression_label_format_to_output_format( test_sed_pred, test_doa_pred) test_gt_dict = feat_cls.regression_label_format_to_output_format( test_sed_gt, test_doa_gt) test_pred_blocks_dict = feat_cls.segment_labels( test_pred_dict, test_sed_pred.shape[0]) test_gt_blocks_dict = feat_cls.segment_labels( test_gt_dict, test_sed_gt.shape[0]) cls_new_metric.update_seld_scores_xyz(test_pred_blocks_dict, test_gt_blocks_dict) test_new_metric = cls_new_metric.compute_seld_scores() test_new_seld_metric = evaluation_metrics.early_stopping_metric( test_new_metric[:2], test_new_metric[2:]) avg_scores_test.append([ test_new_metric[0], test_new_metric[1], test_new_metric[2], test_new_metric[3], test_new_seld_metric ]) print('Results on test split:') print('\tDCASE2020 Scores') print( '\tClass-aware localization scores: DOA Error: {:0.1f}, F-score: {:0.1f}' .format(test_new_metric[2], test_new_metric[3] * 100)) print( '\tLocation-aware detection scores: Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_new_metric[0], test_new_metric[1] * 100)) print('\tSELD (early stopping metric): {:0.2f}'.format( test_new_seld_metric)) print('\n\tDCASE2019 Scores') print( '\tLocalization-only scores: DOA Error: {:0.1f}, Frame recall: {:0.1f}' .format(test_doa_loss[0], test_doa_loss[1] * 100)) print( '\tDetection-only scores:Error rate: {:0.2f}, F-score: {:0.1f}' .format(test_sed_loss[0], test_sed_loss[1] * 100))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: task_id - (optional) To chose the system configuration in parameters.py. (default) 1 - uses default parameters second input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 """ print(argv) if len(argv) != 3: print('\n\n') print( '-------------------------------------------------------------------------------------------------------' ) print('The code expected two optional inputs') print('\t>> python seld.py <task-id> <job-id>') print( '\t\t<task-id> is used to choose the user-defined parameter set from parameter.py' ) print('Using default inputs for now') print( '\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') print( '-------------------------------------------------------------------------------------------------------' ) print('\n\n') use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") torch.autograd.set_detect_anomaly(True) # use parameter set defined by user task_id = '1' if len(argv) < 2 else argv[1] params = doanet_parameters.get_params(task_id) job_id = 1 if len(argv) < 3 else argv[-1] # load Hungarian network for data association, and freeze all layers. hnet_model = HNetGRU(max_len=2).to(device) hnet_model.load_state_dict( torch.load("models/hnet_model.pt", map_location=torch.device('cpu'))) for model_params in hnet_model.parameters(): model_params.requires_grad = False print('---------------- Hungarian-net -------------------') print(hnet_model) # Training setup train_splits, val_splits, test_splits = None, None, None if params['mode'] == 'dev': test_splits = [1] val_splits = [2] train_splits = [[3, 4, 5, 6]] for split_cnt, split in enumerate(test_splits): print( '\n\n---------------------------------------------------------------------------------------------------' ) print( '------------------------------------ SPLIT {} -----------------------------------------------' .format(split)) print( '---------------------------------------------------------------------------------------------------' ) # Unique name for the run cls_feature_class.create_folder(params['model_dir']) unique_name = '{}_{}_{}_{}_split{}'.format(task_id, job_id, params['dataset'], params['mode'], split) unique_name = os.path.join(params['model_dir'], unique_name) model_name = '{}_model.h5'.format(unique_name) print("unique_name: {}\n".format(unique_name)) # Load train and validation data print('Loading training dataset:') data_gen_train = cls_data_generator.DataGenerator( params=params, split=train_splits[split_cnt]) print('Loading validation dataset:') data_gen_val = cls_data_generator.DataGenerator( params=params, split=val_splits[split_cnt], shuffle=False) # Collect i/o data size and load model configuration data_in, data_out = data_gen_train.get_data_sizes() model = doanet_model.CRNN(data_in, data_out, params).to(device) # model.load_state_dict(torch.load("models/23_5624972_mic_dev_split1_model.h5", map_location='cpu')) print('---------------- DOA-net -------------------') print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format( data_in, data_out)) print( 'MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, f_pool_size{}, t_pool_size{}\n\trnn_size: {}, fnn_size: {}\n' .format(params['dropout_rate'], params['nb_cnn2d_filt'], params['f_pool_size'], params['t_pool_size'], params['rnn_size'], params['fnn_size'])) print(model) # start training best_val_epoch = -1 best_doa, best_mota, best_ids, best_recall, best_precision, best_fscore = 180, 0, 1000, 0, 0, 0 patience_cnt = 0 nb_epoch = 2 if params['quick_test'] else params['nb_epochs'] tr_loss_list = np.zeros(nb_epoch) val_loss_list = np.zeros(nb_epoch) hung_tr_loss_list = np.zeros(nb_epoch) hung_val_loss_list = np.zeros(nb_epoch) optimizer = optim.Adam(model.parameters(), lr=params['lr']) criterion = torch.nn.MSELoss() activity_loss = nn.BCEWithLogitsLoss() for epoch_cnt in range(nb_epoch): # --------------------------------------------------------------------- # TRAINING # --------------------------------------------------------------------- start_time = time.time() train_loss, train_dMOTP_loss, train_dMOTA_loss, train_act_loss = train_epoch( data_gen_train, optimizer, model, hnet_model, activity_loss, criterion, params, device) train_time = time.time() - start_time # --------------------------------------------------------------------- # VALIDATION # --------------------------------------------------------------------- start_time = time.time() val_metric = doa_metric() val_metric, val_loss, val_dMOTP_loss, val_dMOTA_loss, val_act_loss = test_epoch( data_gen_val, model, hnet_model, activity_loss, criterion, val_metric, params, device) val_hung_loss, val_mota, val_ids, val_recall_doa, val_precision_doa, val_fscore_doa = val_metric.get_results( ) val_time = time.time() - start_time # Save model if loss is good if val_hung_loss <= best_doa: best_val_epoch, best_doa, best_mota, best_ids, best_recall, best_precision, best_fscore = epoch_cnt, val_hung_loss, val_mota, val_ids, val_recall_doa, val_precision_doa, val_fscore_doa torch.save(model.state_dict(), model_name) # Print stats and plot scores print( 'epoch: {}, time: {:0.2f}/{:0.2f}, ' 'train_loss: {:0.2f} {}, val_loss: {:0.2f} {}, ' 'LE/MOTA/IDS/LR/LP/LF: {:0.3f}/{}, ' 'best_val_epoch: {} {}'.format( epoch_cnt, train_time, val_time, train_loss, '({:0.2f},{:0.2f},{:0.2f})'.format( train_dMOTP_loss, train_dMOTA_loss, train_act_loss) if params['use_hnet'] else '', val_loss, '({:0.2f},{:0.2f},{:0.2f})'.format( val_dMOTP_loss, val_dMOTA_loss, val_act_loss) if params['use_hnet'] else '', val_hung_loss, '{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}'.format( val_mota, val_ids, val_recall_doa, val_precision_doa, val_fscore_doa), best_val_epoch, '({:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f})'.format( best_doa, best_mota, best_ids, best_recall, best_precision, best_fscore))) tr_loss_list[epoch_cnt], val_loss_list[ epoch_cnt], hung_val_loss_list[ epoch_cnt] = train_loss, val_loss, val_hung_loss plot_functions(unique_name, tr_loss_list, val_loss_list, hung_tr_loss_list, hung_val_loss_list) patience_cnt += 1 if patience_cnt > params['patience']: break # --------------------------------------------------------------------- # Evaluate on unseen test data # --------------------------------------------------------------------- print('Load best model weights') model.load_state_dict(torch.load(model_name, map_location='cpu')) print('Loading unseen test dataset:') data_gen_test = cls_data_generator.DataGenerator( params=params, split=test_splits[split_cnt], shuffle=False) test_metric = doa_metric() test_metric, test_loss, test_dMOTP_loss, test_dMOTA_loss, test_act_loss = test_epoch( data_gen_test, model, hnet_model, activity_loss, criterion, test_metric, params, device) test_hung_loss, test_mota, test_ids, test_recall_doa, test_precision_doa, test_fscore_doa = test_metric.get_results( ) print('test_loss: {:0.2f} {}, LE/MOTA/IDS/LR/LP/LF: {:0.3f}/{}'.format( test_loss, '({:0.2f},{:0.2f},{:0.2f})'.format( test_dMOTP_loss, test_dMOTA_loss, test_act_loss) if params['use_hnet'] else '', test_hung_loss, '{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}/{:0.2f}'.format( test_mota, test_ids, test_recall_doa, test_precision_doa, test_fscore_doa)))