def audio_tagging_results(reference, estimated): classes = [] if "event_label" in reference.columns: classes.extend(reference.event_label.dropna().unique()) classes.extend(estimated.event_label.dropna().unique()) classes = list(set(classes)) mhe = ManyHotEncoder(classes) reference = format_df(reference, mhe) estimated = format_df(estimated, mhe) else: classes.extend( reference.event_labels.str.split( ',', expand=True).unstack().dropna().unique()) classes.extend( estimated.event_labels.str.split( ',', expand=True).unstack().dropna().unique()) classes = list(set(classes)) mhe = ManyHotEncoder(classes) matching = reference.merge(estimated, how='outer', on="filename", suffixes=["_ref", "_pred"]) def na_values(val): if type(val) is np.ndarray: return val if pd.isna(val): return np.zeros(len(classes)) return val if not estimated.empty: matching.event_label_pred = matching.event_label_pred.apply(na_values) matching.event_label_ref = matching.event_label_ref.apply(na_values) tp, fp, fn, tn = intermediate_at_measures( np.array(matching.event_label_ref.tolist()), np.array(matching.event_label_pred.tolist())) macro_res = macro_f_measure(tp, fp, fn) else: macro_res = np.zeros(len(classes)) results_serie = pd.DataFrame(macro_res, index=mhe.labels) return results_serie[0]
def get_labels(self, ind, df_meta, wav_name, frames, out_filenames): cnt_max = len(out_filenames) if {"onset", "offset", "event_label"}.issubset(df_meta.columns): many_hot_encoder = ManyHotEncoder(self.classes, n_frames=cnt_max * frames) df_wav_name = df_meta[df_meta.filename == wav_name].copy() # Because values are in seconds in the file df_wav_name["onset"] = df_wav_name[ "onset"] * cfg.sample_rate // cfg.hop_length df_wav_name["offset"] = df_wav_name[ "offset"] * cfg.sample_rate // cfg.hop_length y = many_hot_encoder.encode_strong_df(df_wav_name) encoded_labels = y.reshape(-1, frames, y.shape[-1]) weak_labels_frames = encoded_labels.max(axis=1) weak_labels_frames = [ ','.join(many_hot_encoder.decode_weak(weak_labels)) for weak_labels in weak_labels_frames ] add_item = { "raw_filename": [wav_name for _ in range(len(out_filenames))], "filename": out_filenames, "event_labels": weak_labels_frames } elif "event_labels" in df_meta.columns: weak_labels_frames = [ df_meta.iloc[ind]["event_labels"] for _ in range(len(out_filenames)) ] add_item = { "raw_filename": [wav_name for _ in range(len(out_filenames))], "filename": out_filenames, "event_labels": weak_labels_frames } else: add_item = { "raw_filename": [wav_name for _ in range(len(out_filenames))], "filename": out_filenames } return add_item
def test_model(state, reference_tsv_path, reduced_number_of_data=None, strore_predicitions_fname=None): dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace), base_feature_dir=os.path.join( cfg.workspace, "dataset", "features"), save_log_feature=False) crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) LOG.info(reference_tsv_path) df = dataset.initialize_and_get_df(reference_tsv_path, reduced_number_of_data) strong_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, strong_dataload, many_hot_encoder.decode_strong, pooling_time_ratio, save_predictions=strore_predicitions_fname) compute_strong_metrics(predictions, df) weak_dataload = DataLoadDf(df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(weak_dataload, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
# DATA # ######## dataset = DesedSynthetic(cfg.relative_data_path, base_feature_dir=cfg.base_feature_dir, save_log_feature=False) dfs = get_dfs(dataset, weak_path, test_path, eval_path, subpart_data, valid_list=val_list, frames_in_sec=frames_in_sec, segment=segment, dropna=f_args.dropna, unique_fr=f_args.unique_fr, fixed_segment=f_args.fixed_segment ) if resume_training is None: classes = dataset.classes many_hot_encoder = ManyHotEncoder(classes) else: many_hot_encoder = ManyHotEncoder.load_state_dict(state["many_hot_encoder"]) classes = many_hot_encoder.labels encode_function_label = many_hot_encoder.encode_weak # Datasets trans_fr = [ApplyLog(), ToTensor(), Unsqueeze(0)] train_weak_df_fr = dfs["train"] train_weak_dl_fr = DataLoadDf(train_weak_df_fr, encode_function_label, transform=Compose(trans_fr)) if type_positive != "label" or type_negative != "label": unlabel_df_fr = dataset.get_df_feat_dir(cfg.unlabel, subpart_data=subpart_data, frames_in_sec=frames_in_sec) unlabel_dl_fr = DataLoadDf(unlabel_df_fr, encode_function_label, transform=Compose(trans_fr)) datasets_mean = [train_weak_dl_fr, unlabel_dl_fr]
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None): crnn_kwargs = state["model"]["kwargs"] crnn = CRNN(**crnn_kwargs) crnn.load(parameters=state["model"]["state_dict"]) LOG.info("Model loaded at epoch: {}".format(state["epoch"])) pooling_time_ratio = state["pooling_time_ratio"] crnn.load(parameters=state["model"]["state_dict"]) scaler = Scaler() scaler.load_state_dict(state["scaler"]) classes = cfg.classes many_hot_encoder = ManyHotEncoder.load_state_dict( state["many_hot_encoder"]) # ############## # Validation # ############## crnn = crnn.eval() [crnn] = to_cuda_if_available([crnn]) transforms_valid = get_transforms(cfg.max_frames, scaler=scaler) # # 2018 # LOG.info("Eval 2018") # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data) # # Strong # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong) # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio) # # Weak # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # Validation 2019 # LOG.info("Validation 2019 (original code)") # b_dataset = B_DatasetDcase2019Task4(cfg.workspace, # base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'), # save_log_feature=False) # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) # b_validation_df.to_csv('old.csv') # b_validation_strong = B_DataLoadDf(b_validation_df, # b_dataset.get_feature_file, many_hot_encoder.encode_strong_df, # transform=transforms_valid) # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong, # save_predictions=strore_predicitions_fname) # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio) # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak, # transform=transforms_valid) # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size)) # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric))) # ============================================================================================ # ============================================================================================ # ============================================================================================ dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir, local_path=cfg.workspace, exp_tag=cfg.exp_tag, save_log_feature=False) # Validation 2019 LOG.info("Validation 2019") validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) validation_strong = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms_valid) predictions = get_predictions(crnn, validation_strong, many_hot_encoder.decode_strong, save_predictions=strore_predicitions_fname) vdf = validation_df.copy() vdf.filename = vdf.filename.str.replace('.npy', '.wav') pdf = predictions.copy() pdf.filename = pdf.filename.str.replace('.npy', '.wav') compute_strong_metrics(pdf, vdf, pooling_time_ratio) validation_weak = DataLoadDf(validation_df, dataset.get_feature_file, many_hot_encoder.encode_weak, transform=transforms_valid) weak_metric = get_f_measure_by_class( crnn, len(classes), DataLoader(validation_weak, batch_size=cfg.batch_size)) LOG.info("Weak F1-score per class: \n {}".format( pd.DataFrame(weak_metric * 100, many_hot_encoder.labels))) LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
embed_name = model_path.split("/")[-2] ############ # Parameters experiences ########### subpart_data = f_args.subpart_data dataset = DesedSynthetic("../dcase2019", base_feature_dir="../dcase2019/features", save_log_feature=False) emb_model, state = load_model(model_path, return_state=True) epoch_model = state["epoch"] LOG.info("model loaded at epoch: {}".format(epoch_model)) if torch.cuda.is_available(): emb_model = emb_model.cuda() emb_model.eval() many_hot_encoder = ManyHotEncoder.load_state_dict(state['many_hot_encoder']) encode_function_label = many_hot_encoder.encode_weak scaler = ScalerSum.load_state_dict(state['scaler']) frames_in_sec = cfg.frames_in_sec transf = Compose([ApplyLog(), PadOrTrunc(nb_frames=cfg.frames), ToTensor(), Unsqueeze(0), Normalize(scaler), Unsqueeze(1)]) test_fr = dataset.get_df_feat_dir(cfg.test2018, frames_in_sec=frames_in_sec, subpart_data=subpart_data) print(len(test_fr)) test_dataset = DataLoadDf(test_fr, many_hot_encoder.encode_weak, transform=transf) embed_set = "embedding" embed_dir = "stored_data/embeddings" embed_dir = os.path.join(embed_dir, embed_name, "embeddings")
cfg.workspace, "dataset", "features"), save_log_feature=False) weak_df = dataset.initialize_and_get_df(cfg.weak, reduced_number_of_data) synthetic_df = dataset.initialize_and_get_df(cfg.synthetic, reduced_number_of_data, download=False) validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) classes = DatasetDcase2019Task4.get_classes( [weak_df, validation_df, synthetic_df]) # Be careful, frames is max_frames // pooling_time_ratio because max_pooling is applied on time axis in the model many_hot_encoder = ManyHotEncoder(classes, n_frames=cfg.max_frames // pooling_time_ratio) transforms = get_transforms(cfg.max_frames) # Divide weak in train and valid train_weak_df = weak_df.sample(frac=0.8, random_state=26) valid_weak_df = weak_df.drop(train_weak_df.index).reset_index(drop=True) train_weak_df = train_weak_df.reset_index(drop=True) LOG.debug(valid_weak_df.event_labels.value_counts()) train_weak_data = DataLoadDf(train_weak_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df, transform=transforms) # Divide synthetic in train and valid
base_feature_dir=cfg.base_feature_dir, save_log_feature=False) dfs = get_dfs(dataset, weak_path, test_path, eval_path, subpart_data, valid_list=val_list, frames_in_sec=args.frames_in_sec, segment=args.segment, dropna=args.dropna, unique_fr=args.unique_fr, fixed_segment=args.fixed_segment) train_weak_df = dfs["train"] classes = dataset.classes many_hot_encoder = ManyHotEncoder(classes) # ############## # Triplet dataset # ############# batch_size = cfg.batch_size num_workers = cfg.num_workers list_trans_fr = [ApplyLog(), ToTensor(), Unsqueeze(0)] if args.segment: list_trans_fr.append(Unsqueeze(0)) train_set = DataLoadDf(train_weak_df, many_hot_encoder.encode_weak, Compose(list_trans_fr), return_indexes=False)
download=False) ############################################################################# validation_df = dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data) LOG.info("Select Label : {}".format( synthetic_df.loc[(synthetic_df['filename'].str.contains( '.000.wav', regex=False))]['event_label'].unique())) # exit() classes = cfg.classes ############################################################################# # many_hot_encoder = ManyHotEncoder(classes, n_frames=cfg.max_frames // pooling_time_ratio) many_hot_encoder = ManyHotEncoder(classes, cfg.sample_rate, cfg.hop_length, cfg.pooling_time_ratio, n_frames=cfg.max_frames // pooling_time_ratio) ############################################################################# ############################################################################# # transforms = get_transforms(cfg.max_frames) # # Normalize時に無音部分を考慮しない LOG.info("Normalize時に無音部分を考慮しない") transforms = get_transforms_nopad() ############################################################################# # Divide weak in train and valid train_weak_df = weak_df.sample(frac=0.8, random_state=26) valid_weak_df = weak_df.drop(train_weak_df.index).reset_index(drop=True) train_weak_df = train_weak_df.reset_index(drop=True)