예제 #1
0
def test_model(state,
               reference_tsv_path,
               reduced_number_of_data=None,
               strore_predicitions_fname=None):
    dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace),
                                    base_feature_dir=os.path.join(
                                        cfg.workspace, "dataset", "features"),
                                    save_log_feature=False)

    crnn_kwargs = state["model"]["kwargs"]
    crnn = CRNN(**crnn_kwargs)
    crnn.load(parameters=state["model"]["state_dict"])
    LOG.info("Model loaded at epoch: {}".format(state["epoch"]))
    pooling_time_ratio = state["pooling_time_ratio"]

    crnn.load(parameters=state["model"]["state_dict"])
    scaler = Scaler()
    scaler.load_state_dict(state["scaler"])
    classes = cfg.classes
    many_hot_encoder = ManyHotEncoder.load_state_dict(
        state["many_hot_encoder"])

    crnn = crnn.eval()
    [crnn] = to_cuda_if_available([crnn])
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)

    LOG.info(reference_tsv_path)
    df = dataset.initialize_and_get_df(reference_tsv_path,
                                       reduced_number_of_data)
    strong_dataload = DataLoadDf(df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_strong_df,
                                 transform=transforms_valid)

    predictions = get_predictions(crnn,
                                  strong_dataload,
                                  many_hot_encoder.decode_strong,
                                  pooling_time_ratio,
                                  save_predictions=strore_predicitions_fname)
    compute_strong_metrics(predictions, df)

    weak_dataload = DataLoadDf(df,
                               dataset.get_feature_file,
                               many_hot_encoder.encode_weak,
                               transform=transforms_valid)
    weak_metric = get_f_measure_by_class(
        crnn, len(classes), DataLoader(weak_dataload,
                                       batch_size=cfg.batch_size))
    LOG.info("Weak F1-score per class: \n {}".format(
        pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
예제 #2
0
    def set_df_list(self, train):
        dataset = DatasetDcase2019Task4(cfg.workspace,
                                        base_feature_dir=os.path.join(cfg.workspace, "dataset", "features"),
                                        save_log_feature=False)

        transforms = get_transforms(cfg.max_frames)

        weak_df = dataset.initialize_and_get_df(cfg.weak)
        load_weak = DataLoadDf(weak_df, dataset.get_feature_file, None, transform=transforms)
        if train ==True:
            self.list_dataset = [load_weak]

        else:
            synthetic_df = dataset.initialize_and_get_df(cfg.synthetic, download=False)
            synthetic_df.onset = synthetic_df.onset * cfg.sample_rate // cfg.hop_length
            synthetic_df.offset = synthetic_df.offset * cfg.sample_rate // cfg.hop_length

            validation_df = dataset.initialize_and_get_df(cfg.validation)
            validation_df.onset = validation_df.onset * cfg.sample_rate // cfg.hop_length
            validation_df.offset = validation_df.offset * cfg.sample_rate // cfg.hop_length

            eval_desed_df = dataset.initialize_and_get_df(cfg.eval_desed)
            eval_desed_df.onset = eval_desed_df.onset * cfg.sample_rate // cfg.hop_length
            eval_desed_df.offset = eval_desed_df.offset * cfg.sample_rate // cfg.hop_length

            # many_hot_encoder = ManyHotEncoder(classes, n_frames=cfg.max_frames // pooling_time_ratio)

            load_synthetic = DataLoadDf(synthetic_df, dataset.get_feature_file, None, transform=transforms)
            load_validation = DataLoadDf(validation_df, dataset.get_feature_file, None, transform=transforms)
            load_eval_desed = DataLoadDf(eval_desed_df, dataset.get_feature_file, None, transform=transforms)


            self.list_dataset = [load_weak, load_synthetic, load_validation, load_eval_desed]

        scaler = Scaler()
        scaler.calculate_scaler(ConcatDataset(self.list_dataset))

        transforms = get_transforms(cfg.max_frames, scaler)
        for i in range(len(self.list_dataset)):
            self.list_dataset[i].set_transform(transforms)
        print(self.list_dataset)
예제 #3
0
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None):
    crnn_kwargs = state["model"]["kwargs"]
    crnn = CRNN(**crnn_kwargs)
    crnn.load(parameters=state["model"]["state_dict"])
    LOG.info("Model loaded at epoch: {}".format(state["epoch"]))
    pooling_time_ratio = state["pooling_time_ratio"]

    crnn.load(parameters=state["model"]["state_dict"])
    scaler = Scaler()
    scaler.load_state_dict(state["scaler"])
    classes = cfg.classes
    many_hot_encoder = ManyHotEncoder.load_state_dict(
        state["many_hot_encoder"])

    # ##############
    # Validation
    # ##############
    crnn = crnn.eval()
    [crnn] = to_cuda_if_available([crnn])
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)

    # # 2018
    # LOG.info("Eval 2018")
    # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data)
    # # Strong
    # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df,
    #                               transform=transforms_valid)
    # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong)
    # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio)
    # # Weak
    # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak,
    #                             transform=transforms_valid)
    # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size))
    # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))

    # Validation 2019
    # LOG.info("Validation 2019 (original code)")
    # b_dataset = B_DatasetDcase2019Task4(cfg.workspace,
    #                                   base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'),
    #                                   save_log_feature=False)
    # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data)
    # b_validation_df.to_csv('old.csv')
    # b_validation_strong = B_DataLoadDf(b_validation_df,
    #                                  b_dataset.get_feature_file, many_hot_encoder.encode_strong_df,
    #                                  transform=transforms_valid)

    # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong,
    #                               save_predictions=strore_predicitions_fname)
    # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio)

    # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak,
    #                              transform=transforms_valid)
    # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size))
    # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))

    # ============================================================================================
    # ============================================================================================
    # ============================================================================================

    dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir,
                                    local_path=cfg.workspace,
                                    exp_tag=cfg.exp_tag,
                                    save_log_feature=False)
    # Validation 2019
    LOG.info("Validation 2019")
    validation_df = dataset.initialize_and_get_df(cfg.validation,
                                                  reduced_number_of_data)
    validation_strong = DataLoadDf(validation_df,
                                   dataset.get_feature_file,
                                   many_hot_encoder.encode_strong_df,
                                   transform=transforms_valid)

    predictions = get_predictions(crnn,
                                  validation_strong,
                                  many_hot_encoder.decode_strong,
                                  save_predictions=strore_predicitions_fname)
    vdf = validation_df.copy()
    vdf.filename = vdf.filename.str.replace('.npy', '.wav')
    pdf = predictions.copy()
    pdf.filename = pdf.filename.str.replace('.npy', '.wav')
    compute_strong_metrics(pdf, vdf, pooling_time_ratio)

    validation_weak = DataLoadDf(validation_df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_weak,
                                 transform=transforms_valid)
    weak_metric = get_f_measure_by_class(
        crnn, len(classes),
        DataLoader(validation_weak, batch_size=cfg.batch_size))
    LOG.info("Weak F1-score per class: \n {}".format(
        pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
    train_synth_df_frames.offset = train_synth_df_frames.offset * cfg.sample_rate // cfg.hop_length // pooling_time_ratio
    LOG.debug(valid_synth_df.event_label.value_counts())
    LOG.debug(valid_synth_df)
    train_synth_data = DataLoadDf(train_synth_df_frames,
                                  dataset.get_feature_file,
                                  many_hot_encoder.encode_strong_df,
                                  transform=transforms)

    if not no_weak:
        list_datasets = [train_weak_data, train_synth_data]
        training_data = ConcatDataset(list_datasets)
    else:
        list_datasets = [train_synth_data]
        training_data = train_synth_data

    scaler = Scaler()
    scaler.calculate_scaler(training_data)
    LOG.debug(scaler.mean_)

    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)
    # Validation dataset is only used to get an idea of wha could be results on evaluation dataset
    validation_dataset = DataLoadDf(validation_df,
                                    dataset.get_feature_file,
                                    many_hot_encoder.encode_strong_df,
                                    transform=transforms_valid)

    transforms = get_transforms(cfg.max_frames, scaler)
    train_synth_data.set_transform(transforms)
    if not no_weak:
        train_weak_data.set_transform(transforms)
        concat_dataset = ConcatDataset([train_weak_data, train_synth_data])
예제 #5
0
    validation_data = DataLoadDf(validation_df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_weak,
                                 transform=transforms)
    test_data = DataLoadDf(test_df,
                           dataset.get_feature_file,
                           many_hot_encoder.encode_weak,
                           transform=transforms)

    list_dataset = [train_data]
    batch_sizes = [cfg.batch_size]
    # batch_sizes = [cfg.batch_size // len(list_dataset)] * len(list_dataset)
    weak_mask = slice(cfg.batch_size)
    strong_mask = None

    scaler = Scaler()
    if path.exists(cfg.scaler_fn):
        LOG.info('Loading scaler from {}'.format(cfg.scaler_fn))
        scaler.load(cfg.scaler_fn)
    else:
        scaler.calculate_scaler(ConcatDataset(list_dataset))
        LOG.info('Saving scaler to {}'.format(cfg.scaler_fn))
        scaler.save(cfg.scaler_fn)

    LOG.debug(scaler.mean_)

    transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise")
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)
    for i in range(len(list_dataset)):
        list_dataset[i].set_transform(transforms)
    validation_data.set_transform(transforms_valid)
        batch_sizes = [
            6 * cfg.batch_size // 15, 2 * cfg.batch_size // 15,
            7 * cfg.batch_size // 15
        ]
        strong_mask = slice(
            6 * cfg.batch_size // 15 + 2 * cfg.batch_size // 15,
            cfg.batch_size)

        # batch_sizes = [cfg.batch_size//3, 2*cfg.batch_size//3]
        # strong_mask = slice(cfg.batch_size//3, cfg.batch_size)

    weak_mask = slice(batch_sizes[0] + batch_sizes[1])
    #############################################################################

    scaler = Scaler()
    scaler.calculate_scaler(ConcatDataset(list_dataset))

    LOG.debug(scaler.mean_)
    # print(train_weak_data.filenames)
    # exit()

    #############################################################################
    # transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise")
    LOG.info("Change Normalize(Zero-padding)")
    transforms = get_transforms_AANPT(cfg.max_frames,
                                      scaler,
                                      augment_type="noise")
    #############################################################################

    for i in range(len(list_dataset)):