예제 #1
0
                        help="Number of Noise for insert in control")
    parser.add_argument('--num_noise_patient', type=int, default=0,
                        help="Number of Noise for insert in patient")
                        
    args = parser.parse_args()

    c = load_config(args.config_path)
    ap = AudioProcessor(**c.audio)
    
    if not args.no_insert_noise:
        c.data_aumentation['insert_noise'] = True
    else:
        c.data_aumentation['insert_noise'] = False

    # ste values for noisy insertion in test
    c.data_aumentation["num_noise_control"] = args.num_noise_control
    c.data_aumentation["num_noise_patient"] = args.num_noise_patient

    print("Insert noise ?", c.data_aumentation['insert_noise'])

    c.dataset['test_csv'] = args.test_csv
    c.dataset['test_data_root_path'] = args.test_root_dir


    c.test_config['batch_size'] = args.batch_size
    c.test_config['num_workers'] = args.num_workers
    max_seq_len = c.dataset['max_seq_len'] 

    test_dataloader = test_dataloader(c, ap, max_seq_len=max_seq_len)

    run_test(args, args.checkpoint_path, test_dataloader, c, c.model_name, ap, cuda=True)
    all_checkpoints = sorted(glob(os.path.join(args.checkpoints_path, '*.pt')))
    #print(all_checkpoints, os.listdir(args.checkpoints_path))
    if args.config_path:
        c = load_config(args.config_path)
    else: #load config in checkpoint
        checkpoint = torch.load(all_checkpoints[0], map_location='cpu')
        c = load_config_from_str(checkpoint['config_str'])

    ap = AudioProcessor(c.audio)

    log_path = os.path.join(c.train_config['logs_path'], c.model_name)
    audio_config = c.audio[c.audio['backend']]
    tensorboard = TensorboardWriter(log_path, audio_config)
    # set test dataset dir
    c.dataset['test_dir'] = args.dataset_dir
    # set batchsize = 32
    c.test_config['batch_size'] = 5
    test_dataloader = test_dataloader(c, ap)
    best_loss = 999999999
    best_loss_checkpoint = ''
    sdrs_checkpoint = []
    for i in tqdm.tqdm(range(len(all_checkpoints))):
        checkpoint = all_checkpoints[i]
        mean_loss= test(args, log_path, checkpoint, test_dataloader, tensorboard, c, c.model_name, ap, cuda=True)
        sdrs_checkpoint.append([mean_loss, checkpoint])
        if mean_loss < best_loss:
            best_loss = mean_loss
            best_loss_checkpoint = checkpoint
    print("Best Loss checkpoint is: ", best_loss_checkpoint, "Best Loss:", best_loss)
    copyfile(best_loss_checkpoint, os.path.join(args.checkpoints_path,'fast_best_checkpoint.pt'))
    np.save(os.path.join(args.checkpoints_path,"Loss_validation_with_VCTK_best_SI-SNR_is_"+str(best_sdr)+".np"), np.array(sdrs_checkpoint))
예제 #3
0
def run_test_all_seeds(args, cuda=True, debug=False, return_potential=False):
    runs_list = os.listdir(args.experiment_dir)
    runs_list.sort()
    num_runs = len(runs_list)

    votes = []
    wav_files = []
    targets = []
    # define loss function
    criterion = nn.BCELoss(reduction='sum')
    for run in runs_list:
        blockPrint()
        run_dir = os.path.join(args.experiment_dir, run)
        if os.path.isfile(run_dir):
            continue
        model_name = os.listdir(run_dir)[0]
        checkpoint_path = os.path.join(run_dir, model_name,
                                       'best_checkpoint.pt')
        config_path = os.path.join(run_dir, model_name, 'config.json')

        c = load_config(config_path)
        ap = AudioProcessor(**c.audio)

        c.dataset['test_csv'] = args.test_csv
        c.dataset['test_data_root_path'] = args.test_root_dir

        c.test_config['batch_size'] = args.batch_size
        c.test_config['num_workers'] = args.num_workers
        max_seq_len = c.dataset['max_seq_len']

        c.train_config['seed'] = 0

        testdataloader = test_dataloader(c, ap, max_seq_len=max_seq_len)

        # load model
        model = return_model(c)
        enablePrint()
        if checkpoint_path is not None:
            print("Loading checkpoint: %s" % checkpoint_path)
            try:
                checkpoint = torch.load(checkpoint_path, map_location='cpu')
                model.load_state_dict(checkpoint['model'])
                print("Model Sucessful Load !")
            except Exception as e:
                raise ValueError(
                    "You need pass a valid checkpoint, may be you need check your config.json because de the of this checkpoint cause the error: "
                    + e)
        blockPrint()
        # convert model from cuda
        if cuda:
            model = model.cuda()

        model.train(False)

        vote, targets, wav_path = test(criterion,
                                       ap,
                                       model,
                                       c,
                                       testdataloader,
                                       cuda=cuda,
                                       confusion_matrix=True,
                                       debug=debug,
                                       simples_vote=args.simples_vote)
        # print(vote)
        wav_files.append(wav_path)
        votes.append(vote)
        if len(wav_files):
            if wav_files[-1] != wav_files[0]:
                raise ValueError(
                    "Diferents files  or order for the test in diferrents seeds or folds"
                )

    # mean vote, and round is necessary if use composite vote
    preds = np.mean(np.array(votes), axis=0)
    # print(preds)
    if not return_potential:
        preds = preds.round()

    file_names = wav_files[0]

    if debug and not return_potential:
        enablePrint()
        targets = np.array(targets)
        preds = np.array(preds)
        names = np.array(file_names)
        idxs = np.nonzero(targets == c.dataset['control_class'])
        control_target = targets[idxs]
        control_preds = preds[idxs]
        names_control = names[idxs]

        idxs = np.nonzero(targets == c.dataset['patient_class'])

        patient_target = targets[idxs]
        patient_preds = preds[idxs]
        names_patient = names[idxs]

        if debug:
            print('+' * 40)
            print("Control Files Classified incorrectly:")
            incorrect_ids = np.nonzero(
                control_preds != c.dataset['control_class'])
            inc_names = names_control[incorrect_ids]
            print("Num. Files:", len(inc_names))
            print(inc_names)
            print('+' * 40)
            print('-' * 40)
            print("Patient Files Classified incorrectly:")
            incorrect_ids = np.nonzero(
                patient_preds != c.dataset['patient_class'])
            inc_names = names_patient[incorrect_ids]
            print("Num. Files:", len(inc_names))
            print(inc_names)
            print('-' * 40)

        acc_control = (control_preds == control_target).mean()
        acc_patient = (patient_preds == patient_target).mean()
        acc_balanced = (acc_control + acc_patient) / 2

        f1 = f1_score(targets.tolist(), preds.tolist())
        uar = recall_score(targets.tolist(), preds.tolist(), average='macro')
        print("======== Confusion Matrix ==========")
        y_target = pd.Series(targets, name='Target')
        y_pred = pd.Series(preds, name='Predicted')
        df_confusion = pd.crosstab(y_target,
                                   y_pred,
                                   rownames=['Target'],
                                   colnames=['Predicted'],
                                   margins=True)
        print(df_confusion)

        print("Test\n ", "Acurracy Control: ", acc_control,
              "Acurracy Patient: ", acc_patient, "Acurracy Balanced",
              acc_balanced)
        print("F1:", f1, "UAR:", uar)

    if return_potential:
        return preds, file_names
    else:
        df = pd.DataFrame({
            'filename': file_names,
            'prediction': preds.astype(int)
        })
        df['prediction'] = df['prediction'].replace(
            int(c.dataset['control_class']), 'negative',
            regex=True).replace(int(c.dataset['patient_class']),
                                'positive',
                                regex=True)
        if args.output_csv:
            out_csv_path = args.output_csv
        else:
            out_csv_path = os.path.join(
                args.experiment_dir, os.path.basename(c.dataset['test_csv']))

        df.to_csv(out_csv_path, index=False)