Пример #1
0
def naive_bayes_classification(PARAMS, train_data, train_label, test_data, test_label):
    NB_model = GaussianNB()
    start = time.process_time()

    '''
    Checking if model is already available
    '''
    NB_ModelFileName = PARAMS['opDir'] + PARAMS['modelName'].split('/')[-1].split('.')[0] + '.pkl'
    if not os.path.exists(NB_ModelFileName):
        NB_model.fit(train_data, train_label.flatten())
        if PARAMS['save_flag']:
            misc.save_obj(NB_model, PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0])
    else:
        NB_model = misc.load_obj(PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0])
    trainingTimeTaken = time.process_time() - start
    start = time.process_time()

    PtdLabels_train = NB_model.predict(train_data)
    PtdLabels_test = NB_model.predict(test_data)

    # Predictions_train = NB_model.predict_proba(train_data)
    Predictions_test = NB_model.predict_proba(test_data)

    accuracy_train = np.mean(PtdLabels_train.ravel() == train_label.ravel()) * 100
    accuracy_test = np.mean(PtdLabels_test.ravel() == test_label.ravel()) * 100
    
    ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train, train_label)
    ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, test_label)
    
    # Performance_train = np.array([accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]])
    Performance_test = np.array([accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]])

    testingTimeTaken = time.process_time() - start
    
    print('Accuracy: train=', accuracy_train, ' test=', accuracy_test, 'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1])

    Train_Params = {
        'model':NB_model,
        'trainingTimeTaken': trainingTimeTaken,
        }
    
    Test_Params = {
        'PtdLabels': PtdLabels_test,
        'Predictions': Predictions_test,
        'accuracy': accuracy_test,
        'Performance_test': Performance_test,
        'testingTimeTaken': testingTimeTaken,
        'fscore': fscore_test,
        'GroundTruth': test_label,
        }

    return Train_Params, Test_Params
Пример #2
0
def test_NB_ensemble(PARAMS, All_Test_Params):
    PtdLabels_Ensemble = []
    GroundTruth_Ensemble = []
    Predictions_Ensemble = np.empty([])
    featCount = 0
    testingTimeTaken = 0
    start = time.process_time()
    for featName in All_Test_Params.keys():
        if not 'PtdLabels' in All_Test_Params[featName].keys():
            continue
        Test_Params = All_Test_Params[featName]
        if featCount==0:
            Predictions_Ensemble = np.array(Test_Params['PtdLabels'], ndmin=2).T
            GroundTruth_Ensemble = Test_Params['GroundTruth']
        else:
            # print('Predictions_Ensemble: ', np.shape(Predictions_Ensemble), np.shape(Test_Params['PtdLabels']))
            Predictions_Ensemble = np.append(Predictions_Ensemble, np.array(Test_Params['PtdLabels'], ndmin=2).T, axis=1)
        featCount += 1
    PtdLabels_Ensemble, mode_count = scipy.stats.mode(Predictions_Ensemble, axis=1)
    ConfMat_Ensemble, fscore_Ensemble = misc.getPerformance(PtdLabels_Ensemble, GroundTruth_Ensemble)
    accuracy_Ensemble = np.sum(np.diag(ConfMat_Ensemble))/np.sum(ConfMat_Ensemble)
    testingTimeTaken = time.process_time() - start
    print('NB Ensemble: ', accuracy_Ensemble, fscore_Ensemble)

    Ensemble_Test_Params = {
        'accuracy_Ensemble': accuracy_Ensemble,
        'testingTimeTaken': testingTimeTaken,
        'ConfMat_Ensemble': ConfMat_Ensemble,
        'fscore_Ensemble': fscore_Ensemble,
        'PtdLabels_Ensemble': PtdLabels_Ensemble,
        'Predictions_Ensemble': Predictions_Ensemble,
        'GroundTruth_Ensemble': GroundTruth_Ensemble,
        }
    
    return Ensemble_Test_Params
def test_model(PARAMS, data_dict, Train_Params):
    loss = 0
    performance = 0
    testingTimeTaken = 0
    PtdLabels = []
    test_data = data_dict['test_data']

    start = time.clock()
    if not PARAMS['data_generator']:
        OHE_testLabel = to_categorical(data_dict['test_label'])
        loss, performance = Train_Params['model'].evaluate(x=test_data,
                                                           y=OHE_testLabel)
        Predictions = Train_Params['model'].predict(test_data)
        PtdLabels = np.argmax(Predictions, axis=1)
        GroundTruth = data_dict['test_label']

    else:
        class_wise_numFiles = [
            len(files[0])
            for files in Train_Params['FL_Ret']['file_list_test'].values()
        ]
        totTestFiles = np.sum(class_wise_numFiles)
        SPE = int(totTestFiles / Train_Params['batch_size'])
        loss, performance = Train_Params['model'].evaluate_generator(generator(
            Train_Params['FL_Ret']['file_list_test'], PARAMS,
            Train_Params['batch_size']),
                                                                     steps=SPE,
                                                                     verbose=1)

        PtdLabels = []
        GroundTruth = []
        count = -1
        Predictions = np.empty([])
        file_keys = [
            key for key in Train_Params['FL_Ret']['file_list_test'].keys()
        ]
        for clNum in range(len(file_keys)):
            files = Train_Params['FL_Ret']['file_list_test'][
                file_keys[clNum]][0]
            for fl in files:
                count += 1
                file_name = fl
                batchData, batchLabel = generator_test(file_name, PARAMS,
                                                       clNum)
                pred = Train_Params['model'].predict(x=batchData)
                pred_lab = np.argmax(pred, axis=1)
                PtdLabels.extend(pred_lab)
                GroundTruth.extend(batchLabel)
                if np.size(Predictions) <= 1:
                    Predictions = pred
                else:
                    Predictions = np.append(Predictions, pred, 0)

    testingTimeTaken = time.clock() - start
    print('Time taken for model testing: ', testingTimeTaken)
    ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth)

    return loss, performance, testingTimeTaken, ConfMat, fscore, PtdLabels, Predictions
Пример #4
0
def test_model(PARAMS, Train_Params):
    start = time.clock()
    PtdLabels = []
    GroundTruth = []
    Predictions = np.empty([])
    count = -1
    class_labels = {
        PARAMS['classes'][key]: key
        for key in PARAMS['classes'].keys()
    }
    startTime = time.clock()
    for classname in PARAMS['test_files'].keys():
        clNum = class_labels[classname]
        files = PARAMS['test_files'][classname]
        # print('test_files: ', files)
        for fl in files:
            fName = PARAMS['test_folder'] + '/' + PARAMS[
                'featName'] + '/' + classname + '/' + fl
            if not os.path.exists(fName):
                continue
            count += 1
            batchData, batchLabel = generator_test(PARAMS, PARAMS['featName'],
                                                   fl, clNum)
            endTime = time.clock()
            print('Data loading time: ', endTime - startTime)

            startTime = time.clock()
            pred = Train_Params['model'].predict(x=batchData)
            print('Prediction time: ', time.clock() - startTime)

            pred_lab = np.argmax(pred, axis=1)
            PtdLabels.extend(pred_lab)
            GroundTruth.extend(batchLabel.tolist())
            print('pred_lab: ', np.sum(pred_lab == 0), np.sum(pred_lab == 1))
            print('ground_truth: ', np.sum(batchLabel == 0),
                  np.sum(batchLabel == 1))
            if np.size(Predictions) <= 1:
                Predictions = pred
            else:
                Predictions = np.append(Predictions, pred, 0)
            print(
                PARAMS['classes'][clNum], fl, np.shape(batchData), ' acc=',
                np.round(
                    np.sum(pred_lab == batchLabel) * 100 / len(batchLabel), 2))

    testingTimeTaken = time.clock() - start
    print('Time taken for model testing: ', testingTimeTaken)
    ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth)

    return ConfMat, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken
Пример #5
0
def performance_dump(PARAMS,
                     PtdLabels,
                     GroundTruths,
                     labels,
                     info='',
                     fName_suffix=''):
    ConfMat, precision, recall, fscore = misc.getPerformance(
        PtdLabels, GroundTruths, labels)
    accuracy = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4)
    print('Total data performance: ', fscore)
    print(ConfMat)

    if len(labels) == 2:
        classnames = ['neg', 'pos']
    else:
        classnames = ['mu', 'sp', 'spmu']

    res_dict = {}
    res_dict['0'] = 'feature_name:' + PARAMS['featName'][PARAMS['Model']]
    res_dict['1'] = 'model:' + PARAMS['Model']
    ln = 2
    if not info == '':
        res_dict[str(ln)] = info
        ln += 1
    res_dict[str(ln)] = 'loss:--'
    ln += 1
    res_dict[str(ln)] = 'accuracy:' + str(accuracy)
    ln += 1
    res_dict[str(ln)] = 'Prec_' + classnames[0] + ':' + str(precision[0])
    ln += 1
    res_dict[str(ln)] = 'Rec_' + classnames[0] + ':' + str(recall[0])
    ln += 1
    res_dict[str(ln)] = 'F1_' + classnames[0] + ':' + str(fscore[0])
    ln += 1
    res_dict[str(ln)] = 'Prec_' + classnames[1] + ':' + str(precision[1])
    ln += 1
    res_dict[str(ln)] = 'Rec_' + classnames[1] + ':' + str(recall[1])
    ln += 1
    res_dict[str(ln)] = 'F1_' + classnames[1] + ':' + str(fscore[1])
    if len(labels) == 3:
        ln += 1
        res_dict[str(ln)] = 'Prec_' + classnames[2] + ':' + str(precision[2])
        ln += 1
        res_dict[str(ln)] = 'Rec_' + classnames[2] + ':' + str(recall[2])
        ln += 1
        res_dict[str(ln)] = 'F1_' + classnames[2] + ':' + str(fscore[2])
    ln += 1
    res_dict[str(ln)] = 'F1_avg:' + str(np.round(np.mean(fscore), 4))
    misc.print_results(PARAMS, fName_suffix, res_dict)
Пример #6
0
def perform_testing(PARAMS, Train_Params):
    metrics, metric_names, testingTimeTaken = test_model_generator(
        PARAMS, Train_Params)
    Test_Params = {
        'metrics': metrics,
        'metric_names': metric_names,
        'testingTimeTaken': testingTimeTaken,
    }

    ConfMat, precision, recall, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken = test_model(
        PARAMS, Train_Params, None)
    Test_Params['testingTimeTaken_annot'] = testingTimeTaken
    Test_Params['ConfMat_annot'] = ConfMat
    Test_Params['precision_annot'] = precision
    Test_Params['recall_annot'] = recall
    Test_Params['fscore_annot'] = fscore
    Test_Params['PtdLabels_test_annot'] = PtdLabels
    Test_Params['Predictions_test_annot'] = Predictions
    Test_Params['GroundTruth_test_annot'] = GroundTruth

    if len(PARAMS['classes']) == 3:
        PtdLabels_All = []
        GroundTruths_All = []
        for target_dB in PARAMS['test_SMR_levels']:
            ConfMat, precision, recall, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken = test_model(
                PARAMS, Train_Params, target_dB)
            PtdLabels_All.extend(PtdLabels)
            GroundTruths_All.extend(GroundTruth)
            Test_Params['testingTimeTaken_' + str(target_dB) +
                        'dB'] = testingTimeTaken
            Test_Params['ConfMat_' + str(target_dB) + 'dB'] = ConfMat
            Test_Params['precision_' + str(target_dB) + 'dB'] = precision
            Test_Params['recall_' + str(target_dB) + 'dB'] = recall
            Test_Params['fscore_' + str(target_dB) + 'dB'] = fscore
            Test_Params['PtdLabels_test_' + str(target_dB) + 'dB'] = PtdLabels
            Test_Params['Predictions_test_' + str(target_dB) +
                        'dB'] = Predictions
            Test_Params['GroundTruth_test_' + str(target_dB) +
                        'dB'] = GroundTruth

        labels = [key for key in PARAMS['classes'].keys()]
        ConfMat_All, precision_All, recall_All, fscore_All = misc.getPerformance(
            PtdLabels_All, GroundTruths_All, labels)
        Test_Params['ConfMat_All'] = ConfMat_All
        Test_Params['precision_All'] = precision_All
        Test_Params['recall_All'] = recall_All
        Test_Params['fscore_All'] = fscore_All

    return Test_Params
Пример #7
0
def patch_probability_generator(PARAMS, fl, Train_Params):
    startTime = time.process_time()
    labels_sp = []
    labels_mu = []
    pred_opDir = PARAMS['opDir'] + '/__Frame_Predictions_CNN/'
    if not os.path.exists(pred_opDir):
        os.makedirs(pred_opDir)
    result_fName = fl + '_fold' + str(PARAMS['fold']) + '_result'

    n_fft = PARAMS['n_fft'][PARAMS['Model']]
    n_mels = PARAMS['n_mels'][PARAMS['Model']]
    featName = PARAMS['featName'][PARAMS['Model']]

    if not os.path.exists(pred_opDir + result_fName + '.pkl'):
        fName_path = PARAMS['test_path'] + '/features/' + fl + '.npy'
        if not os.path.exists(fName_path):
            return {}
        fv = np.load(fName_path, allow_pickle=True)
        fv = get_featuregram(PARAMS,
                             PARAMS['feature_opDir'],
                             fl,
                             fv,
                             n_fft,
                             n_mels,
                             featName,
                             save_feat=True)
        if not 'HarmPerc' in featName:
            fv = fv.T
            fv = StandardScaler(copy=False).fit_transform(fv)
            fv = fv.T
        else:
            nDim = np.shape(fv)[0]
            fv_H = fv[:int(nDim / 2), :]
            fv_H = fv_H.T
            fv_H = StandardScaler(copy=False).fit_transform(fv_H)
            fv_H = fv_H.T
            fv_P = fv[int(nDim / 2):, :]
            fv_P = fv_P.T
            fv_P = StandardScaler(copy=False).fit_transform(fv_P)
            fv_P = fv_P.T
            fv = np.append(fv_H.astype(np.float32),
                           fv_P.astype(np.float32),
                           axis=0)

        nFrames = np.shape(fv)[1]
        annotations_mu, annotations_sp, music_marker, speech_marker = get_annotations(
            PARAMS['test_path'], fl, nFrames, PARAMS['opDir'])

        pred = np.empty([])
        pred_lab = np.empty([])
        batch_size = 10000
        labels_mu = []
        labels_sp = []
        # for batchStart in range(0, np.shape(fv_patches)[0], batch_size):
        for batchStart in range(0, np.shape(fv)[1], batch_size):
            # batchEnd = np.min([batchStart+batch_size, np.shape(fv_patches)[0]])
            batchEnd = np.min([batchStart + batch_size, np.shape(fv)[1]])
            # fv_patches_temp = fv_patches[batchStart:batchEnd,:]
            fv_temp = fv[:, batchStart:batchEnd]
            music_marker_temp = music_marker[batchStart:batchEnd]
            speech_marker_temp = speech_marker[batchStart:batchEnd]
            print('\tBatch: (',
                  batchStart,
                  batchEnd,
                  ') ',
                  np.shape(fv_temp),
                  ' mu=',
                  np.sum(music_marker_temp),
                  ' sp=',
                  np.sum(speech_marker_temp),
                  end=' ',
                  flush=True)

            fv_patches_temp = get_feature_patches(PARAMS, fv_temp, PARAMS['W'],
                                                  PARAMS['W_shift_test'],
                                                  featName)

            labels_mu_patches = cextract_patches(
                np.array(music_marker_temp, ndmin=2),
                np.shape(np.array(music_marker_temp, ndmin=2)), PARAMS['W'],
                PARAMS['W_shift_test']).astype(int)
            labels_mu_temp = (
                (np.sum(np.squeeze(labels_mu_patches, axis=1), axis=1) /
                 np.shape(labels_mu_patches)[2]) > 0.5).astype(int)

            labels_sp_patches = cextract_patches(
                np.array(speech_marker_temp, ndmin=2),
                np.shape(np.array(speech_marker_temp, ndmin=2)), PARAMS['W'],
                PARAMS['W_shift_test']).astype(int)
            labels_sp_temp = (
                (np.sum(np.squeeze(labels_sp_patches, axis=1), axis=1) /
                 np.shape(labels_sp_patches)[2]) > 0.5).astype(int)

            if 'Lemaire_et_al' in PARAMS['Model']:
                # TCN input shape=(batch_size, timesteps, ndim)
                fv_patches_temp = np.transpose(fv_patches_temp, axes=(0, 2, 1))

            if PARAMS['signal_type'] == 'music':
                pred_temp = Train_Params['model'].predict(x=fv_patches_temp)
                CM, acc, P, R, F1 = getPerformance(
                    np.array((pred_temp > 0.5).astype(int)), labels_mu_temp)
            elif PARAMS['signal_type'] == 'speech':
                pred_temp = Train_Params['model'].predict(x=fv_patches_temp)
                CM, acc, P, R, F1 = getPerformance(
                    np.array((pred_temp > 0.5).astype(int)), labels_sp_temp)

            pred_lab_temp = np.array(pred_temp > 0.5).astype(int)

            if np.size(pred) <= 1:
                pred = pred_temp
                pred_lab = pred_lab_temp
            else:
                pred = np.append(pred, pred_temp)
                pred_lab = np.append(pred_lab, pred_lab_temp)
            labels_mu.extend(labels_mu_temp)
            labels_sp.extend(labels_sp_temp)
            print(np.shape(fv_patches_temp), np.shape(pred_temp),
                  np.shape(pred), ' acc=', acc, F1)

        if PARAMS['signal_type'] == 'music':
            ConfMat, precision, recall, fscore = misc.getPerformance(
                pred_lab, labels_mu, labels=[0, 1])
            acc = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4)
            print('Perf mu: ', acc, precision, recall, fscore)
        elif PARAMS['signal_type'] == 'speech':
            ConfMat, precision, recall, fscore = misc.getPerformance(
                pred_lab, labels_sp, labels=[0, 1])
            acc = np.round(np.sum(np.diag(ConfMat)) / np.sum(ConfMat), 4)
            print('Perf sp: ', acc, precision, recall, fscore)
        print('\n\n\n')

        probability_genTime = time.process_time() - startTime
        result = {
            'pred': pred,
            'pred_lab': pred_lab,
            'labels_sp': labels_sp,
            'labels_mu': labels_mu,
            'probability_genTime': probability_genTime,
            'ConfMat': ConfMat,
            'precision': precision,
            'recall': recall,
            'fscore': fscore,
            'accuracy': acc,
        }
        misc.save_obj(result, pred_opDir, result_fName)
        print('Test predictions saved!!!')
    else:
        result = misc.load_obj(pred_opDir, result_fName)

    return result
Пример #8
0
                    All_Classifier_Predictions,
                    np.array(Test_Params['PtdLabels_test'], ndmin=2), 0)

            data_dict_Classifier_Part = None
            Train_Params = None
            Test_Params = None
            del data_dict_Classifier_Part
            del Train_Params
            del Test_Params

            if PARAMS['use_GPU']:
                reset_GPU_session()

        PtdLabels_majority_voting = np.argmax(Majority_Voting_Ensemble_Result,
                                              axis=1)
        ConfMat_majority_voting, fscore_majority_voting = misc.getPerformance(
            PtdLabels_majority_voting, data_dict['test_label'])
        print('\n\n\nMajority Voting Ensemble Avg. F1-score: ',
              np.mean(fscore_majority_voting))

        resultFile = PARAMS['opDir'] + '/Ensemble_performance_' + PARAMS[
            'featName'] + '.csv'
        result_fid = open(resultFile, 'a+', encoding='utf-8')
        # result_fid.write('Majority Voting Ensemble Average=' + str(np.round(fscore_majority_voting[-1],4)) + ' F1-score= ' + str([str(np.round(fscore_majority_voting[i], 2)) for i in range(len(fscore_majority_voting)-1)]) + '\n')
        result_fid.write('Majority Voting Ensemble Average\t' +
                         str(fscore_majority_voting[0]) + '\t' +
                         str(fscore_majority_voting[1]) + '\t' +
                         str(fscore_majority_voting[2]) + '\n')
        result_fid.close()

        kwargs = {
            '0': ':',
                All_PtdLabels.extend(PtdLabels)
                All_GroundTruths.extend(GroundTruths)
                Predictions = None
                PtdLabels = None
                GroundTruths = None
                del Predictions
                del PtdLabels
                del GroundTruths

            # plt.subplot(211)
            # plt.plot(All_GroundTruths)
            # plt.subplot(212)
            # plt.plot(All_PtdLabels)
            # plt.show()

            ConfMat, fscore = misc.getPerformance(All_PtdLabels,
                                                  All_GroundTruths)
            print('ConfMat: ', ConfMat)
            print('fscore: ', fscore)

            get_segment_level_statistics(
                All_GroundTruths, All_PtdLabels, PARAMS['fold'], feature_type,
                PARAMS['opDir'] + '/Segment_Level_Performance.csv')

            compute_segmentation_performance(PARAMS,
                                             All_Labels,
                                             All_Predictions, [1000, 500],
                                             feature_type,
                                             win_size=101,
                                             plot_fig=True)

        if PARAMS['use_GPU']:
def test_model(PARAMS, Train_Params, target_dB):
    PtdLabels = np.empty([])
    GroundTruth = np.empty([])
    Predictions = np.empty([])

    startTime = time.process_time()
    if target_dB == None:
        # class_labels = {PARAMS['classes'][key]:key for key in PARAMS['classes'].keys()}
        for classname in ['music', 'speech']:
            files = PARAMS['test_files'][classname]
            fl_count = 0
            for fl in files:
                fl_count += 1
                fName = PARAMS['folder'] + '/' + classname + '/' + fl
                if not os.path.exists(fName):
                    continue
                if classname == 'speech':
                    batchData, batchLabel = test_file_wise_generator(
                        PARAMS, fName, '', None)
                    pred = Train_Params['model'].predict(x=batchData)
                elif classname == 'music':
                    batchData, batchLabel = test_file_wise_generator(
                        PARAMS, '', fName, None)
                    pred = Train_Params['model'].predict(x=batchData)
                pred_lab = np.argmax(pred, axis=1)
                if np.size(Predictions) <= 1:
                    Predictions = pred
                    PtdLabels = np.argmax(pred, axis=1)
                    if classname == 'speech':
                        GroundTruth = np.array([1] * np.shape(pred)[0])
                    elif classname == 'music':
                        GroundTruth = np.array([0] * np.shape(pred)[0])
                else:
                    Predictions = np.append(Predictions, pred, 0)
                    PtdLabels = np.append(PtdLabels, np.argmax(pred, axis=1))
                    if classname == 'speech':
                        GroundTruth = np.append(
                            GroundTruth, np.array([1] * np.shape(pred)[0]))
                    elif classname == 'music':
                        GroundTruth = np.append(
                            GroundTruth, np.array([0] * np.shape(pred)[0]))

                print(fl_count,
                      '/',
                      len(files),
                      target_dB,
                      'dB\t',
                      classname,
                      'pred_lab: ',
                      np.sum(pred_lab == 0),
                      np.sum(pred_lab == 1),
                      np.sum(pred_lab == 2),
                      end='\t',
                      flush=True)
                if classname == 'speech':
                    acc_fl = np.round(
                        np.sum(np.array(pred_lab) == 1) * 100 / len(pred_lab),
                        4)
                    acc_all = np.round(
                        np.sum(np.array(PtdLabels) == np.array(GroundTruth)) *
                        100 / len(PtdLabels), 4)
                    print(fl, np.shape(batchData), len(PtdLabels),
                          len(GroundTruth), ' acc=', acc_fl, acc_all)
                elif classname == 'music':
                    acc_fl = np.round(
                        np.sum(np.array(pred_lab) == 0) * 100 / len(pred_lab),
                        4)
                    acc_all = np.round(
                        np.sum(np.array(PtdLabels) == np.array(GroundTruth)) *
                        100 / len(PtdLabels), 4)
                    print(fl, np.shape(batchData), len(PtdLabels),
                          len(GroundTruth), ' acc=', acc_fl, acc_all)

    if len(PARAMS['classes']) == 3:
        files_spmu = PARAMS['test_files']['speech+music']
        fl_count = 0
        for spmu_info in files_spmu:
            fl_count += 1
            fl_sp = spmu_info['speech']
            fl_mu = spmu_info['music']
            fName_sp = PARAMS['folder'] + '/speech/' + fl_sp
            fName_mu = PARAMS['folder'] + '/music/' + fl_mu
            if target_dB == None:
                # Annotated SMR is not used in the testing function if target_dB
                # is None so that the performance can be tested at specific
                # SMR values
                batchData, batchLabel = test_file_wise_generator(
                    PARAMS, fName_sp, fName_mu, spmu_info['SMR'])
            else:
                batchData, batchLabel = test_file_wise_generator(
                    PARAMS, fName_sp, fName_mu, target_dB)
            pred = Train_Params['model'].predict(x=batchData)
            pred_lab = np.argmax(pred, axis=1)
            if np.size(Predictions) <= 1:
                Predictions = pred
                GroundTruth = np.array([2] * np.shape(pred)[0])
                PtdLabels = np.argmax(pred, axis=1)
            else:
                Predictions = np.append(Predictions, pred, 0)
                GroundTruth = np.append(GroundTruth,
                                        np.array([2] * np.shape(pred)[0]))
                PtdLabels = np.append(PtdLabels, np.argmax(pred, axis=1))
            acc_fl = np.round(
                np.sum(np.array(pred_lab) == 2) * 100 / len(pred_lab), 4)
            acc_all = np.round(
                np.sum(np.array(PtdLabels) == np.array(GroundTruth)) * 100 /
                len(PtdLabels), 4)
            if target_dB == None:
                print(fl_count, '/', len(files_spmu),
                      spmu_info['SMR'], 'dB\tspeech_music pred_lab: ',
                      np.sum(pred_lab == 0), np.sum(pred_lab == 1),
                      np.sum(pred_lab == 2), fl_sp, fl_mu, np.shape(batchData),
                      ' acc=', acc_fl, acc_all)
            else:
                print(fl_count, '/', len(files_spmu),
                      target_dB, 'dB\tspeech_music pred_lab: ',
                      np.sum(pred_lab == 0), np.sum(pred_lab == 1),
                      np.sum(pred_lab == 2), fl_sp, fl_mu, np.shape(batchData),
                      ' acc=', acc_fl, acc_all)

    testingTimeTaken = time.process_time() - startTime
    print('Time taken for model testing: ', testingTimeTaken)
    labels = [key for key in PARAMS['classes'].keys()]
    ConfMat, precision, recall, fscore = misc.getPerformance(
        PtdLabels, GroundTruth, labels)
    print(ConfMat)
    print('Precision: ', precision)
    print('Recall: ', recall)
    print('fscore: ', fscore)

    return ConfMat, precision, recall, fscore, PtdLabels, Predictions, GroundTruth, testingTimeTaken
def grid_search_svm(PARAMS, data_dict):
    pwrs_c = list(np.arange(-5, 1, 1))
    pwrs_gamma = list(np.arange(-5, 1, 1))
    C = np.power(2.0, pwrs_c)
    Gamma = np.power(2.0, pwrs_gamma)
    svm_type = 'single'
    njobs = multiprocessing.cpu_count() - 1
    cv_folds = 3
    print('SVM type=', svm_type, ' CV folds=', cv_folds, ' n_jobs=', njobs)

    trainingTimeTaken = 0
    start = time.process_time()

    if svm_type == 'single':
        clf = SVC(decision_function_shape='ovo', verbose=0, probability=True)
        tunable_parameters = [{'kernel': ['rbf'], 'gamma': Gamma, 'C': C}]
        CLF_CV = GridSearchCV(clf,
                              tunable_parameters,
                              cv=cv_folds,
                              iid=True,
                              refit=True,
                              n_jobs=njobs,
                              verbose=False)

    elif svm_type == 'bagging':
        clf = SVC(decision_function_shape='ovo', verbose=0, probability=True)
        '''
        This function extracts balanced bootstraps
        '''
        max_features = 1.0
        n_estimators = 10
        bagged_classifier = BalancedBaggingClassifier(
            base_estimator=clf,
            sampling_strategy=1.0,
            n_estimators=n_estimators)
        max_samples = [0.2]  #[0.001, 0.005, 0.01, 0.05]
        print('max_samples=', max_samples, ' max_features=', max_features,
              ' n_estimators=', n_estimators)
        tunable_parameters = {
            'max_samples': max_samples,
            'base_estimator__gamma': Gamma,
            'base_estimator__C': C
        }
        '''
        Perform Grid search over individual classifiers in the bag
        '''
        CLF_CV = GridSearchCV(bagged_classifier,
                              tunable_parameters,
                              scoring='accuracy',
                              cv=cv_folds,
                              iid=False,
                              refit=True,
                              n_jobs=njobs,
                              verbose=True)

    All_train_data = np.append(data_dict['train_data'], data_dict['val_data'],
                               0)
    All_train_label = np.append(data_dict['train_label'],
                                data_dict['val_label'])
    '''
    Checking if model is already available
    '''
    if not os.path.exists(PARAMS['modelName']):
        CLF_CV.fit(All_train_data, All_train_label)
        model = CLF_CV.best_estimator_
        if PARAMS['save_flag']:
            misc.save_obj(model, PARAMS['opDir'],
                          PARAMS['modelName'].split('/')[-1].split('.')[0])
            misc.save_obj(
                CLF_CV, PARAMS['opDir'],
                PARAMS['modelName'].split('/')[-1].split('.')[0] +
                '_All_Models')
    else:
        model = misc.load_obj(PARAMS['opDir'],
                              PARAMS['modelName'].split('/')[-1].split('.')[0])
        CLF_CV = misc.load_obj(
            PARAMS['opDir'],
            PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models')

    trainingTimeTaken = time.process_time() - start

    testingTimeTaken = 0
    start = time.process_time()

    if svm_type == 'single':
        optC = str(CLF_CV.best_params_['C'])
        optGamma = str(CLF_CV.best_params_['gamma'])
        countSV = model.n_support_
    elif svm_type == 'bagging':
        optC = str(CLF_CV.best_params_['base_estimator__C'])
        optGamma = str(CLF_CV.best_params_['base_estimator__gamma'])
        countSV = [0, 0]

    countTrPts = [
        np.sum(All_train_label == lab) for lab in np.unique(All_train_label)
    ]

    PtdLabels_train = model.predict(All_train_data)
    Predictions_train = model.predict_log_proba(All_train_data)

    PtdLabels_test = model.predict(data_dict['test_data'])
    Predictions_test = model.predict_log_proba(data_dict['test_data'])

    accuracy_train = np.mean(
        PtdLabels_train.ravel() == All_train_label.ravel()) * 100
    accuracy_test = np.mean(
        PtdLabels_test.ravel() == data_dict['test_label'].ravel()) * 100

    ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train,
                                                      All_train_label)
    ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test,
                                                    data_dict['test_label'])

    Performance_train = np.array(
        [accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]])
    Performance_test = np.array(
        [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]])

    print('Accuracy: train=', accuracy_train, ' test=', accuracy_test,
          'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1],
          ' SupportVectors=', countSV)
    testingTimeTaken = time.process_time() - start

    Train_Params = {
        'model': model,
        'optC': optC,
        'optGamma': optGamma,
        'countSV': countSV,
        'countTrPts': countTrPts,
        'trainingTimeTaken': trainingTimeTaken,
    }

    Test_Params = {
        'PtdLabels_train': PtdLabels_train,
        'Predictions_train': Predictions_train,
        'PtdLabels_test': PtdLabels_test,
        'Predictions_test': Predictions_test,
        'accuracy_train': accuracy_train,
        'accuracy_test': accuracy_test,
        'Performance_train': Performance_train,
        'Performance_test': Performance_test,
        'testingTimeTaken': testingTimeTaken,
    }

    return Train_Params, Test_Params
Пример #12
0
def test_cnn_noise(PARAMS, Train_Params):
    start = time.clock()
    GroundTruth = []
    Predictions = {}
    PtdLabels = {}
    for dB in PARAMS['noise_dB_range']:
        Predictions[dB] = np.empty([])
        PtdLabels[dB] = []
    count = -1
    class_labels = {
        PARAMS['classes'][key]: key
        for key in PARAMS['classes'].keys()
    }

    for classname in PARAMS['test_files'].keys():
        clNum = class_labels[classname]
        files = PARAMS['test_files'][classname]
        # print('test_files: ', files)
        fl_count = 0
        for fl in files:
            fl_count += 1
            print('\t\t\t', PARAMS['fold'], PARAMS['classes'][clNum], fl,
                  fl_count, '/', len(files))
            count += 1
            numFV = 0
            if not os.path.exists(PARAMS['test_folder'] + '/' +
                                  PARAMS['featName'] + '/' + classname + '/' +
                                  fl):
                continue
            for targetdB in PARAMS['noise_dB_range']:
                batchData, batchLabel = generator_test_noise(
                    PARAMS, PARAMS['featName'], fl, clNum, targetdB)
                pred = Train_Params['model'].predict(x=batchData)
                numFV = np.shape(pred)[0]
                if np.size(Predictions[targetdB]) <= 1:
                    Predictions[targetdB] = np.array(pred, ndmin=2)
                    PtdLabels[targetdB].extend(
                        np.argmax(pred, axis=1).tolist())
                else:
                    Predictions[targetdB] = np.append(Predictions[targetdB],
                                                      np.array(pred, ndmin=2),
                                                      axis=0)
                    PtdLabels[targetdB].extend(
                        np.argmax(pred, axis=1).tolist())
                print(
                    '\t\t\t\t dB=', targetdB, ' batchData: ',
                    np.shape(batchData), np.shape(Predictions[targetdB]),
                    ' acc=',
                    np.round(
                        np.sum(
                            np.argmax(pred, axis=1) == np.array([clNum] *
                                                                numFV)) * 100 /
                        numFV, 2))
            GroundTruth.extend([clNum] * numFV)

    testingTimeTaken = time.clock() - start
    print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ',
          testingTimeTaken)

    ConfMat = {}
    fscore = {}
    accuracy = {}
    for dB in PARAMS['noise_dB_range']:
        ConfMat_dB, fscore_dB = misc.getPerformance(PtdLabels[dB], GroundTruth)
        ConfMat_dB = np.reshape(
            ConfMat_dB, (len(PARAMS['classes']), len(PARAMS['classes'])))
        accuracy_dB = np.round(
            np.sum(np.diag(ConfMat_dB)) / np.sum(ConfMat_dB), 4)
        ConfMat[dB] = ConfMat_dB
        fscore[dB] = fscore_dB
        accuracy[dB] = accuracy_dB

    Test_Params_Noise = {
        'loss': -1,
        'accuracy': accuracy,
        'testingTimeTaken': testingTimeTaken,
        'ConfMat': ConfMat,
        'fscore': fscore,
        'PtdLabels': PtdLabels,
        'Predictions': Predictions,
        'GroundTruth': GroundTruth,
    }

    return Test_Params_Noise
Пример #13
0
def test_cnn_ensemble(PARAMS, Ensemble_Train_Params):
    start = time.clock()
    PtdLabels_Ensemble = []
    GroundTruth_Ensemble = []
    Predictions_Ensemble = np.empty([])
    count = -1
    class_labels = {
        PARAMS['classes'][key]: key
        for key in PARAMS['classes'].keys()
    }
    individual_performances = {
        'Khonglah_et_al': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
        'Sell_et_al': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
        'MFCC-39': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
        'Melspectrogram': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
        'HNGDMFCC': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
        'MGDCC': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
        'IFCC': {
            'Predictions': np.empty([]),
            'GroundTruths': np.empty([]),
            'fscore': [0, 0, 0]
        },
    }
    temp_folder = PARAMS['opDir'] + '/__temp/'
    if not os.path.exists(temp_folder):
        os.makedirs(temp_folder)

    for classname in PARAMS['test_files'].keys():
        clNum = class_labels[classname]
        files = PARAMS['test_files'][classname]
        # print('test_files: ', files)
        fl_count = 0
        for fl in files:
            fl_count += 1
            print('\t\t\t',
                  PARAMS['fold'],
                  PARAMS['classes'][clNum],
                  fl,
                  fl_count,
                  '/',
                  len(files),
                  end='\t')
            count += 1
            PtdLabels = None
            PtdLabels_temp = np.empty([])
            GroundTruth = np.empty([])
            Predictions = np.empty([])
            empty_predictions = False
            for featName in Ensemble_Train_Params.keys():

                empty_predictions = False
                curr_fName = PARAMS[
                    'test_folder'] + '/' + featName + '/' + classname + '/' + fl
                # print('curr_fName: ', curr_fName)
                if not os.path.exists(curr_fName):
                    # print('curr_file not found')
                    empty_predictions = True
                    break

                Train_Params = Ensemble_Train_Params[featName]
                batchData = None
                batchLabel = None
                temp_file = 'pred_' + classname + '_fold' + str(
                    PARAMS['fold']) + '_' + featName + '_' + fl.split('.')[0]
                # print(temp_folder, temp_file)
                # print(featName, Train_Params['model'].layers[0].output_shape, PARAMS['input_shape'][featName])
                if not os.path.exists(temp_folder + '/' + temp_file + '.pkl'):
                    batchData, batchLabel = generator_test_ensemble(
                        PARAMS, featName, fl, clNum)
                    # print('batchData: ', np.shape(batchData), np.shape(batchLabel))
                    pred = Train_Params['model'].predict(x=batchData)
                    # print('pred: ', np.shape(pred))
                    misc.save_obj(pred, temp_folder, temp_file)
                else:
                    try:
                        pred = misc.load_obj(temp_folder, temp_file)
                    except:
                        batchData, batchLabel = generator_test_ensemble(
                            PARAMS, featName, fl, clNum)
                        # print('batchData: ', np.shape(batchData), np.shape(batchLabel))
                        pred = Train_Params['model'].predict(x=batchData)
                        # print('pred: ', np.shape(pred))
                        misc.save_obj(pred, temp_folder, temp_file)

                # print('indv_labels: ', np.shape(indv_labels), np.shape(individual_performances[featName]['PtdLabels']))
                if np.size(
                        individual_performances[featName]['Predictions']) <= 1:
                    individual_performances[featName][
                        'Predictions'] = np.array(pred, ndmin=2)
                    individual_performances[featName]['GroundTruth'] = np.ones(
                        np.shape(pred)[0]) * clNum
                else:
                    individual_performances[featName][
                        'Predictions'] = np.append(
                            individual_performances[featName]['Predictions'],
                            np.array(pred, ndmin=2),
                            axis=0)
                    individual_performances[featName][
                        'GroundTruth'] = np.append(
                            individual_performances[featName]['GroundTruth'],
                            np.ones(np.shape(pred)[0]) * clNum)

                if np.size(Predictions) <= 1:
                    Predictions = np.array(pred, ndmin=2)
                    PtdLabels_temp = np.array(np.argmax(pred, axis=1),
                                              ndmin=2).T
                else:
                    # print('PtdLabels_temp: ', np.shape(PtdLabels_temp), np.shape(pred))
                    empty_predictions = False
                    if np.shape(pred)[0] != np.shape(Predictions)[0]:
                        if np.shape(pred)[0] > np.shape(Predictions)[0]:
                            pred = pred[:np.shape(Predictions)[0], :]
                        else:
                            empty_predictions = True
                            break
                    Predictions = np.add(Predictions, np.array(pred, ndmin=2))
                    PtdLabels_temp = np.append(PtdLabels_temp,
                                               np.array(np.argmax(pred,
                                                                  axis=1),
                                                        ndmin=2).T,
                                               axis=1)

            if empty_predictions:
                print(' ', end='\n')
                continue

            GroundTruth = np.ones(np.shape(Predictions)[0]) * clNum
            PtdLabels = np.argmax(Predictions, axis=1)
            # PtdLabels, label_counts = scipy.stats.mode(PtdLabels_temp, axis=1)
            # PtdLabels = np.array(PtdLabels.flatten())
            # print('PtdLabels: ', np.shape(PtdLabels), ' GroundTruth: ', np.shape(GroundTruth))

            print(np.shape(Predictions),
                  ' acc=',
                  np.round(
                      np.sum(PtdLabels == GroundTruth) * 100 /
                      np.size(GroundTruth), 2),
                  end='\n')
            if np.size(PtdLabels_Ensemble) <= 1:
                PtdLabels_Ensemble = PtdLabels
                GroundTruth_Ensemble = GroundTruth
                Predictions_Ensemble = Predictions
            else:
                PtdLabels_Ensemble = np.append(PtdLabels_Ensemble, PtdLabels)
                GroundTruth_Ensemble = np.append(GroundTruth_Ensemble,
                                                 GroundTruth)
                Predictions_Ensemble = np.append(Predictions_Ensemble,
                                                 Predictions,
                                                 axis=0)

    testingTimeTaken = time.clock() - start
    print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ',
          testingTimeTaken)
    ConfMat_Ensemble, fscore_Ensemble = misc.getPerformance(
        PtdLabels_Ensemble, GroundTruth_Ensemble)
    accuracy_Ensemble = np.round(
        np.sum(PtdLabels_Ensemble == GroundTruth_Ensemble) * 100 /
        len(GroundTruth_Ensemble), 4)

    for featName in Ensemble_Train_Params.keys():
        # print(featName, 'individual_performances: ', np.shape(individual_performances[featName]['PtdLabels']), np.shape(GroundTruth_Ensemble))
        indv_PtdLabels = np.argmax(
            individual_performances[featName]['Predictions'], axis=1)
        ConfMat_indv, fscore_indv = misc.getPerformance(
            indv_PtdLabels, individual_performances[featName]['GroundTruth'])
        individual_performances[featName]['fscore'] = fscore_indv

    Ensemble_Test_Params = {
        'loss': -1,
        'accuracy_Ensemble': accuracy_Ensemble,
        'testingTimeTaken': testingTimeTaken,
        'ConfMat_Ensemble': ConfMat_Ensemble,
        'fscore_Ensemble': fscore_Ensemble,
        'PtdLabels_Ensemble': PtdLabels_Ensemble,
        'Predictions_Ensemble': Predictions_Ensemble,
        'GroundTruth_Ensemble': GroundTruth_Ensemble,
        'individual_performances': individual_performances,
    }

    return Ensemble_Test_Params
Пример #14
0
def grid_search_gmm(PARAMS, data_dict):
    K = 10
    gmmModel_mu = GaussianMixture(n_components=K, max_iter=1000)
    gmmModel_sp = GaussianMixture(n_components=K, max_iter=1000)

    All_train_data = np.append(data_dict['train_data'], data_dict['val_data'],
                               0)
    All_train_label = np.append(data_dict['train_label'],
                                data_dict['val_label'])

    mu_idx = np.squeeze(np.where(All_train_label == 0))
    sp_idx = np.squeeze(np.where(All_train_label == 1))
    '''
    Checking if model is already available
    '''
    gmmModelFileName = PARAMS['opDir'] + PARAMS['modelName'].split(
        '/')[-1].split('.')[0] + '_gmmModel_mu_K=' + str(K) + '.pkl'
    if not os.path.exists(gmmModelFileName):
        gmmModel_mu.fit(All_train_data[mu_idx, :])
        gmmModel_sp.fit(All_train_data[sp_idx, :])

        if PARAMS['save_flag']:
            misc.save_obj(
                gmmModel_mu, PARAMS['opDir'],
                PARAMS['modelName'].split('/')[-1].split('.')[0] +
                '_gmmModel_mu_K=' + str(K))
            misc.save_obj(
                gmmModel_sp, PARAMS['opDir'],
                PARAMS['modelName'].split('/')[-1].split('.')[0] +
                '_gmmModel_sp_K=' + str(K))
    else:
        gmmModel_mu = misc.load_obj(
            PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] +
            '_gmmModel_mu_K=' + str(K))
        gmmModel_sp = misc.load_obj(
            PARAMS['opDir'], PARAMS['modelName'].split('/')[-1].split('.')[0] +
            '_gmmModel_sp_K=' + str(K))

    score_train_mu = np.array(gmmModel_mu.score_samples(All_train_data),
                              ndmin=2).T
    score_train_sp = np.array(gmmModel_sp.score_samples(All_train_data),
                              ndmin=2).T
    print('scores shape: ', np.shape(score_train_mu), np.shape(score_train_sp))
    score_train = np.append(score_train_mu, score_train_sp, 1)
    print('score_train: ', np.shape(score_train))
    PtdLabels_train = np.argmax(score_train, axis=1)

    score_test_mu = np.array(gmmModel_mu.score_samples(data_dict['test_data']),
                             ndmin=2).T
    score_test_sp = np.array(gmmModel_sp.score_samples(data_dict['test_data']),
                             ndmin=2).T
    score_test = np.append(score_test_mu, score_test_sp, 1)
    print('score_test: ', np.shape(score_test))
    PtdLabels_test = np.argmax(score_test, axis=1)

    accuracy_train = np.mean(
        PtdLabels_train.ravel() == All_train_label.ravel()) * 100
    accuracy_test = np.mean(
        PtdLabels_test.ravel() == data_dict['test_label'].ravel()) * 100

    ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train,
                                                      All_train_label)
    ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test,
                                                    data_dict['test_label'])

    Performance_train = np.array(
        [accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]])
    Performance_test = np.array(
        [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]])

    print('Accuracy: train=', accuracy_train, ' test=', accuracy_test,
          'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1])

    return score_test, PtdLabels_test, K, Performance_train, Performance_test
def test_model(PARAMS, test_data, test_label, Train_Params):
    loss = 0
    performance = 0
    testingTimeTaken = 0
    PtdLabels = []

    start = time.clock()
    if not PARAMS['data_generator']:
        loss, performance = Train_Params['model'].evaluate(x=test_data,
                                                           y=test_label)
        Predictions = Train_Params['model'].predict(test_data)
        PtdLabels = np.array(Predictions > 0.5).astype(int)
        GroundTruth = test_label
        testingTimeTaken = time.clock() - start
        print('Time taken for model testing: ', testingTimeTaken)
        ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth)
    else:
        loss, performance = Train_Params['model'].evaluate_generator(
            generator(PARAMS, PARAMS['test_folder'], PARAMS['test_files'],
                      PARAMS['batch_size']),
            steps=PARAMS['test_steps'],
            verbose=1,
        )
        print('loss: ', loss, ' performance: ', performance)

        PtdLabels = []
        GroundTruth = []
        Predictions = np.empty([])
        count = -1
        class_labels = {
            PARAMS['classes'][key]: key
            for key in PARAMS['classes'].keys()
        }
        # startTime = time.clock()
        for classname in PARAMS['test_files'].keys():
            clNum = class_labels[classname]
            files = PARAMS['test_files'][classname]
            # print('test_files: ', files)
            for fl in files:
                count += 1
                batchData, batchLabel = generator_test(PARAMS,
                                                       PARAMS['featName'], fl,
                                                       clNum)
                if batchData == []:
                    continue
                # endTime = time.clock()
                # print('Data loading time: ', endTime-startTime)

                # startTime = time.clock()
                pred = Train_Params['model'].predict(x=batchData)
                # print('Prediction time: ', time.clock()-startTime, np.shape(pred))

                if len(PARAMS['classes']) > 2:
                    pred_lab = np.argmax(pred, axis=1)
                else:
                    pred_lab = np.squeeze(
                        np.array(np.array(pred) > 0.5).astype(int))
                # print(clNum, ' batchLabel: ', batchLabel)
                # print(clNum, ' pred_lab: ', pred_lab)
                PtdLabels.extend(pred_lab)
                GroundTruth.extend(batchLabel.tolist())
                # print('pred_lab: ', np.sum(pred_lab==0), np.sum(pred_lab==1))
                # print('ground_truth: ', np.sum(batchLabel==0), np.sum(batchLabel==1))
                if np.size(Predictions) <= 1:
                    Predictions = pred
                else:
                    Predictions = np.append(Predictions, pred, 0)
                print(
                    PARAMS['classes'][clNum], fl, np.shape(batchData), ' acc=',
                    np.round(
                        np.sum(pred_lab == batchLabel) * 100 / len(batchLabel),
                        2))

        testingTimeTaken = time.clock() - start
        print('Time taken for model testing: ', testingTimeTaken)
        ConfMat, fscore = misc.getPerformance(PtdLabels, GroundTruth)

    return loss, performance, testingTimeTaken, ConfMat, fscore, PtdLabels, Predictions, GroundTruth
Пример #16
0
def test_cnn_ensemble_noise(PARAMS, Ensemble_Train_Params):
    start = time.clock()
    GroundTruth_Ensemble = {dB: [] for dB in PARAMS['noise_dB_range']}
    Predictions_Ensemble = {
        dB: np.empty([])
        for dB in PARAMS['noise_dB_range']
    }
    PtdLabels_Ensemble = {dB: [] for dB in PARAMS['noise_dB_range']}
    count = -1
    class_labels = {
        PARAMS['classes'][key]: key
        for key in PARAMS['classes'].keys()
    }
    basic_storage_cell = {
        'Predictions': np.empty([]),
        'GroundTruths': np.empty([]),
        'fscore': [0, 0, 0]
    }
    individual_performances = {
        'Khonglah_et_al':
        {db: basic_storage_cell
         for db in PARAMS['noise_dB_range']},
        'Sell_et_al':
        {db: basic_storage_cell
         for db in PARAMS['noise_dB_range']},
        'MFCC-39': {db: basic_storage_cell
                    for db in PARAMS['noise_dB_range']},
        'Melspectrogram':
        {db: basic_storage_cell
         for db in PARAMS['noise_dB_range']},
        'HNGDMFCC':
        {db: basic_storage_cell
         for db in PARAMS['noise_dB_range']},
        'MGDCC': {db: basic_storage_cell
                  for db in PARAMS['noise_dB_range']},
        'IFCC': {db: basic_storage_cell
                 for db in PARAMS['noise_dB_range']},
    }
    temp_folder = PARAMS['opDir'] + '/__temp/'
    if not os.path.exists(temp_folder):
        os.makedirs(temp_folder)

    for classname in PARAMS['test_files'].keys():
        clNum = class_labels[classname]
        files = PARAMS['test_files'][classname]
        # print('test_files: ', files)
        fl_count = 0
        for fl in files:
            fl_count += 1
            print('\t\t\t',
                  PARAMS['fold'],
                  PARAMS['classes'][clNum],
                  fl,
                  fl_count,
                  '/',
                  len(files),
                  end='\n')
            count += 1
            PtdLabels = None
            GroundTruth = np.empty([])
            Predictions = {dB: np.empty([]) for dB in PARAMS['noise_dB_range']}
            PtdLabels = {dB: [] for dB in PARAMS['noise_dB_range']}
            empty_predictions = False
            for featName in Ensemble_Train_Params.keys():
                empty_predictions = False
                curr_fName = PARAMS[
                    'test_folder'] + '/' + featName + '/' + classname + '/' + fl
                if not os.path.exists(curr_fName):
                    empty_predictions = True
                    break

                Train_Params = Ensemble_Train_Params[featName]
                batchData = None
                batchLabel = None
                if not os.path.exists(PARAMS['test_folder'] + '/' + featName +
                                      '/' + classname + '/' + fl):
                    continue
                for targetdB in PARAMS['noise_dB_range']:
                    temp_file = temp_folder + '/pred_' + classname + '_fold' + str(
                        PARAMS['fold']) + '_' + featName + '_' + str(
                            targetdB) + 'dB_' + fl.split('.')[0] + '.pkl'
                    if not os.path.exists(temp_file):
                        batchData, batchLabel = generator_test_noise(
                            PARAMS, featName, fl, clNum, targetdB)
                        pred = Train_Params['model'].predict(x=batchData)
                        misc.save_obj(
                            pred, temp_folder, 'pred_' + classname + '_fold' +
                            str(PARAMS['fold']) + '_' + featName + '_' +
                            str(targetdB) + 'dB_' + fl.split('.')[0])
                    else:
                        pred = misc.load_obj(
                            temp_folder, 'pred_' + classname + '_fold' +
                            str(PARAMS['fold']) + '_' + featName + '_' +
                            str(targetdB) + 'dB_' + fl.split('.')[0])

                    if np.size(individual_performances[featName][targetdB]
                               ['Predictions']) <= 1:
                        individual_performances[featName][targetdB][
                            'Predictions'] = np.array(pred, ndmin=2)
                        individual_performances[featName][targetdB][
                            'GroundTruth'] = np.ones(np.shape(pred)[0]) * clNum
                    else:
                        individual_performances[featName][targetdB][
                            'Predictions'] = np.append(
                                individual_performances[featName][targetdB]
                                ['Predictions'],
                                np.array(pred, ndmin=2),
                                axis=0)
                        individual_performances[featName][targetdB][
                            'GroundTruth'] = np.append(
                                individual_performances[featName][targetdB]
                                ['GroundTruth'],
                                np.ones(np.shape(pred)[0]) * clNum)

                    if np.size(Predictions[targetdB]) <= 1:
                        Predictions[targetdB] = np.array(pred, ndmin=2)
                    else:
                        empty_predictions = False
                        if np.shape(pred)[0] != np.shape(
                                Predictions[targetdB])[0]:
                            if np.shape(pred)[0] > np.shape(
                                    Predictions[targetdB])[0]:
                                pred = pred[
                                    np.shape(Predictions[targetdB])[0], :]
                            else:
                                empty_predictions = True
                                break
                        Predictions[targetdB] = np.add(Predictions[targetdB],
                                                       np.array(pred, ndmin=2))

            if empty_predictions:
                print(' ', end='\n')
                continue

            for dB in PARAMS['noise_dB_range']:
                GroundTruth = np.array(np.ones(np.shape(Predictions[dB])[0]) *
                                       clNum,
                                       ndmin=2).T
                PtdLabels[dB] = np.array(np.argmax(Predictions[dB], axis=1),
                                         ndmin=2).T
                # print('PtdLabels[dB]: ', np.shape(PtdLabels[dB]), np.shape(GroundTruth), np.sum(PtdLabels[dB]==GroundTruth), np.shape(GroundTruth)[0])
                print('\t\t\t\t',
                      dB,
                      'dB\t',
                      np.shape(Predictions[dB]),
                      ' acc=',
                      np.round(
                          np.sum(PtdLabels[dB] == GroundTruth) * 100 /
                          np.shape(GroundTruth)[0], 2),
                      end='\n')
                if np.size(PtdLabels_Ensemble[dB]) <= 1:
                    PtdLabels_Ensemble[dB] = PtdLabels[dB]
                    Predictions_Ensemble[dB] = Predictions[dB]
                    GroundTruth_Ensemble[dB] = GroundTruth
                else:
                    PtdLabels_Ensemble[dB] = np.append(PtdLabels_Ensemble[dB],
                                                       PtdLabels[dB])
                    Predictions_Ensemble[dB] = np.append(
                        Predictions_Ensemble[dB], Predictions[dB], axis=0)
                    GroundTruth_Ensemble[dB] = np.append(
                        GroundTruth_Ensemble[dB], GroundTruth)

    testingTimeTaken = time.clock() - start
    print('\t\t\t', PARAMS['fold'], ' Time taken for model testing: ',
          testingTimeTaken)
    ConfMat_Ensemble = {}
    fscore_Ensemble = {}
    accuracy_Ensemble = {}
    for dB in PARAMS['noise_dB_range']:
        # print(dB, np.shape(PtdLabels_Ensemble[dB]), np.shape(GroundTruth_Ensemble[dB]))
        ConfMat_dB, fscore_dB = misc.getPerformance(PtdLabels_Ensemble[dB],
                                                    GroundTruth_Ensemble[dB])
        ConfMat_dB = np.reshape(
            ConfMat_dB, (len(PARAMS['classes']), len(PARAMS['classes'])))
        accuracy_dB = np.round(
            np.sum(np.diag(ConfMat_dB)) / np.sum(ConfMat_dB), 4)
        ConfMat_Ensemble[dB] = ConfMat_dB
        fscore_Ensemble[dB] = fscore_dB
        accuracy_Ensemble[dB] = accuracy_dB

    for featName in Ensemble_Train_Params.keys():
        for dB in PARAMS['noise_dB_range']:
            indv_PtdLabels_dB = np.argmax(
                individual_performances[featName][dB]['Predictions'], axis=1)
            ConfMat_indv_dB, fscore_indv_dB = misc.getPerformance(
                indv_PtdLabels_dB,
                individual_performances[featName][dB]['GroundTruth'])
            individual_performances[featName][dB]['fscore'] = fscore_indv_dB

    Ensemble_Test_Params = {
        'loss': -1,
        'accuracy_Ensemble': accuracy_Ensemble,
        'testingTimeTaken': testingTimeTaken,
        'ConfMat_Ensemble': ConfMat_Ensemble,
        'fscore_Ensemble': fscore_Ensemble,
        'PtdLabels_Ensemble': PtdLabels_Ensemble,
        'Predictions_Ensemble': Predictions_Ensemble,
        'GroundTruth_Ensemble': GroundTruth_Ensemble,
        'individual_performances': individual_performances,
    }

    return Ensemble_Test_Params
Пример #17
0
def load_model_NB(PARAMS, test_data, test_label, input_shape):
    '''
    Checking if model is already available
    '''
    if not os.path.exists(PARAMS['modelName']):
        print('NB model does not exist')
        return {}, {}
    else:
        NB_model = misc.load_obj('/'.join(PARAMS['modelName'].split('/')[:-1]), PARAMS['modelName'].split('/')[-1].split('.')[0])
        
    start = time.process_time()

    # PtdLabels_test = NB_model.predict(test_data)
    # Predictions_test = NB_model.predict_proba(test_data)
    # GroundTruth = test_label
    
    temp_folder = PARAMS['opDir'] + '/__temp/'
    if not os.path.exists(temp_folder):
        os.makedirs(temp_folder)
    PtdLabels_test = []
    Predictions_test = []
    GroundTruth = []
    for clNum in PARAMS['classes'].keys():
        files = PARAMS['test_files'][PARAMS['classes'][clNum]]
        for fl in files:
            temp_file = 'pred_' + PARAMS['classes'][clNum] + '_fold' + str(PARAMS['fold']) + '_' + PARAMS['featName'] + '_' + fl.split('.')[0]
            if not os.path.exists(temp_folder + temp_file + '.pkl'):
                FV = np.load(PARAMS['test_folder'] + '/' + PARAMS['featName'] + '/' + PARAMS['classes'][clNum] + '/' + fl, allow_pickle=True)
                FV = misc.get_feature_patches(FV, PARAMS['CNN_patch_size'], PARAMS['CNN_patch_shift_test'], input_shape)
                FV = PARAMS['std_scale'].transform(FV)
                pred_lab = NB_model.predict(FV)
                pred = NB_model.predict_proba(FV)
                misc.save_obj({'pred':pred, 'pred_lab':pred_lab}, temp_folder, temp_file)
            else:
                pred = misc.load_obj(temp_folder, temp_file)['pred']
                pred_lab = misc.load_obj(temp_folder, temp_file)['pred_lab']
            PtdLabels_test.extend(pred_lab)
            Predictions_test.extend(pred)
            GroundTruth.extend([clNum]*np.shape(pred)[0])
            print(fl, ' acc=', np.sum(np.array(pred_lab)==np.array([clNum]*np.shape(pred)[0]))/np.size(pred_lab))
    PtdLabels_test = np.array(PtdLabels_test)
    GroundTruth = np.array(GroundTruth)
            
    accuracy_test = np.mean(PtdLabels_test.ravel() == GroundTruth.ravel()) * 100
    ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, GroundTruth)
    Performance_test = np.array([accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]])
    testingTimeTaken = time.process_time() - start    
    print('Accuracy: test=', np.round(accuracy_test,4), 'F-score: test=', np.round(fscore_test,4))
    
    Train_Params = {
        'model':NB_model,
        }
    
    Test_Params = {
        'PtdLabels': PtdLabels_test,
        'Predictions': Predictions_test,
        'accuracy': accuracy_test,
        'Performance_test': Performance_test,
        'testingTimeTaken': testingTimeTaken,
        'fscore': fscore_test,
        'GroundTruth': GroundTruth,
        }

    return Train_Params, Test_Params
Пример #18
0
def grid_search_svm(PARAMS, train_data, train_label, test_data, test_label):
    C = np.power(2.0, list(np.arange(-5, 5, 1)))
    Gamma = np.power(2.0, list(np.arange(-5, 5, 1)))
    njobs = multiprocessing.cpu_count() - 1
    cv_folds = 3
    print('CV folds=', cv_folds, ' n_jobs=', njobs)

    trainingTimeTaken = 0
    start = time.process_time()

    clf_param_tuning = SVC(decision_function_shape='ovo',
                           verbose=0,
                           probability=True)
    tunable_parameters = [{'kernel': ['rbf'], 'gamma': Gamma, 'C': C}]
    CLF_CV = GridSearchCV(clf_param_tuning,
                          tunable_parameters,
                          cv=cv_folds,
                          refit=True,
                          n_jobs=njobs,
                          verbose=2)
    '''
    Checking if model is already available
    '''
    if not os.path.exists(PARAMS['modelName']):
        CLF_CV.fit(train_data, train_label.flatten())
        model = CLF_CV.best_estimator_
        if PARAMS['save_flag']:
            misc.save_obj(model, PARAMS['opDir'],
                          PARAMS['modelName'].split('/')[-1].split('.')[0])
            misc.save_obj(
                CLF_CV, PARAMS['opDir'],
                PARAMS['modelName'].split('/')[-1].split('.')[0] +
                '_All_Models')
    else:
        model = misc.load_obj(PARAMS['opDir'],
                              PARAMS['modelName'].split('/')[-1].split('.')[0])
        CLF_CV = misc.load_obj(
            PARAMS['opDir'],
            PARAMS['modelName'].split('/')[-1].split('.')[0] + '_All_Models')

    trainingTimeTaken = time.process_time() - start

    testingTimeTaken = 0
    start = time.process_time()

    optC = str(CLF_CV.best_params_['C'])
    optGamma = str(CLF_CV.best_params_['gamma'])
    countSV = model.n_support_

    countTrPts = [np.sum(train_label == lab) for lab in np.unique(train_label)]

    PtdLabels_train = model.predict(train_data)
    # Predictions_train = model.predict_log_proba(train_data)

    PtdLabels_test = model.predict(test_data)
    Predictions_test = model.predict_log_proba(test_data)

    accuracy_train = np.mean(
        PtdLabels_train.ravel() == train_label.ravel()) * 100
    accuracy_test = np.mean(PtdLabels_test.ravel() == test_label.ravel()) * 100

    ConfMat_train, fscore_train = misc.getPerformance(PtdLabels_train,
                                                      train_label)
    ConfMat_test, fscore_test = misc.getPerformance(PtdLabels_test, test_label)

    # Performance_train = np.array([accuracy_train, fscore_train[0], fscore_train[1], fscore_train[2]])
    Performance_test = np.array(
        [accuracy_test, fscore_test[0], fscore_test[1], fscore_test[2]])

    print('Accuracy: train=', accuracy_train, ' test=', accuracy_test,
          'F-score: train=', fscore_train[-1], ' test=', fscore_test[-1],
          ' SupportVectors=', countSV)
    testingTimeTaken = time.process_time() - start

    Train_Params = {
        'model': model,
        'optC': optC,
        'optGamma': optGamma,
        'countSV': countSV,
        'countTrPts': countTrPts,
        'trainingTimeTaken': trainingTimeTaken,
    }

    Test_Params = {
        'PtdLabels': PtdLabels_test,
        'Predictions': Predictions_test,
        'accuracy': accuracy_test,
        'Performance_test': Performance_test,
        'testingTimeTaken': testingTimeTaken,
        'fscore': fscore_test,
        'GroundTruth': test_label,
    }

    # return model, optC, optGamma, Predictions_test, Performance_test, countSV, countTrPts
    return Train_Params, Test_Params