Exemplo n.º 1
0
    def comet_addtional_info(exp, model, save_path, X_test, y_test,
                             embedding_type, model_type):
        from tensorflow.keras.utils import to_categorical
        NN_test_preds = model.predict(X_test)
        class_rep = sklearn.metrics.classification_report(
            y_test, NN_test_preds)

        #print(class_rep)
        if len(set(y_test)) == 2:
            prec, rec, f_1, supp = prf(y_test, NN_test_preds, average=None)
        else:
            prec, rec, f_1, supp = prf(y_test, NN_test_preds, average=None)

        #get AID number
        import ntpath
        #get base file name
        folder, base = ntpath.split(save_path)
        #split file name at second _ assumes file save in AID_xxx_endinfo.pkl
        AID, _, end_info = base.rpartition('_')
        exp.add_tag(AID)
        #save data location, AID info, and version info
        exp.log_dataset_info(name=AID, version=end_info, path=save_path)
        #save model params
        #exp.log_parameters(trained_mod.get_params())
        #save metrics report to comet
        if len(f_1) == 2:
            for i, name in enumerate(['Active', 'Inactive']):
                exp.log_metric('f1 class ' + name, f_1[i])
                exp.log_metric('Recall class' + name, rec[i])
                exp.log_metric('Precision class' + name, prec[i])
        else:
            for i, name in enumerate(['Active', 'Inconclusive', 'Inactive']):
                exp.log_metric('f1 class ' + str(i), f_1[i])
                exp.log_metric('Recall class' + str(i), rec[i])
                exp.log_metric('Precision class' + str(i), prec[i])
            #exp.log_metric('f1 class '+str(i), f_1[i])
            #exp.log_metric('Recall class'+str(i),rec[i])
            #exp.log_metric('Precision class'+str(i), prec[i])
        exp.log_other('Classification Report', class_rep)
        #save model in data_folder with comet experiement number associated
        #        exp_num = exp.get_key()
        #        model_save = folder+'\\'+model_type+'_'+exp_num+'.pkl'
        #        pickle_on = open(model_save,'wb')
        #        pickle.dump(fast_NN,pickle_on)
        #        pickle_on.close()
        #        #log trained model location
        #        exp.log_other('Trained Model Path',model_save)
        #save some informatvie tags:
        tags = [AID, end_info, model_type]
        exp.add_tags(tags)
        exp.add_tag('SVM')
        exp.add_tag(embedding_type)
        #save ROC curve
        exp.log_figure(figure_name='ROC-Pres/Recall', figure=plt)
        plt.show()

        #tell comet that the experiement is over
        exp.end()
Exemplo n.º 2
0
    def train_LGBM(X_train, X_test, y_train, y_test, split_ID):
        import lightgbm as lgb
        #make model class
        lgbm_model = lgb.LGBMClassifier(boosting_type='gbdt',
                                        num_leaves=31,
                                        max_depth=-1,
                                        learning_rate=0.1,
                                        n_estimators=500,
                                        subsample_for_bin=200000,
                                        objective='binary',
                                        is_unbalance=True,
                                        min_split_gain=0.0,
                                        min_child_weight=0.001,
                                        min_child_samples=20,
                                        subsample=1.0,
                                        subsample_freq=0,
                                        colsample_bytree=1.0,
                                        reg_alpha=0.0,
                                        reg_lambda=0.0,
                                        random_state=None,
                                        n_jobs=-1,
                                        silent=True,
                                        importance_type='split')
        #train model
        lgbm = lgbm_model.fit(X_train, y_train)
        lgbm_preds = lgbm.predict(X_test)
        prec, rec, f_1, supp = prf(y_test, lgbm_preds, average=None)
        class_rep = sklearn.metrics.classification_report(y_test, lgbm_preds)
        exp.log_other('Classification Report' + split_ID, class_rep)
        mcc = sklearn.metrics.matthews_corrcoef(y_test, lgbm_preds)

        #if first iteration, report model parameters to comet
        if split_ID == '0':
            exp.log_parameters(lgbm.get_params())
        return prec, rec, f_1, supp, mcc
Exemplo n.º 3
0
    def test(self):
        print("======================TEST MODE======================")
        pred = self.best_model.fit_predict(self.X_test)

        gt = self.y_test.astype(int)

        from sklearn.metrics import (
            precision_recall_fscore_support as prf,
            accuracy_score,
            roc_auc_score,
        )

        auc = roc_auc_score(gt, -pred)
        pred = pred < 0
        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}".format(
                accuracy, precision, recall, f_score
            )
        )

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
            },
        )
        return accuracy, precision, recall, f_score
Exemplo n.º 4
0
    def test_all(self):
        print("======================TEST MODE======================")
        self.X_test = np.concatenate([self.X_train, self.X_test], axis=0)
        pred = self.best_model.predict(self.X_test)

        gt = np.concatenate([self.y_train, self.y_test])
        gt = gt.astype(int)

        from sklearn.metrics import (precision_recall_fscore_support as prf,
                                     accuracy_score, roc_auc_score)

        auc = roc_auc_score(gt,
                            -self.best_model.decision_function(self.X_test))
        pred = pred < 0
        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC-score: {:0.4f}"
            .format(accuracy, precision, recall, f_score, auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
                "auc": auc,
            },
        )
        return accuracy, precision, recall, f_score, auc
Exemplo n.º 5
0
def get_DBSCAN_pca_result(filename, eps, min_samples):
    #读取训练数据测试数据
    data, labels = read('data\\' + filename + '_train.data')
    #将数据降至二维
    reduced_data = PCA(n_components=2).fit_transform(data)
    reduced_data = normalize(reduced_data)
    #print(reduced_data)
    #data_test , test_lable = read('data\\'+ filename +'_test.data');
    #labels_dict = get_labels_num(labels)
    t0 = time()
    dbscan = DBSCAN(eps=eps,
                    min_samples=min_samples,
                    metric='euclidean',
                    algorithm='auto',
                    leaf_size=30,
                    p=None,
                    n_jobs=1).fit(reduced_data)
    #预测结果
    t1 = time()
    #print(labels_dict)
    labels = change_labels_2_num(labels)
    predict_label = dbscan.labels_
    #print(predict_label)
    #for temp in predict_label:
    #    print(temp)
    print("数据降至二维 eps:", eps, "\t min_samples: ", min_samples)
    precision, recall, fbeta_score, support = prf(labels,
                                                  predict_label,
                                                  average='weighted')
    print('precision:', precision)
    print('recall:', recall)
    print('fbeta_socre:', fbeta_score)
    print('所用时间:', t1 - t0)
Exemplo n.º 6
0
    def evaluate(self, x_train, y_train, x_test, y_test):
        def _compute_energy(X):
            energy = []
            n_x = len(X)
            max_batches = n_x // self.config.batch_size
            if n_x % self.config.batch_size != 0: max_batches += 1
            for x_batch in tqdm(iter_data(X, size=self.config.batch_size),
                                total=max_batches):
                #z = self.session.run(self.model.z, feed_dict=self.model.get_feed_dict(x_batch))
                #energy.append( self.session.run(self.model.compute_energy(z, phi, mu, scale)) )
                energy.append(
                    self.session.run(
                        self.model.energy,
                        feed_dict=self.model.get_feed_dict(x_batch)))
            return np.concatenate(energy)

        eng_train = _compute_energy(x_train)
        eng_test = _compute_energy(x_test)
        assert len(eng_train) == len(x_train) and len(eng_test) == len(
            x_test), 'double check'

        combined_energy = np.concatenate((eng_train, eng_test))
        thresh = np.percentile(combined_energy, 100 - 20)

        pred = (eng_test > thresh).astype(int)
        gt = y_test.astype(int)

        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average='binary')

        print(
            "Seed : {:3d}, Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}"
            .format(self.config.seed, accuracy, precision, recall, f_score))
        return accuracy, precision, recall, f_score
Exemplo n.º 7
0
    def train_kSVM(X_train, X_test, y_train, y_test, split_ID):
        kSVM = SVC(kernel='rbf',
                   degree=3,
                   gamma='auto',
                   coef0=0.0,
                   C=1.0,
                   tol=0.001,
                   probability=False,
                   class_weight='balanced',
                   shrinking=False,
                   cache_size=None,
                   verbose=False,
                   max_iter=-1,
                   n_jobs=-1,
                   max_mem_size=-1,
                   random_state=None,
                   decision_function_shape='ovo')
        kSVM_model = kSVM.fit(X_train, y_train)

        kSVM_preds = kSVM_model.predict(X_test)
        prec, rec, f_1, supp = prf(y_test, kSVM_preds, average=None)
        class_rep = sklearn.metrics.classification_report(y_test, kSVM_preds)
        exp.log_other('Classification Report' + split_ID, class_rep)
        mcc = sklearn.metrics.matthews_corrcoef(y_test, kSVM_preds)

        #if first iteration, report model parameters to comet
        if split_ID == '0':
            exp.log_parameters(kSVM_preds.get_params())
        return prec, rec, f_1, supp, mcc
Exemplo n.º 8
0
def main():
    rfc = RFC(n_estimators=100, n_jobs=-1)
    fs = SelectFromModel(rfc)
    pca = PCA()
    svm = SVC()
    estimators = zip(["feature_selection", "pca", "svm"], [fs, pca, svm])
    pl = Pipeline(estimators)
    parameters = {
    "feature_selection__threshold" : ["mean", "median"],
    "pca__n_components" : [0.8, 0.5],
    "svm__gamma" : [0.001, 0.01, 0.05],
    "svm__C" : [1, 10]
    }
    gclf = GridSearchCV(pl, parameters, n_jobs=-1, verbose=2)
    
    digits = load_digits()
    X = digits.data
    y = digits.target
    first_fold = True
    trues = []
    preds = []
    for train_index, test_index in SKF().split(X, y):
        if first_fold:
            gclf.fit(X[train_index], y[train_index])
            clf = gclf.best_estimator_
            first_fold = False
        clf.fit(X[train_index,], y[train_index])
        trues.append(y[test_index])
        preds.append(clf.predict(X[test_index]))
    
        true_labels = np.hstack(trues)
        pred_labels = np.hstack(preds)
        print("p:{0:.6f} r:{1:.6f} f1:{2:.6f}".format(*prf(true_labels,pred_labels,average="macro")))
Exemplo n.º 9
0
def get_kmeans_result(filename):
    #读取训练数据测试数据
    data, labels = read('data\\' + filename + '_train.data')
    data_test, test_lable = read('data\\' + filename + '_test.data')
    n_samples, n_features = data.shape
    n_digits = len(np.unique(labels))
    print("未使用数据降维,init='random',n_init=10")
    print("n_digits: %d, \t n_samples %d, \t n_features %d" %
          (n_digits, n_samples, n_features))
    #kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10).fit(data)
    t0 = time()
    kmeans = KMeans(init='random', n_clusters=n_digits, n_init=10).fit(data)
    t1 = time()
    predict_label = kmeans.predict(data_test)
    t2 = time()
    labels_dict = get_labels_num(labels)

    predict_label_d = []

    predict_label_d = [labels_dict[k] for k in predict_label]

    #print(predict_label_d)

    #print(test_lable)
    precision, recall, fbeta_score, support = prf(test_lable,
                                                  predict_label_d,
                                                  average='weighted')
    print('precision:', precision)
    print('recall:', recall)
    print('fbeta_socre:', fbeta_score)
    print('模型训练时间:', t1 - t0)
    print('测试数据预测时间:', t2 - t1)
Exemplo n.º 10
0
def perform_testing():

    print('--- Performing Oracle Evaluation ---')
    testing_data_dict = helpers.csv_to_dict(training=False)
    keys = list(testing_data_dict.keys())
    testing_key = keys[0]  # Validate on the second composer
    print('Testing on: ' + ' '.join(testing_key))
    # Get data
    _, y_annotated, _ = helpers.fetch_data(testing_data_dict, testing_key)

    # Get data dictionary
    training_data_dict = helpers.csv_to_dict(training=True)
    training_keys = sorted(list(training_data_dict.keys()))[2]
    print('Using predictions from: ' + " ".join(training_keys))
    # Get data
    _, y_noisy, _ = helpers.fetch_data(training_data_dict, training_keys)

    res = prf(y_annotated, y_noisy, average='binary')
    cls_error = np.sum(
        np.abs(y_annotated - y_noisy)) / np.shape(y_annotated)[0] * 100.

    print('Precision: %2f' % res[0])
    print('Recall: %2f' % res[1])
    print('Fscore: %2f' % res[2])
    print('Error: %2f' % cls_error)

    return None
Exemplo n.º 11
0
    def train_SVM(X_train, X_test, y_train, y_test, split_ID):
        sgd_linear_SVM = SGDClassifier(loss='hinge',
                                       penalty='l2',
                                       alpha=0.0001,
                                       l1_ratio=0.15,
                                       fit_intercept=True,
                                       max_iter=500000,
                                       tol=0.001,
                                       shuffle=True,
                                       verbose=0,
                                       epsilon=0.1,
                                       n_jobs=-1,
                                       random_state=None,
                                       learning_rate='optimal',
                                       eta0=0.0,
                                       power_t=0.5,
                                       early_stopping=False,
                                       validation_fraction=0.1,
                                       n_iter_no_change=5,
                                       class_weight='balanced',
                                       warm_start=False,
                                       average=False)
        sgd_linear_SVM_model = sgd_linear_SVM.fit(X_train, y_train)

        sgd_lSVM_preds = sgd_linear_SVM_model.predict(X_test)
        prec, rec, f_1, supp = prf(y_test, sgd_lSVM_preds, average=None)
        class_rep = sklearn.metrics.classification_report(
            y_test, sgd_lSVM_preds)
        exp.log_other('Classification Report' + split_ID, class_rep)
        mcc = sklearn.metrics.matthews_corrcoef(y_test, sgd_lSVM_preds)

        #if first iteration, report model parameters to comet
        if split_ID == '0':
            exp.log_parameters(sgd_linear_SVM_model.get_params())
        return prec, rec, f_1, supp, mcc
Exemplo n.º 12
0
def calc_and_save_metrics(y_true, pred_probs, model_type, embedding_type, AID,
                          metric_dict_list, iter_num, test_train, hist):
    '''Takes in test and train data + labels, computes metrics and saves them
    as a dict inside of the provided list. Returns this list.'''
    #save the hist from our DNNS if needed
    history = hist

    #make categorical preds at .5 threshold
    #need to grab the DNN's class 1 preds bc doing the categorical embedding
    class_preds = [x >= 0.5 for x in pred_probs]
    #calculate all metrics
    prec, rec, f_1, supp = prf(y_true, class_preds, average=None)
    mcc = matthews_corrcoef(y_true, class_preds)
    conf_mat = confusion_matrix(y_true, class_preds)
    prec_array, recall_array, thresh_array = precision_recall_curve(
        y_true, pred_probs)
    auc_PR = auc(recall_array, prec_array)

    results_array = np.concatenate((prec, rec, f_1, supp)).tolist() + [
        mcc, prec_array, recall_array, thresh_array, conf_mat, auc_PR
    ]
    metric_names = [
        'Classifier', 'Embedding', 'AID', 'Iteration Number', 'test_train',
        'prec_Inactive', 'prec_Active', 'rec_Inactive', 'rec_Active',
        'f_1_Inactive', 'f_1_Active', 'supp_Inactive', 'supp_Active', 'mcc',
        'prec_array', 'rec_array', 'thresh_array', 'conf_matrix', 'auc', 'hist'
    ]
    metric_dict_list.append(
        dict(
            zip(metric_names,
                [model_type, embedding_type, AID, iter_num, test_train] +
                results_array + [history])))
    return metric_dict_list
Exemplo n.º 13
0
def sentPred(trainfile, testfile, result, report):
    traindata = np.loadtxt(trainfile)
    testdata = np.loadtxt(testfile)

    x_train = traindata[:, 1:]
    y_train = traindata[:, 0]

    y_pred_stan = traindata[:, -1]
    score_train_stan = ascore(y_train, y_pred_stan)
    rep_train_stan = prf(y_train, y_pred_stan, average=None)

    clf_lda = lda()
    clf_lda.fit(x_train, y_train)
    y_pred_lda = clf_lda.predict(x_train)
    score_train_lda = ascore(y_train, y_pred_lda)
    rep_train_lda = prf(y_train, y_pred_lda, average=None)
    test_pred_lda = clf_lda.predict(testdata)

    clf_log = log()
    clf_log.fit(x_train, y_train)
    y_pred_log = clf_log.predict(x_train)
    score_train_log = ascore(y_train, y_pred_log)
    rep_train_log = prf(y_train, y_pred_log, average=None)
    test_pred_log = clf_log.predict(testdata)

    clf_knn = knn(n_neighbors=1)
    clf_knn.fit(x_train, y_train)
    y_pred_knn = clf_knn.predict(x_train)
    score_train_knn = ascore(y_train, y_pred_knn)
    rep_train_knn = prf(y_train, y_pred_knn, average=None)
    test_pred_knn = clf_knn.predict(testdata)

    separator = np.array((9, ))
    test_pred = np.concatenate(
        (test_pred_lda, separator, test_pred_log, separator, test_pred_knn))
    np.savetxt(result, test_pred, fmt='%i')

    np.savetxt(report,
               rep_train_stan + rep_train_lda + rep_train_log + rep_train_knn,
               fmt='%10.5f')

    f = open(report, 'ab')
    f.write('stan: ' + str(score_train_stan) + '\n')
    f.write('lda: ' + str(score_train_lda) + '\n')
    f.write('log: ' + str(score_train_log) + '\n')
    f.write('knn: ' + str(score_train_knn) + '\n')
    f.close()
Exemplo n.º 14
0
    def test(self):
        print("======================TEST MODE======================")
        # self.dagmm.load_stat
        self.ae.load_state_dict(
            torch.load(self.model_save_path + "parameter.pth"))
        self.ae.eval()
        vae_loss = VAE_LOSS()
        vae_score = VAE_Outlier_SCORE()

        if self.data_name == 'optdigits':
            loss_type = 'BCE'
        else:
            loss_type = 'MSE'

        for _, (x, y, m) in enumerate(self.testing_loader):
            y = y.data.cpu().numpy()
            x = x.to(self.device).float()
            m = m.to(self.device).float()
            _, _, xhat1, xhat2, mu1, mu2, logvar1, logvar2 = self.ae(
                x.float(), x.float(), m, m)
            error1 = vae_score(xhat1, x, mu1, logvar1, loss_type)
            error2 = vae_score(xhat2, x, mu2, logvar2, loss_type)
            n_non_missing = m.sum(dim=1)
            error = (error1 / n_non_missing + error2 / n_non_missing)

        error = error.data.cpu().numpy()
        thresh = np.percentile(error, self.data_normaly_ratio * 100)
        print("Threshold :", thresh)

        pred = (error > thresh).astype(int)
        gt = y.astype(int)

        from sklearn.metrics import (
            precision_recall_fscore_support as prf,
            accuracy_score,
            roc_auc_score,
        )

        auc = roc_auc_score(gt, error)
        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC : {:0.4f}"
            .format(accuracy, precision, recall, f_score, auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
                "auc": auc,
            },
        )
        return accuracy, precision, recall, f_score, auc
Exemplo n.º 15
0
def pr(labels, scores, percen):
    thresh = np.percentile(scores, 100 - percen)
    print("Threshold :", thresh)
    pred = (scores >= thresh).astype(int)
    labels = np.array(labels)
    gt = labels.astype(int)
    precision, recall, f_score, support = prf(gt, pred, average='binary')
    print('precision %f , recall %f , f1: %f' % (precision, recall, f_score))
Exemplo n.º 16
0
 def get_accuracy_precision_recall_fscore(y_true: list, y_pred: list):
     accuracy = accuracy_score(y_true, y_pred)
     # warn_for=() avoids log warnings for any result being zero
     precision, recall, f_score, _ = prf(y_true, y_pred, average='binary', warn_for=())
     if precision == 0 and recall == 0:
         f01_score = 0
     else:
         f01_score = fbeta_score(y_true, y_pred, average='binary', beta=0.1)
     return accuracy, precision, recall, f_score, f01_score
Exemplo n.º 17
0
def getPRF(fileName):
	y_p_t = readFile(fileName)		
	y_pred = y_p_t[0];
	y_true = y_p_t[1];
	acc = prf(y_true, y_pred, average = 'micro');
	p = round(acc[0]*100, 1);
	r = round(acc[1]*100, 1);
	f = round(acc[2]*100, 1);
	return np.array([p,r,f])
Exemplo n.º 18
0
def sentPred(trainfile, testfile, result, report):
    traindata = np.loadtxt(trainfile)
    testdata = np.loadtxt(testfile)

    x_train = traindata[:,1:]
    y_train = traindata[:,0]

    y_pred_stan = traindata[:,-1]
    score_train_stan = ascore(y_train, y_pred_stan)
    rep_train_stan = prf(y_train, y_pred_stan, average=None)

    clf_lda = lda()
    clf_lda.fit(x_train, y_train)
    y_pred_lda = clf_lda.predict(x_train)
    score_train_lda = ascore(y_train, y_pred_lda)
    rep_train_lda = prf(y_train, y_pred_lda, average=None)
    test_pred_lda = clf_lda.predict(testdata)

    clf_log = log()
    clf_log.fit(x_train, y_train)
    y_pred_log = clf_log.predict(x_train)
    score_train_log = ascore(y_train, y_pred_log)
    rep_train_log = prf(y_train, y_pred_log, average=None)
    test_pred_log = clf_log.predict(testdata)

    clf_knn = knn(n_neighbors = 1)
    clf_knn.fit(x_train, y_train)
    y_pred_knn = clf_knn.predict(x_train)
    score_train_knn = ascore(y_train, y_pred_knn)
    rep_train_knn = prf(y_train, y_pred_knn, average=None)
    test_pred_knn = clf_knn.predict(testdata)

    separator = np.array((9,))
    test_pred = np.concatenate((test_pred_lda,separator,test_pred_log,separator,test_pred_knn))
    np.savetxt(result, test_pred, fmt='%i')

    np.savetxt(report, rep_train_stan + rep_train_lda + rep_train_log + rep_train_knn, fmt = '%10.5f')

    f = open(report, 'ab')
    f.write('stan: ' + str(score_train_stan) + '\n')
    f.write('lda: '  + str(score_train_lda)  + '\n')
    f.write('log: '  + str(score_train_log)  + '\n')
    f.write('knn: '  + str(score_train_knn)  + '\n')
    f.close()
Exemplo n.º 19
0
    def test(self):
        print("======================TEST MODE======================")
        self.ae.train()
        mse_loss = torch.nn.MSELoss(reduction='none')
        if self.data_name == 'optdigits':
            mse_loss = torch.nn.BCELoss(reduction='none')

        error_list = []
        for _ in range(1000):  # ensemble score over 100 stochastic feedforward
            with torch.no_grad():
                for _, (x, y) in enumerate(self.testing_loader):  # testing data loader has n_test batchsize, if it is image data, need change this part
                    y = y.data.cpu().numpy()
                    x = x.to(self.device).float()
                    _, _, xhat1, xhat2 = self.ae(x.float(), x.float())
                    error = mse_loss(xhat1, x) + mse_loss(xhat2, x)
                    error = error.mean(dim=1)
                error = error.data.cpu().numpy()
                error_list.append(error)
        error_list = np.array(error_list)
        error = error_list.mean(axis=0)
        from sklearn.metrics import (
            precision_recall_fscore_support as prf,
            accuracy_score,
            roc_auc_score,
        )
        gt = y.astype(int)

        thresh = np.percentile(error, self.dataset.__anomalyratio__() * 100)
        print("Threshold :", thresh)

        pred = (error > thresh).astype(int)
        gt = y.astype(int)
        auc = roc_auc_score(gt, error)
        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC : {:0.4f}".format(
                accuracy, precision, recall, f_score, auc
            )
        )

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
                "auc": auc,
            },
        )
        print("result save to {}".format(self.result_path))
        return accuracy, precision, recall, f_score, auc
Exemplo n.º 20
0
    def update(self, y_hat, y_test):
        y_hat, y_test = y_hat.flatten(), y_test.flatten()

        new_prf = np.array(prf(y_test, y_hat, average='weighted'))[:-1]
        new_acc = accuracy_score(y_test, y_hat)

        self.prf = update_moving_average(self.prf, new_prf, self.n)
        self.acc = update_moving_average(self.acc, new_acc, self.n)

        self.n = 1 if self.n is None else self.n + 1
Exemplo n.º 21
0
    def test(self):
        log_density_test = []
        y_test = []

        self.ae.eval()
        for batch_idx, (x, y, _) in enumerate(self.testing_loader):
            x = to_var(x)
            x = x.float()
            y = y.float()
            log_density = self.ae.log_prob(x)
            y_test.append(y)

            log_density_test.append(log_density)

        log_density_test = torch.cat(log_density_test)
        y_test = torch.cat(y_test)

        y_test = y_test.data.cpu().numpy()
        log_density_test = log_density_test.data.cpu().numpy()

        clean_index = np.where(y_test.squeeze() == 0)
        anomaly_index = np.where(y_test.squeeze() == 1)

        thresh = np.percentile(log_density_test,
                               (1 - self.data_normaly_ratio) * 100)
        print("Threshold :", thresh)

        pred = (log_density_test < thresh).astype(int)
        gt = y_test.astype(int)
        auc = roc_auc_score(gt, -log_density_test)

        from sklearn.metrics import precision_recall_fscore_support as prf, accuracy_score

        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average='binary')

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC:{:0.4f}"
            .format(accuracy, precision, recall, f_score, auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
                "auc": auc,
            },
        )
        print("result save to {}".format(self.result_path))
        return accuracy, precision, recall, f_score, auc
Exemplo n.º 22
0
def train_test_svm(cla):
    p = Preprocess()
    train_label, train_matrix, test_label, test_matrix = p.preprocess_fourtype()
    # train svm
    cla.fit(train_matrix, train_label)

    # predict train matrix to check the model
    print('\n predict train data')
    predict_trainlabel = cla.predict(train_matrix)
    # show diff
    # show_diff(predict_trainlabel, train_label)
    p,r,f,s = prf(train_label,predict_trainlabel)
    print(p,r,f,s)

    # predict test matrix to check the precision
    print('\n predict test data')
    predict_testlabel = cla.predict(test_matrix)
    # show diff
    # show_diff(predict_testlabel, test_label)
    p,r,f,s = prf(test_label,predict_testlabel)
    print(p,r,f,s)
Exemplo n.º 23
0
    def test(self):
        print("======================TEST MODE======================")
        self.ae.load_state_dict(torch.load(self.model_save_path + "parameter.pth"))
        self.ae.eval()
        loss = torch.nn.MSELoss(reduction='none')
        if self.data_name == 'optdigits':
            loss = torch.nn.BCELoss(reduction='none')

        for _, (x, y, m) in enumerate(self.testing_loader):
            y = y.data.cpu().numpy()
            x = x.to(self.device).float()
            m = m.to(self.device).float()

            _, _, xhat1, xhat2 = self.ae(x.float(), x.float(), m, m)
            error = loss(xhat1, x) + loss(xhat2, x)
            error = error.sum(dim=1)
        error = error.data.cpu().numpy()
        thresh = np.percentile(error, self.data_normaly_ratio * 100)
        print("Threshold :", thresh)

        pred = (error > thresh).astype(int)
        gt = y.astype(int)

        from sklearn.metrics import (
            precision_recall_fscore_support as prf,
            accuracy_score,
            roc_auc_score
        )
        gt = gt.squeeze()
        auc = roc_auc_score(gt, error)
        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}".format(
                accuracy, precision, recall, f_score
            )
        )

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "auc": auc,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
            },
        )
        return accuracy, precision, recall, f_score, auc
Exemplo n.º 24
0
 def train_RF(X_train,X_test,y_train,y_test,split_ID):
         
     rf = RandomForestClassifier(n_estimators=100, random_state=2562, class_weight="balanced_subsample", n_jobs = -1)
     rand_for = rf.fit(X_train,y_train)
     rf_preds = rand_for.predict(X_test)
     prec,rec,f_1,supp = prf(y_test, rf_preds, average=None)
     class_rep = sklearn.metrics.classification_report(y_test,rf_preds)
     exp.log_other('Classification Report'+split_ID,class_rep)
     mcc = sklearn.metrics.matthews_corrcoef(y_test, rf_preds)
     
     #if first iteration, report model parameters to comet
     if split_ID == '0':
         exp.log_parameters(rand_for.get_params())
     return prec,rec,f_1,supp,mcc 
def get_accuracy_precision_recall_fscore(y_true: list, y_pred: list):
    """
	Input : Actual labels and Predicted labels
	Output : Returns performance metrics
	"""
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f_score, _ = prf(y_true,
                                        y_pred,
                                        average='binary',
                                        warn_for=())
    if precision == 0 and recall == 0:
        f01_score = 0
    else:
        f01_score = fbeta_score(y_true, y_pred, average='binary', beta=0.1)
    return accuracy, precision, recall, f_score, f01_score
Exemplo n.º 26
0
    def test(self):
        print("======================TEST MODE======================")
        self.ae.eval()
        loss = SVMLoss()

        for _, (x, y, m) in enumerate(self.testing_loader):
            y = y.data.cpu().numpy()
            x = x.to(self.device).float()
            m = m.to(self.device).float()

            z1, _, _ = self.ae(x.float(), m)
            error = ((z1 - self.ae.c1)**2)
            error = error.sum(dim=1)
        error = error.data.cpu().numpy()
        thresh = np.percentile(error, self.data_normaly_ratio * 100)
        print("Threshold :", thresh)

        pred = (error > thresh).astype(int)
        gt = y.astype(int)

        from sklearn.metrics import (precision_recall_fscore_support as prf,
                                     accuracy_score, roc_auc_score)
        gt = gt.squeeze()
        auc = roc_auc_score(gt, error)
        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, auc: {:0.4f}"
            .format(accuracy, precision, recall, f_score, auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "auc": auc,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
            },
        )
        print("result save to {}".format(self.result_path))
        return accuracy, precision, recall, f_score, auc
Exemplo n.º 27
0
    def eval():
        test_energy = []
        test_labels = []
        for it, (input_data, labels) in enumerate(dev_loader):
            input_data = input_data.cuda()
            pred = dis(input_data)
            test_energy.append(pred.data.cpu().numpy())
            test_labels.append(labels.numpy())

        test_energy = np.concatenate(test_energy, axis=0)
        test_labels = np.concatenate(test_labels, axis=0)
        test_energy = -test_energy
        thresh = np.percentile(test_energy, 80)
        pred = (test_energy > thresh).astype(int)
        gt = test_labels.astype(int)
        from sklearn.metrics import precision_recall_fscore_support as prf
        precision, recall, f_score, _ = prf(gt, pred, average='binary')
        return precision, recall, f_score
Exemplo n.º 28
0
def get_DBSCAN_result(filename, eps, min_samples):
    #读取训练数据测试数据
    data, labels = read('data\\' + filename + '_train.data')
    #data_test , test_lable = read('data\\'+ filename +'_test.data');
    #labels_dict = get_labels_num(labels)
    t0 = time()
    dbscan = DBSCAN(eps=eps, min_samples=min_samples).fit(data)
    #预测结果
    t1 = time()
    predict_label = dbscan.labels_
    labels = change_labels_2_num(labels)
    #predict_label_d = []
    #predict_label_d = [labels_dict[k] for k in predict_label]
    print("数据未降维 eps:", eps, "\t min_samples: ", min_samples)
    precision, recall, fbeta_score, support = prf(labels,
                                                  predict_label,
                                                  average='weighted')
    print('precision:', precision)
    print('recall:', recall)
    print('fbeta_socre:', fbeta_score)
    print('所用时间:', t1 - t0)
Exemplo n.º 29
0
def get_kmeans_pca_result(filename):
    data, labels = read('data\\' + filename + '_train.data')
    data_test, test_lable = read('data\\' + filename + '_test.data')
    n_samples, n_features = data.shape
    n_digits = len(np.unique(labels))
    print("数据降至二维,init='random',n_init=10")
    print("n_digits: %d, \t n_samples %d, \t n_features %d" %
          (n_digits, n_samples, n_features))
    #将数据降至二维
    reduced_data = PCA(n_components=2).fit_transform(data)
    reduced_data = normalize(reduced_data)
    t0 = time()
    kmeans_pca = KMeans(init='random', n_clusters=n_digits, n_init=10)
    t1 = time()
    kmeans_pca.fit(reduced_data)
    t2 = time()
    predict_data = PCA(n_components=2).fit_transform(data_test)
    predict_data = normalize(predict_data)
    y_kmeans = kmeans_pca.predict(predict_data)
    labels_dict = get_labels_num(labels)

    predict_label_d = []

    predict_label_d = [labels_dict[k] for k in y_kmeans]
    precision, recall, fbeta_score, support = prf(test_lable,
                                                  predict_label_d,
                                                  average='weighted')
    print('precision:', precision)
    print('recall:', recall)
    print('fbeta_socre:', fbeta_score)
    print('模型训练时间:', t1 - t0)
    print('测试数据预测时间:', t2 - t1)
    plt.scatter(predict_data[:, 0],
                predict_data[:, 1],
                c=y_kmeans,
                s=50,
                cmap='viridis')
    centers = kmeans_pca.cluster_centers_
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
    plt.show()
Exemplo n.º 30
0
    def test(self):
        print("======================TEST MODE======================")
        # pred = self.best_model.predict(self.X_test)
        score = self.best_model.score_samples(self.X_test)
        thresh = np.percentile(score, self.data_anomaly_ratio * 100)
        print("Threshold :", thresh)

        pred = (score < thresh).astype(int)
        # pred = pred < 0
        gt = self.y_test.astype(int)

        from sklearn.metrics import (precision_recall_fscore_support as prf,
                                     accuracy_score, roc_auc_score)
        auc = roc_auc_score(gt,
                            -self.best_model.decision_function(self.X_test))

        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average="binary")

        print(
            "Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}, AUC: {:0.4f}"
            .format(accuracy, precision, recall, f_score, auc))

        os.makedirs(self.result_path, exist_ok=True)

        np.save(
            self.result_path + "result.npy",
            {
                "auc": auc,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f_score,
            },
        )
        return accuracy, precision, recall, f_score
Exemplo n.º 31
0
def perform_testing():
    print('--- Performing Evaluation ---')
    nn_list = list(build_model(flag='testing'))
    data_dict = helpers.csv_to_dict(training=False)
    keys = list(data_dict.keys())
    testing_key = keys[0]  # Validate on the second composer
    print('Testing on: ' + ' '.join(testing_key))
    # Get data
    x, y, fs = helpers.fetch_data(data_dict, testing_key)
    x *= 0.99 / np.max(np.abs(x))

    sigmoid = torch.nn.Sigmoid()  # Label helper!
    d_p_length_samples = exp_settings['d_p_length'] * exp_settings[
        'fs']  # Length in samples

    number_of_data_points = len(x) // d_p_length_samples
    for data_point in tqdm(range(number_of_data_points)):
        # Generate data
        x_d_p = x[data_point * d_p_length_samples:(data_point + 1) *
                  d_p_length_samples]
        y_d_p = y[data_point * d_p_length_samples:(data_point + 1) *
                  d_p_length_samples]

        # Reshape data
        x_d_p = x_d_p.reshape(1, d_p_length_samples)
        y_d_p = y_d_p.reshape(1, d_p_length_samples)
        x_cuda = torch.autograd.Variable(torch.from_numpy(x_d_p),
                                         requires_grad=False).float().detach()
        y_cuda = torch.autograd.Variable(torch.from_numpy(y_d_p),
                                         requires_grad=False).float().detach()
        if torch.has_cudnn:
            x_cuda = x_cuda.cuda()
            y_cuda = y_cuda.cuda()

        # Forward analysis pass: Input data
        x_real, x_imag = nn_list[0].forward(x_cuda)
        # Magnitude computation
        mag = torch.norm(torch.cat((x_real, x_imag), 0), 2, dim=0).unsqueeze(0)
        # Mel analysis
        mel_mag = torch.autograd.Variable(nn_list[1].forward(mag).data,
                                          requires_grad=False)

        # Learned normalization
        mel_mag_pr = nn_list[2].forward(mel_mag)
        # GRUs
        h_enc = nn_list[3].forward(mel_mag_pr)
        h_dec = nn_list[4].forward(h_enc)
        # Classifier
        _, vad_prob = nn_list[5].forward(h_dec, mel_mag_pr)
        vad_prob = sigmoid(vad_prob).gt(0.50).float().data.cpu().numpy()[0, :,
                                                                         0]

        # Up-sample the labels to the time-domain
        # Target data preparation
        vad_true = nn_list[6].forward(y_cuda).gt(
            0.50).float().data.cpu().numpy()[0, :, 0]

        if data_point == 0:
            out_prob = vad_prob
            out_true_prob = vad_true
        else:
            out_prob = np.hstack((out_prob, vad_prob))
            out_true_prob = np.hstack((out_true_prob, vad_true))

    res = prf(out_true_prob, out_prob, average='binary')
    cls_error = np.sum(
        np.abs(out_true_prob - out_prob)) / np.shape(out_true_prob)[0] * 100.
    voice_regions_percentage = (len(
        np.where(out_true_prob == 1)[0])) / np.shape(out_true_prob)[0] * 100.
    non_voice_regions_percentage = (len(
        np.where(out_true_prob == 0)[0])) / np.shape(out_true_prob)[0] * 100.

    print('Precision: %2f' % res[0])
    print('Recall: %2f' % res[1])
    print('Fscore: %2f' % res[2])
    print('Error: %2f' % cls_error)
    print('Singing voice frames percentage %2f' % voice_regions_percentage)
    print('Non-singing voice frames percentage %2f' %
          non_voice_regions_percentage)

    print('-- Saving Results --')
    np.save(
        os.path.join('results', exp_settings['split_name'],
                     'lr_pcen_results.npy'), out_prob)
    np.save(
        os.path.join('results', exp_settings['split_name'],
                     'vad_true_targets.npy'), out_true_prob)

    return None
Exemplo n.º 32
0
def perform_validation(nn_list):
    print('--- Performing Validation ---')
    d_p_length_samples = exp_settings['d_p_length'] * exp_settings['fs']
    # Get data dictionary
    data_dict = helpers.csv_to_dict(training=True)
    keys = sorted(list(data_dict.keys()))
    validation_key = keys[exp_settings['split_validation_indx']]
    print('Validating on: ' + " ".join(validation_key))
    # Get data
    x, y, _ = helpers.fetch_data(data_dict, validation_key)
    x *= 0.99 / np.max(np.abs(x))

    sigmoid = torch.nn.Sigmoid()  # Label helper!

    # Constructing batches
    number_of_data_points = len(x) // d_p_length_samples
    available_batches = number_of_data_points // exp_settings['batch_size']
    data_points = np.arange(0, number_of_data_points)
    for batch in tqdm(range(available_batches)):
        x_d_p, y_d_p = helpers.gimme_batches(batch, data_points, x, y)

        x_cuda = torch.autograd.Variable(torch.from_numpy(x_d_p).cuda(),
                                         requires_grad=False).float().detach()
        y_cuda = torch.autograd.Variable(torch.from_numpy(y_d_p).cuda(),
                                         requires_grad=False).float().detach()

        # Forward analysis pass: Input data
        x_real, x_imag = nn_list[0].forward(x_cuda)
        # Magnitude computation
        mag = torch.sqrt(x_real.pow(2) + x_imag.pow(2))
        # Mel analysis
        mel_mag = torch.autograd.Variable(nn_list[1].forward(mag).data,
                                          requires_grad=True)

        # Learned normalization
        mel_mag_pr = nn_list[2].forward(mel_mag)
        # GRUs
        h_enc = nn_list[3].forward(mel_mag_pr)
        h_dec = nn_list[4].forward(h_enc)
        # Classifier
        _, vad_prob = nn_list[5].forward(h_dec, mel_mag_pr)
        vad_prob = sigmoid(vad_prob)
        vad_prob = vad_prob.gt(0.51).float().data.cpu().numpy()[:, :, 0]\
            .reshape(exp_settings['batch_size']*exp_settings['T'], 1)

        # Target data preparation
        y_true = nn_list[6].forward(y_cuda).detach()[:, :, 0]
        vad_true = y_true.gt(0.51).float().data.cpu().numpy().reshape(
            exp_settings['batch_size'] * exp_settings['T'], 1)

        if batch == 0:
            out_prob = vad_prob
            out_true_prob = vad_true
        else:
            out_prob = np.vstack((out_prob, vad_prob))
            out_true_prob = np.vstack((out_true_prob, vad_true))

    res = prf(out_true_prob, out_prob, average='binary')
    cls_error = np.sum(
        np.abs(out_true_prob - out_prob)) / len(out_true_prob) * 100.

    print('Precision: %2f' % res[0])
    print('Recall: %2f' % res[1])
    print('Fscore: %2f' % res[2])
    print('Error: %2f' % cls_error)

    return cls_error