Пример #1
0
def classify(feats, cantidad):
    lbp_params = ((1, 1, 2, 2, 5), (5, 10, 8, 15, 6))
    har_params = ((1, 1, 1, 2, 5), (1, 10, 20, 11, 8))
    gab1_params = (1, 2, 5, 10)
    gab2_params = (1, 2, 5, 10)
    params_landmarks = (1, 5, 8)
    labels_image = main.generate_labels(lbp_params[0], har_params[0],
                                        gab1_params, gab2_params)
    # print(labels_image)
    # print(len(labels_image))
    labels_landmarks = main.generate_labels_landmarks(labels_image[-1] + 1, 6,
                                                      params_landmarks, (),
                                                      (1))
    # print(labels_landmarks)
    # print(len(labels_landmarks))
    labels = np.concatenate([labels_image, labels_landmarks], axis=0)
    # print(labels)
    # print(len(labels))

    # labels = labels_image

    print(labels)

    print('Removing features with low variance')
    rem_var_index = lib_pat.delete_zero_variance_features2(feats, labels, 0.1)
    np.save('rem_var_index.npy', rem_var_index)
    feats, labels = feats[:, rem_var_index], labels[:, rem_var_index]

    print('Separating Features...')
    X_tr, X_te, y_tr, y_te = lib_pat.hold_out(feats, cantidad)

    print('Reducing features by transformation')
    X_tr, X_te = main.reduction_routine(feats, labels, .99, cantidad)
    print('Final reduction (for no colinear features)')
    X_tr, X_te = lib_pat.dim_red_auto_PCA(X_tr, X_te, ratio=.9)

    np.save("X_tr_" + str(cantidad), X_tr)
    np.save("X_te_" + str(cantidad), X_te)
    np.save("y_tr_" + str(cantidad), y_tr)
    np.save("y_te_" + str(cantidad), y_te)

    print('Classification via LDA solver=svd')
    k1, k1_score = lib_pat.classification_LDA(X_tr,
                                              X_te,
                                              y_tr,
                                              y_te,
                                              solver='svd')

    print('Classification via MLP')
    k2, k2_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te)

    print('Classification via NN')
    k3, k3_score = classification.training_and_classification_NN(
        X_tr, X_te, y_tr, y_te)

    np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',')
    np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',')
    np.savetxt('k3', CM(y_te, k3), fmt='%2i', delimiter=',')
Пример #2
0
    def plot_confusion_matrix(self, label_test, fn_test):

        fn_preds = self.clf.predict(fn_test)
        acc = accuracy_score(label_test, fn_preds)

        cm_ = CM(label_test, fn_preds)
        cm = normalize(cm_.astype(np.float), axis=1, norm='l1')

        fig = pl.figure()
        ax = fig.add_subplot(111)
        cax = ax.matshow(cm)
        fig.colorbar(cax)
        for x in range(len(cm)):
            for y in range(len(cm)):
                ax.annotate(str("%.3f(%d)"%(cm[x][y], cm_[x][y])), xy=(y,x),
                            horizontalalignment='center',
                            verticalalignment='center',
                            fontsize=10)
        cm_cls =np.unique(np.hstack((label_test,fn_preds)))

        cls = []
        for c in cm_cls:
            cls.append(mapping[c])
        pl.yticks(range(len(cls)), cls)
        pl.ylabel('True label')
        pl.xticks(range(len(cls)), cls)
        pl.xlabel('Predicted label')
        pl.title('Mn Confusion matrix (%.3f)'%acc)

        pl.show()
Пример #3
0
def plot_confusion_matrix(Z_true, Z_pred, normalize=True, ndecimals=2,
                          title="Confusion Matrix", savename=None):
    """
    Function for making and plotting the confusion matrix of a model using
    sklearn.metrics.confusion_matrix.
    Arguments:
        Z_true (array): true observations
        Z_pred (array): predictions
        normalize (bool, optional): whether to normalize confusion matrix,
                                    defaults to True
        title (str, optional): title of plot, defaults to "Confusion Matrix"
        savename (str, optional): plot is saved under this name if provided,
                                  defaults to None
    """
    c = CM(Z_true, Z_pred)

    if normalize is True:
        c = c/np.sum(c)

    fig, ax = plt.subplots(figsize= (5, 4.5))
    vmax = 1 if normalize else c.max()
    im = ax.matshow(c, vmin=0, vmax=vmax, cmap="autumn_r")
    plt.colorbar(im)
    s = "{:0." + str(ndecimals) + "f}" if normalize else "{:d}"
    for (i, j), z in np.ndenumerate(c):
        ax.text(j, i, s.format(z), ha="center", va="center",
                fontsize=16)
    ax.set_xlabel("Predicted value", fontsize=12)
    ax.xaxis.set_label_position("top")
    ax.set_ylabel("True value", fontsize=12)
    fig.suptitle(title, fontsize=16)
    fig.subplots_adjust(top=0.84)
    if savename is not None:
        plt.savefig(f"Figures/{savename}.png", dpi=300)
    plt.show()
Пример #4
0
    def update(self, pred_label, gt_label):
        """Update per instance
        Args:
            pred_label (np.ndarray): (num_points)
            gt_label (np.ndarray): (num_points,)

        """
        # refer to sklearn.metrics.confusion_matrix
        confusion_matrix = CM(gt_label, pred_label, labels=self.labels)
        self.confusion_matrix += confusion_matrix
Пример #5
0
def landmark_classifier(feats, cantidad, iterations, separate_ratio):
    params_landmarks = (1, 5, 8)
    labels_landmarks = main.generate_labels_landmarks(0, 1, params_landmarks,
                                                      (), (1))
    print(labels_landmarks)

    print('Removing features with low variance')
    feats, labels = lib_pat.delete_zero_variance_features(
        feats, labels_landmarks, 0.05)

    lda_scores = []
    mlp_scores = []
    for i in range(iterations):
        print('Classification Nº {}/{}'.format((i + 1), iterations))
        print('Separating Features...')
        X_tr, X_te, y_tr, y_te, sep_list = lib_pat.separate_train_test(
            feats, separate_ratio, cantidad)

        print('Reducing features by transformation')
        X_tr, X_te = main.reduction_routine(feats, labels, separate_ratio, .99,
                                            cantidad, sep_list)
        print('Final reduction (for no colinear features)')
        X_tr, X_te = lib_pat.dim_red_auto_PCA(X_tr, X_te, ratio=.9)

        print('Classification via LDA solver=svd')
        k1, k1_score = lib_pat.classification_LDA(X_tr,
                                                  X_te,
                                                  y_tr,
                                                  y_te,
                                                  solver='svd')
        lda_scores.append(k1_score)

        print('Classification via MLP')
        k2, k2_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te)
        mlp_scores.append(k2_score)

        np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',')
        np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',')
    lda_mean = sum(lda_scores) / float(len(lda_scores))
    print('LDA mean accuracy:', lda_mean)
    mlp_mean = sum(mlp_scores) / float(len(mlp_scores))
    print('MLP mean accuracy:', mlp_mean)
Пример #6
0
def plot_confusion_matrix(cm, title='Confusion Matrix', cmap=plt.cm.binary):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    xlocations = np.array(range(len(labels)))
    plt.xticks(xlocations, labels, rotation=90)
    plt.yticks(xlocations, labels)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    cm = CM(ytest, ypred)
    np.set_printoptions(precision=2)
Пример #7
0
def classify_trained(cantidad):
    X_tr = np.load("X_tr_" + str(cantidad) + ".npy")
    X_te = np.load("X_te_" + str(cantidad) + ".npy")
    y_tr = np.load("y_tr_" + str(cantidad) + ".npy")
    y_te = np.load("y_te_" + str(cantidad) + ".npy")

    print('Classification via LDA solver=svd')
    k1, k1_score = lib_pat.classification_LDA(X_tr,
                                              X_te,
                                              y_tr,
                                              y_te,
                                              solver='svd')

    print('Classification via MLP')
    k2, k2_score = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te)

    print('Classification via NN')
    k3, k3_score = classification.training_and_classification_NN(
        X_tr, X_te, y_tr, y_te)

    np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',')
    np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',')
    np.savetxt('k3', CM(y_te, k3), fmt='%2i', delimiter=',')
Пример #8
0
def plot_confusion_matrix(test_label, pred):

    mapping = {
        1: 'co2',
        2: 'humidity',
        3: 'pressure',
        4: 'rmt',
        5: 'status',
        6: 'stpt',
        7: 'flow',
        8: 'HW sup',
        9: 'HW ret',
        10: 'CW sup',
        11: 'CW ret',
        12: 'SAT',
        13: 'RAT',
        17: 'MAT',
        18: 'C enter',
        19: 'C leave',
        21: 'occu',
        30: 'pos',
        31: 'power',
        32: 'ctrl',
        33: 'fan spd',
        34: 'timer'
    }
    cm_ = CM(test_label, pred)
    cm = normalize(cm_.astype(np.float), axis=1, norm='l1')
    fig = pl.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm, cmap=Color.YlOrBr)
    fig.colorbar(cax)
    for x in range(len(cm)):
        for y in range(len(cm)):
            ax.annotate(str("%.3f(%d)" % (cm[x][y], cm_[x][y])),
                        xy=(y, x),
                        horizontalalignment='center',
                        verticalalignment='center',
                        fontsize=9)
    cm_cls = np.unique(np.hstack((test_label, pred)))
    cls = []
    for c in cm_cls:
        cls.append(mapping[c])
    pl.yticks(range(len(cls)), cls)
    pl.ylabel('True label')
    pl.xticks(range(len(cls)), cls)
    pl.xlabel('Predicted label')
    pl.title('Confusion Matrix (%.3f)' % (ACC(pred, test_label)))
    pl.show()
Пример #9
0
    def update(self, pred_label, gt_label):
        """Update per instance

        Args:
            pred_label (np.ndarray): (num_points)
            gt_label (np.ndarray): (num_points,)

        """
        # convert ignore_label to num_classes
        # refer to sklearn.metrics.confusion_matrix
        gt_label[gt_label == -100] = self.num_classes
        confusion_matrix = CM(gt_label.flatten(),
                              pred_label.flatten(),
                              labels=self.labels)
        self.confusion_matrix += confusion_matrix
Пример #10
0
 def __init__(self,
              y_true,
              y_pred,
              labels=None,
              sample_weight=None,
              normalize=None):
     self.normalize = normalize
     self.y_pred = y_pred
     Metrics.__init__(self,
                      sample_weight=sample_weight,
                      y_true=y_true,
                      labels=labels)
     self.value = CM(sample_weight=self.sample_weight,
                     labels=self.labels,
                     y_true=self.y_true,
                     normalize=self.normalize,
                     y_pred=self.y_pred)
Пример #11
0
def evaluate_3way(X_test, y_test, model):
    test_y_prob = model.predict(X_test)
    test_y_pred = np.argmax(test_y_prob, axis=1)
    test_y_true = np.argmax(y_test, axis=1)
    # accuracy
    loss, acc = model.evaluate(X_test, y_test)
    # precision, recall, specificity, and f1_score
    p = precision_score(test_y_true, test_y_pred, average="macro")
    r = recall_score(test_y_true, test_y_pred, average="macro")
    f1 = f1_score(test_y_true, test_y_pred, average="macro")
    sen, spe, _ = sss(test_y_true, test_y_pred, average="macro")

    print("Test accuracy:", acc)
    print("Test confusion matrix: \n", CM(test_y_true, test_y_pred))
    print("Precision: ", p)
    print("Recall: ", r)
    print("Specificity: ", spe)
    print("f1_score: ", f1)
Пример #12
0
def evaluate_performance(X_test, y_test, model, name):
    test_y_prob = model.predict(X_test)
    print("test_y_prob", test_y_prob)
    test_y_pred = np.argmax(test_y_prob, axis=1)
    test_y_true = np.argmax(y_test, axis=1)
    # accuracy
    loss, acc = model.evaluate(X_test, y_test)
    p = precision_score(test_y_true, test_y_pred)
    r = recall_score(test_y_true, test_y_pred)
    f1 = f1_score(test_y_true, test_y_pred)
    sen, spe, _ = sss(test_y_true, test_y_pred, average="binary")
    # print results
    print("Test accuracy:", acc)
    print("Test confusion matrix: \n", CM(test_y_true, test_y_pred))
    print("Precision: ", p)
    print("Recall: ", r)
    print("Specificity: ", spe)
    print("f1_score: ", f1)
Пример #13
0
def RunModel(model, data, columns, Predict):
    X = data[columns]
    Y = data[Predict]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        train_size=train,
                                                        test_size=test,
                                                        random_state=42)

    Model = model
    Model.fit(X_train, y_train)

    prediction = Model.predict(X_test)
    mse = (MSE(y_test, prediction))
    r2 = (R2(y_test, prediction))
    mae = (MAE(y_test, prediction))
    acc = AS(y_test, prediction)
    con_met = CM(y_test, prediction)
    return mse, r2, mae, acc, con_met
Пример #14
0
def evaluate_binary(X_test, y_test, model, name):
    test_y_prob = model.predict(X_test)
    test_y_pred = np.argmax(test_y_prob, axis=1)
    test_y_true = np.argmax(y_test, axis=1)
    # accuracy
    loss, acc = model.evaluate(X_test, y_test)
    # AUC
    pos_prob = test_y_prob[:, 1]
    auc_score = roc_auc_score(test_y_true, pos_prob)
    # precision, recall, specificity, and f1_score
    p = precision_score(test_y_true, test_y_pred)
    r = recall_score(test_y_true, test_y_pred)
    f1 = f1_score(test_y_true, test_y_pred)
    sen, spe, _ = sss(test_y_true, test_y_pred, average="binary")

    # print results
    print("Test accuracy:", acc)
    print("Test AUC is: ", auc_score)
    print("Test confusion matrix: \n", CM(test_y_true, test_y_pred))
    print("Precision: ", p)
    print("Recall: ", r)
    print("Specificity: ", spe)
    print("f1_score: ", f1)

    # plot and save roc curve
    pos_prob = test_y_prob[:, 1]
    fpr, tpr, thresholds = roc_curve(test_y_true, pos_prob)
    ns_probs = [0 for _ in range(len(test_y_prob))]
    ns_fpr, ns_tpr, _ = roc_curve(test_y_true, ns_probs)
    plt.axis([0, 1, 0, 1])
    plt.plot(fpr,
             tpr,
             marker='.',
             color='darkorange',
             label='Model AUC (area = {:.2f})'.format(auc_score))
    plt.plot(ns_fpr, ns_tpr, color='royalblue', linestyle='--')
    plt.legend()
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.savefig(name, dpi=300, bbox_inches='tight')
    plt.show()
Пример #15
0
"""
sklearn 使用朴素贝叶斯分类器
"""

#### 1、高斯朴素贝叶斯算法
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix as CM

test_size=0.3
digits=load_digits()
x,y=digits.data,digits.target

train_X,test_X,train_Y,test_Y=train_test_split(x,y,test_size=test_size)
print(train_X[:10])
print(train_Y[:10])
gnb=GaussianNB().fit(train_X,train_Y)
acc_score=gnb.score(test_X,test_Y)
print(acc_score)
pred_Y=gnb.predict(test_X)
# print(pred_Y)
prob=gnb.predict_proba(test_X)
# print(prob)
# print(prob[1,:].sum())

print(CM(test_Y,pred_Y))
Пример #16
0
scaler = SS()
scaler.fit(df.drop('TARGET CLASS',axis=1))
scaled = scaler.transform(df.drop('TARGET CLASS',axis=1))
df_scale = pd.DataFrame(scaled,columns=df.columns[:-1])
print(df_scale.head())

# SPLIT DATA INTO TRAINING AND TESTING
X_train,X_test,y_train,y_test = TTS(df_scale,df['TARGET CLASS'],test_size=0.3,random_state=101)

# KNN
model = KNC(n_neighbors=1)
model.fit(X_train,y_train)
pred = model.predict(X_test)

print(CR(y_test,pred))
print(CM(y_test,pred))

# CHOOSE K VALUE (ELBOW METHOD)
error_rate = []

for i in range(1,40):
	model = KNC(n_neighbors=i)
	model.fit(X_train,y_train)
	pred_i = model.predict(X_test)
	error_rate.append(np.mean(y_test != pred_i))

sns.lineplot(x=np.arange(1,40),y=np.array(error_rate))
plt.show()

# RERUN WITH NEW K
model = KNC(n_neighbors=37)
Пример #17
0
    def evaluate_model(self):
        """ Evaluate the model.
            Model is restored from models_path/model_name
            If model could not be loaded, exits.
            Data used for evaluation is the testing data.
            Once the whole dataset is forwarded, classification report and
            confusion matrix are computed
        """
        # Initialize the variables
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        # Load variables
        if self.SAVE:
            try:
                modelname = self.MODELS_PATH + self.name
                saver = tf.train.import_meta_graph(modelname + ".meta")
                self.saver.restore(sess, modelname)
            except:
                print "Failed to restore model. Exiting."
                exit()

        #### TESTING ####
        Y_true = []
        Y_pred = []
        testing_time = time.time()
        testing_acc = 0
        testing_loss = 0
        tophonetic = np.vectorize(lambda t: sorted(self.labels)[t])
        for batch_id in range(self.nb_batch_test):
            batch_time = time.time()

            # Get batch
            batch_X = self.X_test[batch_id]
            batch_Y = self.Y_test[batch_id]
            lengths = self.lengths_test[batch_id]

            # Get loss and accuracy
            loss, acc, predictions = sess.run(
                fetches=[self.loss, self.acc, self.predictions],
                feed_dict={
                    self.X_: batch_X,
                    self.Y_: batch_Y,
                    self.seq_lengths: lengths
                })

            # Update global variables
            testing_acc += acc
            testing_loss += loss

            for i in range(self.batchsize):
                true = batch_Y[i, :lengths[i]]
                true = np.argmax(true, axis=1)
                Y_true += list(true)
                pred = predictions[i, :lengths[i]]
                pred = np.argmax(pred, axis=1)
                Y_pred += list(pred)

        testing_time = time.time() - testing_time
        testing_acc /= self.nb_batch_test
        testing_loss /= self.nb_batch_test
        self.logger.write_log(
            "\n\nAccuracy:\t%.2f%%\nLoss:\t\t%s\nTime:\t\t%.2fs\n" %
            (100 * testing_acc, testing_loss, testing_time))

        Y_true = tophonetic(Y_true)
        Y_pred = tophonetic(Y_pred)

        # Classification Report (CR)
        self.logger.write_log(CR(Y_true, Y_pred))

        # Confusion Matrix (CM)
        mat = CM(Y_true, Y_pred)

        # header line
        CONFMAT = "\t" + "\t".join([lbl[:5]
                                    for lbl in sorted(self.labels)]) + "\n"

        for i, phonetic in enumerate(sorted(self.labels)):
            CONFMAT += phonetic[:5] + "\t" + "\t".join(
                map(str, mat[i].tolist() + [np.sum(mat[i])])) + "\n\n"

        # footer line, sums
        CONFMAT += "\t" + "\t".join(map(str, np.sum(mat, axis=0).tolist()))
        self.logger.write_log(CONFMAT)
Пример #18
0
plt.scatter(X_[:, 0], X_[:, 1], c=y_, cmap="rainbow", s=30)
plt.show()
clf_lo = LogiR().fit(X_, y_)
prob = clf_lo.predict_proba(X_)
# 将样本和概率放到一个DataFrame中
prob = pd.DataFrame(prob)
prob.columns = ["0", "1"]

for i in range(prob.shape[0]):
    if prob.loc[i, "1"] > 0.5:
        prob.loc[i, "pred"] = 1
    else:
        prob.loc[i, "pred"] = 0
prob["y_true"] = y_
prob = prob.sort_values(by="1", ascending=False)

cm = CM(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])
# 试试看手动计算Precision和Recall?
precision = P(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])
recall = R(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])

for i in range(prob.shape[0]):
    if prob.loc[i, "1"] > 0.4:
        prob.loc[i, "pred"] = 1
    else:
        prob.loc[i, "pred"] = 0
cm2 = CM(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])
# 试试看手动计算Precision和Recall?
precision2 = P(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])
recall2 = R(prob.loc[:, "y_true"], prob.loc[:, "pred"], labels=[1, 0])
Пример #19
0
features_train, features_test, labels_train, labels_test = TTS(features,
                                                               labels,
                                                               test_size=0.3,
                                                               random_state=0)

#fitting logistic regression to the training set
from sklearn.linear_model import LogisticRegression as lg
classifier = lg(random_state=0)
classifier.fit(features_train, labels_train)

#predicting the test set result
labels_pred = classifier.predict(features_test)

#Making the Confusion Matrix
from sklearn.metrics import confusion_matrix as CM
cm = CM(labels_test, labels_pred)

affair = df["affair"].value_counts(normalize=True) * 100

#score of above model
Score = classifier.score(features_test, labels_test)

print("accuracy of model is ", Score * 100, "%")

#Predict the probability of an affair for a random woman not
# present in the dataset. She's a 25-year-old teacher who
#graduated college, has been married for 3 years, has 1 child,
# rates herself as strongly religious, rates her marriage
#as fair, and her husband is a farmer.

pred_affair = classifier.predict(
Пример #20
0
# MODEL
deep_model = estimator.DNNClassifier(
    hidden_units=[20, 20, 20, 20],
    feature_columns=feat_cols,
    n_classes=3,
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001))

# INPUT FUNCTION
input_func = estimator.inputs.numpy_input_fn(x={
    'x': scaled_x_train,
},
                                             y=y_train,
                                             shuffle=True,
                                             batch_size=50,
                                             num_epochs=100)

# TRAINING
deep_model.train(input_fn=input_func, steps=500)

# EVALUATION
input_func_eval = estimator.inputs.numpy_input_fn(x={'x': scaled_x_test},
                                                  shuffle=False)

preds = list(deep_model.predict(input_fn=input_func_eval))

predictions = [p['class_ids'][0] for p in preds]

print(CR(y_test, predictions))
print(CM(y_test, predictions))
Пример #21
0
            c, hash_bucket_size=n)
        feat = tf.feature_column.embedding_column(cat, dimension=n)

    fcols.append(feat)

# INPUT FUNCTION
input_func_train = tf.estimator.inputs.pandas_input_fn(x=X_train,
                                                       y=y_train,
                                                       batch_size=1000,
                                                       num_epochs=100,
                                                       shuffle=True)

# MODEL
model = tf.estimator.DNNClassifier(hidden_units=[10, 10, 10, 10],
                                   feature_columns=fcols)

# TRAINING
model.train(input_fn=input_func_train, steps=None)

# EVALUATION
input_func_eval = tf.estimator.inputs.pandas_input_fn(x=X_test,
                                                      shuffle=False,
                                                      num_epochs=1)
preds = model.predict(input_fn=input_func_eval)
lpreds = list(preds)
cpreds = [pred['class_ids'][0] for pred in list(lpreds)]

print(CM(y_true=y_test, y_pred=cpreds))

print(CR(y_true=y_test, y_pred=cpreds))
Пример #22
0
                                                result,
                                                test_size=0.3,
                                                random_state=0)

rfc = RandomForestClassifier(n_estimators=100)
rfc = rfc.fit(Xtrain, ytrain)
ypred = rfc.predict(Xtest)
score = rfc.score(Xtest, ytest)
r = recall_score(ytest, ypred, average='micro')
r_a = recall_score(ytest, ypred, average='macro')

print("score :", score)
print("recall_score micro", r)
print("recall_score macro", r_a)
print(recall_score)
cmd = CM(ytest, ypred)
print(cmd)
print(cmd[32:45, ...])
# feature_importance = rfc.feature_importances_
# print(feature_importance)
# print(sorted(zip(map(lambda x: round(x, 4), rfc.feature_importances_), names)))
# # # # # # rfc_c = cross_val_score(rfc, x, y, cv=10)
# # # # # # plt.plot(range(1, 11), rfc_c, label = "RandomForest")
# # # # # # plt.show()

labels = list(range(1, 33))
labels.append(50)


def plot_confusion_matrix(cm, title='Confusion Matrix', cmap=plt.cm.binary):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
for i, lam in enumerate(lam_list):
    S = np.load(folder + "\\" + "lam" + lam + "\\" + r"l21S.npk",
                allow_pickle=True)
    predictions = list(map(binary_error, np.linalg.norm(S, axis=1)))
    print("lambda:", lam)
    print("precision",
          precision(bi_y, predictions, labels=["o", "m"], pos_label="o"))
    print("recall", recall(bi_y, predictions, labels=["o", "m"],
                           pos_label="o"))
    print("f1", f1_score(bi_y, predictions, labels=["o", "m"], pos_label="o"))
    lams.append(lam)
    precisions.append(
        precision(bi_y, predictions, labels=["o", "m"], pos_label="o"))
    recalls.append(recall(bi_y, predictions, labels=["o", "m"], pos_label="o"))
    f1s.append(f1_score(bi_y, predictions, labels=["o", "m"], pos_label="o"))
    print(CM(bi_y, predictions))
    print("------------")
print(len(lams), len(recalls), len(f1s), len(precisions))

d = {
    "lambda": list(map(float, lams)),
    "precision": precisions,
    "recall": recalls,
    "f1": f1s
}
data = pd.DataFrame(d)
print(data)
result = data.sort_values(by=["lambda"], ascending=True)
print(result)

l = list(range(len(lams)))
Пример #24
0
        common = Counter(k_nearest_Labels).most_common(
            1)  #finding the most occuring neighbor of same class
        labels.append(common[0][0])

    return np.array(labels)


result = Knn(3, X_Features_Train, Y_Feature_Train, X_Features_Test)

print("Accuracy is: ", end="")
print((np.sum(result == Y_Feature_Test) / len(Y_Feature_Test)) *
      100)  #calculating accuracy

print("Predicting result on the following input data: ", end="")
p = np.array([[6.7, 3.3, 5.7, 2.1]])
print(p)
prediction = Knn(3, X_Features_Train, Y_Feature_Train, p)

if (prediction[0] == 0):
    print("Model Prdicted a Setosa")
elif (prediction[0] == 1):
    print("Model Prdicted a VersiColor")
elif (prediction[0] == 2):
    print("Model Prdicted a Virginica ")
else:
    print("Not able to predict")

confusionMatrix = CM(Y_Feature_Test, result)
print("confusionMatrix is: ")
print(confusionMatrix)
Пример #25
0
        1: 15
    }  #注意,这里写的其实是,类别1:10,隐藏了类别0:1这个比例
).fit(Xtrain, Ytrain)
result = clf.predict(Xtest)
score = clf.score(Xtest, Ytest)
recall = recall_score(Ytest, result)
auc = roc_auc_score(Ytest, clf.decision_function(Xtest))
print("testing accuracy %f, recall is %f', auc is %f" % (score, recall, auc))
print(datetime.datetime.fromtimestamp(time() - times).strftime("%M:%S:%f"))

valuec = pd.Series(Ytest).value_counts()

#查看模型的特异度
from sklearn.metrics import confusion_matrix as CM

cm = CM(Ytest, result, labels=(1, 0))

irange = np.linspace(0.01, 0.05, 10)
for i in irange:
    times = time()
    clf = SVC(kernel="linear",
              gamma="auto",
              cache_size=5000,
              class_weight={
                  1: 1 + i
              }).fit(Xtrain, Ytrain)
    result = clf.predict(Xtest)
    score = clf.score(Xtest, Ytest)
    recall = recall_score(Ytest, result)
    auc = roc_auc_score(Ytest, clf.decision_function(Xtest))
    print("under ratio 1:%f testing accuracy %f, recall is %f', auc is %f" %
Пример #26
0
    X_tr, X_te, y_tr, y_te = lib_pat.separate_train_test(feats, 0.8, cantidad)

    print('Reducing features by transformation')
    X_tr, X_te = reduction_routine(feats, labels, .99, cantidad)
    print('Final reduction (for no colinear features)')
    X_tr, X_te = lib_pat.dim_red_auto_PCA(X_tr, X_te, ratio=.9)

    # print('Classification via KNN 9')

    # k1 = lib_pat.classification_knn(X_tr, X_te, y_tr, y_te, 9)

    # print('Classification via SVC linear')
    # k2 = lib_pat.classification_SVM(X_tr, X_te, y_tr, y_te, kernel='linear')

    # print('Classification via SVC poli')
    # k3 = lib_pat.classification_SVM(X_tr, X_te, y_tr, y_te, kernel='poly', degree=3)

    print('Classification via LDA solver=svd')
    k4 = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te, solver='svd')

    print('Classification via MLP')
    k5 = lib_pat.classification_LDA(X_tr, X_te, y_tr, y_te)

    # np.savetxt('k1', CM(y_te, k1), fmt='%2i', delimiter=',')
    # np.savetxt('k2', CM(y_te, k2), fmt='%2i', delimiter=',')
    # np.savetxt('k3', CM(y_te, k3), fmt='%2i', delimiter=',')
    np.savetxt('k4', CM(y_te, k4), fmt='%2i', delimiter=',')
    np.savetxt('k5', CM(y_te, k5), fmt='%2i', delimiter=',')

    quit()
Пример #27
0
from sklearn.linear_model import LogisticRegression as LR
import pandas as pd

# https://www.bilibili.com/video/BV1P7411P78r?p=209
digits = load_digits()
X, y = digits.data, digits.target
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                y,
                                                test_size=0.3,
                                                random_state=420)

gnb = GaussianNB().fit(Xtrain, Ytrain)
acc_score = gnb.score(Xtest, Ytest)
Y_pred = gnb.predict(Xtest)
prob = gnb.predict_proba(Xtest)
cm = CM(Ytest, Y_pred)

h = .02

names = ["Multinomial", "Gaussian", "Bernoulli", "Complement"]
classifiers = [MultinomialNB(), GaussianNB(), BernoulliNB(), ComplementNB()]
X, y = make_classification(n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           random_state=1,
                           n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [
    make_moons(noise=0.3, random_state=0),
# plt.show()

# DUMMI VARIABLES
final = pd.get_dummies(data=df, columns=['purpose'], drop_first=True)
print(final.head())

# SPLIT DATA
X_train, X_test, y_train, y_test = TTS(final.drop('not.fully.paid', axis=1),
                                       final['not.fully.paid'],
                                       test_size=0.3,
                                       random_state=101)

# DECISION TREE CLASSIFIER
tree = DTC()
tree.fit(X_train, y_train)

# PREDICT
tpred = tree.predict(X_test)

print(CR(y_test, tpred))
print(CM(y_test, tpred))

# RANDOM FOREST CLASSIFIER
forest = RFC(n_estimators=500)
forest.fit(X_train, y_train)
fpred = forest.predict(X_test)

print(CR(y_test, fpred))
print(CM(y_test, fpred))

# RANDOM FOREST PERFORMED BETTER OVER ALL - BUT THE FALSE NEGATIVES INCREASED COMAPRED TO A SINGLE TREE
# sns.kdeplot(iris[['sepal_width','sepal_length']][iris['species'] == "setosa"])
# plt.show()

# SPLIT DATA
X_train, X_test, y_train, y_test = TTS(iris.drop('species', axis=1),
                                       iris['species'],
                                       test_size=0.3,
                                       random_state=101)

# TRAIN MODEL
model = SVC()
model.fit(X_train, y_train)
pred = model.predict(X_test)

print(CR(y_test, pred), CM(y_test, pred))
print(model)

# GRID SEARCH - "THIS IS NOT NECESSARY, THE MODEL IS PERFECT"
param_grid = {
    'C': list(np.arange(0.1, 10, 0.1)),
    'gamma': [1, 0.1, 0.001, 0.0001]
}

grid = GSCV(SVC(), param_grid, verbose=3, n_jobs=4)
grid.fit(X_train, y_train)

print(grid.best_params_)
print(grid.best_estimator_)

gpred = grid.predict(X_test)
steps = [('over', over), ('under', under), ('model', model)]

pipeline = Pipeline(steps=steps)
cv = RepeatedStratifiedKFold(n_splits=2, n_repeats=1, random_state=1)
scores_over = cross_val_score(pipeline, X, y, scoring='recall', cv=cv, n_jobs=-1)
print(f"k={k}\n")
print(f"mean recall: {np.mean(scores_over)}\n")
print(scores_over)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)
pipeline.fit(X_train,y_train)

yhat_test = pipeline.predict(X_test)
yhat_test_proba = pipeline.predict_proba(X_test)[:,1]

confusion_matrix = CM(y_test,yhat_test,np.unique(y_train))

precision_ls, recall_ls, threshold_ls =  precision_recall_curve(y_test,yhat_test_proba)

plt.figure(figsize=(10,10))
threshold_ls = np.append(threshold_ls,1)
plt.plot(threshold_ls, precision_ls)
plt.plot(threshold_ls, recall_ls)
plt.legend(["precision","recall"])

tree1 = DecisionTreeClassifier( max_depth=3, min_samples_leaf = 30, class_weight="balanced")
tree1.fit(X_train, y_train)

fig = plt.figure(figsize=(25,20))

_ = tree.plot_tree(tree1,