Exemplo n.º 1
0
def plot_just_right(X, y):
    knn_k9 = KNeighborsClassifier(n_neighbors=9)
    knn_k9.fit(X, y)
    plt.figure(figsize=(10, 10))
    plot_decision_regions(X.values, y.values, clf=knn_k9, legend=2)
    plt.title("KNN (k=9)")
    plt.show()
Exemplo n.º 2
0
def plot_super_flexible(X, y):
    knn_k1 = KNeighborsClassifier(n_neighbors=1)
    knn_k1.fit(X, y)
    plt.figure(figsize=(10, 10))
    plot_decision_regions(X.values, y.values, clf=knn_k1, legend=2)
    plt.title("KNN (k=1)")
    plt.show()
Exemplo n.º 3
0
def plot_super_conservative(X, y):
    lr = LogisticRegression()
    lr.fit(X, y)
    plt.figure(figsize=(10, 10))
    plot_decision_regions(X.values, y.values, clf=lr, legend=2)
    plt.title("Logistic Regression (LR)")
    plt.show()
Exemplo n.º 4
0
def runPCA():
    df_wine = pd.read_csv(
        'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',
        header=None)  # 加载葡萄酒数据集
    X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values  # 把数据与标签拆分开来
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=0)  # 把整个数据集的70%分为训练集,30%为测试集
    # 下面3行代码把数据集标准化为单位方差和0均值
    sc = StandardScaler()
    X_train_std = sc.fit_transform(X_train)
    X_test_std = sc.fit_transform(X_test)

    pca = PCA(n_components=2)  # 保留2个主成分
    lr = LogisticRegression()  # 创建逻辑回归对象
    X_train_pca = pca.fit_transform(X_train_std)  # 把原始训练集映射到主成分组成的子空间中
    X_test_pca = pca.transform(X_test_std)  # 把原始测试集映射到主成分组成的子空间中
    lr.fit(X_train_pca, y_train)  # 用逻辑回归拟合数据
    plot_decision_regions(X_train_pca, y_train, clf=lr, legend=2)
    print(lr.score(X_test_pca, y_test))  # 0.98 在测试集上的平均正确率为0.98
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.legend(loc='lower left')
    plt.show()

    return
Exemplo n.º 5
0
def decision_boundary(df):
    """Plot decision boundary for Logistic Regression."""

    X = df.drop(columns=['status'])
    Y = df.status

    # Scaling
    std_scaler = StandardScaler()
    std_scaled_df = std_scaler.fit_transform(X)
    std_scaled_df = pd.DataFrame(std_scaled_df, columns=X.columns)
    X_train, _, y_train, _ = train_test_split(std_scaled_df, Y, random_state=0)

    # Projection to 2d from 47d
    pca = PCA(n_components=2)
    pca.fit(X_train.fillna(0))
    pca_train = pca.transform(X_train.fillna(0))
    df_train_pca = pd.DataFrame(data=pca_train, columns=['pca-1', 'pca-2'])

    model = LogisticRegression(max_iter=1000)
    model.fit(df_train_pca.fillna(0), y_train)
    plot_decision_regions(df_train_pca.values,
                          y_train.values,
                          clf=model,
                          res=0.02,
                          zoom_factor=5)
    st.pyplot()
    def SVC(self):
        from sklearn.svm import SVC
        from sklearn.pipeline import make_pipeline
        from sklearn.preprocessing import StandardScaler

        self.y_train = self.y_train.flatten()

        # Training a classifier
        svm = SVC(gamma='auto')
        svm.fit(self.X_train, self.y_train)

        # Plotting decision regions
        fig, axarr = plt.subplots(2,
                                  2,
                                  figsize=(10, 8),
                                  sharex=True,
                                  sharey=True)
        values = [-4.0, -1.0, 1.0, 4.0]
        width = 0.75
        for value, ax in zip(values, axarr.flat):
            plot_decision_regions(self.X_train,
                                  self.y_train,
                                  clf=svm,
                                  filler_feature_values={2: value},
                                  filler_feature_ranges={2: width},
                                  legend=2,
                                  ax=ax)
            ax.set_xlabel('Feature 1')
            ax.set_ylabel('Feature 2')
            ax.set_title('Feature 3 = {}'.format(value))

        # Adding axes annotations
        fig.suptitle('SVM on make_blobs')
        plt.tight_layout()
        plt.show()
Exemplo n.º 7
0
def SVM_Linear_2(file):
    X, y = make_classification(n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               n_samples=200,
                               random_state=1,
                               n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)

    # 线性核函数的支持向量机去训练样本
    C = 0.1
    clf = SVC(kernel="linear", C=C)

    # 在训练之前对数据进行normalization
    X = StandardScaler().fit_transform(X)

    clf.fit(X, y)
    y_pred = clf.predict(X)

    prec = precision_score(y_true=y, y_pred=y_pred, pos_label=1)
    rec = recall_score(y_true=y, y_pred=y_pred, pos_label=1)
    f1 = f1_score(y_true=y, y_pred=y_pred, pos_label=1)
    precision_score_ = "Precision score is : {:.2f}".format(prec)
    recall_score_ = "Recall score is : {:.2f}".format(rec)
    f1_score_ = "f1 score is : {:.2f}".format(f1)
    confusion_matrix_ = "Confusion matrix is :{}".format(
        confusion_matrix(y_pred=y_pred, y_true=y))

    plot_decision_regions(X, y, clf=clf, colors='orange,navy')
    plt.title("SVM with linear kernel")
    plt.savefig(file)
    plt.close()

    return precision_score_, recall_score_, f1_score_, confusion_matrix_
Exemplo n.º 8
0
def main():
    wine_data_frame = pd.read_csv('wine_data_frame.csv')

    attrib_train, attrib_test, label_train, label_test = split_data_train_test(
        wine_data_frame)

    # Create Instance
    knn = KNeighborsClassifier(n_neighbors=20)

    # Create Prediction model
    clf_result = knn.fit(attrib_train, label_train)

    # prediction
    label_pred = clf_result.predict(attrib_test)

    # Calculate Prediction Accuracy
    accuracy_score = metrics.accuracy_score(label_test, label_pred)

    print(accuracy_score)

    # plot trained data
    plot_decision_regions(attrib_train,
                          label_train,
                          clf=clf_result,
                          res=0.01,
                          legend=2)
    # plot test data
    # plot_decision_regions(attrib_test_plot, label_test_plot, clf=clf_result, res=0.01, legend=2)

    plt.show()
Exemplo n.º 9
0
def plot_tree_decision_regions(clf: DecisionTreeClassifier, fontsize=None):

    if fontsize is None:
        fontsize = FONTSIZE

    X, y = datasets.load_iris(return_X_y=True)
    X = X[:, :2]

    labels = ['setosa', 'versicolor', 'virginica']
    fig, ax = plt.subplots(figsize=(10, 8))
    with plt.style.context({'lines.markersize': 10}):
        plot_decision_regions(X,
                              y,
                              clf,
                              colors='C0,C1,C2',
                              markers='ooo',
                              hide_spines=False,
                              ax=ax)
    ax.set_xlabel('Sepal length (cm)', fontsize=fontsize)
    ax.set_ylabel('Sepal width (cm)', fontsize=fontsize)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    leg = plt.legend(title='Iris species', fontsize=18)
    for idx, label in enumerate(labels):
        leg.get_texts()[idx].set_text(label)
    plt.setp(leg.get_title(), fontsize=fontsize)
    plt.show()
Exemplo n.º 10
0
def do_pca_and_plot_decision_regions(clf):
    pca = PCA(n_components=2)
    pca.fit(X_train)
    X_t_train = pca.transform(X_train)
    X_t_test = pca.transform(X_test)
    clf.fit(X_t_train, y_train)
    plot_decision_regions(X_t_train, y_train.to_numpy(), clf=clf, legend=2)
Exemplo n.º 11
0
def SVM_RBF_Kernel_3(file):

    X, y = make_classification(n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               n_samples=200,
                               random_state=1,
                               n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)

    C = 0.1  # clf = SVC(kernel = "rbf", gamma = 2., C = C) kernel不选,默认就是rbf
    clf = SVC(gamma=2., C=C)

    clf.fit(X, y)
    y_pred = clf.predict(X)

    prec = precision_score(y_true=y, y_pred=y_pred, pos_label=1)
    rec = recall_score(y_true=y, y_pred=y_pred, pos_label=1)
    f1 = f1_score(y_true=y, y_pred=y_pred, pos_label=1)
    precision_score_ = "Precision score is : {:.2f}".format(prec)
    recall_score_ = "Recall score is : {:.2f}".format(rec)
    f1_score_ = "f1 score is : {:.2f}".format(f1)
    confusion_matrix_ = "Confusion matrix is :{}".format(
        confusion_matrix(y_pred=y_pred, y_true=y))

    plot_decision_regions(X, y, clf=clf, colors='orange,navy')
    plt.title("SVM with rbf kernel")
    plt.savefig(file)
    plt.close()
Exemplo n.º 12
0
def visualize_pathways_for_desease():
    X, y = DataReader().read_fva_solutions('fva_without.transports.txt')
    X = PathwayFvaDiffScaler().fit_transform(X, y)
    vect = DictVectorizer(sparse=False)
    X = vect.fit_transform(X, y)
    # X = X[:, None]
    y = np.array([1 if i == 'bc' else 0 for i in y], dtype=int)
    # clf = LinearSVC(C=0.01, random_state=43).fit(X, y)
    if len(X) == 1:
        X = X + np.reshape(np.random.normal(1, 100, size=len(X)), X.shape)
        clf = DecisionTreeClassifier(max_depth=2).fit(X, y)
        plot_decision_regions(X, y, clf=clf, res=0.5, legend=2)
        plt.xlabel(vect.feature_names_[0])
    else:
        for fn in set(map(lambda x: x[:-4], vect.feature_names_)):
            try:
                x = X[:, (vect.feature_names_.index('%s_min' % fn),
                          vect.feature_names_.index('%s_max' % fn))]
            except:
                continue

            # clf = DecisionTreeClassifier(max_depth=1).fit(x, y)
            clf = LinearSVC(C=1e-4, random_state=43).fit(x, y)

            # clf = LogisticRegression(C=0.1e-1, random_state=43).fit(x, y)

            x = x + np.reshape(np.random.normal(1, 100, size=len(x) * 2),
                               x.shape)
            plot_decision_regions(X=x, y=y, clf=clf, legend=2, res=10)
            plt.xlabel('%s_min' % fn)
            plt.ylabel('%s_max' % fn)
            plt.show()
Exemplo n.º 13
0
def draw_plot(clf, test, y_test):
	plot_decision_regions(X=test.values, y=y_test.values,clf=clf, legend=2)
	plt.xlabel(test.columns[0], size=14)
	plt.ylabel(test.columns[1], size=14)
	Title = classifier_type + ' Decision Region Boundary'
	plt.title(Title, size=16)
	plt.show()
def main():
    from mlxtend.data import iris_data
    from mlxtend.plotting import plot_decision_regions
    import matplotlib.pyplot as plt

    # Loading Data

    X, y = iris_data()
    X = X[:, [0, 3]]  # sepal length and petal width

    # standardize
    X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    lr = SoftmaxRegression(eta=0.01, epochs=10, minibatches=1, random_seed=0)
    lr.fit(X, y)

    plot_decision_regions(X, y, clf=lr)
    plt.title('Softmax Regression - Gradient Descent')
    plt.show()

    plt.plot(range(len(lr.cost_)), lr.cost_)
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.show()
Exemplo n.º 15
0
def plot_article_result_from_files(python_files, weights_files):
    for i, (file_path, weight_path) in enumerate(zip(python_files, weights_files)):
        print('plotting', file_path)
        ax = plt.subplot(3, 3, i + 1)
        plt.minorticks_on()
        if i == 0 or i == 3 or i == 6:
            plt.ylabel('$x_2$')
        if i == 6 or i == 7 or i == 8:
            plt.xlabel('$x_1$')
        if i < 6:
            plt.setp(ax.get_xticklabels(), visible=False)
        if i == 1 or i == 2 or i == 4 or i == 5 or i == 7 or i == 8:
            plt.setp(ax.get_yticklabels(), visible=False)
        # import model
        path, file = os.path.split(file_path)
        sys.path.append(path)
        module_name = pathlib.Path(file).stem
        module = importlib.import_module(module_name)
        model = module.U3_U()
        # Load data
        plain_x, plain_labels = article_2003_09887_data(i)
        plain_x = normalize_data(plain_x)
        model(plain_x[:2])
        # load weights
        with open(weight_path, 'br') as w_file:
            weights = pickle.load(w_file)
            model.set_weights(weights)
        plot_decision_regions(X=plain_x, y=np.array(plain_labels, int), clf=model, ax=ax, scatter_kwargs={'alpha': 0.5})
        sys.path.remove(path)  # Rm the path of the file again
Exemplo n.º 16
0
def main():
    df = pd.read_csv("delay_time.csv")
    X = df[['mean', 'stdev']]
    Y = df['label'].map({'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5})
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=0
    )  # dividing the data to 80% as training data, 20% as testing data

    # knn
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X, Y)
    Y_pred = knn.predict(X_test)
    C = confusion_matrix(Y_test, Y_pred)
    # Normalization
    NC = C / C.astype(np.float).sum(axis=1)
    print(NC)
    for r in NC:
        for c in r:
            print("{}".format(c), end=",")
    # plot
    X_combined = np.vstack((X_train, X_test))
    y_combined = np.hstack((Y_train, Y_test))

    plot_decision_regions(X_combined, y_combined, clf=knn)

    plt.xlabel('x []')
    plt.ylabel('y []')
    plt.xlim(-150, 50)
    plt.ylim(0, 70)
    plt.legend(loc='upper left')
    plt.savefig("knn.png")
    plt.close('all')
Exemplo n.º 17
0
def svm_comparison(datas):
    X = datas.drop(['hora_ideal'], axis=1).values
    # Se deFine los datos correspondientes a la etiqueta
    y = datas["hora_ideal"].values
    pca = PCA(n_components=2)
    X_train = pca.fit_transform(X)
    X_train2, X_test, y_train, y_test = train_test_split(X_train,
                                                         y,
                                                         test_size=0.2)
    clf = SVC(kernel='sigmoid', C=0.5)
    clf.fit(X_train2, y_train)
    y_pred = clf.predict(X_test)
    # Plotting decision region
    plt.figure(figsize=(8, 5), dpi=300)
    plot_decision_regions(X_train2, y_train, clf=clf, legend=2)
    # Adding axes annotations
    plt.xlabel('Características')
    plt.ylabel('Objetivo')
    plt.title(
        "SVM:Límite de la región de decisión con 'kernel'= sigmoid y 'C' =0.5")
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
    plt.tight_layout()
    plt.savefig('SVM_Limit.png', dpi=300)

    matriz = confusion_matrix(y_test, y_pred)
    print("Matriz de confusión")
    print(matriz)

    plt.show()
Exemplo n.º 18
0
def plot_knn(data, grid, y_true, y_pred):
    X_plot = data['X_train'].values
    y_plot = data['y_train'].values.astype(np.integer)
    plot_decision_regions(X_plot, y_plot, clf=grid.best_estimator_, legend=2, scatter_kwargs=dict(s=20), markers='+o')

    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plot_confusion_matrix(conf_mat=cm, show_absolute=True, show_normed=True, colorbar=True)
Exemplo n.º 19
0
def displayData(X, y, grid=False, clf=None):
    pos = y == 1
    neg = y == 0
    plt.plot(X[pos, 0], X[pos, 1], 'X', mew=1, ms=10, mec='k')
    plt.plot(X[neg, 0], X[neg, 1], 'o', mew=1, mfc='y', ms=10, mec='k')
    plt.grid(grid)

    if clf:
        if clf.kernel == 'linear':
            w = clf.coef_[0]
            a = -w[0] / w[1]
            xx = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 10)
            yy = a * xx - clf.intercept_[0] / w[1]
            plt.plot(xx, yy, 'k-')
        elif clf.kernel == 'rbf':
            x_min, x_max = X[:, 0].min(), X[:, 0].max()
            y_min, y_max = X[:, 1].min(), X[:, 1].max()
            plot_decision_regions(X, y, clf=clf, legend=2)
            plt.xlim(x_min, x_max)
            plt.ylim(y_min, y_max)
            # h = 0.2 # Mesh step size
            # cm = plt.cm.RdBu
            # x_min, x_max = X[:, 0].min(), X[:, 0].max()
            # y_min, y_max = X[:, 1].min(), X[:, 1].max()
            # xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
            #              np.arange(y_min, y_max, h))
            # if hasattr(clf, "decision_function"):
            #     Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
            # else:
            #     Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
            # Z = Z.reshape(xx.shape)
            # plt.contourf(xx, yy, Z, cmap=cm, alpha=.8)
        else:
            raise TypeError('Must be rbf or linear (for now)')
Exemplo n.º 20
0
def KNN():

    allData = pd.read_csv('Data_26and30_electrode.csv')
    allLabel = pd.read_csv('Data_26and30_electrode_label.csv').values.reshape(
        -1, )

    #pca
    pca = PCA(n_components=2)
    pca.fit(allData)
    newSet = pca.fit_transform(allData)

    ##data split
    X_train, X_test, y_train, y_test = train_test_split(newSet, allLabel)

    standardScaler = StandardScaler()
    standardScaler.fit(X_train)
    X_train = standardScaler.transform(X_train)
    X_test = standardScaler.transform(X_test)
    knn_model = KNeighborsClassifier(n_neighbors=10)

    knn_model.fit(X_train, y_train)

    knn_model.score(X_test, y_test)
    y_predict = knn_model.predict(X_test[:1])

    #results visualization
    plot_decision_regions(newSet, allLabel, clf=knn_model, legend=2)
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.title('Knn model')
    plt.show()
Exemplo n.º 21
0
def svm_graphs(mdls, data, test_n):
    titles = ('LinearSVC (linear kernel)', 'SVC with RBF kernel',
              'SVC with polynomial (degree 3) kernel')

    X = []
    for d in data[0]:
        X.append(np.asarray([d[0], d[1]]))
    X = np.asarray(X)
    x_col = []
    y_col = []
    for x in X:
        x_col.append(x[0])
        y_col.append(x[1])
    Y = np.asarray(data[1])
    graph_models = list((clf.fit(X, Y) for clf in mdls))

    for clf, ttl in zip(graph_models, titles):
        plot_decision_regions(X=X, y=Y, clf=clf, legend=2)
        plt.xlabel(x_col)
        plt.ylabel(y_col)
        plt.title(ttl)
        filename = 'SvmGraphsTest/Data' + str(test_n)
        if ttl == 'LinearSVC (linear kernel)':
            filename += 'LinearSvc'
        elif ttl == 'SVC with RBF kernel':
            filename += 'SvcRbf'
        else:
            filename += 'SvcPoly'
        filename += '.png'
        plt.savefig(filename)
Exemplo n.º 22
0
    def plotter():
        global xlist, ylist
        plt.clf()
        X = contentx.get()
        Y = contenty.get()
        colo = contentc.get()
        xlist.append([float(X), float(Y)])
        ylist.append(int(colo))

        contentx.set("")
        contenty.set("")
        contentc.set("")

        npx_list = np.array(xlist)
        npy_list = np.array(ylist)

        earth_classifier = Pipeline([('earth', Earth()),
                                     ('logistic', LogisticRegression())])
        earth_classifier.fit(npx_list, npy_list)

        plot_decision_regions(npx_list,
                              npy_list,
                              clf=earth_classifier,
                              legend=2)
        plt.savefig('foo.png')

        root.photo_n = ImageTk.PhotoImage(Image.open('foo.png'))
        vlabel.configure(image=root.photo_n)
        print("Image Updated")
Exemplo n.º 23
0
def main():
    seed = 0
    np.random.seed(seed)
    X, y = make_classification(n_features=2, n_redundant=0, random_state=seed,
                               n_informative=2, n_clusters_per_class=1)
    X = X + np.random.uniform(-.5, .5, X.shape[0] * 2).reshape(X.shape)

    ds = pd.DataFrame(X, columns=["A", "B"])
    ds["Response"] = pd.Series(y)

    DF_train, DF_unseen = train_test_split(ds.copy(), test_size=0.2, stratify=ds["Response"],
                                           random_state=seed)

    #+++++++++++++++++ 5) modelling
    mlp_param_grid = {'mlpc__hidden_layer_sizes': [(3), (6), (3, 3), (5, 5)],
                      'mlpc__learning_rate_init': [0.001, 0.01]}

    mlp_gscv = grid_search_MLP(DF_train, mlp_param_grid, seed)
    print("Best parameter set: ", mlp_gscv.best_params_)
    # pd.DataFrame.from_dict(mlp_gscv.cv_results_).to_excel("D:\\PipeLines\\project_directory\\data\\mlp_gscv.xlsx")

    #+++++++++++++++++ 6) retraining & assessment of generalization ability
    auprc = assess_generalization_auprc(mlp_gscv.best_estimator_, DF_unseen)
    print("AUPRC: {:.2f}".format(auprc))

    plot_decision_regions(X=ds.iloc[:, :-1].values, y=ds.iloc[:, -1].values, clf=mlp_gscv.best_estimator_,
                          X_highlight=DF_unseen.iloc[:, :-1].values,
                          scatter_highlight_kwargs={'s': 120, 'label': 'Test data', 'alpha': 0.7})
    plt.show()
def Highlighting_Test_Data_Points():
    from mlxtend.plotting import plot_decision_regions
    from mlxtend.preprocessing import shuffle_arrays_unison
    import matplotlib.pyplot as plt
    from sklearn import datasets
    from sklearn.svm import SVC

    # Loading some example data
    iris = datasets.load_iris()
    data = pd.read_csv('2clstrain1200.csv', header=None)

    X, y = data.iloc[:, 0:2].values, data.iloc[:, 2].values
    X = X.astype(np.integer)
    y = y.astype(np.integer)
    X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)

    X_train, y_train = X[:700], y[:700]
    X_test, y_test = X[700:], y[700:]

    # Training a classifier
    svm = SVC(C=0.5, kernel='linear')
    svm.fit(X_train, y_train)

    # Plotting decision regions
    plot_decision_regions(X, y, clf=svm, legend=2, X_highlight=X_test)

    # Adding axes annotations
    plt.xlabel('')
    plt.ylabel('')
    plt.title('SVM on Iris')
    plt.show()
Exemplo n.º 25
0
def svm_classification():
    df_test, df_train, df = tf_idf_vect_feature_vector()
    clf = SVC()
    clf.fit(df_train['tweets_vec'].tolist(), df_train['tag'].tolist())
    # Plot Decision Region using mlxtend's awesome plotting function
    # print(clf.score())
    plot_decision_regions(X=df_train['tweets_vec'].tolist(),
                          y=df_train['tag'].tolist(),
                          clf=clf,
                          legend=2)
    # Update plot object with X/Y axis labels and Figure Title
    #plt.xlabel(X.columns[0], size=14)
    #plt.ylabel(X.columns[1], size=14)
    plt.title('SVM Decision Region Boundary', size=16)
    plt.show()

    #predict = clf.predict(df_test['tweets_vec'].tolist(), df_test.tag.tolist())
    predict = clf.predict(df_test['tweets_vec'].tolist(),
                          df_test['tag'].tolist())
    print(predict)

    print('Recall:', recall_score(df_test['tag'].tolist(), predict))
    print('Precision:', precision_score(df_test['tag'].tolist(), predict))

    f1 = f1_score(df_test['tag'].tolist(), predict, average='micro')
    print("f1_score is :", f1)

    return predict
Exemplo n.º 26
0
def dt_classifier(max_depth=None):
    dtc = DecisionTreeClassifier(max_depth=max_depth).fit(X_train, y_train)
    plot_decision_regions(X_train,
                          y_train,
                          clf=dtc,
                          legend=2,
                          markers='oooo^v')
    plt.show()
Exemplo n.º 27
0
 def plot(self):
     fig = plt.figure()
     woe_norm = np.array([self.region_woe[w] for w in sorted(self.region_woe.keys())])
     woe_norm -= woe_norm.min()
     woe_norm /= woe_norm.max()
     colors = ','.join(["#{:02x}".format(int(np.abs(w)*255)) + "0000" for w in woe_norm])
     plot_decision_regions(self.x, self.region.flatten(), clf=self, legend=2, colors = colors)
     return fig
Exemplo n.º 28
0
def plot_boundaries(X, y, clf):
    features = X.columns
    plot_decision_regions(X=X.values, y=y.values, clf=clf, res=0.02, legend=2)
    # Adding axes annotations
    plt.xlabel(features[0])
    plt.ylabel(features[1])
    plt.title('Predictions of the model')
    plt.show()
Exemplo n.º 29
0
def plotResult(X_std, y, clf):
    plot_decision_regions(X_std, y, clf=clf)
    plt.title('Adaline - Stochastic Gradient Descent')
    plt.xlabel('sepal length [standardized]')
    plt.ylabel('petal length [standardized]')
    plt.legend(loc='upper left')
    plt.show()
    pass
Exemplo n.º 30
0
def plot(data, labels, clf, title):
    if not PLOT_DATASETS:
        return
    plot_decision_regions(X=data, y=labels, clf=clf, legend=2)
    plt.xlabel('Maximum Aim Yaw Change (Degrees)', size=14)
    plt.ylabel('Maximum Aim Pitch Change (Degrees)', size=14)
    plt.title(title, size=16)
    plt.show()
Exemplo n.º 31
0
def test_pass():
    sr.fit(X[:, :2], y)
    plot_decision_regions(X=X[:, :2], y=y, clf=sr)