def run_bs_adaboost():
    df = pd.read_csv('Files/csv_result-Descriptors_Training.csv', sep=',') 
    df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0])
    df = prc.handle_outlier(prc.detect_outlier_iterative_IQR(df).dropna(thresh=20))
    df = prc.standarize(df) # or normalize
    dt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, class_weight = {1: 20, 0:1}), n_estimators=20)
    print(main(df, "AdaBoost", dt, bs_estimate = True, verbose=True))
示例#2
0
def run_bs_adaboost():
    df = pd.read_csv('Files/csv_result-Descriptors_Training.csv', sep=',')
    df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0])
    df = prc.handle_outlier(
        prc.detect_outlier_iterative_IQR(df).dropna(thresh=20))
    df = prc.standarize(df)  # or normalize
    dt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1,
                                                   class_weight={
                                                       1: 20,
                                                       0: 1
                                                   }),
                            n_estimators=20)
    print(main(df, "AdaBoost", dt, bs_estimate=True, verbose=True))


# run_depth_test()
# run_bs_dt()
#run_bs_adaboost()

# Test meta learning example
#abc = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=100)
#main(df=df, name = "AdaBoost Decision Stumps", model=abc)
# Print PR Curves from test
#plt.legend(loc=1)
#plt.title("Precision Recall Curve")
#plt.show()
def run_depth_test():
    df = pd.read_csv('Files/csv_result-Descriptors_Training.csv', sep=',') 
    df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0])
    df = prc.handle_outlier(prc.detect_outlier_iterative_IQR(df).dropna(thresh=20))
    df = prc.standarize(df) # or normalize
    rslt = test_tree_depth(df)

    print("Run Time: " + str(datetime.now() - startTime))

    # Print PR Curves from test
    plt.legend(loc=1)
    plt.title("Precision Recall Curve")
    plt.show()

    # Print out the distribution of curves 
    plt.plot(list(range(2, len(rslt))), rslt[2:])
    plt.ylabel("Depth of Tree")
    plt.xlabel("Pr@Re>50")
    plt.title("Testing Decision Tree Depth")
    plt.xticks(list(range(2, len(rslt))))
    plt.show()
示例#4
0
from sklearn.manifold import TSNE

from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

import preprocessing as prc
import feature_selection as fs

df = pd.read_csv('Files\csv_result-Descriptors_Training.csv', sep=',')
df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0])
df = prc.handle_outlier(prc.detect_outlier_iterative_IQR(df).dropna(thresh=20))
df = prc.standarize(df)  #normalize

# return all features with at least thershold
# no selection below 1 !!!
#fs_vairance = fs.variance_threshold(df, threshold=1)
#fs_vairance = pd.concat([fs_vairance, df['class']], axis=1)

X = df.drop(['class'], axis=1)
y = df['class']

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kpca = kpca.fit_transform(X)
X_back = kpca.inverse_transform(X_kpca)
pca = PCA()
X_pca = pca.fit_transform(X)