from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import f1_score, precision_score, recall_score from sklearn.metrics import confusion_matrix ###################################### # PREPROCESSING AND DATA EXPLORATION # ###################################### # initialization path1 = "train_data" path2 = "test_data" # load data (X_tr, y_tr) = load_my_data(path1) (X_tst, y_tst) = load_my_data(path2) print X_tr.shape, y_tr.shape print X_tst.shape, y_tst.shape ################################### # EVALUATE HYPERPARAMETER SETTING # ################################### n_folds = 5 best_score = 0 best_modelLR = None regularization = ["l2", "l1"]
from preprocess import load_my_data, feat_extraction from visualize import plot_2D_transformation, pairwise_scatter, plot_2D_clustering from sklearn.cluster import KMeans __author__ = 'matteo' # initialization path = "iris-data.csv" # load data (X,y) = load_my_data(path) # plot original data 2 factor scatterplots pairwise_scatter(X,y) # supervised (LDA) and unsupervised(PCA) feature extraction num_feat = 2 (X_pca, X_lda) = feat_extraction(X,y,num_feat) # plotting 2D transformed data plot_2D_transformation(X_pca) plot_2D_transformation(X_lda) # cluster pca transformed data clstr = KMeans(n_clusters=3) clstr.fit(X_pca) labels = clstr.labels_ plot_2D_clustering(X_pca, labels) # plot lda transformed data clstr.fit(X_lda)
from sklearn import svm from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import f1_score, precision_score, recall_score from sklearn.metrics import confusion_matrix ###################################### # PREPROCESSING AND DATA EXPLORATION # ###################################### # initialization path1 = "train_data" path2 = "test_data" # load data (X_tr, y_tr) = load_my_data(path1) (X_tst, y_tst) = load_my_data(path2) print X_tr.shape, y_tr.shape print X_tst.shape, y_tst.shape ################################### # EVALUATE HYPERPARAMETER SETTING # ################################### n_folds = 5 best_score = 0 best_modelLR = None regularization = ['l2', 'l1'] for t in regularization: