from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix


######################################
# PREPROCESSING AND DATA EXPLORATION #
######################################

# initialization
path1 = "train_data"
path2 = "test_data"

# load data
(X_tr, y_tr) = load_my_data(path1)
(X_tst, y_tst) = load_my_data(path2)

print X_tr.shape, y_tr.shape
print X_tst.shape, y_tst.shape


###################################
# EVALUATE HYPERPARAMETER SETTING #
###################################

n_folds = 5

best_score = 0
best_modelLR = None
regularization = ["l2", "l1"]
Exemplo n.º 2
0
from preprocess import load_my_data, feat_extraction
from visualize import plot_2D_transformation, pairwise_scatter, plot_2D_clustering
from sklearn.cluster import KMeans

__author__ = 'matteo'

# initialization
path = "iris-data.csv"

# load data
(X,y) = load_my_data(path)

# plot original data 2 factor scatterplots
pairwise_scatter(X,y)

# supervised (LDA) and unsupervised(PCA) feature extraction
num_feat = 2
(X_pca, X_lda) = feat_extraction(X,y,num_feat)

# plotting 2D transformed data
plot_2D_transformation(X_pca)
plot_2D_transformation(X_lda)

# cluster pca transformed data
clstr = KMeans(n_clusters=3)
clstr.fit(X_pca)
labels = clstr.labels_
plot_2D_clustering(X_pca, labels)

# plot lda transformed data
clstr.fit(X_lda)
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix

######################################
# PREPROCESSING AND DATA EXPLORATION #
######################################

# initialization
path1 = "train_data"
path2 = "test_data"

# load data
(X_tr, y_tr) = load_my_data(path1)
(X_tst, y_tst) = load_my_data(path2)

print X_tr.shape, y_tr.shape
print X_tst.shape, y_tst.shape

###################################
# EVALUATE HYPERPARAMETER SETTING #
###################################

n_folds = 5

best_score = 0
best_modelLR = None
regularization = ['l2', 'l1']
for t in regularization: