示例#1
0
X_train = X_train.as_matrix()
X_test = X_test.as_matrix()
y_train = y_train.as_matrix()
y_test = y_test.as_matrix()


#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

means_init = np.array([X[y == i].mean(axis=0) for i in range(3)])

##############################################################################################################################
#For Expected Maximization
em(X_train, X_test, y_train, y_test,component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, file_no = "phish")

#############################################################################################################################
#For KMeans
kmeans(X_train, X_test, y_train, y_test,  component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, file_no = "phish")










ica_new.fit(X1)
X_transformed_f = ica_new.transform(X1)

#Clustering after dimensionality reduction
print("Clustering ICA")

means_init = np.array(
    [X_transformed_f[Y1 == i].mean(axis=0) for i in range(2)])

#clustering experiments
print("Expected Maximization")
component_list, array_aic, array_bic, array_homo_1, array_comp_1, array_sil_1, array_avg_log = em(
    X_train_transformed,
    X_test_transformed,
    y_train,
    y_test,
    init_means=means_init,
    component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
    num_class=2,
    toshow=0)

print("KMeans")
component_list, array_homo_2, array_comp_2, array_sil_2, array_var = kmeans(
    X_train_transformed,
    X_test_transformed,
    y_train,
    y_test,
    init_means=means_init,
    component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
    num_class=2,
    toshow=0)
# ### 1. NBA games datasets

# In[7]:


# Splitting data into training sets and testing sets
X_train, X_test, y_train, y_test = train_test_split(X1,Y1, test_size = 0.2)

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
means_init = np.array([X1[Y1 == i].mean(axis=0) for i in range(2)])

em(X_train, X_test, y_train, y_test, init_means = means_init, component_list = [3,4,5,6,7,8,9,10,11], num_class = 2)


# ### 2. LOL games datasets

# In[8]:


# Splitting data into training sets and testing sets
X_train, X_test, y_train, y_test = train_test_split(X2,Y2, test_size = 0.2)

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
means_init = np.array([X1[Y1 == i].mean(axis=0) for i in range(7)])
y_train = y_train.as_matrix()
y_test = y_test.as_matrix()

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

means_init = np.array([X[y == i].mean(axis=0) for i in range(3)])

##############################################################################################################################
#For Expected Maximization
em(X_train,
   X_test,
   y_train,
   y_test,
   init_means=means_init,
   component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
   num_class=7,
   file_no="wine")

#############################################################################################################################
#For KMeans
kmeans(X_train,
       X_test,
       y_train,
       y_test,
       init_means=means_init,
       component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
       num_class=7,
       file_no="wine")
示例#5
0
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# In[30]:

means_init = np.array([X[y == i].mean(axis=0) for i in range(2)])

# In[31]:

##############################################################################################################################
#For Expected Maximization
em(dataset,
   X_train,
   X_test,
   y_train,
   y_test,
   init_means=means_init,
   component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
   num_class=2)

# In[32]:

#############################################################################################################################
#For KMeans
kmeans(dataset,
       X_train,
       X_test,
       y_train,
       y_test,
       init_means=means_init,
       component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
示例#6
0
################################################################################################################################
#Clustering after dimensionality reduction

print("Clustering PCA")

#Reducing the dimensions with optimal number of components
pca_new = PCA(n_components = gridSearch.best_estimator_.named_steps['pca'].n_components)
pca_new.fit(X)
X_transformed_f = pca_new.transform(X)



#clustering experiments
#clustering experiments
print("Expected Maximization")
component_list, array_aic, array_bic, array_homo_1, array_comp_1, array_sil_1, array_avg_log = em(X_train_transformed, X_test_transformed, y_train, y_test,  component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, toshow =0, file_no = "wine_pca")

print("KMeans")
component_list, array_homo_2, array_comp_2, array_sil_2, array_var = kmeans(X_train_transformed, X_test_transformed, y_train, y_test,  component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, toshow =0, file_no = "wine_pca")



#Writing data to file
component_list = np.array(component_list).reshape(-1,1)
array_aic = np.array(array_aic).reshape(-1,1)
array_bic = np.array(array_bic).reshape(-1,1)
array_homo_1 = np.array(array_homo_1).reshape(-1,1)
array_comp_1 = np.array(array_comp_1).reshape(-1,1)
array_sil_1 = np.array(array_sil_1).reshape(-1,1)
array_avg_log = np.array(array_avg_log).reshape(-1,1)
array_homo_2 = np.array(array_homo_2).reshape(-1,1)
示例#7
0
y_train = y_train.as_matrix() - 1
y_test = y_test.as_matrix() - 1

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

means_init = np.array([X[y == i].mean(axis=0) for i in range(11)])

##############################################################################################################################
#For Expected Maximization
_ = em(X_train,
       X_test,
       y_train,
       y_test,
       init_means=means_init,
       component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
       num_class=11,
       toshow=1)

#############################################################################################################################
#For KMeans
_ = kmeans(X_train,
           X_test,
           y_train,
           y_test,
           init_means=means_init,
           component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
           num_class=11,
           toshow=1)