def init_students_classifier_fn(self, **kwargs): if kwargs == {}: m = self._academic_clusterer._m; error = 1.e-10; maxiter = 100 clf = lambda data: cmeans_predict( data.T, self._cntr_sf, m, error, maxiter ) else: clf = lambda data: cmeans_predict( data.T, self._cntr_sf, kwargs ) self._students_clf = clf
def run(self, chromosome: ndarray, samples: ndarray): u, u0, d, jm, p, fpc = cmeans_predict(samples.transpose(), chromosome, 2, error=0.005, maxiter=1000) return np.argmax(u, axis=0)
def calculate(self, chromosome: ndarray) -> float: u, u0, d, jm, p, fpc = cmeans_predict(self.samples.transpose(), chromosome, 2, error=0.005, maxiter=1000) return fpc
def test_fuzzy_c_means(self): labels_pairwise = FuzzyCMeans().run(self.center, self.samples) u, u0, d, jm, p, fpc = cmeans_predict(self.samples.transpose(), self.center, 2, error=0.005, maxiter=1000) labels_fuzzy_c_means = np.argmax(u, axis=0) equal = labels_fuzzy_c_means == labels_pairwise self.assertEqual(equal.all(), True)
def predict(self, test_data: pd.DataFrame) -> np.array: """Return predicted cluster assignment.""" u, u0, d, jm, p, fpc = fuzz.cmeans_predict(test_data.transpose(), self.cluster_centers_, 2, error=0.005, maxiter=self._n_iter) # Hard Clustering self.labels_ = np.argmax(u, axis=0) # Probabilities self.probs_ = np.max(u, axis=0) return self.labels_
def _test_model(self, model, data): """ Train model with the number of clusters that has better evaluation :param model: Trained model :param data: Dataframe with test data :return: Tested model """ super()._test_model(model, data) data_array = data.to_numpy() result = fuzz.cmeans_predict(test_data=data_array, cntr_trained=model[0], m=2, error=0.005, maxiter=1000) # Evaluation validation = ICValidation(result[5]) return validation
def fuzzy_clustering(train, train_labels, test, test_labels, size, plot, plot_dims): [center, u, u0, d, jm, p, fpc] = skf.cmeans(train.T, c=2, m=.5, error=.001, maxiter=100) [nu, nu0, nd, njm, np, nfpc] = skf.cmeans_predict(test.T, center, 3, error=0.005, maxiter=1000) results_train = u.argmax(axis=0) results_test = nu.argmax(axis=0) if plot: plot_results(test, test_labels, results_test, size, "Fuzzy Clustering") return results_train, results_test
def DecideTypeOfTest(TestFilename, clf, clusterAlg, cntr, features, folder, means, modelNmf, nmf, normalize, outputFolder, outputfile, roundFactor, scaler): if '.' in TestFilename: if (TestFilename.__str__().lower() != 'na' and TestFilename != ""): try: df_neg = pd.read_excel(TestFilename) X_train1 = df_neg.abs() # X_train1 =df_neg if normalize: X_test21 = normaliz(X_train1) else: X_test21 = X_train1 predicted1 = clf.predict(X_test21) testPredictedSinglefile = [] if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg: data2a = df_neg.values.reshape(df_neg.values.shape[1], df_neg.values.shape[0]) u, u0, d, jm, p, fpc = fuzz.cmeans_predict(data2a, cntr, 2, error=0.005, maxiter=1500, init=None) Cluster_Values2 = getMaximumCmeans(u) AllCluster_Values2 = getMaximumCmeans(u, True) temp = [int(round(y * roundFactor)) for y in Cluster_Values2] # testPredictedSinglefile = np.argmax(u, axis=0) testPredictedSinglefile = temp else: testPredictedSinglefile = means.predict(df_neg) except: print( "The alternate test " + TestFilename + " Doesnot exist in the provided folder,please make sure to have the file in : " + folder + TestFilename) sys.exit(-1) Fpredicted = open( outputFolder + '_Test_' + outputfile + "_predicted_" + datetime.now().strftime( '%Y-%m-%d-%H-%M') + ".tsv", mode='w') Fpredicted.write( "{:<18}\t {:<15}\t {:<15}\t {:<15} \t{:<17} \t{:<15} \n".format('V1', 'V2', 'V3', 'V4', 'V5', 'Predicted_Label', )) for values, ja in itertools.zip_longest(df_neg.values, predicted1): Fpredicted.write( "{:<18}\t {:<15}\t {:<15}\t {:<15} \t{:<17} \t{:<15} \n".format(values[0], values[1], values[2], values[3], values[4], ja)) Fpredicted.write( "Accuracy of predicted values with trained clustering model and trained MLPClassifier: " + ( accuracy_score(testPredictedSinglefile, predicted1) * 100).__str__() + " %" + "\n") Fpredicted.close() else: try: testTweetMapper, testFiveDModel, testTotalTweetCount, testTweetsonly = TweetExtractor( TestFilename) df3 = pd.DataFrame(testFiveDModel, columns=features) df3 = df3.abs() if 'y' in nmf.lower(): X_test21a = NMf(df3, modelNmf) else: X_test21a = df3.values if normalize: X_test21a = scaler.transform(X_test21a) # X_test21a = normaliz(X_test21a) predicted12 = clf.predict(X_test21a) testPredicted = [] df2Norma = pd.DataFrame(X_test21a, columns=features) if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg: data2 = df2Norma.values.reshape(df2Norma.values.shape[1], df2Norma.values.shape[0]) utest, u0, d, jm, p, fpc = fuzz.cmeans_predict(data2, cntr, 2, error=0.005, maxiter=1500, init=None) testPredicted = np.argmax(utest, axis=0) else: testPredicted = means.predict(df2Norma) Fpredicted12 = open( outputFolder + '_Test_' + outputfile + "_predicted_" + datetime.now().strftime( '%Y-%m-%d-%H-%M') + ".tsv", encoding="utf8", mode='w') Fpredicted12.write( "{:<20}\t {:<14}\t {:<12}\t {:<250} \t{:<12} \t{:<12} \t{:<12} \t{:<12} \t{:<12}\t{:<15}\n".format( 'userName', 'follower count', 'retweetcount', 'tweet', 'V1', 'V2', 'V3', 'V4', 'V5', 'predicted label' )) for y, w, d in itertools.zip_longest(testTweetMapper.values(), testFiveDModel, predicted12): Fpredicted12.write( "{:<20}\t {:<14}\t {:<12}\t {:<250} \t{:<12} \t{:<12} \t{:<12} \t{:<12} \t{:<12}\t{:<15}\n".format( y[0], y[2], y[3], y[4], w[0], w[1], w[2], w[3], w[4], d)) Fpredicted12.write( "Accuracy of predicted values with trained clustering model and trained MLPClassifier: " + ( accuracy_score(testPredicted, predicted12) * 100).__str__() + " %" + "\n") print( "Accuracy of predicted values with trained clustering model and trained MLPClassifier: " + ( accuracy_score(testPredicted, predicted12) * 100).__str__() + " %" + "\n") Fpredicted12.close() except: PrintException() pass
cmeans_accuracy2 = 0 for i, y in enumerate(Y): x = X[i] y_predicted1 = fcm.predict(x) fcm_accuracy += y_predicted1 == y y_predicted4 = gk.predict(x) gk_accuracy += y_predicted4 == y y_predicted2 = kmeans.predict([x])[0] kmeans_accuracy += y_predicted2 == y y_predicted3 = cmeans_predict(np.expand_dims(x, 0).transpose(), cntr, m, error, maxiter=2)[0] y_predicted3 = np.argmax(y_predicted3, axis=0)[0] cmeans_accuracy += y_predicted3 == y # print(f"y: {y}, fcm: {y_predicted1} , kmeans: {y_predicted2}, cmeans:{y_predicted3}") # if np.argmax(y_predicted, axis=0) == y: # print(f"equal {i}") # accuracy = accuracy + 1 # else: # print(y_predicted) print(f"FCM Accuracy: {fcm_accuracy/len(X)}, " f"K-Means Score: {kmeans_accuracy/len(X)}, " f"CMeans: {cmeans_accuracy/len(X)}, "
def main(): data = np.loadtxt('../data/data_banknote_authentication.txt', delimiter=',') print(data.shape) print(data[:10]) np.random.shuffle(data) print(data.shape) print(data[:10]) # split train and test sample_size = data.shape[0] split_loc = int(0.7 * sample_size) training_set = data[:split_loc] training_x = training_set[:, :4] training_y = training_set[:, -1].astype(np.int) testing_set = data[split_loc:] testing_x = testing_set[:, :4] testing_y = testing_set[:, -1].astype(np.int) plot_data = [] # c = 10 for c in range(2, 100): # for c in range(100, 500, 30): print("c =", c) m = 2 cntr, Ax, u0, d, jm, p, fpc = fuzz.cluster.cmeans(training_x.T, c, m, error=0.005, maxiter=1000) cluster_labels = np.argmax(Ax, axis=0) cluster_map2_class = {} correct_count = 0 for i in range(c): pickup = training_y[cluster_labels == i] class1_count = pickup.sum() if class1_count >= len(pickup) / 2: cluster_map2_class[i] = 1 correct_count += class1_count else: cluster_map2_class[i] = 0 correct_count += len(pickup) - class1_count training_error = 1 - correct_count / len(training_y) print(training_error) Ax_testing, _, _, _, _, _ = fuzz.cmeans_predict(testing_x.T, cntr, m, error=0.005, maxiter=1000) # Ax_testing, _, _, _, _, _ = fuzz.cmeans_predict(training_x.T, cntr, m, error=0.005, maxiter=1000) cluster_labels_testing = np.argmax(Ax_testing, axis=0) correct_testing = 0 for l, gt in zip(cluster_labels_testing, testing_y): # for l, gt in zip(cluster_labels_testing, training_y): if cluster_map2_class[l] == gt: correct_testing += 1 testing_error = 1 - correct_testing / len(testing_y) # testing_error = 1 - correct_testing / len(training_y) print(testing_error) classification_error = 1 - (correct_count + correct_testing) / ( len(training_y) + len(testing_y)) plot_data.append( [c, training_error, testing_error, classification_error]) np.savetxt("../data/results_all.txt", np.array(plot_data))
from sklearn.naive_bayes import GaussianNB import numpy as np import skfuzzy as fuzz # data train [height,weight, shoe size] X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40], [190, 90, 47], [175, 64, 39],[177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]] Y = ['male', 'male', 'female', 'female', 'male', 'male', 'female', 'female', 'female', 'male', 'male'] #classifiers clf = tree.DecisionTreeClassifier() clf1 = svm.SVC() clf2 = neighbors.KNeighborsClassifier() clf3 = GaussianNB() clf4 = fuzz.cmeans_predict(Y,X,2, error=0.0005, maxiter=1000) #train model clf = clf.fit(X,Y) clf1 = clf1.fit(X,Y) clf2 = clf2.fit(X,Y) clf3 = clf3.fit(X,Y) clf4 = clf4.fit(X,Y) _X=[[184,84,44],[198,92,48],[183,83,44],[166,47,36],[170,60,38],[172,64,39],[182,80,42],[180,80,43]] _Y=['male','male','male','female','female','female','male','male'] #prediction prediction = clf.predict(_X) prediction1 = clf1.predict(_X) prediction2 = clf2.predict(_X) prediction3 = clf3.predict(_X)