def calculate_silhoutte(self,location,labels): util=Utilty() self.result=util.read_file("C:\personal\PhD\Dataset\sonar-data-set\glass-classification\\glass.csv",",") '''print('result',result.shape[0])''' D = pairwise_distances(self.result, metric='euclidean') '''print('result', D)''' '''labels=np.ones(result.shape[0]) labels[0:300]=0''' print('silhoutte labels', labels) silhoute=silhouette_score(D, labels, "precomputed") '''print('silhoutte labels', labels) print('silhoutte score',silhoute)''' return silhoute
def delete_identical_row(self,iterator): util = Utilty() '''print('before deletion',iterator)''' to_delete=[] for i in range(len(iterator)): is_identical= util.checkEqual(iterator[i]) '''print('is_identical',is_identical)''' if is_identical==True: print('is_identical is true need to delete',i) to_delete.append(i) iterator = np.delete(iterator, to_delete, axis=0) '''print('after deletion',iterator)''' return iterator
def calculate_dunn(self,location,labels): utility=Utilty() D = pairwise_distances(self.result, metric='euclidean') '''print("labels bouldin",labels)''' dunn_score = davies_bouldin_score(D,labels) '''print("labels bouldin", labels) print("bouldin score", dunn_score)''' return dunn_score
def calculate_silhoutte(self, location, labels): util = Utilty() #self.result=util.read_file("yahoo_data.csv",",") '''print('result',result.shape[0])''' D = pairwise_distances(self.result, metric='euclidean') '''print('result', D)''' '''labels=np.ones(result.shape[0]) labels[0:300]=0''' print('silhoutte labels', labels) silhoute = silhouette_score(D, labels, "precomputed") '''print('silhoutte labels', labels) print('silhoutte score',silhoute)''' return silhoute
def calculate_silhoutte(self, location, labels): util = Utilty() self.result = load_iris() self.result = np.asarray(self.result['data']) '''print('result',result.shape[0])''' D = pairwise_distances(self.result, metric='euclidean') '''print('result', D)''' '''labels=np.ones(result.shape[0]) labels[0:300]=0''' print('silhoutte labels', labels) silhoute = silhouette_score(D, labels, "precomputed") '''print('silhoutte labels', labels) print('silhoutte score',silhoute)''' return silhoute
def calculate_silhoutte(self, location, labels): util = Utilty() self.result = np.loadtxt(open( "C:\personal\PhD\Dataset\Anomaly\Libras\\real_6.csv", "r"), delimiter=",") '''print('result',result.shape[0])''' D = pairwise_distances(self.result, metric='euclidean') '''print('result', D)''' '''labels=np.ones(result.shape[0]) labels[0:300]=0''' print('silhoutte labels', labels) silhoute = silhouette_score(D, labels, "precomputed") '''print('silhoutte labels', labels) print('silhoutte score',silhoute)''' return silhoute
if len(tree_dic[key]) > small_cluster_threshold: large_clusters[key] = tree_dic[key] for key in large_clusters: array = large_clusters[key] centroid = np.mean(array, axis=0) for itr in range(len(large_clusters[key])): anomalyscore_largeclusters = np.linalg.norm(array[itr] - centroid) anomaly_largeclusters[anomalyscore_largeclusters] = array[itr] return anomaly_largeclusters # In[4]: util = Utilty() '''reading data''' result = util.read_file("processed_dataset_D1.csv", ",") obj = MSMA(2, len(result), '', 12, result) print('dataset shape', result.shape) '''obtaining best population sample''' final_dict = obj.start_clustering() print('final dict') print(final_dict, '\n') final_cluster = final_dict[max(final_dict)] '''running recursive algorithm''' print('running binary tree\n') count = 0 leaf_nodes = [] tree_dic = {}
def start_clustering(self): '''this dict contains the max silhoutte version of each iteration''' final_dict={} '''created the population randomly''' population = self.create_population() '''deletion chromosome if all the elements are identical''' print('new random created population',population) population=self.delete_identical_row(population) '''below loop callculate the silhoutte score for each version''' silhoutte_dict = self.get_silhoutte_score(population) util = Utilty() '''sort the population based on their score in a dictionary, where ''' sorted_dict = util.sortDictionary(silhoutte_dict) '''print('random dict',sorted_dict)''' for outerstage in range(100): counter = 0 parent_dict = {} survivor_dict = {} elitismRate = math.floor(len(sorted_dict) / 2) if elitismRate%2==1: print('elitismRate',elitismRate) elitismRate=elitismRate+1 '''print('sorted dict before outer iteration',len(sorted_dict))''' for key in sorted_dict: if counter < elitismRate: parent_dict[key] = sorted_dict[key] else: survivor_dict[key] = sorted_dict[key] counter = counter + 1 offspring_dict=self.get_crossover(parent_dict) mutant_dict=self.get_mutation(offspring_dict) if len(offspring_dict)==0 | len(mutant_dict)==0: print('cannot proceed further as the offspring and mutant list is empty') else: newpopulation=util.merge_dicts(mutant_dict,survivor_dict) '''print('merged newpopulation',len(newpopulation))''' newpopluation_array = util.convert_dict_to_list(newpopulation,len(newpopulation),self.datapoint_number) newpopluation_array=self.delete_identical_row(newpopluation_array) '''print('merged newpopulation array', len(newpopulation)) print('merged newpopulation arrayfull', len(newpopulation))''' silhoutte_dict=self.get_silhoutte_score(newpopluation_array) sorted_dict = util.sortDictionary(silhoutte_dict) print('max val',max(sorted_dict)) final_dict[max(sorted_dict)]=sorted_dict[max(sorted_dict)] for innerstage in range(40): parent_dict = {} survivor_dict = {} counter=0 elitismRate = math.floor(len(sorted_dict) / 2) '''print('inner stage sortedict length',len(sorted_dict))''' if elitismRate%2==1: elitismRate=elitismRate+1 print('elitismRate',elitismRate) for key in sorted_dict: if counter < elitismRate: parent_dict[key] = sorted_dict[key] else: survivor_dict[key] = sorted_dict[key] counter = counter + 1 offspring_dict = self.get_crossover(parent_dict) mutant_dict = self.get_mutation(offspring_dict) if len(offspring_dict) == 0 | len(mutant_dict) == 0: print('cannot proceed further as the offspring and mutant list is empty') else: newpopulation = util.merge_dicts(mutant_dict, survivor_dict) '''print('inner stage after mergin',len(newpopulation)) print('inner stage after mergin full', newpopulation)''' newpopluation_array = util.convert_dict_to_list(newpopulation, len(newpopulation),self.datapoint_number) newpopluation_array = self.delete_identical_row(newpopluation_array) '''print('inner stage after mergin array', len(newpopluation_array)) print('inner stage after mergin arrayfull', newpopluation_array)''' dunnscore_dict=self.get_dunn_score(newpopluation_array) '''print(dunnscore_dict)''' sorted_dict = util.sortDictionaryForDavis(dunnscore_dict) silhoutte_dict = self.get_silhoutte_score(newpopluation_array) sorted_dict = util.sortDictionary(silhoutte_dict) return final_dict
def calculate_silhoutte(self, location, labels, D): util = Utilty() silhoute = silhouette_score(D, labels, "precomputed") return silhoute
def start_clustering(self): '''this dict contains the max silhoutte version of each iteration''' final_dict = {} '''created the population randomly''' population = self.create_population() '''deletion chromosome if all the elements are identical''' # print('new random created population',population) population = self.delete_identical_row(population) print('population shape', population.shape) '''below loop callculate the silhoutte score for each version''' D = pairwise_distances(self.result, metric='euclidean') silhoutte_dict = self.get_silhoutte_score(population, D) util = Utilty() # # '''sort the population based on their score in a dictionary, where ''' sorted_dict = util.sortDictionary(silhoutte_dict) for outerstage in range(10): #10 counter = 0 parent_dict = {} survivor_dict = {} elitismRate = math.floor(len(sorted_dict) / 2) if elitismRate % 2 == 1: # print('Odd elitismrate',elitismRate) elitismRate = elitismRate + 1 '''print('sorted dict before outer iteration',len(sorted_dict))''' for key in sorted_dict: if counter < elitismRate: parent_dict[key] = sorted_dict[key] else: survivor_dict[key] = sorted_dict[key] counter = counter + 1 offspring_dict = self.get_crossover(parent_dict) mutant_dict = self.get_mutation(offspring_dict) if len(offspring_dict) == 0 | len(mutant_dict) == 0: print( 'cannot proceed further as the offspring and mutant list is empty' ) else: newpopulation = util.merge_dicts(mutant_dict, survivor_dict) '''print('merged newpopulation',len(newpopulation))''' newpopluation_array = util.convert_dict_to_list( newpopulation, len(newpopulation), self.datapoint_number) newpopluation_array = self.delete_identical_row( newpopluation_array) silhoutte_dict = self.get_silhoutte_score( newpopluation_array, D) sorted_dict = util.sortDictionary(silhoutte_dict) final_dict[max(sorted_dict)] = sorted_dict[max(sorted_dict)] for innerstage in range(10): parent_dict = {} survivor_dict = {} counter = 0 elitismRate = math.floor(len(sorted_dict) / 2) if elitismRate % 2 == 1: elitismRate = elitismRate + 1 for key in sorted_dict: if counter < elitismRate: parent_dict[key] = sorted_dict[key] else: survivor_dict[key] = sorted_dict[key] counter = counter + 1 offspring_dict = self.get_crossover(parent_dict) mutant_dict = self.get_mutation(offspring_dict) if len(offspring_dict) == 0 | len(mutant_dict) == 0: print( 'cannot proceed further as the offspring and mutant list is empty' ) else: newpopulation = util.merge_dicts( mutant_dict, survivor_dict) newpopluation_array = util.convert_dict_to_list( newpopulation, len(newpopulation), self.datapoint_number) newpopluation_array = self.delete_identical_row( newpopluation_array) dunnscore_dict = self.get_dunn_score( newpopluation_array) sorted_dict = util.sortDictionaryForDavis( dunnscore_dict) # print('outside both loops newpopulation array') # print(newpopluation_array) silhoutte_dict = self.get_silhoutte_score(newpopluation_array, D) sorted_dict = util.sortDictionary(silhoutte_dict) # print('final_dict') # print(final_dict) return final_dict