def calcHiddenOutputs(self, input, center, std, data): knn = KNN(2, data) dist_between = knn.get_euclidean_distance(input, center) # print(type(input[1])) #print(type(center[1])) # print(dist_between) output = np.exp(-1 / (2 * std**2) * dist_between**2) # print(output) return output
def test_euclidean(self): """ Test if euclidean distance is working :return: """ data = Data('abalone', pd.read_csv(r'data/abalone.data', header=None), 8) # load data df = data.df.sample(n=10) # minimal data frame data.split_data(data_frame=df) # sets test and train data knn = KNN(5, data) print(knn.get_euclidean_distance(df.iloc[1], df.iloc[2]))
def getMaxDistMeans(self, mean_list, data): maxDist = 0 knn = KNN(2, data) for clust in mean_list: for clus2 in mean_list: # compare against all other medoids curDist = knn.get_euclidean_distance() if curDist > maxDist: maxDist = curDist # print(maxDist) return maxDist
def getMaxDist(self, medoids_list, data): maxDist = 0 knn = KNN(2, data) for medoid in medoids_list: for medoid2 in medoids_list: # compare against all other medoids curDist = knn.get_euclidean_distance(medoid.row, medoid2.row) if curDist > maxDist: maxDist = curDist # print(maxDist) return maxDist
def predict_centroids( self, centroids, data_set): # Method to return closest cluster to test data for _, data in data_set[data_set].iterrows( ): # Loops through the rows of the data set distance = None # Initializes distance closest_centroid = None # Keeps track of the current closes centroid cluster closest_centroid_euclidian_distance = None # Keeps track of the closest euclidian distance. cluster_val = 1 for centroid in centroids: # Loops through the k centroid points euclid_distance = KNN.get_euclidean_distance( centroid, data ) # Gets the distance between the centroid and the data point if distance is None or euclid_distance < distance: # Updates the distance to keep track of the closest point distance = euclid_distance # closest_centroid = centroid closest_centroid = cluster_val closest_centroid_euclidian_distance = distance cluster_val += 1
def cluster_data(self, clusters, data_set): # Loop until clusters have converged previous_clusters = [] # Initializes to check if previous value mached while (True): current_clusters = [] for point in range(len(clusters)): # Appends an empty list current_clusters.append([]) for _, value in data_set.iterrows(): # Loop rows of the data set cluster_key = 0 # Appends a key for the closest value of the dictionary closest_point = [None, float('inf') ] # Index of dictionary, distance value value = list(value) # Won't work without this for row in clusters.values( ): # Loops through the values in the cluster to compare distance distance = KNN.get_euclidean_distance( row, value) # Gets the euclidean distance if distance < closest_point[ 1]: # Checks if it is closer than the previous closest point closest_point = [cluster_key, distance] # Sets the closest point cluster_key += 1 current_clusters[closest_point[0]].append( value ) # Appends the closest point to a the corresponding cluster clusters = self.mean_clusters( current_clusters, data_set) # Gets the updated k-mean clusters if previous_clusters == current_clusters: print( '-------------------------- K-Means has converged ------------------' ) cluster_list = [] for cluster in clusters.values( ): # Convert the k-means points to a list cluster_list.append(cluster) return cluster_list previous_clusters = current_clusters