Пример #1
0
def k_means (points, k, centroids):
	'''
	Actual K Means code 
	'''	
	iteration = 0
	while (True):
		new_centroids = []
		# Assign data points to clusters
		cluster_lists = utils.assign_cluster(points, centroids, k)

		# compute new centroids as average of points in each cluster
		for list_item in cluster_lists:
			new_centroids.append(Point2D.getAverage(list_item))

		# Check for convergence
		if (set(centroids) == set(new_centroids)):
			iteration += 1
			print "Took " + str(iteration) + " iteration(s)"
			return centroids

		# Iterate again if not convered
		centroids = copy.deepcopy(new_centroids)
		iteration += 1
	print "Took " + str(iteration) + " iteration(s)"
	return centroids
Пример #2
0
def k_means (points, k, centroids):
	'''
	Actual K Means code 
	'''
	iteration = 0
	while (True):
		new_centroid_list = []

		# Assign data points to clusters
		cluster_lists = utils.assign_cluster(points, centroids, k)

		# find new centroids as average of points in each cluster
		for list_item in cluster_lists:
			new_centroid_list.append(find_new_centroid(list_item))

		# check for convergence
		if (set(centroids) == set(new_centroid_list)):
			iteration += 1
			print "Took " + str(iteration) + " iteration(s)"
			return centroids

		# If not converged, go into another iteration
		centroids = copy.deepcopy(new_centroid_list)
		iteration += 1
	print "Took " + str(iteration) + " iteration(s)"
	return centroids
Пример #3
0
def slave_function():
	'''
	Slave functionality K_means code
	'''
	# Get communication instance
	comm = MPI.COMM_WORLD

	# Get k
	k = comm.recv(source = 0)

	# Get Data Points	
	data_points = comm.recv(source = 0)

	# New Centroid List
	new_centroids =  [ [] for i in range(k) ]
	# Population List have size of each cluster
	population_slave = [ [] for i in range(k) ]
	centroids = []
	while (True):
		# Receive centroids from master
		centroids = comm.bcast(centroids,root=0) 
		
		# check if done signal is received
		if len(centroids) == 0:
			return 
		# Assign points to clusters (2D list of points)
		assigned_points = utils.assign_cluster(data_points, centroids, k)
		for i in range (0, k):
			# new_centroids: list of points
			new_centroids[i] = Point2D.getAverage(assigned_points[i])
			# population_slave: list of numbers; i-th number is no. of points in cluster i
			population_slave[i] =  len(assigned_points[i])
		# Send the new centroids and the population count of each cluster	
	   	comm.send((new_centroids, population_slave), dest=0)
Пример #4
0
def slave_function():
	'''
	Slave functionality K_means code
	'''
	# Get communication instance
	comm = MPI.COMM_WORLD

	# Get k
	k = comm.recv(source = 0)

	# Get Data Points	
	data_points = comm.recv(source = 0)

	# Dimension
	dimension = len(data_points[0])

	# Dimension stats for all clusters
	dimension_stats_all_clusters = []

	centroids = []
	while (True):
		# Receive centroids from master
		centroids = comm.bcast(centroids,root=0) 
		
		# check if done signal is received
		if len(centroids) == 0:
			return 
		# Assign points to clusters (2D list of points)
		assigned_points = utils.assign_cluster(data_points, centroids, k)
		for i in range (0, k):
			# dimension stats is a list of dictionaries
			dimension_stats_cluster_i = get_dimension_wise_stats(assigned_points[i], dimension)
			dimension_stats_all_clusters.append(dimension_stats_cluster_i)
		# Send the dimension stats for all clusters to master node
	   	comm.send(dimension_stats_all_clusters, dest=0)