Пример #1
0
def _split_clusters_par_helper_old(args):
	_cluster, min_size, feature_type, dist_metric = args

	features = extract_features(_cluster.members, [_cluster.center], feature_type)[0]
	
	if dist_metric == 'rf':
		rand_mat = compute_random_matrix(features)

		num_tree_features = FUNCTION_NUM_FEATURES(features.shape[1])
		rf = sklearn.ensemble.RandomForestClassifier(n_estimators=NUM_TREES, max_features=num_tree_features,
													bootstrap=False, n_jobs=1)
		combined_data = np.concatenate( (features, rand_mat) )
		labels = np.concatenate( (np.ones(features.shape[0]), np.zeros(rand_mat.shape[0])) )
		rf.fit(combined_data, labels)
		sim_mat = compute_sim_mat(features, rf)

		dists = 1 - sim_mat
	else:
		dists = scipy.spatial.distance.pdist(features, 'euclidean')
		dists = scipy.spatial.distance.squareform(dists)

	dist_mat = dists

	reachabilities = selector.OPTICS(dist_mat, min_size)
	indices = selector.separateClusters(reachabilities, min_size)

	# comes back as selector.dataPoint classes
	indices = map(lambda l: map(lambda dp: dp._id, l), indices)
	#if len(indices) == 1:
	#	return [_cluster]
	#print indices
	clusters = form_clusters_alt(_cluster.members, indices)

	return clusters
Пример #2
0
def _split_clusters_par_helper(args):
	_cluster, dist_mat, min_size = args
	reachabilities = selector.OPTICS(dist_mat, min_size)
	indices = selector.separateClusters(reachabilities, min_size)

	# comes back as selector.dataPoint classes
	indices = map(lambda l: map(lambda dp: dp._id, l), indices)
	clusters = form_clusters_alt(_cluster.members, indices)
	return clusters
Пример #3
0
def split_cluster(_cluster, dist_mat, min_size):
	'''
	Splits a cluster using Logan's OPTICS
		Returns a list of resulting clusters (perhaps just the original)
	'''
	reachabilities = selector.OPTICS(dist_mat, min_size)
	indices = selector.separateClusters(reachabilities, min_size)

	# comes back as selector.dataPoint classes
	indices = map(lambda l: map(lambda dp: dp._id, l), indices)
	clusters = form_clusters_alt(_cluster.members, indices)

	return clusters
Пример #4
0
def split_cluster(_cluster, dist_mat, args):
	'''
	Splits a cluster using Logan's OPTICS
		Returns a list of resulting clusters (perhaps just the original)
	'''
	if args.no_auto_minpts:
		min_size = args.minpts
	else:
		min_size = int(max(args.minpts, args.minpts_perc * len(_cluster.members)))
	reachabilities = selector.OPTICS(dist_mat, min_size)
	indices = selector.separateClusters(reachabilities, min_size)

	# comes back as selector.dataPoint classes
	indices = map(lambda l: map(lambda dp: dp._id, l), indices)
	clusters = form_clusters_alt(_cluster.members, indices)

	return clusters
Пример #5
0
def split_cluster(_cluster, dist_mat, args):
    '''
	Splits a cluster using Logan's OPTICS
		Returns a list of resulting clusters (perhaps just the original)
	'''
    if args.no_auto_minpts:
        min_size = args.minpts
    else:
        min_size = int(
            max(args.minpts, args.minpts_perc * len(_cluster.members)))
    reachabilities = selector.OPTICS(dist_mat, min_size)
    indices = selector.separateClusters(reachabilities, min_size)

    # comes back as selector.dataPoint classes
    indices = map(lambda l: map(lambda dp: dp._id, l), indices)
    clusters = form_clusters_alt(_cluster.members, indices)

    return clusters
Пример #6
0
def split_cluster_old(_cluster, min_size, feature_type='match', dist_metric='euclidean'):
	'''
	Splits a cluster using Logan's OPTICS
		Returns a list of resulting clusters (perhaps just the original)
	'''
	if not _cluster.center:
		set_cluster_center(_cluster)

	dist_mat = cluster_dist_mat(_cluster, feature_type, dist_metric)
	reachabilities = selector.OPTICS(dist_mat, min_size)
	indices = selector.separateClusters(reachabilities, min_size)

	# comes back as selector.dataPoint classes
	indices = map(lambda l: map(lambda dp: dp._id, l), indices)
	#if len(indices) == 1:
	#	return [_cluster]
	#print indices
	clusters = form_clusters_alt(_cluster.members, indices)

	return clusters