def _split_clusters_par_helper_old(args): _cluster, min_size, feature_type, dist_metric = args features = extract_features(_cluster.members, [_cluster.center], feature_type)[0] if dist_metric == 'rf': rand_mat = compute_random_matrix(features) num_tree_features = FUNCTION_NUM_FEATURES(features.shape[1]) rf = sklearn.ensemble.RandomForestClassifier(n_estimators=NUM_TREES, max_features=num_tree_features, bootstrap=False, n_jobs=1) combined_data = np.concatenate( (features, rand_mat) ) labels = np.concatenate( (np.ones(features.shape[0]), np.zeros(rand_mat.shape[0])) ) rf.fit(combined_data, labels) sim_mat = compute_sim_mat(features, rf) dists = 1 - sim_mat else: dists = scipy.spatial.distance.pdist(features, 'euclidean') dists = scipy.spatial.distance.squareform(dists) dist_mat = dists reachabilities = selector.OPTICS(dist_mat, min_size) indices = selector.separateClusters(reachabilities, min_size) # comes back as selector.dataPoint classes indices = map(lambda l: map(lambda dp: dp._id, l), indices) #if len(indices) == 1: # return [_cluster] #print indices clusters = form_clusters_alt(_cluster.members, indices) return clusters
def _split_clusters_par_helper(args): _cluster, dist_mat, min_size = args reachabilities = selector.OPTICS(dist_mat, min_size) indices = selector.separateClusters(reachabilities, min_size) # comes back as selector.dataPoint classes indices = map(lambda l: map(lambda dp: dp._id, l), indices) clusters = form_clusters_alt(_cluster.members, indices) return clusters
def split_cluster(_cluster, dist_mat, min_size): ''' Splits a cluster using Logan's OPTICS Returns a list of resulting clusters (perhaps just the original) ''' reachabilities = selector.OPTICS(dist_mat, min_size) indices = selector.separateClusters(reachabilities, min_size) # comes back as selector.dataPoint classes indices = map(lambda l: map(lambda dp: dp._id, l), indices) clusters = form_clusters_alt(_cluster.members, indices) return clusters
def split_cluster(_cluster, dist_mat, args): ''' Splits a cluster using Logan's OPTICS Returns a list of resulting clusters (perhaps just the original) ''' if args.no_auto_minpts: min_size = args.minpts else: min_size = int(max(args.minpts, args.minpts_perc * len(_cluster.members))) reachabilities = selector.OPTICS(dist_mat, min_size) indices = selector.separateClusters(reachabilities, min_size) # comes back as selector.dataPoint classes indices = map(lambda l: map(lambda dp: dp._id, l), indices) clusters = form_clusters_alt(_cluster.members, indices) return clusters
def split_cluster(_cluster, dist_mat, args): ''' Splits a cluster using Logan's OPTICS Returns a list of resulting clusters (perhaps just the original) ''' if args.no_auto_minpts: min_size = args.minpts else: min_size = int( max(args.minpts, args.minpts_perc * len(_cluster.members))) reachabilities = selector.OPTICS(dist_mat, min_size) indices = selector.separateClusters(reachabilities, min_size) # comes back as selector.dataPoint classes indices = map(lambda l: map(lambda dp: dp._id, l), indices) clusters = form_clusters_alt(_cluster.members, indices) return clusters
def split_cluster_old(_cluster, min_size, feature_type='match', dist_metric='euclidean'): ''' Splits a cluster using Logan's OPTICS Returns a list of resulting clusters (perhaps just the original) ''' if not _cluster.center: set_cluster_center(_cluster) dist_mat = cluster_dist_mat(_cluster, feature_type, dist_metric) reachabilities = selector.OPTICS(dist_mat, min_size) indices = selector.separateClusters(reachabilities, min_size) # comes back as selector.dataPoint classes indices = map(lambda l: map(lambda dp: dp._id, l), indices) #if len(indices) == 1: # return [_cluster] #print indices clusters = form_clusters_alt(_cluster.members, indices) return clusters