Python OPTICS示例，sklearn.cluster.OPTICS Python示例

示例#1

0

显示文件

    def sort_bacteria_in_cluster(self):
        """:
        Sorts the bacteria in the biofilm into bac_clusters. Clusters are calculated with the OPTICS algorithm.
        Return value is a list of the bac_clusters containing the respective bacteria.

        """
        # sort data in the format of a 3xN matrix where N is the number of bacteria.
        data = self.position_matrix.transpose()

        model = OPTICS(min_samples=2, metric='euclidean')

        model.fit_predict(data)

        clusters = [[] for _ in range(0, len(np.unique(model.labels_)))]
        for bacteria, index in zip(self.bacteria, model.labels_):
            # sort bacteria in bac_clusters according to the assigned labels

            clusters[index].append(bacteria)

        # check if all bacteria where assigned
        sum = 0
        for cluster in clusters:
            sum += len(cluster)
        if sum != len(self.bacteria):
            raise ValueError(f"{abs(sum - len(self.bacteria))} bacteria where not sorted in a cluster.")

        return clusters

示例#2

0

显示文件

文件： test_modeling.py 项目： Coldwater30/bachelor-thesis

    def setUp(self):
        n_points_per_cluster = 250
        np.random.seed(0)
        C1 = np.zeros((n_points_per_cluster, 3))
        C2 = np.zeros((n_points_per_cluster, 3))
        C3 = np.zeros((n_points_per_cluster, 3))
        C4 = np.zeros((n_points_per_cluster, 3))
        C5 = np.zeros((n_points_per_cluster, 3))
        C6 = np.zeros((n_points_per_cluster, 3))
        C1[:, 1:3] = ([-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2))
        C2[:, 1:3] = ([4, -1] + .1 * np.random.randn(n_points_per_cluster, 2))
        C3[:, 1:3] = ([0, -2] + .2 * np.random.randn(n_points_per_cluster, 2))
        C4[:, 1:3] = ([-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2))
        C5[:, 1:3] = ([3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2))
        C6[:, 1:3] = ([5, 6] + 2 * np.random.randn(n_points_per_cluster, 2))
        X = np.vstack(
            (C1[:, 1:3], C2[:, 1:3], C3[:, 1:3], C4[:, 1:3], C5[:,
                                                                1:3], C6[:,
                                                                         1:3]))

        clust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)
        # Run the fit
        clust.fit(X)
        self.tbhg = modeling.TBH()
        self.tbhg.optics = clust
        self.tbhg.locH = (C1, C2, C3, C4, C5, C6)
        # self.tbhg = TBHG(clust)
        pass

示例#3

0

显示文件

文件： utils.py 项目： anuprulez/clade_prediction

def find_cluster_indices(output_seqs, batch_size, datatype="train_y"):
    ## Cluster the output set of sequences and chooose sequences randomly from each cluster
    ###
    print("Clustering {}".format(datatype))
    features = convert_to_array(output_seqs)
    from sklearn.cluster import DBSCAN
    clustering_type = OPTICS(min_samples=2, min_cluster_size=2)
    #DBSCAN(eps=0.5, min_samples=2).fit(features) #OPTICS(min_samples=2, min_cluster_size=2)
    cluster_labels = clustering_type.fit_predict(features)
    print("Number of clusters: {}".format(str(len(list(set(cluster_labels))))))
    x = list()
    y = list()
    cluster_indices_dict = dict()
    for i, l in enumerate(cluster_labels):
        x.append(output_seqs[i])
        y.append(l)
        if l not in cluster_indices_dict:
            cluster_indices_dict[l] = list()
        cluster_indices_dict[l].append(i)
    scatter_df = pd.DataFrame(list(zip(x, y)),
                              columns=["output_seqs", "clusters"])
    scatter_df.to_csv(
        "data/generated_files/clustered_output_seqs_data_{}.csv".format(
            datatype))
    return cluster_labels, cluster_indices_dict, scatter_df

示例#4

0

显示文件

文件： demo93_clusteringevaluation_optics_visualaid.py 项目： mahnooranjum/Programming_DataScience

def visual(c, X, y):
    from sklearn.cluster import OPTICS
    cluster_object = OPTICS(min_cluster_size=100)
    y_pred = cluster_object.fit_predict(X)
    colors = [
        'red', 'green', 'blue', 'cyan', 'black', 'yellow', 'magenta', 'brown',
        'orange', 'silver', 'goldenrod', 'olive', 'dodgerblue'
    ]
    clusters = np.unique(y_pred)
    print("Cluster Labels")
    print(clusters)
    print("Evaluation")
    evaluation_labels(y, y_pred)
    evaluation(X, y_pred)
    for cluster in clusters:
        row_idx = np.where(y == cluster)
        plt.scatter(X[row_idx, 0], X[row_idx, 1])
    plt.title('Dataset')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.legend()
    plt.show()

    plt.figure()
    for cluster in clusters:
        row_idx = np.where(y_pred == cluster)
        plt.scatter(X[row_idx, 0], X[row_idx, 1])
    plt.title('Cluster')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.legend()
    plt.show()

示例#5

0

显示文件

def perform_optics_clustering(data, program_options: Options) -> ClusteredData:
    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options=program_options)

    op = OPTICS(min_samples=program_options.OPTICS_MIN_SAMPLES, n_jobs=-1)
    op.fit(data)
    optic_labels = op.labels_

    for k in range(optic_labels.max() + 1):
        class_members = optic_labels == k
        nodes_in_cluster = data[class_members]
        # optics has no way of telling you the final cluster centres so have to calculate it yourself
        cluster_centre = nodes_in_cluster.mean(axis=0)
        cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER,
                          program_options=program_options)
        clustered_data.add_cluster(cluster)

    if optic_labels.min() == -1:
        class_members = optic_labels == -1
        # There are unclassified nodes
        unclassified_nodes = data[class_members]
        for unclassified_node in unclassified_nodes:
            cluster_to_add = Cluster(unclassified_node, [unclassified_node],
                                     cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                                     program_options=program_options)
            clustered_data.add_unclassified_node(cluster_to_add)

    return clustered_data

示例#6

0

显示文件

文件： cluster_models.py 项目： rupakc/Large-Scale-Preprocessing-Evaluation

def get_clustered_data(data_matrix,
                       clustering_algorithm=model_constants.KMEANS,
                       distance_metric='euclidean',
                       num_clusters=3):
    if clustering_algorithm.lower() == model_constants.AFFINITY_PROP:
        aff_prop = AffinityPropagation(affinity=distance_metric)
        aff_prop.fit(data_matrix)
        return aff_prop.labels_, aff_prop
    elif clustering_algorithm.lower() == model_constants.DBSCAN:
        dbscan = DBSCAN(metric=distance_metric)
        dbscan.fit(data_matrix)
        return dbscan.labels_, dbscan
    elif clustering_algorithm.lower() == model_constants.OPTICS:
        optics = OPTICS(metric=distance_metric)
        optics.fit(data_matrix)
        return optics.labels_, optics
    elif clustering_algorithm.lower() == model_constants.MEANSHIFT:
        mean_shift = MeanShift()
        mean_shift.fit(data_matrix)
        return mean_shift.labels_, mean_shift
    elif clustering_algorithm.lower() == model_constants.BIRCH:
        birch = Birch(n_clusters=num_clusters)
        birch.fit(data_matrix)
        return birch.labels_, birch
    elif clustering_algorithm.lower() == model_constants.AGGLOMERATIVE:
        agglomerative = AgglomerativeClustering(n_clusters=num_clusters,
                                                affinity=distance_metric)
        agglomerative.fit(data_matrix)
        return agglomerative.labels_, agglomerative
    else:
        kmeans = KMeans(n_clusters=num_clusters, random_state=42)
        kmeans.fit(data_matrix)
        return kmeans.labels_, kmeans

示例#7

0

显示文件

文件： actions.py 项目： bykov-alexei/Coursework

def cameras():
    conn, cursor = connect()
    query = "SELECT title, x, y, rstp, F, current_frame FROM cameras"
    cursor.execute(query)
    cameras = cursor.fetchall()

    query = "SELECT * FROM occurrences WHERE DATE(`timestamp`)=CURDATE() AND e1 IS NOT NULL"
    cursor.execute(query)
    today_occurrences = cursor.fetchall()
    arr = [[to["e%i" % i] for i in range(1, 129)] for to in today_occurrences]
    arr = np.array(arr)
    
    model = OPTICS()
    model.fit(arr)
    indices = np.arange(len(today_occurrences))
    result_occurrences = []
    for i in range(np.max(model.labels_) + 1):
        person_indices = indices[model.labels_ == i]
        print(person_indices)
        if len(person_indices) < 4:
            continue
        index = np.random.choice(person_indices)
        result_occurrences.append('/'+'/'.join(today_occurrences[index]['human_picture'].split('/')[1:]))
    
    conn.close()
    print(len(result_occurrences))
 
    return render_template('cameras.html', cameras=cameras, today_occurrences=result_occurrences)

示例#8

0

显示文件

def optics_clustering(principal_components, principal_df):
    final_df = pd.concat([principal_df], axis=1)
    model = OPTICS(eps=5, min_samples=2)
    # fit model and predict clusters
    yhat = model.fit_predict(principal_components)
    # retrieve unique clusters
    clusters = unique(yhat)
    final_df['Segment'] = model.labels_
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # create scatter of these samples
        plt.scatter(principal_components[row_ix, 0],
                    principal_components[row_ix, 1],
                    s=75)
    final_df.rename({
        0: 'PC1',
        1: 'PC2',
        2: 'PC3',
        'y': 'Race'
    },
                    axis=1,
                    inplace=True)
    print(final_df)
    plt.title("OPTICS Clustering")
    add_race_labels(final_df)
    calc_silhouette(data=principal_components,
                    prediction=yhat,
                    n_clusters=len(clusters))
    return final_df

示例#9

0

显示文件

文件： bindingdb.py 项目： prtos/prot_repr

def cluster_proteins_by_sim(prot_graph_fname):
    print('here')
    with open(prot_graph_fname, 'rb') as fd:
        nodes, adj_mat = pkl.load(fd)

    model = OPTICS(min_cluster_size=5, n_jobs=-1)
    clusters = model.fit_predict(adj_mat)
    print(Counter(clusters))

    transformer = eGTM()
    x, y = transformer.fit_transform(adj_mat).T
    cmap = plt.get_cmap('jet', np.max(clusters) + 2)
    cmap.set_under('gray')

    fig, ax = plt.subplots()
    ax.scatter(x, y, c=clusters, s=10, cmap=cmap)
    outfile = os.path.join(os.path.dirname(prot_graph_fname),
                           'protein_egtm_clusters.png')
    plt.savefig(outfile)
    plt.close()

    transformer = TSNE(n_components=2, n_iter_without_progress=10)
    x, y = transformer.fit_transform(adj_mat).T
    cmap = plt.get_cmap('jet', np.max(clusters) + 2)
    cmap.set_under('gray')

    fig, ax = plt.subplots()
    ax.scatter(x, y, c=clusters, s=10, cmap=cmap)
    outfile = os.path.join(os.path.dirname(prot_graph_fname),
                           'protein_tsne_clusters.png')
    plt.savefig(outfile)
    plt.close()

示例#10

0

显示文件

文件： cluster.py 项目： fwzhuang/hair_modeling

def optics(params): 
    distance_path=''
    distance_path+=params["distance_path"]
    print(distance_path)
    distance=np.loadtxt(distance_path,dtype=np.float32)
    print(distance.shape)

    #using default values, set metric to 'precomputed'
    op = OPTICS(eps=0.03, min_samples =10, metric='precomputed')
    #check db
    print(op)

    op.fit(distance)
    #get labels
    labels = op.labels_

    print(labels,labels.shape)
    #get number of clusters
    no_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    print(no_clusters,"no_clusters")

    #for i in range(no_clusters):
        #print('Cluster  : ', np.nonzero(labels == i)[0])

    #print(type(labels))
    return_val=tuple(labels.tolist())
    #print(type(return_val))
    return return_val

示例#11

0

显示文件

 def clustering(self, min_cluster_size=5, min_samples=3, eps=1, cpu_threads=-1):
     clust_matr = []
     self.mols_and_aa = []
     for i in range(len(alignment.alignment)):
         num_of_aa = alignment.alignment[i][self.col]
         if num_of_aa != '-' and alignment.molecules[i].amino_acids[num_of_aa].start is not None and alignment.molecules[i].amino_acids[num_of_aa].end is not None:
             self.mols_and_aa.append((alignment.molecules[i], num_of_aa)) 
             clust_matr.append(np.hstack((alignment.molecules[i].amino_acids[num_of_aa].start, alignment.molecules[i].amino_acids[num_of_aa].end)))
     clust_matr = np.array(clust_matr)
     if self.method == 'optics':
         clusterer = OPTICS(metric='euclidean', n_jobs=cpu_threads, min_samples=min_samples)
     elif self.method == 'hdbscan':
         clusterer = hdbscan.HDBSCAN(metric='euclidean', min_cluster_size=min_cluster_size, min_samples=min_samples) 
     elif self.method == 'dbscan':
         clusterer = DBSCAN(metric='euclidean', n_jobs=cpu_threads, eps=eps, min_samples=min_samples)
     db = clusterer.fit(clust_matr)
     self.lab = db.labels_
     if list(self.lab).count(-1) == 0 and len(set(self.lab)) == 2 or list(self.lab).count(-1) == 1 and len(set(self.lab)) == 3:
         self.sil = silhouette_score(clust_matr[self.lab != -1], self.lab[self.lab != -1], metric='euclidean')
         dist_matr = np.array([[distance.euclidean(clust_matr[i], clust_matr[j]) for i in range(len(clust_matr))] for j in range(len(clust_matr))])
         mean_diams_clusters = [dist_matr[self.lab == i].T[self.lab == j].mean() for i in set(self.lab) for j in set(self.lab) if i != j and i != -1 and j != -1] 
         self.diam = max(mean_diams_clusters) 
         self.score = self.sil * self.diam
     else: 
         self.sil = None
         self.diam = None
         self.score = None

示例#12

0

显示文件

def plot_bacteria_as_clusters(data: pd.DataFrame,
                              save_path: Path,
                              save_fig: bool = False,
                              time_point=None):
    if time_point is None:
        # set to last time step
        time_point = -1
    position_matrix = []
    for bac in data['position'].index:
        x, y, z = data['position'][bac][time_point][0], \
                  data['position'][bac][time_point][1], \
                  data['position'][bac][time_point][2]
        position_matrix.append([x, y, z])

    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(data[:, 0], data[:, 1], data[:, 2], s=30)
    ax.view_init(azim=200)
    plt.show()

    # model = DBSCAN(eps=2.5, min_samples=2)
    model = OPTICS(min_samples=2, metric='euclidean')
    model.fit_predict(data)

    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=model.labels_, s=30)
    ax.view_init(azim=200)
    plt.show()
    if save_fig:
        path = Path(save_path).parent / 'cluster_plot.png'
        plt.savefig(path)
        plt.close(fig)
    else:
        plt.show()

示例#13

0

显示文件

文件： test_inferring.py 项目： Coldwater30/bachelor-thesis

    def test_collect_Loactions(self):
        # TODO: mock optics or use namedtuple
        class OPTICS:
            def __init__(self):
                pass

        mockClusters = [[1,7], [8,15],[16,20],[0,25],[32,40],
                        [30,49],[0,50],[85,98],[80,99],[0,99]
                       ]
        optics = OPTICS()
        optics.cluster_hierarchy_ = mockClusters
        
        r = util.build_tree(optics) 

        f = lambda cnodes: [cnode.cluster for cnode in cnodes]

        actual1 = f(inferring.collect_locations(1, r))
        actual2 = f(inferring.collect_locations(2, r))
        actual3 = f(inferring.collect_locations(3, r))
        actual4 = f(inferring.collect_locations(4, r))
        actual = [actual1, actual2, actual3, actual4]
        expected1 = [[0,50], [80, 99]]
        expected2 = [[0,25], [30, 49], [85,98]]
        expected3 = [[1,7], [8,15], [16,20], [32,40]]
        expected4 = []
        expected = [expected1, expected2, expected3, expected4]
        for i, r, e in zip(range(len(actual)), actual, expected):
            with self.subTest(i=i):
                self.assertCountEqual(r, e, "i")

示例#14

0

显示文件

def optics_fit_predict(X, min_samples=50, cluster_method='dbscan', eps=2):
    """Perform OPTICS clustering
    Extracts an ordered list of points and reachability distances, and
    performs initial clustering using ``max_eps`` distance specified at
    OPTICS object instantiation.
    
    Parameters
    ----------
    X               : array, shape (n_samples, n_features), or (n_samples, n_samples)  
    min_samples     : The number of samples in a neighborhood for a point to be considered as a core point.
    cluster_method  : 'dbscan' by default. Other available: 'xi'
    eps             : The maximum distance between two samples for one to be considered as in the neighborhood of the other.

    Returns
    -------
    labels: Prediction/labels  
    """
    opt = OPTICS(min_samples=min_samples, cluster_method=str(cluster_method))
    opt.fit(X)
    labels = cluster_optics_dbscan(reachability=opt.reachability_,
                                   core_distances=opt.core_distances_,
                                   ordering=opt.ordering_,
                                   eps=eps)

    return labels

示例#15

0

显示文件

文件： clustering_helper.py 项目： aswinvisva/dnn_vessel_heterogeneity

    def fit_model(self):
        '''
        Fit model and save if not pretrained

        :return: None
        '''

        if self.show_plots:
            self.elbow_method()

        if not self.pretrained:
            if self.method == "kmeans":
                self.model = KMeans(n_clusters=self.n_clusters)
                self.model.fit(self.data)
            elif self.method == "dbscan":
                self.model = DBSCAN(metric=self.metric, eps=0.15)
                self.model.fit(self.data)
            elif self.method == "optics":
                self.model = OPTICS(metric=self.metric)
                self.model.fit(self.data)
            elif self.method == "hierarichal":
                self.model = linkage(self.data, metric=self.metric)

            if self.save:
                pickle.dump(self.model, open("trained_models/%s_model.pkl" % self.method, "wb"))

        else:
            self.model = pickle.load(open("trained_models/%s_model.pkl" % self.method, "rb"))

示例#16

0

显示文件

文件： mlpairs.py 项目： streater512/oct_applications

    def find_pairs(self):
        """
        Uses OPTICS algorithim to find clusters of similar securities within
        PCA component space. Once clusters labels are assigned, function
        generates series of tuples containing unique pairs of securities
        within the same cluster.
        """

        if self.returns_reduced is None:
            raise ValueError("returns_reduced not found: must run \
                             .reduce_PCA() before this function")

        # Initialize and fit OPTICS cluster to PCA components
        clustering = OPTICS()
        clustering.fit(self.components_.T)

        # Create cluster data frame and identify trading pairs
        clusters = pd.DataFrame({
            'security': self.securities,
            'cluster': clustering.labels_
        })
        # clusters with label == -1 are 'noise'
        clusters = clusters[clusters['cluster'] != -1]

        # Group securities by cluster and flatten list of combination lists
        groups = clusters.groupby('cluster')
        combos = list(groups['security'].apply(combinations, 2))  # All pairs
        pairs = list(chain.from_iterable(combos))  # Flatten list of lists

        print(f"Found {len(pairs)} potential pairs")

        self.pairs = pd.Series(pairs)
        self.cluster_labels_ = clustering.labels_

示例#17

0

显示文件

    def __init__(self, algorithm: str, n_clusters: int = 5, verbose=False):
        """
        Initialize the classifier
        :param algorithm: The name of the clustering algorithm
        :param n_c
        lusters: Number of clusters. Ignored for density based algorithms
        :param verbose: Print more...
        """
        # Store the file path of the training data
        self.data = None
        self.verbose = verbose

        if algorithm == "KMeans":
            self.sklearn_clustering = KMeans(verbose=verbose,
                                             n_clusters=n_clusters)
        elif algorithm.startswith("AgglomerativeClustering"):
            algo, linkage_method = algorithm.split("_")
            self.sklearn_clustering = AgglomerativeClustering(
                linkage=linkage_method, n_clusters=n_clusters)
        elif algorithm == "OPTICS":
            self.sklearn_clustering = OPTICS(min_samples=5)
        else:
            raise Exception(
                "Unsupported clustering type {0}. Use one of {1}".format(
                    algorithm, self.supported_algos))

        self.algorithm = algorithm
        self.count_vectorizer = None
        self.tfidf_transformer = None

示例#18

0

显示文件

def make_autoencoder(data, lr=0.001, enc_dim=100):
    # Auto encoder layers
    ae0 = Input(shape=products_shape, name='FeaturesInput')
    encode = Dense(enc_dim,
                   activation='relu',
                   kernel_initializer=he_normal(1),
                   name='AE_feature_reduction')(ae0)
    decode = Dense(products_shape[0], activation='relu', name='AE_3')(encode)

    # inspired by https://www.frontiersin.org/articles/10.3389/fgene.2018.00585/full
    # clustering layers (will work with the help of OPTICS)
    # we want to find the probability of one product to be in 1 of total found clusters
    opt = OPTICS()
    opt.fit(minmax.fit_transform(data))
    clusters = len(np.unique(opt.labels_))
    print('Optimal number of cluster:', clusters)
    prob0 = Dense(enc_dim // 2,
                  activation='relu',
                  kernel_initializer=he_normal(1))(encode)
    prob1 = BatchNormalization()(prob0)
    prob = Dense(clusters, activation='softmax',
                 name='Probability_Product')(prob1)

    autoencoder_ = Model(inputs=ae0, outputs=decode)
    encoder_ = Model(inputs=ae0, outputs=encode)
    p_prob = Model(inputs=ae0, outputs=prob)

    autoencoder_.compile(optimizer=Adam(learning_rate=lr),
                         loss='mae',
                         metrics=['mse'])

    return autoencoder_, encoder_, p_prob, opt

示例#19

0

显示文件

文件： optics_exploratory.py 项目： rabi3elbeji/fakenews

def exploratory_analysis(dataset: str, samples=0.1, eps=np.inf) -> None:
    X = np.genfromtxt(dataset, delimiter=',', encoding='utf8')
    scaler = StandardScaler(copy=False)
    X_transformed = scaler.fit_transform(X)
    clust = OPTICS(min_samples=samples, max_eps=eps, n_jobs=2)
    labels = clust.fit_predict(X)
    n_clusters = len(set(labels))
    print("# clusters: {0}".format(n_clusters))

示例#20

0

显示文件

def cluster_embedded_maps_optics(aligned_maps):
    # embeding = embed(aligned_maps)
    embedding = np.vstack([xmap.flatten() for xmap in aligned_maps])
    clusterer = OPTICS()

    clusterer.fit(embedding.astype(np.float64))

    return clusterer.labels_

示例#21

0

显示文件

 def create_clusters(self, min_samples):
     optics = OPTICS(min_samples=min_samples)
     clustering = optics.fit(self.performance_features)
     len(clustering.labels_[clustering.labels_ == -1]) / len(
         clustering.labels_)
     classified = pd.Series(clustering.labels_,
                            index=self.performance.columns)
     self._clusters = classified
     self._create_cluster_based_pairs()

示例#22

0

显示文件

文件： plot_optics.py 项目： as133/scikit-learn

import matplotlib.pyplot as plt

# Generate sample data

np.random.seed(0)
n_points_per_cluster = 250

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))

clust = OPTICS(min_samples=9, rejection_ratio=0.5)

# Run the fit
clust.fit(X)

_, labels_025 = clust.extract_dbscan(0.25)
_, labels_075 = clust.extract_dbscan(0.75)

space = np.arange(len(X))
reachability = clust.reachability_[clust.ordering_]
labels = clust.labels_[clust.ordering_]

plt.figure(figsize=(10, 7))
G = gridspec.GridSpec(2, 3)
ax1 = plt.subplot(G[0, :])
ax2 = plt.subplot(G[1, 0])

示例#23

0

显示文件

文件： plot_optics.py 项目： daniel-perry/scikit-learn

import numpy as np

# Generate sample data

np.random.seed(0)
n_points_per_cluster = 250

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))

clust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)

# Run the fit
clust.fit(X)

labels_050 = cluster_optics_dbscan(reachability=clust.reachability_,
                                   core_distances=clust.core_distances_,
                                   ordering=clust.ordering_, eps=0.5)
labels_200 = cluster_optics_dbscan(reachability=clust.reachability_,
                                   core_distances=clust.core_distances_,
                                   ordering=clust.ordering_, eps=2)

space = np.arange(len(X))
reachability = clust.reachability_[clust.ordering_]
labels = clust.labels_[clust.ordering_]