def clustering_random_points(amount, ccore):
    sample = [[random.random(), random.random()] for _ in range(amount)]

    optics_instance = optics(sample, 0.05, 20, None, ccore)
    (ticks, _) = timedcall(optics_instance.process)

    print("Execution time (" + str(amount) + " 2D-points):", ticks)
    def templateClusteringResults(path, radius, neighbors, amount_clusters,
                                  expected_length_clusters, ccore):
        sample = read_sample(path)

        optics_instance = optics(sample, radius, neighbors, amount_clusters,
                                 ccore)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()

        assert sum([len(cluster)
                    for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters
                       ]) == sorted(expected_length_clusters)

        if (amount_clusters is not None):
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(
                optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1
示例#3
0
    def templateClusteringResultsSpecificData(data_type, path, radius, neighbors, amount_clusters, expected_length_clusters, ccore):
        sample = read_sample(path);

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample);
        else:
            input_data = sample;

        optics_instance = optics(input_data, radius, neighbors, amount_clusters, ccore, data_type=data_type);
        optics_instance.process();

        clusters = optics_instance.get_clusters();
        noise = optics_instance.get_noise();

        assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample);
        assert len(clusters) == len(expected_length_clusters);
        assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters);
        assert sorted([len(cluster) for cluster in clusters]) == sorted(expected_length_clusters);

        if (amount_clusters is not None):
            analyser = ordering_analyser(optics_instance.get_ordering());
            assert len(analyser) > 0;

            amount_clusters, borders = analyser.extract_cluster_amount(optics_instance.get_radius());
            assert amount_clusters == len(expected_length_clusters);
            assert len(borders) == amount_clusters - 1;
def template_clustering(path_sample,
                        eps,
                        minpts,
                        amount_clusters=None,
                        visualize=True,
                        ccore=False):
    sample = read_sample(path_sample)

    optics_instance = optics(sample, eps, minpts, amount_clusters, ccore)
    (ticks, _) = timedcall(optics_instance.process)

    print("Sample: ", path_sample, "\t\tExecution time: ", ticks, "\n")

    if (visualize is True):
        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()

        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.append_cluster(noise, sample, marker='x')
        visualizer.show()

        ordering = optics_instance.get_ordering()
        analyser = ordering_analyser(ordering)

        ordering_visualizer.show_ordering_diagram(analyser, amount_clusters)
示例#5
0
文件: main.py 项目: genana/MPI_OPTICS
def mpi_optics(input_filepath, eps, minpts):
    if comm.rank == 0:
        data = np.loadtxt(input_filepath, delimiter=",").tolist()
        tree = kdtree.create(data)
        root_height = tree.height()
        dest_height = int(np.log2(comm.size))
        dest_nodes = [
            node for node in tree.inorder()
            if root_height - node.height() == dest_height
        ]
        divided_data = [[node.data for node in list(node.inorder())]
                        for node in dest_nodes]
        divided_data = zip(list(divided_data), range(0, size))
        for data in divided_data:
            dest = data[1]
            data = data[0]
            if dest != 0:
                comm.send(data, dest=dest, tag=dest)
            else:
                distributed_data = data
    else:
        distributed_data = comm.recv(source=0, tag=rank)

    print("rank=%s, size=%s, name=%s" % (rank, len(distributed_data), name))

    optics_instance = optics(distributed_data, eps, minpts)
    optics_instance.process()
    print("OPTICS finished! rank = %s" % rank)

    optics_instance = optics_instance.get_optics_objects()

    ite_num = int(np.log2(comm.size))
    for ite in range(0, ite_num):
        psuedo_id = int(rank / pow(2, ite))
        if psuedo_id % 2 == 1:
            dest = int((psuedo_id - 1) * pow(2, ite))
            comm.send(optics_instance, dest=dest, tag=ite * 10 + dest)
            #print("sent dest = %s rank = %s ite = %s" % (dest, rank, ite))
            break
        else:
            source = int((psuedo_id + 1) * pow(2, ite))
            received = comm.recv(source=source, tag=ite * 10 + rank)
            reset_flags(optics_instance)
            reset_flags(received)
            optics_instance = merge(optics_instance, received)
            #print("received = %s rank = %s ite = %s" % (received, rank, ite))

    if rank == 0:
        #dataset = distributed_data
        #clusters = optics_instance.get_clusters()
        #noise = optics_instance.get_noise()
        #cluster_ordering = optics_instance.get_cluster_ordering()
        #print(cluster_ordering)
        optics_objects = optics_instance
        #visualizer = cluster_visualizer();
        #visualizer.append_clusters(clusters, dataset);
        #visualizer.append_cluster(noise, dataset, marker = 'x');
        #visualizer.show();
        print("FINAL LEN = %s" % len(optics_objects))
示例#6
0
    def getClusters(self):

        opticinstance = optics.optics(self.list, 0.0005,
                                      10)  #####  parametrage
        opticinstance.process()
        print("nb centroides ",
              len(opticinstance.get_clusters()))  ## nombre de centroides
        return opticinstance.get_clusters()
示例#7
0
 def testClusteringOrderVisualizer(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);
        
     optics_instance = optics(sample, 6.0, 3, 5);
     optics_instance.process();
        
     analyser = ordering_analyser(optics_instance.get_ordering());
     ordering_visualizer.show_ordering_diagram(analyser, 5);
示例#8
0
 def testClusteringOrderVisualizer(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);
     
     optics_instance = optics(sample, 6.0, 3, 5);
     optics_instance.process();
     
     analyser = ordering_analyser(optics_instance.get_ordering());
     ordering_visualizer.show_ordering_diagram(analyser);
    def get_modelo(self, algoritmo, eps, neig):
        print(algoritmo + ' ' + str(eps) + ' - ' + str(neig))
        instance = None

        if algoritmo == 'AGNES':
            instance = agglomerative(self.amostras,
                                     self.numero_clusters,
                                     link=None)
        elif algoritmo == 'BIRCH':
            instance = birch(self.amostras,
                             self.numero_clusters,
                             entry_size_limit=10000)
        elif algoritmo == 'CLARANS':
            instance = clarans(self.amostras,
                               self.numero_clusters,
                               numlocal=100,
                               maxneighbor=1)
        elif algoritmo == 'CURE':
            instance = cure(self.amostras,
                            self.numero_clusters,
                            number_represent_points=5,
                            compression=0.5)
        elif algoritmo == 'DBSCAN':
            instance = dbscan(self.amostras, eps=eps, neighbors=neig)
        elif algoritmo == 'FCM':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = fcm(self.amostras, initial_centers)
        elif algoritmo == 'KMEANS':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = kmeans(self.amostras, initial_centers, tolerance=0.001)
        elif algoritmo == 'KMEDOIDS':
            instance = kmedoids(self.amostras,
                                initial_index_medoids=[0, 0, 0, 0, 0, 0, 0],
                                tolerance=0.0001)  #ajustar o n_de cluster
        elif algoritmo == 'OPTICS':
            instance = optics(self.amostras, eps=eps, minpts=neig)
        elif algoritmo == 'ROCK':
            instance = rock(self.amostras,
                            eps=eps,
                            number_clusters=self.numero_clusters,
                            threshold=0.5)
        else:
            pass

        instance.process()
        lista_agrupada = self.get_lista_agrupada(instance.get_clusters())
        lista_agrupada = np.array(lista_agrupada)

        if (neig != 0):
            n_grupos = len(np.unique(lista_agrupada))
            if n_grupos > self.numero_clusters:
                lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1)
        return lista_agrupada
示例#10
0
 def templateClusteringResults(self, path, radius, neighbors, expected_length_clusters, ccore):
     sample = read_sample(path);
     
     optics_instance = optics(sample, radius, neighbors);
     optics_instance.process();
     
     clusters = optics_instance.get_clusters();
     noise = optics_instance.get_noise();
     
     assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample);
     assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters);
     assert sorted([len(cluster) for cluster in clusters]) == expected_length_clusters;
示例#11
0
    def templateClusteringResults(self, path, radius, neighbors,
                                  expected_length_clusters, ccore):
        sample = read_sample(path)

        optics_instance = optics(sample, radius, neighbors)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()

        assert sum([len(cluster)
                    for cluster in clusters]) + len(noise) == len(sample)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster)
                       for cluster in clusters]) == expected_length_clusters
示例#12
0
    def optics(cls, data, eps, minpts, ccore=False):
        """
        Constructor of OPTICS clustering.rst algorithm

        :param data: Input data that is presented as a list of points (objects), where each point is represented by list or tuple
        :param eps: Connectivity radius between points, points may be connected if distance between them less than the radius
        :param minpts: Minimum number of shared neighbors that is required for establishing links between points
        :param amount_clusters: Optional parameter where amount of clusters that should be allocated is specified.
                    In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
                    in connectivity radius usage.
        :param ccore: if True than DLL CCORE (C++ solution) will be used for solving the problem
        :return: the resulting clustering.rst object
        """
        data = cls.input_preprocess(data)
        model = optics(data, eps, minpts)
        return cls(model)
示例#13
0
    def templateClusteringResultsSpecificData(data_type, path, radius,
                                              neighbors, amount_clusters,
                                              expected_length_clusters, ccore):
        sample = read_sample(path)

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)
        else:
            input_data = sample

        optics_instance = optics(input_data,
                                 radius,
                                 neighbors,
                                 amount_clusters,
                                 ccore,
                                 data_type=data_type)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()
        optics_objects = optics_instance.get_optics_objects()

        object_indexes = set([obj.index_object for obj in optics_objects])
        assertion.eq(len(optics_objects), len(object_indexes))
        for obj in optics_objects:
            if obj.core_distance is not None:
                assertion.ge(obj.core_distance, 0)

            if obj.reachability_distance is not None:
                assertion.ge(obj.reachability_distance, 0)

        assert sum([len(cluster)
                    for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster)
                    for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters
                       ]) == sorted(expected_length_clusters)

        if amount_clusters is not None:
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(
                optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1
示例#14
0
文件: Algorithms.py 项目: PZWJAY/SCA2
def OPTICS(radius, num, datapoints):
    """--------------------------------------------------------------------------------
    :function: Apple OPTICS to cluster data
    :parameter: eps       : the radius of a neighbourhood for each
                min_pts   : the at least min_pts within the eps neighbourhood of the points
                datapoints: datapoint to cluster
    :return: labels: labels for date points
    --------------------------------------------------------------------------------"""
    model = optics(datapoints, radius, num)
    model.process()
    clusters = model.get_clusters()
    labels = np.array([-1] * len(datapoints))
    k = 0
    for cluster in clusters:
        k += 1
        labels[np.array(cluster)] = k
    return labels, k
def template_clustering(path_sample, eps, minpts):
    sample = read_sample(path_sample)

    optics_instance = optics(sample, eps, minpts)
    optics_instance.process()

    clusters = optics_instance.get_clusters()
    noise = optics_instance.get_noise()

    draw_clusters(sample, clusters, [], ".")

    ordering = optics_instance.get_cluster_ordering()
    indexes = [i for i in range(0, len(ordering))]

    # visualization of cluster ordering in line with reachability distance.
    plt.bar(indexes, ordering)
    plt.show()
示例#16
0
def template_clustering(path_sample, eps, minpts):
    sample = read_sample(path_sample)

    optics_instance = optics(sample, eps, minpts)
    optics_instance.process()

    clusters = optics_instance.get_clusters()
    noise = optics_instance.get_noise()

    draw_clusters(sample, clusters, [], '.')

    ordering = optics_instance.get_cluster_ordering()
    indexes = [i for i in range(0, len(ordering))]

    # visualization of cluster ordering in line with reachability distance.
    plt.bar(indexes, ordering)
    plt.show()
示例#17
0
def analyze(sample, radius, neighbors):
    # Run cluster analysis where connectivity radius is bigger than real

    # Create OPTICS algorithm for cluster analysis
    optics_instance = optics(sample, radius, neighbors)

    # Run cluster analysis
    optics_instance.process()

    # Obtain results of clustering
    clusters = optics_instance.get_clusters()
    noise = optics_instance.get_noise()

    # Obtain reachability-distances
    ordering = ordering_analyser(optics_instance.get_ordering())

    return ordering, clusters, noise
示例#18
0
def analyze(sample, radius, neighbors):
  # Run cluster analysis where connectivity radius is bigger than real

  # Create OPTICS algorithm for cluster analysis
  optics_instance = optics(sample, radius, neighbors)

  # Run cluster analysis
  optics_instance.process()

  # Obtain results of clustering
  clusters = optics_instance.get_clusters()
  noise = optics_instance.get_noise()

  # Obtain reachability-distances
  ordering = ordering_analyser(optics_instance.get_ordering())

  return ordering, clusters, noise
    def optics_temp(self, point_list):
        # data = np.array( point_list)
        sample = point_list
        start = time.time()
        optics_instance = optics(sample, 0.5, 6, ccore=True)
        optics_instance.process()
        clusters = optics_instance.get_clusters()
        end = time.time()
        print("imte", end - start)

        noise = optics_instance.get_noise()
        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.append_cluster(noise, sample, marker='x')
        visualizer.show()
        ordering = optics_instance.get_ordering()
        analyser = ordering_analyser(ordering)
        ordering_visualizer.show_ordering_diagram(analyser, amount_clusters)
示例#20
0
    def compute_optics(self):
        logging.debug("Starting computing OPTICS," + "at {0}".format(
            datetime.datetime.fromtimestamp(time.time()).strftime(
                '%Y-%m-%d %H:%M:%S')))

        opt = optics(self.D, self.epsilon, self.min_points, \
                    data_type='distance_matrix', ccore=False)

        opt.process()

        labels = [-1] * len(self.D)
        for i, cl in enumerate(opt.get_clusters()):
            for j in cl:
                labels[j] = i

        self.labels = np.asarray(labels).astype(int)

        logging.debug("Finished computing OPTICS," + "at {0}".format(
            datetime.datetime.fromtimestamp(time.time()).strftime(
                '%Y-%m-%d %H:%M:%S')))
        return self.labels
示例#21
0
    def cluster_optics(self, xs, ys):
        POI = []
        for i in range(len(xs)):
            POI.append([xs[i], ys[i]])
        POI = np.array(POI)

        optics_instance = optics(POI, 27, 5)
        optics_instance.process()
        clusters = optics_instance.get_clusters()

        if self.visualize:
            vis = cluster_visualizer()
            vis.append_clusters(clusters, POI)
            vis.show()
        ret = []

        for i in range(len(clusters)):
            ret.append([])
            for j in range(len(clusters[i])):
                ret[i].append(POI[clusters[i][j]])

        return ret
示例#22
0
def template_clustering(example, sample, w, h):
"""
	outputs clusters & noise. grid {example}, list {sample}
"""
	# max epsilon
	x_size = w
	y_size = h
	maxeps = max(x_size,y_size)
	
	minpts = 12

	optics_instance = optics(sample, maxeps, minpts)
	optics_instance.process()
    
    # extract clusters from ordering
	epsi = epsilon(x_size, y_size, example, sample, minpts)
	optics_instance.extract_clusters(epsi)
	clusters = optics_instance.get_clusters()
	noise = optics_instance.get_noise()
	
	clusters = [[sample[pindex] for pindex in cluster] for cluster in clusters]
	noise = [sample[pindex] for pindex in noise]
	return clusters, noise
示例#23
0
    def cluster_optics(self, radius, neighbors):

        self.clustering_name = "optics clustering"

        X = self.data_frame.get_point_only_df().values

        optics_instance = optics(X, radius, neighbors)
        optics_instance.process()
        clusters = optics_instance.get_clusters()

        self.data_frame.add_result_name(self.clustering_name, -2,
                                        ColType.CLUSTER_LABEL)

        i = 1
        for cluster in clusters:
            for index in cluster:
                self.data_frame.add_result(self.clustering_name, index, i)
            i += 1

        self.cluster_count = len(
            set(self.data_frame.df[self.clustering_name].tolist()))
        self.clustering_result = self.data_frame.df[
            self.clustering_name].tolist()
示例#24
0
def optics_clustering(distance, radius=2, neighbors=2, n_clusters=2):
    """
    Optics clustering on a provided distance matrix. Based on pyclustering https://pyclustering.github.io/docs/0.8.2/html/de/d3b/classpyclustering_1_1cluster_1_1optics_1_1optics.html
    
    Parameters:
        distance (matrix): distance matrix to be used for clustering.
        radius (int): connectivity radius. 
        neighbors (int): in [1, number of samples -1]
        n_clusters (int): amount of clusters that should be found.

    Returns:
            labels (array): of clustering
    """

    if n_clusters < 2 or n_clusters > len(distance):
        print("n_clusters is either too small or too large, n_clusters must be in range 2 ", len(distance))
        return None

    elif neighbors < 1 or neighbors > len(distance)-1:
        print("neighbors has to be in range 1", len(distance)-1)
        return None

    else:

        optics_instance = optics(distance, radius, neighbors, n_clusters, data_type="distance_matrix")
        # Performs cluster analysis
        optics_instance.process()
        # Obtain results of clustering
        clusters = optics_instance.get_clusters()

        #converts output into labeled array
        empty = __generate_empty__(len(distance))
        

        labels = convert_clusters(clusters, empty)

        return labels
示例#25
0
    def optics_func(self):
        ###################### Optics Algorithm ###################

        optics_instance = optics(self.data, 2, 3)
        optics_instance.process()
        clusters = optics_instance.get_clusters()

        ########## Ploting data
        # visualizer = cluster_visualizer();
        # visualizer.append_clusters(clusters, data);
        # visualizer.append_cluster(start_centers, marker = '*', markersize = 5);
        # visualizer.show();

        w = 0
        for i in range(len(clusters)):
            if len(clusters[i]) > w:
                w = len(clusters[i])

        h = len(clusters)

        output = [[0 for x in range(w)] for y in range(h)]
        for i in range(len(clusters)):
            for j in range(len(clusters[i])):
                output[i][j] = self.data[clusters[i][j]]

        ########## Writing data to file
        # with open('out.txt', mode='w') as fp:
        #     fp.write("######################### Optics Algorithm Output #########################\n")
        #     fp.write(str(output))
        #     fp.write('\n\n')
        #     fp.close()

        self.cur.execute("INSERT INTO optics VALUES (%s)", (str(output), ))
        self.conn.commit()

        self.cur.close()
        self.conn.close()
示例#26
0
    def templateClusteringResultsSpecificData(data_type, path, radius, neighbors, amount_clusters, expected_length_clusters, ccore):
        sample = read_sample(path)

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)
        else:
            input_data = sample

        optics_instance = optics(input_data, radius, neighbors, amount_clusters, ccore, data_type=data_type)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()
        optics_objects = optics_instance.get_optics_objects()

        object_indexes = set( [ obj.index_object for obj in optics_objects ] )
        assertion.eq(len(optics_objects), len(object_indexes))
        for obj in optics_objects:
            if obj.core_distance is not None:
                assertion.ge(obj.core_distance, 0)

            if obj.reachability_distance is not None:
                assertion.ge(obj.reachability_distance, 0)

        assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters]) == sorted(expected_length_clusters)

        if amount_clusters is not None:
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1
 def testCoreInterfaceIntInputData(self):
     optics_instance = optics([[1], [2], [3], [20], [21], [22]], 3, 2, 2,
                              True)
     optics_instance.process()
     assert len(optics_instance.get_clusters()) == 2
示例#28
0
def clustering_DADC2(points):
    optics_instance = optics(points, 22, 5)
    optics_instance.process()
    clusters = optics_instance.get_clusters()
    return clusters
示例#29
0
def clustering_OPTICS(points):
    optics_instance = optics(points, 10, 3)
    optics_instance.process()
    clusters = optics_instance.get_clusters()
    return clusters
示例#30
0
文件: OTOSO.py 项目: Skarvir/OTOSO
def optics_plot(log, rels):

    d = {}
    Z = {}

    for trace in log:
        tid = trace.attributes['concept:name']

        z = {}
        for (e1, e2) in list(itertools.combinations(trace, 2)):
            a1 = e1['concept:name']
            t1 = e1['time:timestamp']
            a2 = e2['concept:name']
            t2 = e2['time:timestamp']

            rel = a1 + "->" + a2

            diff = (t2 - t1).total_seconds()

            if rel in d:
                d[rel].append(diff)
            else:
                d[rel] = [diff]

            z[rel] = diff

        Z[tid] = z

    avg = {}
    std = {}
    ext = {}

    for rel, values in d.items():
        avg[rel] = np.mean(values)
        std[rel] = np.std(values)
        ext[rel] = np.max(np.abs(values))

    # standardizing
    Zstd = {}
    for tid, trace in Z.items():
        vstd = {}
        for rel, value in trace.items():
            vstd[rel] = (value - avg[rel])
            if std[rel] == 0:
                vstd[rel] = 0
            else:
                vstd[rel] /= std[rel]
        Zstd[tid] = vstd

    tids = Z.keys()

    Zvectors = []

    for tid in tids:
        temp = Zstd[tid]
        dummy = []
        for rel in rels:
            if rel in temp:
                dummy.append(temp[rel])
            else:
                #dummy.append(100.0)
                dummy.append(0.0)
        Zvectors.append(dummy)

    print("Number of traces: ", len(Zvectors))
    print("Number of relations: ", len(rels))

    optics_instance = optics(Zvectors, eps, 20)
    optics_instance.process()
    clusters = optics_instance.get_clusters()
    objects = optics_instance.get_optics_objects()

    reach = pd.Series(optics_instance.get_ordering())

    fig, ax1 = plt.subplots(figsize=(4, 2), dpi=100)
    plt.plot(reach, color='black')
    thresh = [eps] * len(reach)
    plt.plot(thresh, color='blue')
    ax1.fill_between(range(len(reach)),
                     np.maximum(thresh, reach),
                     reach,
                     color='blue',
                     alpha=0.3)
    plt.ylabel('Reachability Distance')
    plt.xlabel('Traces')
    plt.show()

    result = []
    for c in clusters:
        c_attr = [len(c)]
        c_attr.append(
            np.average([(objects[item]).reachability_distance for item in c
                        if (objects[item]).reachability_distance != None]))
        c_attr.append(np.mean([Zvectors[i] for i in c], axis=0))
        result.append(c_attr)

    return result
示例#31
0

    datalist = []
    for row in range(data.shape[0]):
        datalist.append(list(data[row]))

    # get the true labels and group names
    labels_true = np.array(rawdata['row'])
    groups = np.array(rawdata['group'])

    # ----------------------------------------------------------------------
    # optics example from :
    #      https://raw.githubusercontent.com/annoviko/pyclustering/master/
    #          pyclustering/cluster/examples/optics_examples.py
    print("Calculating clustering using 'OPTICS'...")

    optics_instance = optics(datalist, eps=1, minpts=3, ccore=True)
    (ticks, _) = timedcall(optics_instance.process)

    print("\t\tExecution time: ", ticks, "\n")

    # get clusters and noise
    clusters = optics_instance.get_clusters()
    noise = optics_instance.get_noise()



# =============================================================================
# End of code
# =============================================================================
示例#32
0
文件: TOAD.py 项目: Skarvir/TOAD
for tid in tids:

    temp = Zstd[tid]
    dummy = []
    for rel in rels:
        if rel in temp:
            dummy.append(temp[rel])
        else:
            dummy.append(0.0)
    Zvectors.append(dummy)
    remap_tids.append(tid)

print("Number of traces: ", len(Zvectors))
print("Number of relations: ", len(rels))

optics_instance = optics(Zvectors, eps, minpts)

optics_instance.process()

clusters = optics_instance.get_clusters()
noise = optics_instance.get_noise()

reach = pd.Series(optics_instance.get_ordering())
left = max(reach)

reach_smoothed = scipy.signal.savgol_filter(reach, 5, 3)

if (len(reach) % 2 == 0):
    l = len(reach) - 1
else:
    l = len(reach) - 2
示例#33
0
def cluster_algo(r, c, eps, min_pts, dataset_name):

    results = [[0 for x in range(c)] for y in range(r)]

    # dataset to be clustered based on euclidean distance

    k = genfromtxt(dataset_name, delimiter=',')
    k = np.ma.compress_cols(np.ma.masked_invalid(k))
    l = k

    # normalization
    k = k[:, 0:len(k[0]) - 1]
    k_norm = preprocessing.scale(k)

    dist_list = list()

    count = 1
    for row in k_norm:
        dist_list_row = list()
        for row1 in k_norm:
            val = distance.euclidean(row, row1)
            dist_list_row.append(val)
        dist_list.append(dist_list_row)

        # Clustering the distance matrix based on the OPTICS algo
        optics_instance = optics(dist_list_row, eps, min_pts)
        print("Distance for row ")
        print(count)
        count = count + 1
        print(dist_list_row)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        print("Clusters")
        print(clusters)

        noise = optics_instance.get_noise()
        print("Noise")
        print(noise)

        # Assigning each time instance to a cluster
        clusterCount = 1
        for k in clusters:
            for temp in k:
                if clusterCount <= c:
                    results[temp][clusterCount -
                                  1] = results[temp][clusterCount - 1] + 1

                else:
                    results[temp][c - 1] = results[temp][c - 1] + 1

            clusterCount = clusterCount + 1

        print("Number of clusters")
        print(clusterCount - 1)

    #Clustering Results - Writing to file

    print(len(results))
    output = open(output_file_with_clusters_, 'w')
    c = 0
    tempRowNum = 1
    for row in results:
        print("row: " + str(tempRowNum) + " ****has count of cluster 0: " +
              str(row[0]) + " ****has count of cluster 1: " + str(row[1]))

        output.write(str(tempRowNum))
        output.write(str(","))
        output.write(str(row.index(max(row))))
        output.write(str("\n"))
        tempRowNum = tempRowNum + 1

    output.close()
def process_optics(sample):
    instance = optics(sample, 1.0, 2)
    (ticks, _) = timedcall(instance.process)
    return ticks
示例#35
0
def process_optics(sample):
    instance = optics(sample, 1.0, 2)
    (ticks, _) = timedcall(instance.process)
    return ticks