Exemplo n.º 1
0
def kclustering(distances_unsorted, k):
    # Get list of all nodes in distance list
    nodes = set()
    for u, v, d in distances_unsorted:
        nodes.add(u)
        nodes.add(v)

    # Create UnionFind to store clusters
    x = unionfind.UnionFind(nodes)

    # Sort [(u, v, d)] list on ascending d so we can pop off shortest edges
    # first
    distances = sorted(distances_unsorted, key=lambda x: x[2])

    # Single-link clustering loop: while > k clusters, merge clusters
    # containing closest nodes
    while len(x.get_heads()) > k:
        u, v, d = distances.pop(0)
        x.union(u, v)

    # In the event of ties (?), distances may contain pairs that are now in
    # the same cluster, but the first element in distances for which
    # this is not true is the closest pair not in the same cluster.
    while True:
        u, v, min_distance = distances.pop(0)
        if x.find(u) != x.find(v):
            break

    return x, min_distance
Exemplo n.º 2
0
def zero_barcode_pointcloud(dist_matrix, alpha):
    #create e-neighborhood graph from matrix
    complete_graph = nx.from_numpy_matrix(dist_matrix)
    G_epsilon = nx.Graph()

    for node in complete_graph.nodes():
        G_epsilon.add_node(node)

    for edge in complete_graph.edges():
        weight = complete_graph[edge[0]][edge[1]]['weight']
        if weight <= alpha:
            G_epsilon.add_edge(edge[0], edge[1], weight=weight)

    #init unionFind
    U = unionfind.UnionFind()
    #init bars
    bars = []

    #for each node in graph, create a set, and create a default bar
    for vi in G_epsilon.nodes():
        U.find(vi)
        bars.append([0, float("inf")])

    #sort edges by weights (increasing)
    edges = sorted(G_epsilon.edges(data=True), key=lambda x: x[2]['weight'])

    for edge in edges:
        if U.find(edge[0]) != U.find(edge[1]):
            U.union(edge[0], edge[1])
            bars[edge[0]][1] = G_epsilon[edge[0]][edge[1]]['weight']

    return bars
Exemplo n.º 3
0
def main(args):

    term = 10000
    start_height = 1
    end_height = dq.get_max_height()
    pool_num = multiprocessing.cpu_count() // 2
    cdq = ClusterDB(args.dbpath)

    stime = time.time()
    u = uf.UnionFind(int(dq.get_max_address()) + 1)
    try:
        for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
            if eheight >= end_height:
                eheight = end_height + 1

            with multiprocessing.Pool(pool_num) as p:
                result = p.imap(one_time_change, range(sheight, eheight))
                for addr_list in result:
                    for addr_set in addr_list:
                        addr_1 = addr_set[0]
                        addr_2 = addr_set[1]
                        print(addr_1, ',', addr_2)
                        u.union(int(addr_1), int(addr_2))
            etime = time.time()
            #print('height: {}, time:{}'.format(eheight, etime-stime))
        del u.rank
        db_write(stime, cdq, u)

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()
Exemplo n.º 4
0
def graph_to_dot3(infile):
    graph, metadata = json_to_graph_data(infile)

    uf = unionfind.UnionFind(graph.keys()) 
    for v, edges in graph.items():
        for edge in edges:
            if metadata[edge.label].data["cf:type"] == "version":
                uf.union(edge.dest, v)

    def node2str(node):
        data = metadata[node].data
        dtype = str(data["cf:type"])
        if dtype == "file_name":
            return data["cf:pathname"]
        return dtype + ", " + uf.find(node)
        #return uf.find(node)
    already = set()
    s = ["digraph prov {"]
    for v, edges in graph.items():
        for edge in edges:
            if metadata[edge.label].data["cf:type"] == "version":
                continue
            pair = (uf.find(v), uf.find(edge.dest))
            if pair in already:
                continue
            already.add(pair)
            s.append('\t"%s" -> "%s";' % (
                node2str(v), 
                node2str(edge.dest)))#,
                #metadata[edge.label].typ + ", " +
                #metadata[edge.label].data['cf:type']))
    s.append("}")
    return "\n".join(s)
Exemplo n.º 5
0
    def _update_segmentation(self, action_state):
        
        if len(self.points) < 2:
            return


        n_pixels = np.product(self.embedding.data.shape[1:])
        uf = unionfind.UnionFind(n_pixels)

        label_image = np.zeros((1, ) + self.embedding.data.shape[1:])

        # find minimal threshold
        threshold = None
        for p0 in self.points:
            for p1 in self.points:
                if p0 < p1:
                    min_max_edge = None
                    short_path = nx.shortest_path(self.mst_graph, p0, p1)
                    for u, v in zip(short_path, short_path[1:]):
                        weight = self.mst_graph.get_edge_data(u, v)['weight']
                        if min_max_edge is None or weight > min_max_edge:
                            min_max_edge = weight

                    if threshold is None or threshold > min_max_edge:
                        threshold = min_max_edge

        uf.union_array(mst[mst[:, 2] < threshold][:, :2].astype(np.uint32))
        label_image = uf.get_label_image(self.embedding.data.shape[1:])[None]
        label_image = label_image.astype(np.uint32)
        self.segmentation[:] = label_image
        self.update_view()
Exemplo n.º 6
0
def get_structurally_symmetric(A, force_unique = False):
    disj_set = uf.UnionFind()
    for i in range(len(A)):
        disj_set.add(i)
    if force_unique:
        return disj_set.components(), disj_set.component_mapping()
    autos = get_automorphisms(A)
    for i in range(len(A)):
        for j in range(len(autos[0])):
            disj_set.union(i, autos[i][j])
    return disj_set.components(), disj_set.component_mapping()
Exemplo n.º 7
0
def IsConnected(Permutation, Reference, InteractionPairs):
    diagram = set(InteractionPairs)
    for i in range(len(Permutation)):
        diagram.add((Reference[i], Permutation[i]))

    n_node = len(InteractionPairs)*2
    diagram_union = unionfind.UnionFind(n_node)

    for edge in diagram:
        if edge[0] != edge[1] and not diagram_union.is_connected(edge[0], edge[1]):
            diagram_union.union(edge[0], edge[1])
    return diagram_union.get_n_circles() == 1
Exemplo n.º 8
0
def main(args):
    count = 0
    stime = time.time()
    csv_list = read_csv(args.csv_file)
    etime = time.time()
    cdq = ClusterDB('/home/dnlab/DataHDD/database/multi-input25man.db')
    cdq.create_cluster_table()
    print("DEBUG:", csv_list[0], etime - stime)

    print("START UNION FIND")
    stime = time.time()
    #u = uf.UnionFind(int(dq.get_max_address())+1)
    u = uf.UnionFind(90000000)
    etime = time.time()
    print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}")

    for first, second in csv_list:
        u.union(first, second)
        if count % 10000000 == 0:
            etime = time.time()
            print(f"COUNT {count} END, TOTAL TIME: {etime - stime}")
        count += 1
    etime = time.time()
    print(f"UNION FIND END TOTAL TIME:{etime - stime}")

    del u.rank
    print("START CLUSTERING")
    stime = time.time()
    count = 0
    addr_list = list()
    for index, cluster in enumerate(u.par):
        addr_list.append((str(index), u.find(cluster)))
        if count % 10000 == 0:
            cdq.begin_transactions()
            cdq.insert_cluster_many(addr_list)
            cdq.commit_transactions()
            etime = time.time()
            print(
                f"COUNT {count} END, TOTAL TIME: {etime - stime}, {addr_list[len(addr_list)-1]}"
            )
            del addr_list
            addr_list = list()
        count += 1

    cdq.begin_transactions()
    cdq.insert_cluster_many(addr_list)
    cdq.commit_transactions()
    del addr_list

    etime = time.time()
    del u.par

    print(f"CLUSTERING END:{etime - stime}")
Exemplo n.º 9
0
    def __init__(
        self,
        rules: Rule,
        discards: typing.List[Domino] = None,
        union: unionfind.UnionFind = None,
    ):
        self.rules = rules

        if discards is None:
            discards = []
        self.discards = discards

        if union is None:
            union = unionfind.UnionFind()
        self.union = union

        self.grid = Grid(GridSize.MIGHTY_DUEL if Rule.MIGHTY_DUEL in
                         self.rules else GridSize.STANDARD)
Exemplo n.º 10
0
def main(args):
    count = 0
    stime = time.time()
    csv_list = read_csv(args.csv_file)
    etime = time.time()
    print("DEBUG:", csv_list[0], etime-stime)

    print("START UNION FIND")
    stime = time.time()
    u = uf.UnionFind(int(cdq.get_max_address())+1)
    #u = uf.UnionFind(int(100000000))
    etime = time.time()
    print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}")
    
    
    for first, second in csv_list:
        u.union(first, second)
        if count % 10000000 == 0:
            etime = time.time()
            print(f"COUNT {count} END, TOTAL TIME: {etime - stime}")
        count += 1
    etime = time.time()
    print(f"UNION FIND END TOTAL TIME:{etime - stime}") 
    
    del u.rank
    print("START CLUSTERING")
    stime = time.time()
    count = 0
    addr_list = list()
    for index, cluster in enumerate(u.par):
        addr_list.append((u.find(cluster), str(index)))
        if count % 10000 == 0:
            cdq.begin_transactions()
            cdq.update_cluster_many(addr_list)
            cdq.commit_transactions()
            etime = time.time()
            print(f"COUNT {count} END, TOTAL TIME: {etime - stime}, {addr_list[len(addr_list)-1]}")
            del addr_list
            addr_list = list()
        count += 1
    etime = time.time()
    del u.par
    
    print(f"CLUSTERING END:{etime - stime}")    
Exemplo n.º 11
0
    def run(self):
        # get parameters
        weight = self.dataSet["weight"]
        # init result
        self.result.setAllEdgeValue(False)
        # sort edge in increasing order of weight
        edges = []
        for e in self.graph.getEdges():
            edges.append([e, weight.getEdgeMax() - weight[e]])
        edges.sort(key=lambda ed: ed[1])
        # init Union-Find data structure
        uf = unionfind.UnionFind()

        for ed in edges:
            e = ed[0]
            if uf[self.graph.source(e)] != uf[self.graph.target(e)]:
                uf.union(self.graph.source(e), self.graph.target(e))
                self.result[e] = True
        return True
Exemplo n.º 12
0
def main(args):
    count = 0
    stime = time.time()
    csv_list = read_csv(args.csv_file)
    etime = time.time()
    cdq = ClusterDB('/home/dnlab/DataHDD/dbv3cluster.db')
    print("DEBUG:", csv_list[0], etime - stime)

    print("START UNION FIND")
    stime = time.time()
    #u = uf.UnionFind(int(cdq.get_max_address())+1)
    u = uf.UnionFind(int(200000000))
    etime = time.time()
    print(f"MAKE ADDRESS END, TOTAL TIME:{etime - stime}")

    for first, second in csv_list:
        u.union(first, second)
        if count % 10000000 == 0:
            etime = time.time()
            print(f"COUNT {count} END, TOTAL TIME: {etime - stime}")
        count += 1
    etime = time.time()
    print(f"UNION FIND END TOTAL TIME:{etime - stime}")

    del u.rank
    print("START CLUSTERING")
    stime = time.time()
    count = 0
    addr_list = list()
    for index, cluster in enumerate(u.par):
        addr_list.append((str(index), u.find(cluster)))

    del u.par
    df = pd.DataFrame(
        addr_list,
        columns=['addr', 'number'],
    )
    df.to_csv('/home/dnlab/DataHDD/cluster_result/40man.csv', index=False)
    etime = time.time()
    print(f"CLUSTERING END:{etime - stime}")
Exemplo n.º 13
0
def implicitClustering(nodeList, maxDist=3, bitsLabel=24):

    numNodes = len(nodeList)
    ufstruct = uf.UnionFind(list(range(len(nodeList))))

    swapMasks = calculateMasks(bitsLabel)
    labelDict = createLabelDict(nodeList)

    numClusterings = 0
    for i, node1 in enumerate(nodeList):
        print("Node {} of {}".format(i, numNodes - 1))
        for sameLabelIndex in labelDict[node1]:
            if sameLabelIndex != i:
                ufstruct.union(i, sameLabelIndex)

        for mask in swapMasks:
            modLabel = swapByMask(node1, mask)
            if modLabel in labelDict:
                for index in labelDict[modLabel]:
                    ufstruct.union(i, index)

    k = ufstruct.totalComponents  #len(nodeList) - numClusterings
    return k
Exemplo n.º 14
0
def hamming_clustering(nodes, min_distance):
    '''
    Performs clustering on a list of nodes whose position is expressed as a
    binary sequence in ndimensional space.

    Requires a function find_pairs() that finds all pairs of nodes separated by
    a given Hamming distance, i.e. the number of bits that must be flipped to
    make two numbers identical (e.g. the Hamming distance from 1010 to 1001 is
    2).

    Returns the UnionFind object representation of the clusters.

    >>> nodes = read_nodes('kclustering_tests/tcbig1.txt')
    >>> print hamming_clustering(nodes, 2)
    Found 15 edges with length 1
    {6: [6, 7, 4, 5, 0, 1, 3, 2, 8, 9]}
    >>> nodes = read_nodes('kclustering_tests/tcbig2.txt')
    >>> print hamming_clustering(nodes, 2)
    Found 2 edges with length 1
    {1: [1, 3, 2], 13: [13]}
    '''

    # Get maximum length of binary representation of nodes, i.e. ndim
    ndim = max(map(len, map("{:b}".format, nodes.keys())))

    # Create UnionFind to store clusters
    x = unionfind.UnionFind(nodes.keys())

    for d in range(1, min_distance):
        pairs = find_pairs(nodes, ndim, d)
        npairs = len(pairs)
        print "Found {} edges with length {}".format(npairs, d)
        for i, pair in enumerate(pairs):
            x.union(*pair)

    return x
Exemplo n.º 15
0
    dist = np.empty([nnode, nnode], dtype=int)
    for i in np.arange(nnode - 1):
        for j in np.arange(nnode - 1 - i) + 1 + i:
            diff = np.logical_xor(data[i, :], data[j, :])
            dist[i, j] = len(np.where(diff == True)[0])
            #dist[i,j] = sum(ch1 != ch2 for ch1, ch2 in zip(data[i,:],data[j,:]))
    print 'finish distance calculation'
    return dist


def cluster(data, nf, dist, mindist):
    nnode = len(data[:, 0])
    for i in np.arange(nnode - 1):
        for j in np.arange(nnode - 1 - i) + 1 + i:
            if dist[i, j] == mindist:
                if nf.find(i) != nf.find(j):
                    nf.union(i, j)
    return nf


if __name__ == "__main__":
    data = np.int_(np.loadtxt(sys.argv[1], skiprows=1))
    nf = unionfind.UnionFind()
    nf.insert_objects(np.arange(len(data[:, 0])))
    dist = builddist(data)
    nf = cluster(data, nf, dist, 0)
    nf = cluster(data, nf, dist, 1)
    nf = cluster(data, nf, dist, 2)
    print nf
    print len(set(nf.parent_pointers.values()))
Exemplo n.º 16
0
def perform_cluster(corpus, num_processes, language_type, save_group_id):
    # 分词,过滤停用词
    first_articles = [(obj.messageTitle, '') for obj in corpus]

    if language_type == 0:
        first_cut_result = jieba_multipro.multi_cut_words_for_cluster(
            first_articles, num_processes)
    if language_type == 1:
        first_cut_result = nltk_multipro.multi_cut_words(
            first_articles, num_processes)

    logger.info('first multi_cut_words_for_cluster....')

    # 构建TF-IDF矩阵
    first_tfidf_matrix = tfidfWeight.getTfidf(first_cut_result, True)
    logger.info('first construct_matrix....')

    # DBScan聚类算法
    first_cluster_results = first_cluster(corpus, first_tfidf_matrix,
                                          save_group_id, language_type)
    logger.info('first first_cluster....')

    for label, dict_ in first_cluster_results.items():
        logger.debug('label:%d, cluster_topic: %s, ids: %s' %
                     (label, dict_['cluster_topic'], '-'.join(dict_['ids'])))

    # 如果第一次聚类结果为null
    cluster_results = []
    if len(first_cluster_results) == 0:
        logger.info('first_cluster_results 为空, 随机选择10条数据作为聚类结果....')
        cluster_results = get_result_corpus(corpus)
    else:
        # 二次聚类, 根据topic
        second_corpus = [(label, topic['cluster_message'])
                         for (label, topic) in first_cluster_results.items()
                         if topic['cluster_message'] != '']
        second_articles = [article[1] for article in second_corpus]
        for label, message in second_corpus:
            logger.debug('label:%d, message: %s' % (label, message))

        # 如果第二次聚类语料为null, 则直接返回第一次聚类结果
        if len(second_articles) == 0:
            cluster_results = get_result(first_cluster_results)
        else:
            # 构建TF-IDF矩阵
            second_tfidf_matrix = tfidfWeight.getTfidf(second_articles, True)
            logger.info('second construct_matrix....')

            # DBScan聚类算法
            second_cluster_results = second_cluster(second_corpus,
                                                    second_tfidf_matrix,
                                                    save_group_id)
            logger.info('second second_cluster....')

            # 合并聚类结果
            first_ids = [[label]
                         for (label, topic) in first_cluster_results.items()]
            logger.info('first_ids: %s', first_ids)

            second_ids = [
                clusters.keys()
                for (label, clusters) in second_cluster_results.items()
                if (len(clusters)) > 1
            ]
            logger.info('second_ids: %s', second_ids)

            first_ids.extend(second_ids)
            u = unionfind.UnionFind(first_ids)
            u.create_tree()
            ids_list = u.get_tree()
            logger.info('ids_list: %s', ids_list)

            cluster_results = return_data(ids_list, first_cluster_results)
            logger.info('cluster_results....')
    return cluster_results
Exemplo n.º 17
0
"""
    Code taken from Hyperbolic Hierarchical Clustering (HypHC) by Chami et al.
    for more details visit https://github.com/HazyResearch/HypHC
"""

import numpy as np
import unionfind

if __name__ == '__main__':
    uf = unionfind.UnionFind(5)
    uf.merge(np.array([[0, 1], [2, 3], [0, 4], [3, 4]]))
    print(uf.parent)
    print(uf.tree)
Exemplo n.º 18
0
    def detect(self, image):
        """
        Detect maximally stable extremal regions
        @type image: np.ndarray
        @param image: The image from which MSER are to be extracted from
        @rtype: list
        @return: A list of contours representing detected MSERs
        """
        if image.ndim > 2:
            raise TypeError(f'A grayscale image expected but image had \
                            {image.ndim} channels')

        image_shape = image.shape
        image = image.flatten()
        sorting_indices = np.argsort(image)
        row_indices, column_indices = np.unravel_index(sorting_indices,
                                                       image_shape)

        threshold_list = np.linspace(0, 255, self.num_thresh, endpoint=True)

        output_images = np.zeros(
            (self.num_thresh, image_shape[0], image_shape[1]), dtype=np.uint8)

        sorted_intensity = image[sorting_indices]

        # connected component history
        UF = unionfind.UnionFind(image.size)
        for i, threshold in enumerate(threshold_list):
            # Find the index at which intensity > thershold
            end_index = bisect.bisect(sorted_intensity, threshold)
            if end_index >= image.size:
                continue
            # Build the thresholded image
            output_images[i, row_indices[end_index:],
                          column_indices[end_index:]] = 255
            ids = sorting_indices[end_index:]
            # Find the neighboring pixels of all white pixels
            neighbors = np.apply_along_axis(find_neighbors, 0, ids,
                                            image_shape).T

            for j, pix_neighbors in enumerate(neighbors):
                for neighbor in pix_neighbors:
                    # TODO: Handle pixels on image edges (3 neighbors)
                    if neighbor < 0 or neighbor > image.size:
                        continue
                    # If a neighbor is a white pixel it is part of connected component else it is border pixel
                    pix = ids[j]
                    if neighbor in ids:
                        UF.union(neighbor, pix)
                    else:
                        UF.add_neighbor(pix, neighbor)

        all_history = UF.get_top_level_history()
        history = filter(self.is_possibly_MSER, all_history)

        # Identify connected components corresponding to the MSERs
        msers = []
        for parent_comp in history:
            ph = [parent_comp.size, parent_comp.size]
            pq = [float('inf'), float('inf')]
            MSER.find_msers(parent_comp, msers, ph, pq, self.max_area,
                            self.min_area, self.max_var)

        contours = []
        for mser in msers:
            neighbors = mser.neighbors.difference(mser.members)
            border_pixels = np.array(list(neighbors))
            contour = np.apply_along_axis(id2coords, 0, border_pixels,
                                          image_shape).T
            # Make contours OpenCV compatible (switch x and y)
            contour = np.flip(contour, axis=1)
            contour = MSER.order_rotationally(contour)
            contours.append(contour)
        return contours
Exemplo n.º 19
0
def perform_cluster(corpus, num_processes, min_sample):
    first_articles = [(obj.messageTitle, obj.messageContent) for obj in corpus]
    first_cut_results = nltk_multipro_new.multi_cut_words(
        first_articles, num_processes)
    logger.info('first_cut_results end....')

    # 构建TF-IDF矩阵
    from utils import tfidfWeight
    first_tf_matrix = tfidfWeight.getTfidf(first_cut_results, True)
    logger.info('first_tfidf_matrix end....')

    # 聚类算法
    first_labels = run_cluster(first_tf_matrix)
    first_cluster_results = first_get_return_data(first_labels, corpus)
    logger.info('first_cluster_results end....')

    for label, dict_ in first_cluster_results.items():
        logger.debug('label:%d, cluster_topic: %s, ids: %s' %
                     (label, dict_['title_list'], '-'.join(dict_['ids'])))

    # 如果第一次聚类结果为null
    cluster_results = []
    if len(first_cluster_results) == 0:
        logger.info('first_cluster_results 为空, 随机选择10条数据作为聚类结果....')
        cluster_results = get_result_corpus(corpus)

    else:
        # 二次聚类, 根据topic
        second_corpus = [(label, topic['title_list'])
                         for (label, topic) in first_cluster_results.items()
                         if '。'.join(topic['title_list']).strip() != '']
        second_articles = [('。'.join(article[1]), '')
                           for article in second_corpus]

        second_cut_results = nltk_multipro_new.multi_cut_words(
            second_articles, num_processes)
        logger.info('second_cut_results end....')

        ziped_second_corpus_cut_results = zip(second_corpus,
                                              second_cut_results)
        ziped_second_corpus_cut_results = filter(
            lambda t: t[1] != '@' > 0, ziped_second_corpus_cut_results)
        second_corpus = [t[0] for t in ziped_second_corpus_cut_results]
        second_cut_results = [t[1] for t in ziped_second_corpus_cut_results]

        # 构建TF-IDF矩阵
        second_tf_matrix = tfidfWeight.getTfidf(second_cut_results, True)
        logger.info('second_tf_matrix end....')

        second_labels = run_cluster(second_tf_matrix)
        second_cluster_results = second_get_return_data(
            second_labels, second_corpus)
        logger.info('second_cluster_results end....')

        # 合并聚类结果
        first_ids = [[label]
                     for (label, topic) in first_cluster_results.items()]
        logger.info('first_ids: %s', first_ids)

        second_ids = [
            clusters.keys()
            for (label, clusters) in second_cluster_results.items()
            if (len(clusters)) > 1
        ]
        logger.info('second_ids: %s', second_ids)

        first_ids.extend(second_ids)
        u = unionfind.UnionFind(first_ids)
        u.create_tree()
        ids_list = u.get_tree()
        logger.info('ids_list: %s', ids_list)

        cluster_results = return_data(ids_list, first_cluster_results,
                                      min_sample)
        logger.info('cluster_results....')

    return cluster_results
Exemplo n.º 20
0
	def localsearch(self):
		self.candidates= {}
		global localcount
		self.neis=self.conquered_kingdoms[:]
		for i in self.neis:
			self.T = 2000000000
			self.p = exp(-1/self.T)
			self.copykingdoms = self.conquered_kingdoms[:]
			self.copywalks = self.closed_walk[:]
			passs = False
			# it means that the node we are changing is at the last part
			fullcopywalk = False

			potential = -self.M[i][i]
			checker = True
			index = i
			NTR = {}
			size = 0
	# check whether below really removes or not
			self.copykingdoms.remove(i)
			dontconquer = index
			for j in self.getn(index):
				jindex = j[0]
				if jindex not in self.conquered_kingdoms:
					for k in self.getn(jindex):
						kindex = k[0]
						if kindex in self.copykingdoms:
							checker = False
					if checker:
						size += 1
						NTR[jindex] = [self.M[i][jindex] + self.M[jindex][jindex],0,0]
			first={}
			uniongroup={}
			unioned=[]
			uf = unionfind.UnionFind(len(NTR.keys()))
			if len(NTR.keys()) == 0:
				passs = True
			else:
				for p in NTR:
					root = None
					# dont know whether use below
					addiunion = False
					neigh = self.getns(p)
					if p not in unioned:
						if len(neigh) > 0:
							uniongroup[p] = [p]
							unioned += [p]
							root = p
						else:
							first[p] = [self.M[p][dontconquer] + self.M[p][p],0,0]
					else:
						root = uf._root(p)
					for j in neigh:
						if j in NTR.keys():
							uf.union(p,j)
						if j not in unioned:
							uniongroup[root] += [j]
							unioned+=[j]

			self.newclosedwalk=[]

			for q in first:
				back = self.closed_walk[self.conquered_at[i] - 2]
				try :
					front = self.closed_walk[self.conquered_at[i]]
				except:
					front = None
				if q == back:
					self.copykingdoms +=[q]
					size -= 1
					if size != 0:
						potential += self.M[q][q]
					else :
						newbackcost, newpath = self.dijkreturn(back)
						self.copywalks = self.closed_walk[:back] + newpath
						potential += newbackcost - self.originalbackcost + self.M[q][q] - self.M[i][q]
						fullcopywalk = True
					# 	think about it

				# check if front has str type
				elif q == front:
					self.copykingdoms += [q]
					potential += self.M[q][q]
					size -= 1
				# ----------------------------------------maybe I don't need below
				else:
					potential += first[q][0]
					self.copykingdoms+=[q]
					self.newclosedwalk += [q, dontconquer]
			# ----------------------------------------------

			usedroot=[]
			submains=[]
			for q in unioned:
				tosubgraph=[]
				root = uf._root(q)
				if root not in usedroot:
					usedroot += [root]
					for j in uniongroup[root]:
						tosubgraph += [j]
				self.subG = self.G.subgraph(tosubgraph)

				# !!!!!!!self.subG    -- adjacency matrix. check!
				self.subG[dontconquer][dontconquer] = 4000000001
				submains += [submain(self.list_of_kingdom_names, dontconquer, self.subG, self.subG, 0, params=[])]

			for q in submains:
				self.newclosedwalk += q.closed_walk
				self.copykingdoms += q.conquered_kingdoms
				potential += q.totalcost
			dontconquerindex = self.conquered_at[dontconquer] - 1
			if not fullcopywalk:
				self.copywalks = self.closed_walk[:dontconquerindex + 1] + self.newclosedwalk + self.closed_walk[dontconquerindex + 1:]
			if not passs:
				self.candidates[potential] = [self.copykingdoms, self.copywalks]
		if self.candidates:
			finalpotential = min(self.candidates)
			if finalpotential < 0:
				localcount += 1
				self.closed_walk = self.candidates[finalpotential][1]
				self.conquered_kingdoms = self.candidates[finalpotential][0]
				self.totalcost += finalpotential
				self.T -= 1
				if self.T < 0:
					T=0.00001
			elif self.p > random.random():
				self.T -= 1
				if self.T < 0:
					T=0.00001
				coun = 0
				for l in NTR:
					if coun ==0:
						self.neis +=[l]
						coun +=1
						print('ha')
Exemplo n.º 21
0
 def __init__(self, graph):
     self.graph = graph
     self.sets = u.UnionFind()
     self.my_queue = q.Queue(20)
Exemplo n.º 22
0
 def setUp(self):
     self.forest = unionfind.UnionFind(10)
Exemplo n.º 23
0
def check_completed(now, prv, size):
    uf = unf.UnionFind(len(prv))
    for i in range(size):
        uf.unite(i, prv[i])
        uf.unite(i, now[i])
    return uf.one_roop()
def resume():
    if is_resume():
        term = 10000
        start_height = cdq.get_max_height()
        end_height = dq.gext_max_height()
        pool_num = multiprocessing.cpu_count()//2
        s_index = cdq.get_max_address()
        u = uf.UnionFind(dq.get_max_address() - s_index + 1)
        try:
            for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
                if eheight >= end_height:
                    eheight = end_height + 1

                with multiprocessing.Pool(pool_num) as p:
                    result = p.imap(multi_input, range(sheight, eheight))
                    for addr_list in result:
                        for addr_set in addr_list:
                            addr_1 = addr_set[0]
                            addr_2 = addr_set[1]
                            u.union(int(addr_1) - s_index, int(addr_2) - s_index)           
                etime = time.time()
                print('height: {}, time:{}'.format(eheight, etime-stime))
            del u.rank

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()
        
    addr_list = []
    count = 0
    for index, cluster in enumerate(u.par):
        addr_list.append((str(index + s_index), u.find(cluster) + s_index))
        count += 1
    df = pd.DataFrame(addr_list, columns =['Address', 'ClusterNum'])
    mi_group = mi_df.groupby('ClusterNum')

    for cluster_number, addr_group in mi_group:
        if cluster_number != -1:
            addr_list = list(addr_group.Address)
            cluster_num_list = list(cdq.get_cluster_number(addr_list))
            if len(cluster_num_list) <= 1:
                if cluster_num == -1:
                    insert_cluster_many(list(zip(addr_list, [cluster_number] * len(addr_list))))
                else:
                    insert_cluster_many(list(zip(addr_list, [cluster_num] * len(addr_list))))
            else:
                cluster_num_list.sort()
                cluster_num = cluster_num_list.pop(0)
                if cluster_num == -1:
                    cluster_num = cluster_num_list.pop(0)
                #TODO 만약 같은 주소가 존재한다면 update 그렇지 않다면 insert
                update_cluster_many(list(zip([cluster_num] * len(addr_list), addr_list)))
                
            
            
                
        
        
    
    '''
    지속적인 비트코인 주소를 업데이트하는 함수
    1. 현재주소와, 최대주소 비교 (Meta Table 만드는것 추천)
    2. 현재주소와 최대주소가 다르다면, Clustering 시작
    3. start_height = Metatable.blk.+1
       end_height = dq.get_max_height()
    4. index = cur_addr
    5. uf.UnionFind(max_addr - cur_addr + 1)
    6. 아래와 유사
       ** u.union(int(addr_1) - index, int(addr_2) - index) ** ==> 함수 1 union
          addr_list.append((str(index) + index, u.find(cluster)+index)) ==> 함수 2 Clustering
          df = pd.DataFrame(addr_list) 
          for cluster_list groupby 해서 주소리스트를 가져옴: ==> dbwrite
            - 주소들이 포함된 모든 클러스터 번호를 가져옴
              만약 클러스터 번호가 없다면 그대로 add
            - 클러스터 번호가 1개라면 그 번호로 클러스터 add
            - 만약 클러스터 번호가 여러개라면 가장 작은것으로 add후
              다른 클러스터가 있는것은 update
    '''
    
    
def main(args):
    
    term = 10000
    start_height = 1
    end_height = dq.get_max_height()
    pool_num = multiprocessing.cpu_count()//2  
    cdq = ClusterDB(args.dbpath)
    
    stime = time.time()
    u = uf.UnionFind(int(dq.get_max_address())+1)
    try:
        for sheight, eheight in zip(range(start_height, end_height, term), \
                                    range(start_height+term, end_height+term, term)):
            if eheight >= end_height:
                eheight = end_height + 1

            with multiprocessing.Pool(pool_num) as p:
                result = p.imap(multi_input, range(sheight, eheight))
                for addr_list in result:
                    for addr_set in addr_list:
                        addr_1 = addr_set[0]
                        addr_2 = addr_set[1]
                        u.union(int(addr_1), int(addr_2))           
            etime = time.time()
            print('height: {}, time:{}'.format(eheight, etime-stime))
        del u.rank
        db_write(stime, cdq, u)

    except KeyboardInterrupt:
        print('Keyboard Interrupt Detected! Commit transactions...')
        cdq.commit_transactions()

            
if __name__=="__main__":
    import argparse
    parser = argparse.ArgumentParser(description='Heuristics Clusterings')
    parser.add_argument('--dbpath', '-d', type=str,
                        required=True,
                        help='insert make dbpath')
    parser.add_argument('--resume', '-r', type=bool,
                        default=False,
                        help='execute resume')

    args = parser.parse_args()
    main(args)
Exemplo n.º 25
0
def create_unionfind_ds(edgelist):
    nodes = create_node_list(edgelist)
    UFDS = uf.UnionFind(nodes)
    return UFDS