Exemplo n.º 1
0
class SuperBase:
    superbase = None
    lematizer = None

    def __init__(self, lemat_dict_file):
        self.lematizer = Lematizer(lemat_dict_file)
        self.superbase = UnionFind()

        lemats = self.lematizer.all_lemats()
        for l in lemats:
            self.superbase.make_set(l)

        for (_, lems) in self.lematizer.items():
            sofar = None
            for l in lems:
                if sofar:
                    self.superbase.union(sofar, l)
                sofar = self.superbase.find(l)

    def __getitem__(self, word):
        try:
            # trick
            for lem in self.lematizer[word]:
                break
            # confused?
            # above code is the best way I know to extract an element from the set
            return self.superbase.find(lem)
        except KeyError:
            return word

    def items(self):
        return ((w, self[w]) for (w, _) in self.lematizer.items())
def clustering(edge_list, count_nodes, clusters):
    u = UnionFind([x+1 for x in range(count_nodes)])
    count_edges = len(edge_list)
    i = 0
    while True:
        if not u.find(edge_list[i][1][0]) == u.find(edge_list[i][1][1]):
            if count_nodes <= clusters:
                return edge_list[i][0], u
            u.union(edge_list[i][1][0], edge_list[i][1][1])
            count_nodes -= 1
        i += 1
Exemplo n.º 3
0
    def test_union(self):
        u = UnionFind()
        foo = Node("foo")
        u.add(foo)

        bar = Node("bar")
        u.add(bar)

        self.assertEqual(foo, u.find(foo))
        self.assertEqual(bar, u.find(bar))

        u.union(foo, bar)

        self.assertEqual(bar, u.find(foo))
        self.assertEqual(bar, u.find(bar))
Exemplo n.º 4
0
def cluster(graph, k):
    edges = heapify(graph.edges)

    u = UnionFind()
    [u.add(node) for node in graph.nodes.values()]

    while u.clusters > k:
        cost, edge = heappop(edges)
        if cycle(u, edge):
            #print "skipping {}".format(edge)
            pass
        else:
            u.union(u.find(edge.v0), u.find(edge.v1))

    mindist = get_mindist(u, edges)
    return mindist, u.followers
Exemplo n.º 5
0
    def clustering(self, groups, minheap):
        #maxheap is a max heap of edges (one direction only)
        unionfind = UnionFind(list(self.graph.keys()))
        while unionfind.size() > groups:
            #keep merging until number of desired groups reached
            curr = minheap.pop()
            curr_edge = curr.get_data()
            if unionfind.find(curr_edge[0]) == unionfind.find(curr_edge[1]):
                #same group
                continue
            unionfind.union(curr_edge[0], curr_edge[1], True)
        while unionfind.find(curr_edge[0]) == unionfind.find(curr_edge[1]):
            #pop until different groups to get max distance, because next edge might be within a group
            minheap.pop().get_data()
            curr_edge = minheap.peek().get_data()

        #smallest distance is the edge at the top of max heap since they are in different groups
        return unionfind, minheap.peek().get_key()
Exemplo n.º 6
0
def kclustering(graph, k):
    """ compute the maximum spacing of a k-cluster """
    nodes = set()
    for u, v, d in graph:
        nodes.add(u)
        nodes.add(v)

    group = UnionFind(nodes)
    # sort the graph by costs
    graph = sorted(graph, key=lambda x: x[2])

    while len(group.subtree.keys()) > k:
        u, v, d = graph.pop(0)
        group.union(u, v)

    # do not output the cost between two nodes that are both in the same cluster
    while True:
        u, v, min_cost = graph.pop(0)
        if group.find(u) != group.find(v):
            break

    return min_cost
Exemplo n.º 7
0
    def recolor_by_connected_components(self):
        from unionfind import UnionFind
        uf = UnionFind()

        for t in self.gtm.times:
            for g in self.gtm.time[t]:
                uf.find(g)
            for i in self.gtm.inds:
                uf.find((i, t))

            for g in self.gtm.time[t]:
                for i in self.gtm.group[g]:
                    if self.group_color[g - 1] == self.ind_color[i - 1][t - 1]:
                        uf.union(g, (i, t))
                        leader = uf.find(g)
            if t > 1:
                for i in self.gtm.inds:
                    if self.ind_color[i - 1][t - 1] == self.ind_color[i -
                                                                      1][t -
                                                                         2]:
                        uf.union((i, t - 1), (i, t))
                        leader = uf.find((i, t - 1))

        new_color = {}
        for t in self.gtm.times:
            for g in self.gtm.time[t]:
                leader = uf.find(g)
                if leader not in new_color:
                    new_color[leader] = len(new_color) + 1
            for i in self.gtm.inds:
                leader = uf.find((i, t))
                if leader not in new_color:
                    new_color[leader] = len(new_color) + 1

        for g in self.gtm.groups:
            self.group_color[g - 1] = new_color[uf.find(g)]
        for i in self.gtm.inds:
            for t in self.gtm.times:
                self.ind_color[i - 1][t - 1] = new_color[uf.find((i, t))]
Exemplo n.º 8
0
    def recolor_by_connected_components(self):
        from unionfind import UnionFind
        uf = UnionFind()

        for t in self.gtm.times:
            for g in self.gtm.time[t]:
                uf.find(g)
            for i in self.gtm.inds:
                uf.find((i,t))

            for g in self.gtm.time[t]:
                for i in self.gtm.group[g]:
                    if self.group_color[g-1]==self.ind_color[i-1][t-1]:
                        uf.union(g, (i,t))
                        leader = uf.find(g)
            if t>1:
                for i in self.gtm.inds:
                    if self.ind_color[i-1][t-1]==self.ind_color[i-1][t-2]:
                        uf.union((i,t-1), (i,t))
                        leader = uf.find((i,t-1))

        new_color = {}
        for t in self.gtm.times:
            for g in self.gtm.time[t]:
                leader = uf.find(g)
                if leader not in new_color:
                    new_color[leader] = len(new_color)+1
            for i in self.gtm.inds:
                leader = uf.find((i,t))
                if leader not in new_color:
                    new_color[leader] = len(new_color)+1

        for g in self.gtm.groups:
            self.group_color[g-1] = new_color[uf.find(g)]
        for i in self.gtm.inds:
            for t in self.gtm.times:
                self.ind_color[i-1][t-1] = new_color[uf.find((i,t))]
def hammond_distances(file_path):
    file_stream = open(file_path)
    line_one = file_stream.readline().split(' ')
    count_edges, count_bits = int(line_one[0]), int(line_one[1])
    uf = UnionFind([])
    for i in range(count_edges):
        code = file_stream.readline()
        code = code.replace(' ', '').replace('\n', '')
        uf.add(code)
        update_singles(uf, code, count_bits)
        update_doubles(uf, code, count_bits)
    file_stream.close()
    clusters = set()
    for k in uf._node_titles.keys():
        clusters.add(uf.find(k))
    return len(clusters)
Exemplo n.º 10
0
    def kruskal(self):
        queue = PriorityQueue()
        mst = list()
        mst_weight = 0
        uf = UnionFind(len(self.vertexes))

        for edge in self.edges:
            queue.put(edge)

        while not queue.empty() and len(mst) < self.size:
            current_edge = queue.get()

            if not uf.find(self.vertexes[current_edge.origin].index, self.vertexes[current_edge.dest].index):
                uf.union(self.vertexes[current_edge.origin].index, self.vertexes[current_edge.dest].index)
                mst.append(current_edge)
                mst_weight += current_edge.weight

        return mst, mst_weight
Exemplo n.º 11
0
def kruskal_ts(graph, edges):
    """ Kruskal's algorithm with tim sort """

    sorted_edges = sorted(edges, key=lambda t: t[2])
    num_nodes = len([v for v in graph])
    data_st = UnionFind(num_nodes)

    tree_edges = list()

    for edge in sorted_edges:
        (u, v, weight) = edge[0], edge[1], edge[2]

        if not data_st.find(u, v):
            tree_edges.append((weight, u, v))
            data_st.union(u, v)

        if len(tree_edges) == (num_nodes - 1):
            break

    return tree_edges
Exemplo n.º 12
0
        for j in xrange(y):
            grid.append([int(i) for i in f.readline().split()])

###    print neighbours(grid, 1, 1)

        for j in xrange(y):
            for i in xrange(x):
                height = grid[j][i]
                n = sorted(neighbours(grid, i, j))
                # Check if I'm a sink
                if not n or not min(k[0] for k in n) < height:
                    pass
                else:
                    # Else flow to the lowest neighbour
                    _, __, coord = n[0]
                    uf.union((i, j), coord)

###    for line in grid:
###    	print line

        letters = list("abcdefghijklmnopqrstuvwxyz")
        print "Case #%d:" % (casenum + 1)
        key = {}
        for j in xrange(y):
            for i in xrange(x):
                node = uf.find((i, j)).id
                if node not in key:
                    key[node] = letters.pop(0)
                print key[node],
            print
Exemplo n.º 13
0
	if line.startswith("# y"): break
	if line=="" or line.startswith("#"): continue
	if line.find(',')>=0: line = line.split(',')
	elif line.find(' ')>=0: line = line.split(' ')
	else: raise Exception("Invalid line: "+line)
	if len(line)>=2:
		u,v = int(line[0]), int(line[1])
	else:
		raise Exception("ERROR line: %s"%line)
	uf.union(u,v)

# make lists of groups (no dummies) in each component
vertices = range(1, group_count+1)
component = {}
for v in vertices:
	l = uf.find(v)
	if l not in component:
		component[l] = list()
	component[l].append(v)
for l in component:
	component[l].sort()
component = sorted(component.values())

# build color-conflict graph
adj_list = {}
for t in xrange(len(gtm.times)):
	groups = gtm.time[t+1]
	for i in xrange(len(groups)):
		g = uf.find(groups[i])
		for j in xrange(i+1, len(groups)):
			h = uf.find(groups[j])
Exemplo n.º 14
0
    def forward(self, x, batch: OptTensor=None):
        
        if batch is None:
            batch = torch.zeros(x.size()[0], dtype=torch.int64, device=x.device)
        
        '''Embedding1: Intermediate Latent space features (hiddenDim)'''
        x_emb = self.inputnet(x)   

        '''KNN(k neighbors) over intermediate Latent space features'''     
        for ec in self.edgeconvs:
            edge_index = knn_graph(x_emb, self.k, batch, loop=False, flow=ec.flow)
            x_emb = x_emb + ec(x_emb, edge_index)
    
        '''
        [1]
        Embedding2: Final Latent Space embedding coords from x,y,z to ncats_out
        '''
        out = self.output(x_emb)
        #plot = self.plotlayer(out)


        '''KNN(k neighbors) over Embedding2 features''' 
        edge_index = knn_graph(out, self.k, batch, loop=False, flow=ec.flow)
        
        ''' 
        use Embedding1 to build an edge classifier
        inputnet_cat is residual to inputnet
        '''
        x_cat = self.inputnet_cat(x) + x_emb

        '''
        [2]
        Compute Edge Categories Convolution over Embedding1
        '''
        for ec in self.edgecatconvs:            
            x_cat = x_cat + ec(torch.cat([x_cat, x_emb, x], dim=1), edge_index)
        
        edge_scores = self.edge_classifier(torch.cat([x_cat[edge_index[0]], 
                                                      x_cat[edge_index[1]]], 
                                                      dim=1)).squeeze()
        

        '''
        use the predicted graph to generate disjoint subgraphs
        these are our physics objects
        '''
        objects = UnionFind(x.size()[0])
        good_edges = edge_index[:,torch.argmax(edge_scores, dim=1) > 0]
        good_edges_cpu = good_edges.cpu().numpy() 

        for edge in good_edges_cpu.T:
            objects.union(edge[0],edge[1])
        cluster_map = torch.from_numpy(np.array([objects.find(i) for i in range(x.shape[0])], 
                                                dtype=np.int64)).to(x.device)
        cluster_roots, inverse = torch.unique(cluster_map, return_inverse=True)
        # remap roots to [0, ..., nclusters-1]
        cluster_map = torch.arange(cluster_roots.size()[0], 
                                   dtype=torch.int64, 
                                   device=x.device)[inverse]
        

        ''' 
        [3]
        use Embedding1 to learn segmented cluster properties 
        inputnet_cat is residual to inputnet
        '''
        x_prop = self.inputnet_prop(x) + x_emb
        # now we accumulate over all selected disjoint subgraphs
        # to define per-object properties
        for ec in self.propertyconvs:
            x_prop = x_prop + ec(torch.cat([x_prop, x_emb, x], dim=1), good_edges)        
        props_pooled, cluster_batch = max_pool_x(cluster_map, x_prop, batch)
        cluster_props = self.property_predictor(props_pooled)    

        return out, edge_scores, edge_index, cluster_map, cluster_props, cluster_batch
Exemplo n.º 15
0
def maze(w, h, size=2):
    def conv_size(n):
        return (n - 1) // size + 1

    nw, nh = conv_size(w), conv_size(h)
    ns = size // 2 - 1
    uf = UnionFind(nw * nh)
    lab = Labyrinth(w, h)

    for x in range(w):
        for y in range(h):
            lab[x, y] = 0

    edges = []
    for i in range(nh - 1):
        for j in range(nw - 1):
            f = flatten(i, j, nw, nh)
            edges.append((f, f + 1))  # right
            edges.append((f, f + nw))  # down

    for i in range(nh - 1):
        f = flatten(i, nw - 1, nw, nh)
        edges.append((f, f + nw))  # down

    for j in range(nw - 1):
        f = flatten(nh - 1, j, nw, nh)
        edges.append((f, f + 1))  # right

    shuffle(edges)

    while len(uf) > 1:
        u, v = edges.pop()
        y1, x1 = unflatten(u, nw, nh)
        y2, x2 = unflatten(v, nw, nh)
        if uf.find(u) != uf.find(v):
            uf.union(u, v)
            if x2 - x1 == 1:
                for i in range(size + 1):
                    for j in range(1, ns + 1):
                        ny = size * y1 - j
                        if ny >= 0:
                            lab[size * x1 + i, ny] = True
                        else:
                            break
                    lab[size * x1 + i, size * y1] = True
                    for j in range(1, ns + 1):
                        ny = size * y1 + j
                        if ny < h:
                            lab[size * x1 + i, ny] = True
                        else:
                            break
            else:
                for i in range(size + 1):
                    for j in range(1, ns + 1):
                        nx = size * x1 - j
                        if nx >= 0:
                            lab[nx, size * y1 + i] = True
                        else:
                            break
                    lab[size * x1, size * y1 + i] = True
                    for j in range(1, ns + 1):
                        nx = size * x1 + j
                        if nx < w:
                            lab[nx, size * y1 + i] = True
                        else:
                            break

    lab[0, 0] = 1
    lab.start = 0, 0
    lab[lab.w - 2, lab.h - 2] = 1
    lab.goal = lab.w - 2, lab.h - 2

    return lab
Exemplo n.º 16
0
 def test_find(self):
     u = UnionFind()
     foo = Node("foo")
     u.add(foo)
     self.assertEqual(foo, u.find(foo))