示例#1
0
def simpleTests():
    test = range(10)
    uf = UnionFind()
    for t in test:
        uf.makeSet(t)
    # END for
    for t in test:
        assert uf.find(t) == t, "Parent not initialized correctly."
    # END for
    assert uf.countGroups() == 10, "Counted wrong number of groups."

    uf.union(0,1)
    assert uf.find(1) == 0, "Parent not updated correctly."
    assert uf.data[0][1] == 1, "Order not updated for equal trees correctly."
    assert uf.countGroups() == 9, "Counted wrong number of groups."

    uf.union(1,2)
    assert uf.find(2) == 0, "Parent not updated correctly."
    assert uf.data[0][1] == 1, "Order not updated for unequal trees correctly."
    assert uf.countGroups() == 8, "Counted wrong number of groups."

    uf.union(3,4)
    uf.union(4,5)
    uf.union(0,3)
    assert uf.data[0][1] == 2, "Order not updated for unequal trees."
    assert uf.data[5][0] == 3, "Parent should not be updated until find operation."
    assert uf.find(5) == 0, "Find operation returned wrong parent."
    assert uf.data[5][0] == 0, "Parent should have been updated."
    assert uf.countGroups() == 5, "Counted wrong number of groups."
class Kruskal:
    def __init__(self, data):
        nodes = int(data[0].split()[0])
        self.ufSet = UnionFind()
        for n in range(nodes):
            self.ufSet.makeSet(n)
        # END for

        self.edges = []
        for k in data[1:]:
            row = map(int, k.strip().split())
            self.edges.append((row[0] - 1, row[1] - 1, row[2]))
        # END for

        self.edges.sort(key=itemgetter(2))

    # END __init__

    def mstKruskal(self):
        mst = []
        l = 0
        for edge in self.edges:
            s1 = self.ufSet.find(edge[0])
            s2 = self.ufSet.find(edge[1])
            if s1 == s2:
                continue
            # END if

            self.ufSet.union(edge[0], edge[1])
            mst.append(edge)
            l += edge[2]
        # END for

        self.mst = mst
        return l

    # END mstKruskal

    def clusterKruskal(self, k):
        print "Running Clustering, k={0}".format(k)
        done = False
        for edge in self.edges:
            s1 = self.ufSet.find(edge[0])
            s2 = self.ufSet.find(edge[1])
            if s1 == s2:
                continue
            # END if
            if not done:
                self.ufSet.union(edge[0], edge[1])
            else:
                print "Smallest unallocated edge: {0}".format(edge)
                return edge[2]
            # END if

            if self.ufSet.countGroups() == k:
                done = True
示例#3
0
class ClusterHamming:
    def __init__(self, data):
        (nodes, self.bits) = map(int, data.pop(0).split())
        self.uf = UnionFind()
        for n in range(nodes):
            self.uf.makeSet(n)
        # END for

        self.hammingData = defaultdict(list)
        for n in range(nodes):
            s = data[n].replace(' ', '')
            self.hammingData[s].append(n)
        # END for

    # END __init__

    def flip(self, s, flipbits):
        """
        Given an input string (s) and tuple of indices (flipbits), returns a new
        string with bits at specified indices flipped.

        The length of (flipbits) determines the resulting hamming distance.
        """
        result = ''
        for i, c in enumerate(s):
            if i in flipbits:
                if c == '1':
                    result += '0'
                else:
                    result += '1'
            else:
                result += c
        return result

    # END flip

    def getHammingPermutations(self, s, n):
        """
        Generate permutations of s whose distance is less than or equal to n
        """
        result = []
        result.append(s)
        for d in range(1, n + 1):
            for flipbits in combinations(range(self.bits), d):
                result.append(self.flip(s, flipbits))
            # END for
        # END for
        return result

    # END getHammingPermutations

    def printSummary(self):
        resultMap = defaultdict(list)
        for k, v in self.hammingData.iteritems():
            cluster = self.uf.find(v)
            resultMap[cluster].append(k)
        # END for

        for k, v in resultMap.iteritems():
            print "\n\nCluster {0}:".format(k)
            for key in v:
                print "\t{0}".format(key)
            # END for
        # END for

    # END printSummary

    def run(self, minDist):
        data = copy(self.hammingData)
        while data:
            (nodeKey, refNodes) = data.popitem()
            if len(refNodes) > 1:
                for i in range(1, len(refNodes)):
                    self.uf.union(refNodes[0], refNodes[i])
                # END for
            # END for

            nearestNodes = self.getHammingPermutations(nodeKey, minDist - 1)
            for testNodeKey in nearestNodes:
                if testNodeKey not in data:
                    continue

                testNodes = self.hammingData[testNodeKey]
                for n in testNodes:
                    if self.uf.find(n) == self.uf.find(refNodes[0]):
                        continue
                    self.uf.union(refNodes[0], n)
                # END for
            # END for
        # END while

        return self.uf.countGroups()