Exemplo n.º 1
0
    def test_pop(self):
        d = Dict()

        for x in range(10):
            d[str(x)] = x

        self.assertEqual(d.pop("300", 100), 100)
Exemplo n.º 2
0
    def test_keys(self):
        d = Dict()
        d[("abc", 1)] = 1
        d[3.3] = 2
        d[30] = 3
        d["test1234"] = 4

        self.assertEqual(hash27("".join([str(k) for k in d])),
                         7766555225202364718)
Exemplo n.º 3
0
    def test_popitem(self):
        d = Dict()

        for x in range(500):
            d[str(x)] = x

        d.popitem()

        self.assertEqual(hash27("".join(d)), -434207861779954688)
Exemplo n.º 4
0
    def test_fromkeys(self):
        s = []

        for x in range(500):
            s.append(str(x))

        d = Dict.fromkeys(s)

        self.assertEqual(hash27("".join(d)), -7925872281736336380)
Exemplo n.º 5
0
    def test_copy(self):
        d = Dict()

        for x in range(500):
            d[str(x)] = x

        d = d.copy()

        self.assertEqual(hash27("".join(d)), 1141231293364439680)
Exemplo n.º 6
0
    def test_delete(self):
        d = Dict()

        for x in range(500):
            d[str(x)] = x

        del d["53"]
        d.pop("155")

        self.assertEqual(hash27("".join(d)), -8652364590473687932)
Exemplo n.º 7
0
    def test_update(self):
        d = Dict()

        for x in range(500):
            d[str(x)] = x

        d["255"] = "abc"
        d["100"] = "123"

        self.assertEqual(hash27("".join(d)), -7925872281736336380)
Exemplo n.º 8
0
    def test_large(self):
        d = Dict()

        for x in range(60000):
            d[str(x)] = x

        # Test key and value
        self.assertEqual(hash27("".join(d)), -35326655653467556)
        self.assertEqual(hash27("".join([str(x) for x in d.values()])),
                         -35326655653467556)
Exemplo n.º 9
0
    def test_small(self):
        d = Dict()

        for x in range(15):
            d[str(x)] = x

        # Test key and value
        self.assertEqual(hash27("".join(d)), 6636034109572507556)
        self.assertEqual(hash27("".join([str(x) for x in d.values()])),
                         6636034109572507556)
Exemplo n.º 10
0
    def test_clear(self):
        d = Dict()

        for x in range(500):
            d[str(x)] = x

        d.clear()

        for x in range(1000, 1500):
            d[str(x)] = x

        self.assertEqual(hash27("".join(d)), -1473514505880218088)
Exemplo n.º 11
0
    def test_pickle(self):
        d = Dict()

        for x in range(500):
            d[str(x)] = x

        del d["300"]

        # Pickle and reload object
        data = pickle.dumps(d)
        d = pickle.loads(data)

        self.assertEqual(hash27("".join(d)), 6818550152093286356)
Exemplo n.º 12
0
    def test_merge(self):
        # Build list of (key, value) pairs to preserve insertion ordering
        d = []
        e = []

        for x in range(200):
            d.append((str(x), x))

        for x in range(200):
            e.append((str(x), x))

        m = Dict(d)
        m.update(e)

        self.assertEqual(hash27("".join(m)), -5846033856052761336)
        self.assertEqual(hash27("".join([str(x) for x in m.values()])),
                         -5846033856052761336)
Exemplo n.º 13
0
def core_removal(threshold, graph):
    if len(graph) == 1:  # need at least two nodes in the graph...
        return [graph]

    avg_deg, density = graph_stats(graph)
    if density >= threshold:
        return [graph]
    else:
        # find and remove core nodes; create connected subcomponents
        core_nodes = get_core_nodes(graph, avg_deg)
        result = []
        subgraphs = []
        for v, n in graph.items():
            if v in core_nodes: continue
            n = n - core_nodes  # note that we're reassigning n
            for s in subgraphs:
                if not n.isdisjoint(s):
                    s |= n
                    break
            else:
                subgraphs.append(n | Set([v]))
        # connected subcomponent joining
        i = 0
        while i < len(subgraphs) - 1:
            j = i + 1
            while j < len(subgraphs):
                if not subgraphs[i].isdisjoint(subgraphs[j]):
                    subgraphs[i] |= subgraphs[j]
                    subgraphs.pop(j)
                else:
                    j += 1
            i += 1
        # recursive core removal
        for s in subgraphs:
            tresults = core_removal(threshold,
                                    Dict((v, graph[v] & s) for v in s))
            for tc in tresults:
                nodes = Set()
                for v, n in tc.items():
                    nodes.add(v)
                    n |= graph[v] & core_nodes
                for c in core_nodes:
                    tc[c] = graph[c] & (nodes | core_nodes)
            result += tresults
        return result
Exemplo n.º 14
0
    def cluster(self, verbose=False):

        data = Dict()

        with open(self.filename, 'r') as f:
            for line in f:
                a, b = line.split()[:2]

                if a in data:
                    data[a].add(b)
                else:
                    data[a] = Set()
                    data[a].add(b)
                if b in data:
                    data[b].add(a)
                else:
                    data[b] = Set()
                    data[b].add(a)

        # step 1: find preliminary cores
        SC = []  # currently-detected preliminary cores
        count = 0
        for vertex, neighbors in tqdm(data.items()):
            # build neighborhood graph
            vertices = Set([vertex]) | neighbors
            size1_neighbors = Set()
            graph = {}
            for v in vertices:
                n = data[v] & vertices
                if len(n) > 1:  # ignore size-1 vertices
                    graph[v] = n
                else:
                    size1_neighbors.add(v)
            if len(graph) < 2:  # not enough connections in this graph
                continue
            graph[vertex] -= size1_neighbors

            # get core graph
            avg_deg, density = graph_stats(graph)
            core_nodes = get_core_nodes(graph, avg_deg)
            vertices = Set(graph.keys())
            for v in vertices - core_nodes:
                del graph[v]
            for n in graph.values():
                n &= core_nodes
            if len(graph) < 2:  # not enough connections in this graph
                continue
            graph_nodes = Set(graph)

            # inner loop
            for sg in core_removal(self.density_threshold, graph):
                while True:
                    _, density = graph_stats(sg)
                    # if density threshold met, stop; else, remove min degree node
                    if density >= self.density_threshold: break
                    w = min(sg.items(), key=lambda k: len(k[1]))[0]
                    del sg[w]
                    for n in sg.values():
                        n.discard(w)

                sg_nodes = Set(sg)
                while graph_nodes - sg_nodes:
                    w = max(graph_nodes - sg_nodes,
                            key=lambda v: len(graph[v] & sg_nodes))
                    new_sg = sg.copy()
                    for v, n in new_sg.items():
                        if w in graph[v]:
                            n.add(w)
                    new_sg[w] = graph[w] & sg_nodes
                    _, density = graph_stats(new_sg)
                    if density < self.density_threshold: break
                    sg = new_sg
                    sg_nodes.add(w)

                # redundancy filtering
                max_sim = -1
                for i in range(len(SC)):
                    sim = NA_score(Set(SC[i]), sg_nodes)
                    if sim > max_sim:
                        max_sim = sim
                        index = i
                if max_sim < self.affinity_threshold:
                    SC.append(sg)
                else:
                    _, density_i = graph_stats(SC[index])
                    if density * len(sg) > density_i * len(SC[index]):
                        SC[index] = sg

        # step 2: adding peripheral proteins
        clusters = Set()
        for core in SC:
            nodes = frozenset(core)
            neighbors = reduce(lambda x, y: x | y,
                               (data[v] for v in nodes)) - nodes
            neighbors -= Set(v for v in neighbors
                             if float(len(data[v] & nodes)) /
                             len(nodes) <= self.closeness_threshold)
            clusters.add(tuple(nodes | neighbors))

        self.clusters = clusters

        print("Found %d clusters/protein complexes" % (len(clusters)))
        return clusters


# if __name__ == '__main__':
#     filename = "../data/unweighted_example_network.txt"
#     c = COACH(filename)
#     c.cluster()
Exemplo n.º 15
0
    def cluster(self, verbose=False):
        # data = defaultdict(Set) # node id => neighboring node ids

        data = Dict()
        # read in graph
        with open(self.filename, 'r') as f:
            counter = 0
            for line in f:
                a, b = line.split()[:2]
                counter += 1
                if a in data:
                    data[a].add(b)
                else:
                    data[a] = Set()
                    data[a].add(b)
                if b in data:
                    data[b].add(a)
                else:
                    data[b] = Set()
                    data[b].add(a)

        # weights = defaultdict(int)
        weights = Dict()
        for a, b in combinations(data, 2):
            if b not in data[a]: continue
            shared = len(data[a] & data[b])
            if a in weights:
                weights[a] += shared
            else:
                weights[a] = 0
                weights[a] += shared
            if b in weights:
                weights[b] += shared
            else:
                weights[b] = 0
                weights[b] += shared

        unvisited = Set(data)
        num_clusters = 0
        clusters = []

        # print(unvisited)
        # return 0

        # Potential culprit
        seed_nodes = sorted(data,
                            key=lambda k: (weights[k], len(data[k])),
                            reverse=True)

        for seed in seed_nodes:  # get highest degree node
            if seed not in unvisited: continue

            cluster = Set(
                (seed, next(iter(data[seed]))))  # seed and random neighbor

            while True:
                # rank neighbors by the number of edges between the node and cluster nodes
                frontier = sorted((len(data[p] & cluster), p)
                                  for p in Set.union(*((data[n] - cluster)
                                                       for n in cluster)))

                # do this until IN_vk < T_IN, SP <= 2 is met, or no frontier nodes left
                found = False
                while frontier and not found:
                    m_vk, p = frontier.pop()
                    if m_vk < self.t_in * len(cluster): break
                    c_2neighbors = data[p] & cluster
                    c_2neighbors.update(*(data[c] & cluster
                                          for c in c_2neighbors))
                    if cluster == c_2neighbors:
                        found = True
                        break

                if not found: break

                # otherwise, add the node to the cluster
                cluster.add(p)

            unvisited -= cluster

            if verbose:
                print(' '.join(cluster))

            num_clusters += 1

            if verbose:
                print(num_clusters, len(cluster), len(unvisited))

            clusters.append(cluster)

            if not unvisited: break

        self.clusters = clusters


# if __name__ == '__main__':
#     filename = "../data/unweighted_example_network.txt"
#     c = IPCA(filename)
#     c.cluster()
Exemplo n.º 16
0
def coach(filename):
    # read protein-protein pairs
    # data = defaultdict(Set)

    data = Dict()

    with open(filename, 'r') as f:
        for line in f:
            a, b = line.split()[:2]

            if a in data:
                data[a].add(b)
            else:
                data[a] = Set()
                data[a].add(b)
            if b in data:
                data[b].add(a)
            else:
                data[b] = Set()
                data[b].add(a)

    # step 1: find preliminary cores
    SC = []  # currently-detected preliminary cores
    count = 0
    for vertex, neighbors in tqdm(data.items()):
        # build neighborhood graph
        vertices = Set([vertex]) | neighbors
        size1_neighbors = Set()
        graph = {}
        for v in vertices:
            n = data[v] & vertices
            if len(n) > 1:  # ignore size-1 vertices
                graph[v] = n
            else:
                size1_neighbors.add(v)
        if len(graph) < 2:  # not enough connections in this graph
            continue
        graph[vertex] -= size1_neighbors

        # get core graph
        avg_deg, density = graph_stats(graph)
        core_nodes = get_core_nodes(graph, avg_deg)
        vertices = Set(graph.keys())
        for v in vertices - core_nodes:
            del graph[v]
        for n in graph.values():
            n &= core_nodes
        if len(graph) < 2:  # not enough connections in this graph
            continue
        graph_nodes = Set(graph)

        # inner loop
        for sg in core_removal(graph):
            while True:
                _, density = graph_stats(sg)
                # if density threshold met, stop; else, remove min degree node
                if density >= DENSITY_THRESHOLD: break
                w = min(sg.items(), key=lambda k: len(k[1]))[0]
                del sg[w]
                for n in sg.values():
                    n.discard(w)

            sg_nodes = Set(sg)
            while graph_nodes - sg_nodes:
                w = max(graph_nodes - sg_nodes,
                        key=lambda v: len(graph[v] & sg_nodes))
                new_sg = sg.copy()
                for v, n in new_sg.items():
                    if w in graph[v]:
                        n.add(w)
                new_sg[w] = graph[w] & sg_nodes
                _, density = graph_stats(new_sg)
                if density < DENSITY_THRESHOLD: break
                sg = new_sg
                sg_nodes.add(w)

            # redundancy filtering
            max_sim = -1
            for i in range(len(SC)):
                sim = NA_score(Set(SC[i]), sg_nodes)
                if sim > max_sim:
                    max_sim = sim
                    index = i
            if max_sim < AFFINITY_THRESHOLD:
                SC.append(sg)
            else:
                _, density_i = graph_stats(SC[index])
                if density * len(sg) > density_i * len(SC[index]):
                    SC[index] = sg

    # step 2: adding peripheral proteins
    clusters = Set()
    for core in SC:
        nodes = frozenset(core)
        neighbors = reduce(lambda x, y: x | y,
                           (data[v] for v in nodes)) - nodes
        neighbors -= Set(
            v for v in neighbors
            if float(len(data[v] & nodes)) / len(nodes) <= CLOSENESS_THRESHOLD)
        print(nodes)
        print(neighbors)
        print(nodes | neighbors)
        clusters.add(tuple(nodes | neighbors))

    return clusters