def test_pop(self): d = Dict() for x in range(10): d[str(x)] = x self.assertEqual(d.pop("300", 100), 100)
def test_keys(self): d = Dict() d[("abc", 1)] = 1 d[3.3] = 2 d[30] = 3 d["test1234"] = 4 self.assertEqual(hash27("".join([str(k) for k in d])), 7766555225202364718)
def test_popitem(self): d = Dict() for x in range(500): d[str(x)] = x d.popitem() self.assertEqual(hash27("".join(d)), -434207861779954688)
def test_fromkeys(self): s = [] for x in range(500): s.append(str(x)) d = Dict.fromkeys(s) self.assertEqual(hash27("".join(d)), -7925872281736336380)
def test_copy(self): d = Dict() for x in range(500): d[str(x)] = x d = d.copy() self.assertEqual(hash27("".join(d)), 1141231293364439680)
def test_delete(self): d = Dict() for x in range(500): d[str(x)] = x del d["53"] d.pop("155") self.assertEqual(hash27("".join(d)), -8652364590473687932)
def test_update(self): d = Dict() for x in range(500): d[str(x)] = x d["255"] = "abc" d["100"] = "123" self.assertEqual(hash27("".join(d)), -7925872281736336380)
def test_large(self): d = Dict() for x in range(60000): d[str(x)] = x # Test key and value self.assertEqual(hash27("".join(d)), -35326655653467556) self.assertEqual(hash27("".join([str(x) for x in d.values()])), -35326655653467556)
def test_small(self): d = Dict() for x in range(15): d[str(x)] = x # Test key and value self.assertEqual(hash27("".join(d)), 6636034109572507556) self.assertEqual(hash27("".join([str(x) for x in d.values()])), 6636034109572507556)
def test_clear(self): d = Dict() for x in range(500): d[str(x)] = x d.clear() for x in range(1000, 1500): d[str(x)] = x self.assertEqual(hash27("".join(d)), -1473514505880218088)
def test_pickle(self): d = Dict() for x in range(500): d[str(x)] = x del d["300"] # Pickle and reload object data = pickle.dumps(d) d = pickle.loads(data) self.assertEqual(hash27("".join(d)), 6818550152093286356)
def test_merge(self): # Build list of (key, value) pairs to preserve insertion ordering d = [] e = [] for x in range(200): d.append((str(x), x)) for x in range(200): e.append((str(x), x)) m = Dict(d) m.update(e) self.assertEqual(hash27("".join(m)), -5846033856052761336) self.assertEqual(hash27("".join([str(x) for x in m.values()])), -5846033856052761336)
def core_removal(threshold, graph): if len(graph) == 1: # need at least two nodes in the graph... return [graph] avg_deg, density = graph_stats(graph) if density >= threshold: return [graph] else: # find and remove core nodes; create connected subcomponents core_nodes = get_core_nodes(graph, avg_deg) result = [] subgraphs = [] for v, n in graph.items(): if v in core_nodes: continue n = n - core_nodes # note that we're reassigning n for s in subgraphs: if not n.isdisjoint(s): s |= n break else: subgraphs.append(n | Set([v])) # connected subcomponent joining i = 0 while i < len(subgraphs) - 1: j = i + 1 while j < len(subgraphs): if not subgraphs[i].isdisjoint(subgraphs[j]): subgraphs[i] |= subgraphs[j] subgraphs.pop(j) else: j += 1 i += 1 # recursive core removal for s in subgraphs: tresults = core_removal(threshold, Dict((v, graph[v] & s) for v in s)) for tc in tresults: nodes = Set() for v, n in tc.items(): nodes.add(v) n |= graph[v] & core_nodes for c in core_nodes: tc[c] = graph[c] & (nodes | core_nodes) result += tresults return result
def cluster(self, verbose=False): data = Dict() with open(self.filename, 'r') as f: for line in f: a, b = line.split()[:2] if a in data: data[a].add(b) else: data[a] = Set() data[a].add(b) if b in data: data[b].add(a) else: data[b] = Set() data[b].add(a) # step 1: find preliminary cores SC = [] # currently-detected preliminary cores count = 0 for vertex, neighbors in tqdm(data.items()): # build neighborhood graph vertices = Set([vertex]) | neighbors size1_neighbors = Set() graph = {} for v in vertices: n = data[v] & vertices if len(n) > 1: # ignore size-1 vertices graph[v] = n else: size1_neighbors.add(v) if len(graph) < 2: # not enough connections in this graph continue graph[vertex] -= size1_neighbors # get core graph avg_deg, density = graph_stats(graph) core_nodes = get_core_nodes(graph, avg_deg) vertices = Set(graph.keys()) for v in vertices - core_nodes: del graph[v] for n in graph.values(): n &= core_nodes if len(graph) < 2: # not enough connections in this graph continue graph_nodes = Set(graph) # inner loop for sg in core_removal(self.density_threshold, graph): while True: _, density = graph_stats(sg) # if density threshold met, stop; else, remove min degree node if density >= self.density_threshold: break w = min(sg.items(), key=lambda k: len(k[1]))[0] del sg[w] for n in sg.values(): n.discard(w) sg_nodes = Set(sg) while graph_nodes - sg_nodes: w = max(graph_nodes - sg_nodes, key=lambda v: len(graph[v] & sg_nodes)) new_sg = sg.copy() for v, n in new_sg.items(): if w in graph[v]: n.add(w) new_sg[w] = graph[w] & sg_nodes _, density = graph_stats(new_sg) if density < self.density_threshold: break sg = new_sg sg_nodes.add(w) # redundancy filtering max_sim = -1 for i in range(len(SC)): sim = NA_score(Set(SC[i]), sg_nodes) if sim > max_sim: max_sim = sim index = i if max_sim < self.affinity_threshold: SC.append(sg) else: _, density_i = graph_stats(SC[index]) if density * len(sg) > density_i * len(SC[index]): SC[index] = sg # step 2: adding peripheral proteins clusters = Set() for core in SC: nodes = frozenset(core) neighbors = reduce(lambda x, y: x | y, (data[v] for v in nodes)) - nodes neighbors -= Set(v for v in neighbors if float(len(data[v] & nodes)) / len(nodes) <= self.closeness_threshold) clusters.add(tuple(nodes | neighbors)) self.clusters = clusters print("Found %d clusters/protein complexes" % (len(clusters))) return clusters # if __name__ == '__main__': # filename = "../data/unweighted_example_network.txt" # c = COACH(filename) # c.cluster()
def cluster(self, verbose=False): # data = defaultdict(Set) # node id => neighboring node ids data = Dict() # read in graph with open(self.filename, 'r') as f: counter = 0 for line in f: a, b = line.split()[:2] counter += 1 if a in data: data[a].add(b) else: data[a] = Set() data[a].add(b) if b in data: data[b].add(a) else: data[b] = Set() data[b].add(a) # weights = defaultdict(int) weights = Dict() for a, b in combinations(data, 2): if b not in data[a]: continue shared = len(data[a] & data[b]) if a in weights: weights[a] += shared else: weights[a] = 0 weights[a] += shared if b in weights: weights[b] += shared else: weights[b] = 0 weights[b] += shared unvisited = Set(data) num_clusters = 0 clusters = [] # print(unvisited) # return 0 # Potential culprit seed_nodes = sorted(data, key=lambda k: (weights[k], len(data[k])), reverse=True) for seed in seed_nodes: # get highest degree node if seed not in unvisited: continue cluster = Set( (seed, next(iter(data[seed])))) # seed and random neighbor while True: # rank neighbors by the number of edges between the node and cluster nodes frontier = sorted((len(data[p] & cluster), p) for p in Set.union(*((data[n] - cluster) for n in cluster))) # do this until IN_vk < T_IN, SP <= 2 is met, or no frontier nodes left found = False while frontier and not found: m_vk, p = frontier.pop() if m_vk < self.t_in * len(cluster): break c_2neighbors = data[p] & cluster c_2neighbors.update(*(data[c] & cluster for c in c_2neighbors)) if cluster == c_2neighbors: found = True break if not found: break # otherwise, add the node to the cluster cluster.add(p) unvisited -= cluster if verbose: print(' '.join(cluster)) num_clusters += 1 if verbose: print(num_clusters, len(cluster), len(unvisited)) clusters.append(cluster) if not unvisited: break self.clusters = clusters # if __name__ == '__main__': # filename = "../data/unweighted_example_network.txt" # c = IPCA(filename) # c.cluster()
def coach(filename): # read protein-protein pairs # data = defaultdict(Set) data = Dict() with open(filename, 'r') as f: for line in f: a, b = line.split()[:2] if a in data: data[a].add(b) else: data[a] = Set() data[a].add(b) if b in data: data[b].add(a) else: data[b] = Set() data[b].add(a) # step 1: find preliminary cores SC = [] # currently-detected preliminary cores count = 0 for vertex, neighbors in tqdm(data.items()): # build neighborhood graph vertices = Set([vertex]) | neighbors size1_neighbors = Set() graph = {} for v in vertices: n = data[v] & vertices if len(n) > 1: # ignore size-1 vertices graph[v] = n else: size1_neighbors.add(v) if len(graph) < 2: # not enough connections in this graph continue graph[vertex] -= size1_neighbors # get core graph avg_deg, density = graph_stats(graph) core_nodes = get_core_nodes(graph, avg_deg) vertices = Set(graph.keys()) for v in vertices - core_nodes: del graph[v] for n in graph.values(): n &= core_nodes if len(graph) < 2: # not enough connections in this graph continue graph_nodes = Set(graph) # inner loop for sg in core_removal(graph): while True: _, density = graph_stats(sg) # if density threshold met, stop; else, remove min degree node if density >= DENSITY_THRESHOLD: break w = min(sg.items(), key=lambda k: len(k[1]))[0] del sg[w] for n in sg.values(): n.discard(w) sg_nodes = Set(sg) while graph_nodes - sg_nodes: w = max(graph_nodes - sg_nodes, key=lambda v: len(graph[v] & sg_nodes)) new_sg = sg.copy() for v, n in new_sg.items(): if w in graph[v]: n.add(w) new_sg[w] = graph[w] & sg_nodes _, density = graph_stats(new_sg) if density < DENSITY_THRESHOLD: break sg = new_sg sg_nodes.add(w) # redundancy filtering max_sim = -1 for i in range(len(SC)): sim = NA_score(Set(SC[i]), sg_nodes) if sim > max_sim: max_sim = sim index = i if max_sim < AFFINITY_THRESHOLD: SC.append(sg) else: _, density_i = graph_stats(SC[index]) if density * len(sg) > density_i * len(SC[index]): SC[index] = sg # step 2: adding peripheral proteins clusters = Set() for core in SC: nodes = frozenset(core) neighbors = reduce(lambda x, y: x | y, (data[v] for v in nodes)) - nodes neighbors -= Set( v for v in neighbors if float(len(data[v] & nodes)) / len(nodes) <= CLOSENESS_THRESHOLD) print(nodes) print(neighbors) print(nodes | neighbors) clusters.add(tuple(nodes | neighbors)) return clusters