Пример #1
0
def km_random(g,k=5,m=3,start=None):
    """ k nodes of breath first sequence; m add and del number."""
    if start==None:
        start=g.nodes().pop()
    bfList=list(nx.bfs_edges(g,start))
    bfList.reverse()
    bfList.append((start,start))
    tempk=[]
    try:
        while bfList:
            for each in range(k):
                tempk.append(bfList.pop()[1])
            
            tg=nx.subgraph(g,tempk)
            e=del_edge(tg,m)
            g.remove_edges_from(e)

            tg=nx.subgraph(g,tempk)
            e=add_edge(tg,m)
            g.add_edges_from(e)

            tempk=[]

    except IndexError:
        print "pop finishing"
Пример #2
0
def calc_euler_tour(g, start, end):
    '''Calculates an Euler tour over the graph g from vertex start to vertex end.
    Assumes start and end are odd-degree vertices and that there are no other odd-degree
    vertices.'''
    even_g = nx.subgraph(g, g.nodes())
    if end in even_g.neighbors(start):
        # If start and end are neighbors, remove the edge
        even_g.remove_edge(start, end)
        comps = list(nx.connected_components(even_g))
        # If the graph did not split, just find the euler circuit
        if len(comps) == 1:
            trail = list(nx.eulerian_circuit(even_g, start))
            trail.append((start, end))
        elif len(comps) == 2:
            subg1 = nx.subgraph(even_g, comps[0])
            subg2 = nx.subgraph(even_g, comps[1])
            start_subg, end_subg = (subg1, subg2) if start in subg1.nodes() else (subg2, subg1)
            trail = list(nx.eulerian_circuit(start_subg, start)) + [(start, end)] + list(nx.eulerian_circuit(end_subg, end))
        else:
            raise Exception('Unknown edge case with connected components of size {0}:\n{1}'.format(len(comps), comps))
    else:
        # If they are not neighbors, we add an imaginary edge and calculate the euler circuit
        even_g.add_edge(start, end)
        circ = list(nx.eulerian_circuit(even_g, start))
        try:
            trail_start = circ.index((start, end))
        except:
            trail_start = circ.index((end, start))
        trail = circ[trail_start+1:] + circ[:trail_start]
    return trail
Пример #3
0
def match_story_by_sen_edge(Gs, stories, target, tau):
    existing = copy.deepcopy(stories['keywords_set'])
    match_target = copy.deepcopy(target['keywords_set'])
    node_cos = match_story(existing, match_target, 0.3)

    subgs1 = []
    for sto in match_target:
        subgs1.append(nx.subgraph(Gs, sto))
    subgs0 = []
    for sto in existing:
        subgs0.append(nx.subgraph(Gs, sto))

    matched = []
    for i in range(len(subgs1)):
        matchingGraph = subgs1[i]
        dis = []
        for cand in subgs0:
            val = compute_distance(matchingGraph, cand)
            dis.append(val)
        total = np.multiply(dis, node_cos[i])
        match_score = np.max(total)

        if match_score < tau:
            stories['keywords_set'].append(target['keywords_set'][i])
            stories['doc_set'].append(target['doc_set'][i])
            continue

        print match_score
        match_ind = np.argmax(total)
        print  match_ind, stories['doc_set'][match_ind], target['doc_set'][i]
# 		match_text = existing[match_ind]
        stories['keywords_set'][match_ind].extend(match_target[i])
        u = stories['doc_set'][match_ind].union(target['doc_set'][i])
        stories['doc_set'][match_ind] = u
Пример #4
0
 def test_subgraph_of_subgraph(self):
     SGv = nx.subgraph(self.G, range(3, 7))
     SDGv = nx.subgraph(self.DG, range(3, 7))
     SMGv = nx.subgraph(self.MG, range(3, 7))
     SMDGv = nx.subgraph(self.MDG, range(3, 7))
     for G in self.graphs + [SGv, SDGv, SMGv, SMDGv]:
         SG = nx.induced_subgraph(G, [4, 5, 6])
         assert_equal(list(SG), [4, 5, 6])
         SSG = SG.subgraph([6, 7])
         assert_equal(list(SSG), [6])
         # subgraph-subgraph chain is short-cut in base class method
         assert_is(SSG._graph, G)
Пример #5
0
 def fitness(new_members, is_print=False):
     if len(new_members) == 1:
         return 0
     else:
         new_nodes = set(flatten(map(lambda mem: nx.neighbors(data_graph, mem), new_members))) | new_members
         global w_in
         global w_all
         w_all = len(nx.subgraph(data_graph, new_nodes).edges())
         w_in = len(nx.subgraph(data_graph, new_members).edges())
         if is_print:
             print 'w_in', w_in, nx.subgraph(data_graph, new_members).edges()
             print 'w_all', w_all, nx.subgraph(data_graph, new_nodes).edges()
         return float(w_in) / w_all
Пример #6
0
 def test_subgraph(self):
     assert_equal(self.G.subgraph([0, 1, 2, 4]).adj,
                  nx.subgraph(self.G, [0, 1, 2, 4]).adj)
     assert_equal(self.DG.subgraph([0, 1, 2, 4]).adj,
                  nx.subgraph(self.DG, [0, 1, 2, 4]).adj)
     assert_equal(self.G.subgraph([0, 1, 2, 4]).adj,
                  nx.induced_subgraph(self.G, [0, 1, 2, 4]).adj)
     assert_equal(self.DG.subgraph([0, 1, 2, 4]).adj,
                  nx.induced_subgraph(self.DG, [0, 1, 2, 4]).adj)
     # subgraph-subgraph chain is allowed in function interface
     H = nx.induced_subgraph(self.G.subgraph([0, 1, 2, 4]), [0, 1, 4])
     assert_is_not(H._graph, self.G)
     assert_equal(H.adj, self.G.subgraph([0, 1, 4]).adj)
Пример #7
0
def filterGraphByRecipeID(G, Grecipes, Gingredients, recipeNodes):
	recipe_to_remove = [ n for n in Grecipes.nodes() if n not in recipeNodes]	
	searchGrecipes = nx.subgraph(Grecipes, recipeNodes)
	searchGrecipes.remove_nodes_from(recipe_to_remove)

	ingrNodes = list(set([b for n in searchGrecipes.nodes() for b in G.neighbors(n)]))
	ingr_to_remove = [ n for n in Gingredients.nodes() if n not in ingrNodes]
	searchGingredients = Gingredients

	searchG = nx.subgraph(G, recipeNodes + ingrNodes)
	searchG.remove_nodes_from(recipe_to_remove)
	searchG.remove_nodes_from(ingr_to_remove)

	return (searchG, searchGrecipes, searchGingredients)
 def plot_induced_subgraphs(self):
     plt.figure(1)
     partition = self.find_partition()[1]
     communities = [partition[v] for v in partition]
     newGraph=self.G
     for community in communities:
         nx.subgraph(newGraph, [key for key in partition if partition[key]==community])
     node_color=[float(partition[v]) for v in partition]
     labels =  {}
     for node in newGraph.nodes():
         labels[node]= newGraph.node[node].get('name', '')
     nx.draw_spring(newGraph,node_color=node_color, labels=labels)
     plt.show()
     plt.savefig("C:\\Users\\Heschoon\\Dropbox\\ULB\\Current trends of artificial intelligence\\Trends_project\\graphs\\graph_induced.pdf")
Пример #9
0
def core_substitution(graph, orig_cip_graph, new_cip_graph):
    """
    graph is the whole graph..
    subgraph is the interfaceregrion in that we will transplant
    new_cip_graph which is the interface and the new core
    """
    assert( set(orig_cip_graph.nodes()) - set(graph.nodes()) == set([]) ), 'orig_cip_graph not in graph'

    # select only the interfaces of the cips
    new_graph_interface_nodes = [n for n, d in new_cip_graph.nodes(data=True) if 'core' not in d]
    new_cip_interface_graph = nx.subgraph(new_cip_graph, new_graph_interface_nodes)

    original_graph_interface_nodes = [n for n, d in orig_cip_graph.nodes(data=True) if 'core' not in d]
    original_interface_graph = nx.subgraph(orig_cip_graph, original_graph_interface_nodes)
    # get isomorphism between interfaces, if none is found we return an empty graph

    iso = get_good_isomorphism(graph,
                               orig_cip_graph,
                               new_cip_graph,
                               original_interface_graph,
                               new_cip_interface_graph)

    if len(iso) != len(original_interface_graph):
        # print iso
        # draw.display(orig_cip_graph)
        # draw.display(new_cip_graph)
        #draw.graphlearn([orig_cip_graph, new_cip_graph],size=10)
        logger.log(5,"grammar hash collision, discovered in 'core_substution' ")
        return nx.Graph()

    # ok we got an isomorphism so lets do the merging
    graph = nx.union(graph, new_cip_graph, rename=('', '-'))

    # removing old core
    # original_graph_core_nodes = [n for n, d in orig_cip_graph.nodes(data=True) if 'core' in d]
    original_graph_core_nodes = [n for n, d in orig_cip_graph.nodes(data=True) if 'core' in d]

    for n in original_graph_core_nodes:
        graph.remove_node(str(n))

    # merge interfaces
    for k, v in iso.iteritems():
        graph.node[str(k)][
            'interface'] = True  # i am marking the interface only for the backflow probability calculation in graphlearn, this is probably deleteable because we also do this in merge, also this line is superlong Ooo
        merge(graph, str(k), '-' + str(v))
    # unionizing killed my labels so we need to relabel


    return nx.convert_node_labels_to_integers(graph)
Пример #10
0
def node_coordinates_robust(graph):
    xs, ys, zs, As, namess = [], [], [], [], []
    for cc in nx.connected_components(graph.to_undirected()):
        if len(cc) == 1:
            x, y, z = [0], [0], [0]
            A = np.array([[0]])
            names = list(cc)
        elif len(cc) == 2:
            x, y, z = [0, 1], [0, 1], [0, 1]
            n1, n2 = list(cc)
            A = np.array([[0, 1], [1, 0]]) * graph[n1][n2].get('weight', 1)
            names = list(cc)
        else:
            x, y, z, A, names = node_coordinates(nx.subgraph(graph, cc))
        xs.append(x)
        ys.append(y)
        zs.append(z)
        As.append(A)
        namess.append(names)
    for coord in [xs, ys, zs]:
        loc = 0
        for i, arr in enumerate(coord):
            arr = np.asanyarray(arr)
            scale = np.sqrt(arr.size - 0.99)
            coord[i] = ((arr - np.min(arr)) / np.max(arr) *
                        scale + loc)
            loc += 1.05 * scale
    x = np.concatenate(xs)
    y = np.concatenate(ys)
    z = np.concatenate(zs)
    A = coo_mat_concat(As)
    names = list(itertools.chain(*namess))
    return x, y, z, A, names
def congressSubgraph(base_graph, title='Rep'):
	"""Generate a subgraph from a Congerssional Twitter graph based on a member's title"""
	header = ['lastname','firstname','middlename','title','party','state','gender','twitter_id']
	congress_reader = csv.DictReader(open('../twitter_congress.csv', 'rU'), fieldnames=header)
	all_congress = map(lambda l: l, congress_reader)
	mem_list = [(a['twitter_id']) for (a) in all_congress if a['title']==title]
	return nx.subgraph(base_graph, mem_list)
Пример #12
0
def test_msf_components():

    grid, dist_matrix = random_settlements(500)

    msf = mod_boruvka(grid)

    msf_subgraph = lambda components: nx.subgraph(msf, components)
    component_graphs = map(msf_subgraph, nx.connected_components(msf))

    def full_graph(g):
        new_graph = nx.Graph()
        new_graph.add_nodes_from(g.nodes(data=True))
        if len(g.nodes()) < 2:
            return new_graph

        new_graph.add_weighted_edges_from([(u, v, dist_matrix[u][v])
            for u, v in itertools.product(g.nodes(), g.nodes())
            if u != v])
        return new_graph

    full_graphs = map(full_graph, component_graphs)
    mst_graphs = map(nx.mst.minimum_spanning_tree, full_graphs)

    diff_component_mst = []
    for i in range(len(component_graphs)):
        c_sets = set([frozenset(e) for e in component_graphs[i].edges()])
        mst_sets = set([frozenset(e) for e in mst_graphs[i].edges()])
        if not c_sets == mst_sets:
            diff_component_mst.append(i)

    assert len(diff_component_mst) == 0, str(len(diff_component_mst)) + \
        " components are not MSTs"
Пример #13
0
def step3_balance_number(G,node):
	right=0
	wrong=0
	total=0
	cate_num=dict() #the number 16 cases
	ins=find_in_nodes(G,node)
	outs=find_out_nodes(G,node)
	sub_nodes=ins+outs
	subG=nx.subgraph(G,sub_nodes)
	for in_node in ins:
		c_outs=find_in_nodes(subG,in_node)
		for out_node in c_outs:
			if out_node in outs:
				total=total+1
				#node-->out_node-->mid_node-->in_node-->node
				flag1=(G[node][out_node]['weight']==-1)
				flag2=(G[out_node][in_node]['weight']==-1)
				flag3=(G[in_node][node]['weight']==-1)
				key=str(1000+flag1*100+flag2*10+flag3)
				
				if cate_num.has_key(key):
					cate_num[key]=cate_num[key]+1
				else:
					cate_num[key]=1

				if((flag1+flag2+flag3)%2==0):
					right=right+1
				else:
					wrong=wrong+1
	return (right,total,cate_num)
Пример #14
0
def init():
    global projectname
    global version_aray
    global pos
    global x
    global y
    global size_array
    global numframes
    global sg
    for i in range(6):
        data_directory = projectname + "_history/" + projectname + version_array[i] + "/" + projectname
        [g, lines] = creategraph.readfile(data_directory)
        if i == 0:
            sg = creategraph.refine(g, 45)
            [pos, x, y] = creategraph.coordinate(sg)
            size = creategraph.point_sizes(sg, lines)
            zeros = np.array([0] * len(size))
            print 'len(size) = ', len(size)
            print 'zeros = ', zeros
            size_array.append(zeros)
            size_array.append(size)
        else:
            # create the graph induced by nodes from sg
            subg = nx.subgraph(g, nx.nodes(sg))
            print subg, sg
            if nx.number_of_nodes(subg) != nx.number_of_nodes(sg):
                print 'panic at 34' 
            else: #                            v  this seems to be a error, but not
                size = creategraph.point_sizes(sg, lines)
                size_array.append(size)


    x = np.array(x)
    y = np.array(y)
    size_array = np.array(size_array)
Пример #15
0
def subgraph_from_pathways(pathways, g):
    """
    returns a topological annotated graph containing given pathways
    """
    nodes = []
    if type(pathways) == list:
        for p in pathways:
            nodes += nodes_from_pathway(p, g)
    elif type(pathways) == str:
        nodes += nodes_from_pathway(pathways, g)
        
    h = nx.subgraph(g, nodes)

    remove_edges = []
    if type(pathways) == list:
        for p in pathways:
            for e in h.edges():
                if not p in h.get_edge_data(*e)['pathways']:
                    remove_edges.append(e)
    elif type(pathways) == str:
        for e in h.edges():
            if not pathways in h.get_edge_data(*e)['pathways']:
                remove_edges.append(e)
                
    for r in remove_edges:
        h.remove_edge(*r)

    return topological_annotate(h)
Пример #16
0
def greedy_choice(G, candidate, blue_nodes, black_nodes, visited):
    """Helper function to greedy cut"""

    G.node[candidate][gc.PARTITION] = gc.BLUE
    blue_cut_val = gc.cut_edges(nx.subgraph(G, visited))

    G.node[candidate][gc.PARTITION] = gc.BLACK
    black_cut_val = gc.cut_edges(nx.subgraph(G, visited))

    if blue_cut_val > black_cut_val:
        G.node[candidate][gc.PARTITION] = gc.BLUE
        blue_nodes.add(candidate)
    else:
        black_nodes.add(candidate)

    return blue_nodes, black_nodes
Пример #17
0
    def remove_bridges(self, in_file_, start_id, delim_):
        reader = csv.reader(open(in_file_), delimiter=delim_)
        for line in reader:
                self.G.remove_edge(int(line[0]) - start_id,int(line[1]) - start_id)
	
	print "no of components after removing bridges: %d" % nx.number_connected_components(self.G)
	comps = nx.connected_components(self.G)
	for comp in comps:
        	print len(comp)

        bfs = self.BreadthFirstLevels(1,100)
        nbunch = [1]
        for n in bfs:
            #print(n)
            val_ = n.values()
            for set_ in val_:
                nbunch += list(set_)
        #print nbunch
        print "start creating the induced graph!"
        induced_g = nx.subgraph(self.G, nbunch)
        self.G.clear()
#        start_ = 0
#        for n_ in induced_g:
#            self.maps_[n_] = start_
#            start_ += 1
#        for n_1 in induced_g:
#            for n_2 in induced_g:
#                if n_1 in induced_g.neighbors(n_2):
#                    self.G.add_edge(maps_[n_1],maps_[n_2]) 
        self.n = nx.number_of_nodes(induced_g)
        self.G = induced_g
        print "no of node: %d and no of edges: %d in induce graph!" % (self.G.number_of_nodes(), self.G.number_of_edges())
Пример #18
0
def compute_global_utility(graph):
    """
    Return an index that quantifies how big the size of adopter
    clusters is in the entire population of consumers. We call this
    index 'Global utility' in our article.

    This index computes the cluster-size-weighted average of adopter
    clusters divided by the total number of consumers

    So it goes from 0 to 1 and it's always increasing.
    """
    N = len(graph.nodes())
    adopters = get_adopters(graph)
    
    clusters = nx.subgraph(graph, adopters)
    cluster_sizes = [len(c) for c in nx.connected_components(clusters) if len(c) > 1]
    if cluster_sizes:
        # The weight of each cluster depends on its size
        weights = np.array(cluster_sizes) / N
        # Compute the weighted average
        weigthed_average = np.average(cluster_sizes, weights=weights)
        # Since the index needs to go between 0 and 1, we need to divide between N
        # again
        utility = weigthed_average / N
        return utility
    else:
        return 0
Пример #19
0
    def expand(seed_set):
        members = seed_set
        print 'seed:', members, nx.subgraph(data_graph, set(
            flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) | members).edges()
        is_change = True
        while is_change:
            to_check_neighbors = list(flatten(map(lambda mem: nx.neighbors(data_graph, mem), members)))
            random.shuffle(to_check_neighbors)
            print to_check_neighbors
            is_change = False
            # for neighbor in to_check_neighbors:
            for neighbor in to_check_neighbors:
                if fitness(members | {neighbor}) > fitness(members):
                    is_change = True
                    members.add(neighbor)
                    fitness(members, is_print=True)
                    print 'add neighbor:', neighbor, members, 'w_in:', w_in, 'w_all:', w_all
                    break

            for member in members:
                if fitness(members - {member}) > fitness(members):
                    is_change = True
                    members.remove(member)
                    fitness(members, is_print=True)
                    print 'remove member:', member, members, 'w_in', w_in, 'w_all:', w_all
                    break
        print set(members)
        print '\n----------------------------\n'
def main():
    """
    Pre-processing: 
        load data, compute centrality measures, write files with node data
    """
    print(nx.__version__)
    # Load network data, create storage dict, and extract main component
    depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    depends.name="depends"
    suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    suggests.name="suggests"
    imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    imports.name="imports"
    nets_dict={"depends":depends,"suggests":suggests,"imports":imports}
    for k in nets_dict.keys():
        main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes()
        nets_dict[k]=nx.subgraph(nets_dict[k],main_component)
    
    # Run multiple measures on graphs and normalize weights
    measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank]
    for g in nets_dict.values():
        multiple_measures(g,measure_list)
        normalize_weights(g)
        
    # Output networks in GraphML format (to store node attributes)
    for i in nets_dict.items():
        # print(i[1].edges(data=True))
        nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml")
        print("")
    print("All files written with data")
    
    """Visualization:
Пример #21
0
def weak_link_distribution(bn,N_clus=2,mcs=0,n_jumps=1):
  jumps=[j[0] for j in networkb.find_th_jumps(bn,N_clus)]
  jumps=sorted(list(set(jumps)),reverse=True)
  if len(jumps)==0:
    return []
  pcs=jumps[0:min(n_jumps,len(jumps)-1)]
  d=[]  
  for pc in pcs:
    G=bn.get_Graph(pc,correlation='positive')
    cluster_list=[x for x in networkx.connected_components(G) if x>mcs]
    if len(cluster_list)<2:
      continue
    thmin=(jumps[jumps.index(pc)+1]+pc)/2
    print thmin
    print pc
    H=bn.get_Graph(thmin,th_up=pc,correlation='positive')
    if H.number_of_edges()<1:
      continue
    H=networkx.subgraph(H,itertools.chain.from_iterable(cluster_list))
    if H.number_of_edges()<1:
      continue
    for e in H.edges_iter():
      d.append(bn.nodedistance(e))
  json.dump(d,open(bn.weak_link_distribution_file,'w'))
  return d
Пример #22
0
def filter_relevant_ids(graph):
    """
        Dado el grafo de mis followed y sus followed,
        extraemos los 100 nodos más relevantes 
    """
    graph = nx.read_gpickle('graph.gpickle')
    my_followed = list(set([x[0] for x in graph.edges()]))
    graph = nx.subgraph(graph, my_followed)

    def get_nfollowed(nid):
        return len(graph.successors(nid))

    def get_nfollowers(nid):
        return len(graph.predecessors(nid))

    import pandas as pd
    df = pd.DataFrame()
    df['nodeid'] = my_followed
    df['nfollowed'] = df['nodeid'].apply(get_nfollowed)
    df['nfollowers'] = df['nodeid'].apply(get_nfollowers)

    relevant = df[(df.nfollowed > 40) & (df.nfollowers > 40)]

    relevantids = list(relevant.nodeid.values)
    with open('layer0.pickle','wb') as f:
        pickle.dump(relevantids, f)

    return relevantids
Пример #23
0
def upper_bound(G, k, H):
    '''
    the upper bound of size of k-plex of H in G
    '''
    # upper_bound_by_deg = min([nx.degree(G, node) for node in H]) + k
    # upper_bound_by_deg = 100

    subg = nx.subgraph(G, H)
    validate_neighbors = {node for node in neighbors(G, H) if len(set(G.neighbors(node)).intersection(H)) >= len(H)+1-k}

    strict_nodes = [node for node in H if len(subg[node]) == len(H)-k]
    if len(strict_nodes) > 0:
        avaliable_nodes = {node for node in G.neighbors(strict_nodes[0]) if node not in H}
        for i in range(1, len(strict_nodes)):
            avaliable_nodes.intersection_update({node for node in G.neighbors(strict_nodes[i]) if node not in H})
        avaliable_nodes.intersection_update(validate_neighbors)

        return len(H)+len(avaliable_nodes)
    else:
        min_d = float('inf')
        for node in H:
            nbrs = neighbors(G, [node])
            nbrs.intersection_update(validate_neighbors)
            num_non_nbrs = k-1-(subg.number_of_nodes() - nx.degree(subg, node))
            min_d = min(min_d, len(nbrs)+num_non_nbrs)
        return min_d
def find_football_communities():
    """ Finds the communities produced for the football network, uses compare
    methods to graph
    """
    fgraph = CD.football_graph()
    known = CD.football_known_c()
    temp7 = known[7]
    temp8 = known[8]
    temp9 = known[9]
    known[7] = temp8
    known[8] = temp9
    known[9] = temp7

    center_g = nx.Graph()
    center_g.add_nodes_from(range(12))
    centers = nx.circular_layout(center_g, scale = 10)
            
    pos = {}
    subgraphs = [nx.subgraph(fgraph, c) for c in known]
    count = -1
    for g in subgraphs:
        count += 1
        (off_x, off_y) = centers[count]
        pos_local = nx.circular_layout(g, scale=2.)
        for n, place in pos_local.iteritems():
            pos[n] = place + np.array([off_x, off_y])
    
    compare_methods(fgraph,
                    'football_',
                    param=[1., 1., 5./115., 4, 0, .7, 20],
                    known=known,
                    pos=pos,
                    color_map={76:1, 11:2, 7:3, 102:4, 104:5, 47:6, 98:7,
                               96:8, 23:9, 94:10, 27:0},
                    data_path="FootballGames/football_metis")
Пример #25
0
def bound_branch(G, k ,q_nodes, is_use_cores=False, select_method='rand'):
    '''
    wrapper of branch and bound method
    '''
    ts = time.time()
    global optimal
    optimal = set()
    k_neighbors = k_hop_nbrs_n(G, k, q_nodes)
    sub = set(q_nodes)
    sub.update(k_neighbors)
    g = nx.subgraph(G, sub)

    if is_use_cores:
        cores = nx.core_number(g)
    else:
        cores = None

    # print('subgraph ', g.nodes())
    print('minimum degree of subgraph', minimum_degree(g))
    print('k neighbors', len(k_neighbors))
    BB(g, k, q_nodes, set(), cores, select_method)
    print('the solution is', optimal)

    te = time.time()

    texe = round(te-ts, 2) # the execution time

    return texe
Пример #26
0
def mean_geodesic(pg, debug=0):
    """
    mean_geodesic() calculates the mean geodesic (shortest) distance
    between two vertices in a network.
    """
    length_sum = 0
    if networkx.is_directed_acyclic_graph(pg):
        n_pairs_with_paths = 0
    else:
        n_pairs_with_paths = ( pg.order() * ( pg.order() + 1 ) ) / 2
    tg = networkx.subgraph(pg, pg.nodes())
    for u in pg.nodes_iter():
        tg.delete_node(u)
        for v in tg.nodes_iter():
            try:
                length = networkx.shortest_path_length(pg,u,v)
                if length > 0:
                    length_sum = length_sum + length
                    if networkx.is_directed_acyclic_graph(pg):
                        n_pairs_with_paths = n_pairs_with_paths + 1
            except networkx.exception.NetworkXError:
                pass
    try:
        geodesic = float(length_sum) / float(n_pairs_with_paths)
    except:
        geodesic = -999.
    if debug:
        print 'length_sum:\t', length_sum
        print 'n_pairs_with_paths:\t', n_pairs_with_paths
    return geodesic
def vis_coauthor_communities(graph, source, i, prefix, options, radius, overlap):
    """ Finds the communities produced by different methods for the astro
    citation network
    """    
    interest = CD.get_ball(graph, options[source][i], radius)
    print "Displaying and computing for a subset of ", len(interest), " nodes."
    sgraph = nx.subgraph(graph, interest)
    
    cleaned = {}
    for key in options.keys():
        """ for generating sub community structure
        """
        if key == source:
            # split the overarching with the substructure
            cleaned[source] = [options[source][i]]
            options['Parallel Subcommunities'] = options[source][:i]
            options['Parallel Subcommunities'].extend(options[source][i+1:])
            key = 'Parallel Subcommunities'
        
        filtered = [filter(lambda n: n in interest, c) for c in options[key]]
        filtered = filter(lambda c: len(c) > 0, filtered)
        cleaned[key] = filtered
        cleaned[key] = CD.clean_of_duplicate_c(cleaned[key], overlap=overlap)

    compare_methods(sgraph, prefix, options=cleaned)
Пример #28
0
def dyad_census(pg, debug=0, debuglog=0):
    """
    dyad_census() calculates the number of null, asymmetric, and
    mutual edges between all pairs of nodes in a directed graph.
    """
    if not networkx.is_directed_acyclic_graph(pg):
        logging.error('pyp_network.dyad_census() requires a directed graph as input!')
        return 0
    else:
        census = {}
        census['null'] = 0
        census['asymmetric'] = 0
        census['mutual'] = 0
        tg = networkx.subgraph(pg, pg.nodes())
        for u in pg.nodes_iter():
            tg.delete_node(u)
            for v in tg.nodes_iter():
                if not pg.has_neighbor(u,v):
                    census['null'] = census['null'] + 1
                elif u in pg.predecessors(v) and v in pg.successors(u):
                    census['mutual'] = census['mutual'] + 1
                    if debug:
                        print 'Nodes %s and %s link to one another!' % ( u, v )
                    if debuglog:
                        logging.error('Nodes %s and %s link to one another!',u, v)
                elif u in pg.predecessors(v) and v not in pg.successors(u):
                    census['asymmetric'] = census['asymmetric'] + 1
                elif u not in pg.predecessors(v) and v in pg.successors(u):
                    census['asymmetric'] = census['asymmetric'] + 1
                else:
                    pass
        del(tg)
        return census
Пример #29
0
Файл: plot.py Проект: jni/prin
def network_properties(network : nx.DiGraph,
                       in_degree_threshold : float = -1,
                       pagerank_threshold : float = -1,
                       damping : float = 0.85,
                       spectral_offset : float = 0.5)\
        -> (pd.DataFrame, sparse.spmatrix):
    conn = max(nx.connected_components(network.to_undirected()), key=len)
    conn = nx.subgraph(network, conn)
    pr = compute_pagerank(conn, damping=damping)
    names = nx.nodes(conn)
    indeg = [conn.in_degree(n) for n in names]
    odeg = [conn.out_degree(n) for n in names]
    description = [conn.node[n].get('description', n) for n in names]
    x, y, z, Adj, aff_names = node_coordinates(conn, nodelist=names,
                                               offset=spectral_offset)
    data = {'id': names,
            'in_degree': indeg,
            'out_degree': odeg,
            'pagerank': pr,
            'affinity_x': x,
            'affinity_y': y,
            'processing_depth': z,
            'description': description}
    df = pd.DataFrame(data, index=names)
    df = df[df['pagerank'] > pagerank_threshold / len(names)]
    df = df[df['in_degree'] > in_degree_threshold]
    return df, Adj
Пример #30
0
def get_all_hardware_grid_problems(
    device_graph: nx.Graph,
    central_qubit: cirq.GridQubit,
    n_instances: int,
    rs: np.random.RandomState,
):
    """Helper function to get all subgraphs for a given named device.

    This is annotated with lru_cache so you can freely call this function
    multiple times without re-constructing the list of qubits.

    Used by `generate_hardware_problem_problem` to get a subgraph for a given
    value of n_qubits.

    Returns:
        A dictionary indexed by n_qubit, instance_i
    """
    all_hg_problems: Dict[Tuple[int, int], HardwareGridProblem] = {}
    subgraphs = get_growing_subgraphs(device_graph=device_graph,
                                      central_qubit=central_qubit)
    for n_qubits in sorted(subgraphs):
        subgraph = nx.subgraph(device_graph, subgraphs[n_qubits])
        for instance_i in range(n_instances):
            problem = random_plus_minus_1_weights(subgraph, rs=rs)
            qubits = sorted(problem.nodes)
            coordinates = [(q.row, q.col) for q in qubits]
            problem = nx.relabel_nodes(problem,
                                       {q: i
                                        for i, q in enumerate(qubits)})

            all_hg_problems[n_qubits, instance_i] = HardwareGridProblem(
                graph=problem,
                coordinates=coordinates,
            )

    return all_hg_problems
Пример #31
0
    def _build_list(self):
        print "using disconnectivity analysis to find minima to untrap"
        self.minpairs = deque()

        graph = TSGraph(self.database).graph
        cclist = list(nx.connected_components(graph))

        # get the largest cluster
        group1 = cclist[0]
        min1 = sorted(group1, key=lambda m: m.energy)[0]
        if not min1 == self.database.minima()[0]:
            # make sure that the global minimum is in group1
            print "warning, the global minimum is not the in the largest cluster."

        # compute the energy barriers for all minima in the cluster        
        subgraph = nx.subgraph(graph, group1)
        energy_barriers = self._compute_barriers(subgraph, min1)

        # sort the minima by the barrier height divided by the energy difference
        weights = [(m, np.abs(barrier) / np.abs(m.energy - min1.energy))
                   for (m, barrier) in energy_barriers.iteritems()]
        weights.sort(key=lambda v: 1. / v[1])

        self.minpairs = deque()
        for min2, w in weights:
            if len(self.minpairs) > self.list_len:
                break

            if not self.is_good_pair(min1, min2):
                continue

            self.minpairs.append((min1, min2))
            if True:
                # print some stuff
                print "    untrap analysis: minimum", min2.id(), "with energy", min2.energy, "barrier", energy_barriers[
                    min2], "untrap weight", w
Пример #32
0
def __HeadTailCommunityDetection(G, finaledgelist, head_tail_ratio=0.6):

    H = nx.connected_components(G)

    for s in H:
        subgraph = nx.subgraph(G, s)
        result = nx.edge_betweenness(subgraph, normalized=False)
        edges = list(result.keys())
        values = list(result.values())
        mean = np.mean(values)
        edgelist = []
        edgetemp = subgraph.edges()
        if len(edgetemp) <= 2:
            for edge in edgetemp:
                finaledgelist.append(edge)
        else:
            for index in range(len(values)):
                if values[index] <= mean:
                    edgelist.append(edges[index])

            if float(len(edgelist)) / float(
                    len(edges)
            ) <= head_tail_ratio:  # change the head/tail division rule here, here is for tail percentage,
                # so if the rule is 40/60, the value should be assigned 0.6 as in the code.
                for edge in edgelist:
                    finaledgelist.append(edge)
            else:
                Gsub = nx.Graph()
                for edge in edgelist:
                    Gsub.add_edge(edge[0], edge[1])
                try:
                    __HeadTailCommunityDetection(Gsub, finaledgelist,
                                                 head_tail_ratio)
                except:
                    pass
    return finaledgelist
Пример #33
0
def avg_distance(graph: nx.Graph, communities: object,
                 **kwargs: dict) -> object:
    """Average distance.

    The average distance of a community is defined average path length across all possible pair of nodes composing it.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**.
    :return: If **summary==True** a FitnessResult object, otherwise a list of floats.

    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> scd = evaluation.avg_distance(g,communities)
    """

    return __quality_indexes(
        graph, communities,
        lambda graph, coms: nx.average_shortest_path_length(
            nx.subgraph(graph, coms)), **kwargs)
Пример #34
0
def is_consistent(queue, G, reactions, trail):
    """Checks whether the trail also makes sense when between a metabolic
    pathway and the original gene neighborhood G.

    :param queue: multiprocessing queue for storing the result of the procedure
    :param G: undirected graph representing a genome, with genes
            for vertices (i.e. not reaction identifiers as are vertices in D).
    :param reactions: dict of dicts storing reaction information (obtained by
        parsing a KGML file)
    :param trail: CoMetGeNe trail
    :return: True if trail is consistent with respect to the gene neighborhood
        graph G, False otherwise
    """
    G_nodes = G.nodes()
    gene_trail = list()
    for vertex in trail:
        gene_vertex = list()
        for gene in reactions[vertex]['enzyme']:
            if gene in G_nodes:
                gene_vertex.append(gene)
        gene_trail.append(gene_vertex)

    decompositions = decompose(gene_trail)
    for decomp in decompositions:
        for X in nx.connected_components(nx.subgraph(G, decomp)):
            skip = False
            for vertex in trail:
                if not skip:
                    enzyme_names = reactions[vertex]['enzyme']
                    if len(set(X) & set(enzyme_names)) == 0:
                        skip = True
            if not skip:
                queue.put(True)
                return

    queue.put(False)
Пример #35
0
def parse_graph_from_csv(file, largest_cc=False, skip_first_line=True):
    """
    Parses a file from a SNAP Dataset and returns a networkx graph.

    Parameters
    ----------
    file : file
    largest_cc : bool
        Whether the complete graph in the dataset should be returned, or only
        its largest connected component.
    """
    graph_name = os.path.splitext(os.path.basename(file.name))[0]

    if skip_first_line:
        file.readline()  # ignore first line `node_1,node_2`
    graph = nx.read_adjlist(file, delimiter=",", nodetype=int)

    # Change indices to start from zero if necessary
    if min(graph.nodes()) == 1:
        # print("Reducing indices by one to have them starting at zero.")
        graph = nx.relabel_nodes(graph, lambda x: x - 1, copy=False)

    if largest_cc:
        original_size = len(graph)
        largest_component = max(nx.connected_components(graph), key=len)
        subgraph = nx.subgraph(graph, largest_component)
        graph = nx.Graph(subgraph)

        if len(graph) < original_size:
            print(
                "Only considering largest connected component with %d nodes. Original graph had %d nodes."
                % (len(graph), original_size))
        else:
            print("Graph has only one connected component.")

    return graph_name, graph
Пример #36
0
def has_skipped_vertices_G(trail, reactions, G):
    """Determines whether the given CoMetGene trail skips any genes.

    :param trail: CoMetGene trail
    :param reactions: dict of dicts storing reaction information (obtained by
        parsing a KGML file)
    :param G: undirected graph built on the same vertex set as the metabolic
        pathway (see Model in the methods section of Zaharia et al., 2018)
    :return: True if the CoMetGeNe trail was obtained skipping at least one
        gene, False otherwise
    """
    involved = set()  # genes involved in this trail
    for r_id in trail:
        for gene in reactions[r_id]['enzyme']:
            involved.add(gene)

    G_sub = nx.subgraph(G, trail)
    for v1, v2 in G_sub.edges():
        if 'skipped' in G_sub[v1][v2]:
            for vertex in G_sub[v1][v2]['skipped']:
                if vertex not in involved:
                    return True

    return False
Пример #37
0
def entire_workflow(m, steps, evaluate_function, weight_function):
    grid = nx.grid_graph([m, m])
    for v in grid.nodes():
        grid.node[v]["X"] = v[0]
        grid.node[v]["Y"] = v[1]
        grid.node[v]["pos"] = [v[0], v[1]]
    grid.graph["size"] = 0
    grid.graph["steps"] = steps
    path = random_walk(grid, steps, True)

    boundary_nodes = []

    for x in grid.nodes():
        if grid.degree(x) <= 3:
            boundary_nodes.append(x)

    boundary = nx.subgraph(grid, boundary_nodes)

    restricted_path = restrict_path(boundary_nodes, path)

    weight_function(boundary)

    series = create_time_series(boundary, restricted_path, evaluate_function)
    return [series, restricted_path, boundary]
Пример #38
0
def erdos_renyi_modularity(graph, communities, **kwargs):
    """Erdos-Renyi modularity is a variation of the Newman-Girvan one.
    It assumes that vertices in a network are connected randomly with a constant probability :math:`p`.

    .. math:: Q(S) = \\frac{1}{m}\\sum_{c \\in S} (m_S − \\frac{mn_S(n_S −1)}{n(n−1)})

    where :math:`m` is the number of graph edges, :math:`m_S` is the number of community edges, :math:`l_S` is the number of edges from nodes in S to nodes outside S.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :return: FitnessResult object

    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> mod = evaluation.erdos_renyi_modularity(g,communities)

    :References:

    1. Erdos, P., & Renyi, A. (1959). `On random graphs I. <https://gnunet.org/sites/default/files/Erd%C5%91s%20%26%20R%C3%A9nyi%20-%20On%20Random%20Graphs.pdf/>`_ Publ. Math. Debrecen, 6, 290-297.
    """
    graph = convert_graph_formats(graph, nx.Graph)
    m = graph.number_of_edges()
    n = graph.number_of_nodes()
    q = 0

    for community in communities.communities:
        c = nx.subgraph(graph, community)
        mc = c.number_of_edges()
        nc = c.number_of_nodes()
        q += mc - (m * nc * (nc - 1)) / (n * (n - 1))

    return FitnessResult(score=(1 / m) * q)
Пример #39
0
    def __init__(self,
                 function,
                 n_nodes=None,
                 p_edges=None,
                 max_iter_time=60
                 ):
        if n_nodes is None:
            self.n_nodes = [int(i) for i in [1e2, 3e2, 1e3]]
        else:
            self.n_nodes = n_nodes

        if p_edges is None:
            self.p_edges = [0.0005, 0.001]
        else:
            self.p_edges = p_edges

        self.function = function
        self.simulation_grid = self.expand_grid_local(self.n_nodes, self.p_edges)
        self._simulation_graphs = [nx.subgraph(graph, nbunch=max(nx.connected_components(graph), key=len))
                                   for graph in self.random_graph_set(self.n_nodes, self.p_edges)]
        self.simulation_grid['n_edges'] = [graph.number_of_edges() for graph in self._simulation_graphs]
        self.computing_time = list()
        self.computing_time_df = self.simulation_grid.copy()
        self.max_iter_time = max_iter_time
Пример #40
0
def avg_transitivity(graph: nx.Graph, communities: object,
                     **kwargs: dict) -> object:
    """Average transitivity.

    The average transitivity of a community is defined the as the average clustering coefficient of its nodes w.r.t. their connection within the community itself.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**.
    :return: If **summary==True** a FitnessResult object, otherwise a list of floats.

    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> scd = evaluation.avg_transitivity(g,communities)
    """

    return __quality_indexes(
        graph, communities,
        lambda graph, coms: nx.average_clustering(nx.subgraph(graph, coms)),
        **kwargs)
Пример #41
0
def hub_dominance(graph: nx.Graph, communities: object,
                  **kwargs: dict) -> object:
    """Hub dominance.

    The hub dominance of a community is defined as the ratio of the degree of its most connected node w.r.t. the theoretically maximal degree within the community.

    :param graph: a networkx/igraph object
    :param communities: NodeClustering object
    :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**.
    :return: If **summary==True** a FitnessResult object, otherwise a list of floats.

    Example:

    >>> from cdlib.algorithms import louvain
    >>> from cdlib import evaluation
    >>> g = nx.karate_club_graph()
    >>> communities = louvain(g)
    >>> scd = evaluation.hub_dominance(g,communities)
    """

    return __quality_indexes(
        graph, communities, lambda graph, coms: max(
            [x[1] for x in list(nx.degree(nx.subgraph(graph, coms)))]) /
        (len(coms) - 1), **kwargs)
def canonical_order(graph, face):
    '''
    Outputs the coordinates of the nodes of the face in a canonical order
    in particular, the first one is the lex-min.

    You need to use the graph structure to make this work
    '''

    lex_sorted_nodes = sorted(face)
    first_node = lex_sorted_nodes[0]
    cycle_sorted_nodes = [first_node]
    local_cycle = nx.subgraph(graph, face)

    #Compute the second node locally based on angle orientation

    v = first_node
    locations = []
    neighbor_list = list(local_cycle.neighbors(v))
    for w in neighbor_list:
        locations.append(graph.nodes[w]["pos"] - graph.nodes[v]["pos"])
    angles = [float(np.arctan2(x[1], x[0])) for x in locations]
    neighbor_list.sort(key=dict(zip(neighbor_list, angles)).get)

    second_node = neighbor_list[0]
    cycle_sorted_nodes.append(second_node)
    ##Now compute a canonical ordering of local_cycle, clockwise, starting
    ##from first_node

    while len(cycle_sorted_nodes) < len(lex_sorted_nodes):

        v = cycle_sorted_nodes[-1]
        neighbor_list = list(local_cycle.neighbors(v))
        neighbor_list.remove(cycle_sorted_nodes[-2])
        cycle_sorted_nodes.append(neighbor_list[0])

    return cycle_sorted_nodes
Пример #43
0
    def _graphize_streets(self, highways, node_suffix=''):

        self.logger.debug('Creating street skeleton graph...')
        street_graph = nx.Graph()

        for h in highways:
            raw_ns = h.get_nodes()

            for n1, n2 in _walk2(raw_ns):

                # trimming streets out of box
                if not ((self.box[0] < n1.lat < self.box[2]) and (self.box[1] < n1.lon < self.box[3])):
                    if not ((self.box[0] < n2.lat < self.box[2]) and (self.box[1] < n2.lon < self.box[3])):
                        continue

                street_graph.add_node(str(n2.id) + node_suffix,
                                      pos=asarray([n1.lon, n1.lat]).astype(float),
                                      type='street')
                street_graph.add_node(str(n1.id) + node_suffix,
                                      pos=asarray([n2.lon, n2.lat]).astype(float),
                                      type='street')
                street_graph.add_edge(str(n2.id) + node_suffix, str(n1.id) + node_suffix, phases=3, type='street')

        # it may be possible that the streets are not connected. In that case, we only take the biggest component.
        # one could also choose to add nonexistent street edges or to find the minimum street path to connect
        # everything with openstreetmaps (dangerous).
        if not nx.is_connected(street_graph):
            self.logger.warning('The original graph is not connected. Only the biggest component will be kept.')
            subgraphs = [nx.subgraph(street_graph, n) for n in nx.connected_components(street_graph)]
            sizes = {len(x.nodes): x for x in subgraphs}
            biggest_sg = sizes[max(sizes.keys())]

            killnodes = [n for n in street_graph.nodes if n not in biggest_sg.nodes]
            street_graph.remove_nodes_from(killnodes)

        return street_graph
Пример #44
0
def show_subgraphs(ldag, context="edge", load_cache=True, graph=None):
    if graph is None:
        graph = ldag.graph

    l_buf = []
    separator = "\n\n"
    iterobj = sorted(nx.connected_components(graph.to_undirected()),
                     key=len,
                     reverse=True)
    for sgid, nodes in enumerate(iterobj):
        if len(nodes) == 1:
            continue
        l_graph_buf = ["Subgraph {0} ({1} nodes)".format(sgid, len(nodes))]
        subg = nx.subgraph(graph, nodes)

        for edge in remove_edge_duplication(subg.edges(), ldag, graph=graph):
            msg = edge_view(edge,
                            ldag,
                            context=context,
                            load_cache=load_cache,
                            graph=graph)
            l_graph_buf.append(msg)
        l_buf.append("\n".join(l_graph_buf))
    return separator.join(l_buf)
Пример #45
0
def caculateSST(G, fracs):
    KSD_DD = [0] * len(fracs)
    KSD_CCD = [0] * len(fracs)
    ND_DD = [0] * len(fracs)
    ND_CCD = [0] * len(fracs)
    # SDD_DD = [0] * len(fracs)
    # SDD_CCD = [0] * len(fracs)
    reNodes = SST(G)
    for j in range(len(fracs)):
        tempNodes = []
        for node in reNodes:
            if len(tempNodes) >= fracs[j] * G.number_of_nodes():
                break
            tempNodes.append(node)
        sampleG = nx.subgraph(G, tempNodes)
        dd2 = DD(sampleG)
        ccd2 = CCD(sampleG)
        KSD_DD[j] = KSD(dd1, dd2)
        KSD_CCD[j] = KSD(ccd1, ccd2)
        ND_DD[j] = ND(dd1, dd2)
        ND_CCD[j] = ND(ccd1, ccd2)
        # SDD_DD[j] = SDD(dd1, dd2)
        # SDD_CCD[j] = SDD(ccd1, ccd2)
    return KSD_DD, KSD_CCD, ND_DD, ND_CCD
Пример #46
0
def init():
	global max_depth
	global T
	global grid_dim_x
	global grid_dim_y
	global base
	global route_length
	global total_distance
	global total_reward

	total_distance = 0
	total_reward = 0
	max_depth = 2
	T = 3
	grid_dim_x = 26
	grid_dim_y = 26
	base = 0.0
	route_length = 9000

	global graph
	global dist
	global sigma1
	global avg_strat1
	global regret1
	global route

	df = pd.read_csv("data/paws_mdp_out.txt", sep=" ")
	dist = pd.read_csv("data/dist.gop", sep=" ", header=None)
	graph = \
		nx.from_pandas_dataframe(df, source='node_from', target='node_to',
		                         edge_attr=['distance', 'animal_density', 'grid_cell_x', 'grid_cell_y'])
	graph = nx.subgraph(graph, nx.node_connected_component(graph, base))
	sigma1 = [[1/(grid_dim_x*grid_dim_y)] * grid_dim_y] * grid_dim_x
	avg_strat1 = [[0] * grid_dim_y] * grid_dim_x
	regret1 = [[0] * grid_dim_y] * grid_dim_x
	route = []
Пример #47
0
def get_stat_datasets():
    # path = '/network/rit/lab/ceashpc/share_data/GraphOpt/datasets/DBLP/DBLP_Citation_2014_May/DM'
    path = '/network/rit/lab/ceashpc/share_data/GraphOpt/datasets/DBLP/DBLP_Citation_2014_May/DB'
    # fn = 'dm_top30000_dataset.pkl'
    fn = 'db_top30000_dataset.pkl'
    with open(os.path.join(path, fn), 'rb') as rfile:
        dataset = pickle.load(rfile)

    first_graph = dataset['first_graph']
    second_graph = dataset['second_graph']
    print(nx.density(first_graph))
    print(nx.density(second_graph))

    print(first_graph.number_of_nodes())
    print(first_graph.number_of_edges())
    print(second_graph.number_of_nodes())
    print(second_graph.number_of_edges())
    print(nx.is_connected(first_graph))
    print(nx.is_connected(second_graph))

    lcc = max(nx.connected_component_subgraphs(second_graph), key=len)
    subgraph = nx.subgraph(second_graph, lcc)
    print(subgraph.number_of_nodes())
    print(subgraph.number_of_edges())
Пример #48
0
    def set_splice_graph_old(self, sg, component, target):
        self.graph = sg.get_graph()
        self.tx_paths = sg.annotation
        self.original_tx_paths = sg.annotation  # tx paths all ways without trimming
        known_edges = set([(tx[i], tx[i + 1]) for tx in self.tx_paths
                           for i in range(len(tx) - 1)])
        self.component = component
        self.target = target
        self.sub_graph = nx.subgraph(self.graph, self.component)

        # add any possible tx that uses novel edges to list of known txs
        for tx in nx.all_simple_paths(self.sub_graph,
                                      source=self.component[0],
                                      target=self.component[-1]):
            novel = False
            for i in range(len(tx) - 1):
                if (tx[i], tx[i + 1]) not in known_edges:
                    novel = True
            if novel:
                self.tx_paths.append(tx)

        self.inc_lengths, self.skip_lengths = [], [
        ]  # call set all_path_lengths method
        self.all_path_coordinates = []  # call set_all_path_coordinates method
Пример #49
0
for its in range(1, 100):
    et = [0, 0]
    while et[0] == et[1]:
        ed = random.choice(el)
        et = [
            int(df.loc[df["ID"] == ed[0], "newtree" + str(its - 1)]),
            int(df.loc[df["ID"] == ed[1], "newtree" + str(its - 1)])
        ]

    sgn = []
    for n in graph.nodes():
        if int(df.loc[df["ID"] == n, "newtree" + str(its - 1)]) in et:
            sgn.append(n)

    sgraph = nx.subgraph(graph, sgn)

    edd = {0: et[0], 3: et[1]}

    newtree_partial = recursive_tree_part(sgraph, 2, "POP10", .05, 2)
    newtree = {}
    for n in graph.nodes():
        if n not in sgn:
            newtree[n] = int(df.loc[df["ID"] == n, "newtree" + str(its - 1)])
        else:
            newtree[n] = edd[newtree_partial[n]]

    df["newtree" + str(its)] = df["ID"].map(newtree)
    #df["newtree"+str(its)]=pd.to_numeric(df["newtree"+str(its)])

    df.plot(column="newtree" + str(its), cmap="tab20")
Пример #50
0
    def setup(self, **keywords):
        """
		2012.10.15
			run before anything is run
		"""
        AbstractMatrixFileWalker.setup(self, **keywords)
        #self.writer = BeagleGenotypeFile(path=self.outputFname, mode='w')

        #read in the IBD check result
        self.ibdData = SNP.readAdjacencyListDataIntoMatrix(inputFname=self.pedigreeKinshipFilePath, \
            rowIDHeader=None, colIDHeader=None, \
            rowIDIndex=0, colIDIndex=1, \
            dataHeader=None, dataIndex=2, hasHeader=False)

        #. read in the alignment coverage data
        alignmentCoverageFile = MatrixFile(
            path=self.individualAlignmentCoverageFname)
        alignmentCoverageFile.constructColName2IndexFromHeader()
        alignmentReadGroup2coverageLs = alignmentCoverageFile.constructDictionary(
            keyColumnIndexList=[0], valueColumnIndexList=[1])
        alignmentCoverageFile.close()

        sys.stderr.write(
            "Reading in all samples from %s VCF input files ... \n" %
            (len(self.inputFnameLs)))
        # read all the Beagle files
        individualID2HaplotypeData = {}
        for inputFname in self.inputFnameLs:
            vcfFile = VCFFile(inputFname=inputFname)
            #vcfFile.readInAllHaplotypes()
            for individualID in vcfFile.getSampleIDList():
                individualID2HaplotypeData[individualID] = None
                #haplotypeList = vcfFile.getHaplotypeListOfOneSample(individualID)
                #individualID2HaplotypeData[individualID] = PassingData(haplotypeList=haplotypeList,
                #													locusIDList=vcfFile.locusIDList)
            # get all haplotypes , etc.
            # get all sample IDs
        sys.stderr.write("%s individuals total.\n" %
                         (len(individualID2HaplotypeData)))

        #. read in the pedigree or deduce it from Beagle Trio/Duo genotype file (columns)
        #. construct individualID2pedigreeContext, context: familySize=1/2/3, familyPosition=1/2 (parent/child)
        sys.stderr.write("Constructing individualID2pedigreeContext ...")
        plinkPedigreeFile = PlinkPedigreeFile(path=self.pedigreeFname)
        pGraph = plinkPedigreeFile.pedigreeGraph
        #shrink the graph to only individuals with data
        pGraph = nx.subgraph(pGraph, individualID2HaplotypeData.keys())

        cc_subgraph_list = nx.connected_component_subgraphs(
            pGraph.to_undirected())
        individualID2familyContext = {}
        outDegreeContainer = NumberContainer(minValue=0)
        familySizeContainer = NumberContainer(minValue=0)
        individualCoverageContainer = NumberContainer(minValue=0)
        familyCoverageContainer = NumberContainer(minValue=0)
        for cc_subgraph in cc_subgraph_list:
            familySize = len(cc_subgraph)
            familySizeContainer.addOneValue(familySize)

            familyCoverage = 0
            for n in cc_subgraph:  #assuming each family is a two-generation trio/nuclear family
                individualCoverage = self.getIndividualCoverage(
                    individualID=n,
                    alignmentReadGroup2coverageLs=alignmentReadGroup2coverageLs
                )
                individualCoverage = float(individualCoverage)
                individualCoverageContainer.addOneValue(individualCoverage)
                familyCoverage += individualCoverage
                in_degree = pGraph.in_degree(n)
                out_degree = pGraph.out_degree(n)
                outDegreeContainer.addOneValue(out_degree)
                familyContext = PassingData(familySize=familySize, in_degree=in_degree, out_degree=out_degree, \
                      individualCoverage=individualCoverage,\
                      familyCoverage=None)
                if n not in individualID2familyContext:
                    individualID2familyContext[n] = familyContext
                else:
                    sys.stderr.write(
                        "Node %s already in individualID2familyContext.\n" %
                        (n))
            familyCoverageContainer.addOneValue(familyCoverage)
            #set the family coverage for each member, used in weighing the individual. better covered family => better haplotype
            for n in cc_subgraph:
                individualID2familyContext[n].familyCoverage = familyCoverage
        plinkPedigreeFile.close()
        sys.stderr.write("%s individuals.\n" %
                         (len(individualID2familyContext)))

        # weigh each unique individual based on its sequencing coverage + no of offspring => probability mass for each individual
        sys.stderr.write(
            "Weighing each individual , assigning probability mass  ...")
        individualID2probabilityMass = {}
        for individualID, familyContext in individualID2familyContext.items():
            outDegreeQuotient = outDegreeContainer.normalizeValue(
                familyContext.familySize)
            individualCoverageQuotient = individualCoverageContainer.normalizeValue(
                familyContext.individualCoverage)
            #familyCoverageQuotient = familyCoverageContainer.normalizeValue(familyContext.familyCoverage)
            importanceScore = outDegreeQuotient + individualCoverageQuotient
            representativeImportanceScore = importanceScore
            individualID2probabilityMass[
                individualID] = representativeImportanceScore
        sys.stderr.write(" %s IDs with probability mass assigned.\n" %
                         (len(individualID2probabilityMass)))

        self.individualID2probabilityMass = individualID2probabilityMass
        self.individualID2HaplotypeData = individualID2HaplotypeData
Пример #51
0
    cache[fingerprint] = (mcost, mpath)
    return (mcost, mpath)


main(None)
#wrapper(main)
import pygraphviz
from networkx.drawing.nx_agraph import write_dot
#print(G.edges(data=True))
#print(G.nodes)

#labels = nx.get_edge_attributes(G,'weight')
#nx.draw_networkx_edge_labels(G,pos=nx.spring_layout(G),edge_labels=labels)

cnt = 0
cache = dict()
write_dot(G, "maze.dot")

nx.draw_spring(G, with_labels=True)
plt.savefig("maze_nwx.png")

# cut the graph into the four distinct different graphs

graphs = list()
for i in range(0, 4):
    graphs.append(nx.subgraph(G, nx.node_connected_component(G, "@" + str(i))))

cache = dict()
print(findtheway(graphs, ["@0", "@1", "@2", "@3"]))
Пример #52
0
def proj_mp(graph, weight, A, sparsity, lmbd, max_iter=10, epsilon=1e-3):
    current_x = proj_init_point(graph.number_of_nodes()) + 1e-6

    edges = np.array(graph.edges)
    edge_weights = np.ones(graph.number_of_edges())
    start_time = time.time()
    for i in range(max_iter):
        print('iter {}'.format(i))
        iter_time = time.time()
        gradient = proj_get_gradient(current_x, weight, A, lmbd)
        normalized_gradient = normalize_gradient(current_x, gradient)

        re_head = head_proj(edges=edges,
                            weights=edge_weights,
                            x=normalized_gradient,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            err_tol=1e-8,
                            max_iter=100,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)
        re_nodes, _, _ = re_head
        gamma_x = set(re_nodes)
        print('gamma_x', len(gamma_x))
        # print(sorted(list(gamma_x)))
        if i == 0:
            supp_x = set()
        else:
            supp_x = set(
                [ind for ind, _ in enumerate(current_x) if not 0. == _])
        omega_x = gamma_x | supp_x

        print('omega_x', len(omega_x))
        # print(sorted(list(omega_x)))

        # print(gradient[sorted(list(gamma_x))])
        # print(gradient[sorted(list(supp_x))])

        bx = proj_argmax(current_x,
                         omega_x,
                         weight,
                         A,
                         lmbd,
                         max_iter=2000,
                         learning_rate=0.01)

        re_tail = tail_proj(edges=edges,
                            weights=edge_weights,
                            x=bx,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            nu=2.5,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            verbose=0)
        re_nodes, _, _ = re_tail

        psi_x = set(re_nodes)
        prev_x = current_x
        current_x = np.zeros_like(current_x)
        current_x[list(psi_x)] = bx[list(psi_x)]

        print('psi_x', len(np.nonzero(current_x)[0]))

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A,
                                                      lmbd)

        print(
            'iter {}, func val: {:.5f}, iter_time: {:.5f}, diff_norm: {:.5f}'.
            format(i, func_val,
                   time.time() - iter_time, diff_norm_x))

        subgraph = set(np.nonzero(current_x)[0])
        print('subgraph density', nx.density(nx.subgraph(graph, subgraph)))

        if diff_norm_x <= epsilon:
            break

    run_time = time.time() - start_time
    func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A, lmbd)

    print('final function value: {:.5f}'.format(func_val))
    print('run time of whole algorithm: {:.5f}'.format(run_time))

    subgraph = set(np.nonzero(current_x)[0])

    return subgraph
Пример #53
0
cliques = nx.find_cliques(G)

# Count and print the number of maximal cliques in G
print(len(list(cliques)))

############################## Task 2 (Finding cliques (II))
# Import necessary modules
import networkx as nx
from nxviz import CircosPlot
import matplotlib.pyplot as plt

# Find the author(s) that are part of the largest maximal clique: largest_clique
largest_clique = sorted(nx.find_cliques(G), key=lambda x: len(x))[-1]

# Create the subgraph of the largest_clique: G_lc
G_lc = nx.subgraph(G, largest_clique)

# Create the CircosPlot object: c
c = CircosPlot(G_lc)

# Draw the CircosPlot to the screen
c.draw()
plt.show()

############################## Final Task ###########################
# Find important users : nx.degree_centrality()
# Find largest communities of collaborators : max_cliques()
# recommmendation system : open triangle

############################## Task 1 (Finding important collaborators)
# Compute the degree centralities of G: deg_cent
Пример #54
0
def inducer(graph, node):
    nebs = nx.neighbors(graph, node)
    sub_nodes = nebs + [node]
    sub_g = nx.subgraph(graph, sub_nodes)
    out_counts = np.sum(map(lambda x: len(nx.neighbors(graph,x)), sub_nodes))
    return sub_g, out_counts, nebs
Пример #55
0
def optimize(instance,
             sparsity,
             threshold,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    first_graph = instance['first_graph']
    second_graph = instance['second_graph']
    true_subgraph = instance['true_subgraph']
    features = instance['weight']
    A = adj_matrix(
        second_graph
    )  # get adjacent matrix of second graph, used for density projection

    first_graph_edges = np.array(first_graph.edges)
    first_graph_edge_weights = np.ones(
        first_graph.number_of_edges())  # edge weight, default 1

    print('number of nodes in first graph', first_graph.number_of_nodes())
    print('number of nodes in second graph', second_graph.number_of_nodes())

    if first_graph.number_of_nodes() != second_graph.number_of_nodes():
        raise ('error, wrong dual network input !!!')

    num_nodes = first_graph.number_of_nodes()
    num_edges_first_graph = first_graph.number_of_edges()
    num_edges_second_graph = second_graph.number_of_edges()

    if logger:
        # print some basic information
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges in first graph: {:d}'.format(
            num_edges_first_graph))
        logger.debug('number of edges in second graph: {:d}'.format(
            num_edges_second_graph))
        logger.debug('density of first graph: {:.5f}'.format(
            nx.density(first_graph)))
        logger.debug('density of second graph: {:.5f}'.format(
            nx.density(second_graph)))
        logger.debug('density of true subgraph in second graph: {:.5f}'.format(
            nx.density(nx.subgraph(second_graph, true_subgraph))))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.

    func = DualEMS(features, trade_off)
    if logger:
        print(sorted(true_subgraph))

        true_x = np.zeros(num_nodes)
        # print(type(true_subgraph))
        true_x[list(true_subgraph)] = 1.
        true_x = np.array(true_x)
        true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val(
            true_x, true_x)
        print('ground truth values: {}, {}, {}, {}'.format(
            true_obj_val, x_ems_val, y_ems_val, penalty))

    current_x, current_y = func.get_init_x_zeros(
    )  # are there some other better initialization methods?
    current_x += 1e-6  # start from zero, plus 1e-6 avoid divide by zero error
    current_y += 1e-6

    print('iteration start funval', func.get_obj_val(current_x, current_y))

    for iter in range(max_iter):  # external iteration
        if logger:
            logger.debug('iteration: {:d}'.format(iter))

        prev_x, prev_y = np.copy(current_x), np.copy(
            current_y)  # store previous vectors for early termination

        # handle first graph
        grad_x = func.get_gradient(current_x, current_y)
        iter_proj_time = 0.
        if iter == 0:  # from all zero vector
            norm_grad_x = normalize_gradient(np.zeros_like(current_x), grad_x)
        else:
            norm_grad_x = normalize_gradient(current_x, grad_x)

        start_proj_time = time.time()
        # head projection for the connected constraint, so projection should be on first graph
        re_head = head_proj(edges=first_graph_edges,
                            weights=first_graph_edge_weights,
                            x=norm_grad_x,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)
        re_nodes, _, _ = re_head
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for x: {:.5f}'.format(time.time() -
                                                          start_proj_time))
        gamma_x = set(re_nodes)
        indicator_x = np.zeros(num_nodes)
        indicator_x[list(gamma_x)] = 1.
        # there is no differene between using grad_x and norm_grad_x, because indicator_x is got from norm_grad_x
        if iter == 0:
            tmp_x = np.zeros_like(
                current_x
            ) + learning_rate * norm_grad_x * indicator_x  # start from all zeros
        else:
            tmp_x = current_x + learning_rate * norm_grad_x * indicator_x

        omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

        # head projection for y
        grad_y = func.get_gradient(current_y,
                                   current_x)  # note, reverse order x & y
        # note, test not normalize
        if iter == 0:
            norm_grad_y = normalize_gradient(
                np.zeros_like(current_y),
                grad_y)  # # note, is it necessary for density projection?
        else:
            norm_grad_y = normalize_gradient(current_y, grad_y)
            # norm_grad_y = grad_y # note !!!

        # note, should be positive for gradient, input for density projection should be positive
        # note, why baojian's code does not consider positive value, head projection
        abs_norm_grad_y = np.absolute(
            norm_grad_y
        )  # take absolute value of gradient, since larger absolute value represent larger affection to objective function

        np.set_printoptions(linewidth=3000)

        # print(norm_grad_y)

        # lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1, 0.12, 0.15, 0.17, 0.2, 0.2, 0.2, 0.2, 0.21, 0.22, 0.23, 0.18, 0.18, 0.18, 0.17] # normalize
        lmbd_list = [0.23]  # normalize
        # lmbd_list = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006] # normalize
        # sparsity_list = [250, 250, 260, 270, 270, 275, 275, 280, 280, 280, 265, 270, 275, 275, 280, 285, 260, 255, 250, 245] # normalize
        sparsity_list = [275]  # normalize
        # sparsity_list = [50, 50, 50, 50, 55] # normalize
        lmbd_sparsity_list = zip(lmbd_list, sparsity_list)
        # sparsity_list = [50]
        print('start head projection for y')
        start_proj_time = time.time()
        # gamma_y = density_projection(second_graph, norm_grad_y, A, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False)
        gamma_y = density_projection(
            second_graph,
            abs_norm_grad_y,
            A,
            threshold,
            lmbd_sparsity_list,
            normalize=True,
            true_subgraph=true_subgraph
        )  # test not normalize, need new lambda sparsity list
        # gamma_y = density_projection(second_graph, abs_norm_grad_y, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # test not normalize, need new lambda sparsity list
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for y: {:.5f}'.format(time.time() -
                                                          start_proj_time))

    #
    #
    #     indicator_y = np.zeros(num_nodes)
    #     indicator_y[list(gamma_y)] = 1.
    #     if iter == 0:
    #         # tmp_y = np.zeros_like(current_y) + learning_rate * grad_y * indicator_y
    #         tmp_y = np.zeros_like(current_y) + learning_rate * norm_grad_y * indicator_y # todo, pls note that update gradient should be normalized gradient
    #     else:
    #         # tmp_y = current_y + learning_rate * grad_y * indicator_y
    #         tmp_y = current_y + learning_rate * norm_grad_y * indicator_y
    #
    #     omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _])
    #
    #     print('omega_x', len(omega_x))
    #     print(sorted(list(omega_x)))
    #
    #     print('omega_y', len(omega_y))
    #     print(sorted(list(omega_y)))
    #
    #     print('intersect', len(omega_y & omega_x))
    #     print(sorted(list(omega_y & omega_x)))
    #
    #     # break
    #
    #     print('solve argmax')
    #     start_max_time = time.time()
    #     bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y)
    #     print('solve argmax time {:.5f}'.format(time.time() - start_max_time))
    #
    #     # break
    #
    #     start_proj_time = time.time()
    #     # tail projection on first graph
    #     re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0)  # tail projection
    #     re_nodes, _, _ = re_tail
    #     iter_proj_time += time.time() - start_proj_time
    #     print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time))
    #     psi_x = set(re_nodes)
    #
    #     current_x = np.zeros_like(current_x)
    #     current_x[list(psi_x)] = bx[list(psi_x)]
    #     current_x = normalize(current_x)
    #
    #     lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1]
    #     # lmbd_list = [0.006, 0.08]
    #     sparsity_list = [250, 250, 270, 270, 275, 275]
    #     lmbd_sparsity_list = zip(lmbd_list, sparsity_list)
    #
    #     start_proj_time = time.time()
    #     # psi_y = density_projection(second_graph, by, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False)
    #     psi_y = density_projection(second_graph, by, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # not normalize, not absolute value, since by is in [0, 1]
    #     iter_proj_time += (time.time() - start_proj_time)
    #
    #     print('tail projetion time for y: {:.5f}'.format(time.time() - start_proj_time))
    #
    #     current_y = np.zeros_like(current_y)
    #     print('1', len(np.nonzero(by)[0]))
    #     print('by nonzero', sorted(list(np.nonzero(by)[0])))
    #     print('1v', len(np.nonzero(bx)[0]))
    #     print('2', len(psi_y))
    #     print('psi_y', sorted(list(psi_y)))
    #     print('2v', len(psi_x))
    #     current_y[list(psi_y)] = by[list(psi_y)]
    #     print('3', len(np.nonzero(current_y)[0]))
    #     print('3v', len(np.nonzero(current_x)[0]))
    #     current_y = normalize(current_y)
    #     print('4', len(np.nonzero(current_y)[0]))
    #     print('4v', len(np.nonzero(current_x)[0]))
    #
    #     print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y))
    #
    #     acc_proj_time += iter_proj_time
    #
    #     if logger:
    #         print('iter proj time: {:.5f}'.format(iter_proj_time))
    #
    #     diff_norm = np.sqrt(np.linalg.norm(current_x - prev_x) ** 2 + np.linalg.norm(current_y - prev_y) ** 2)
    #     if logger:
    #         logger.debug('difference norm: {}'.format(diff_norm))
    #
    #     if diff_norm < epsilon:
    #         break
    #
    run_time = time.time() - start_time
    if logger:
        pass

    return current_x, current_y, run_time
Пример #56
0
def connected_components(graph, nbunch):
    g = nx.subgraph(graph, nbunch)
    ccomponents = nx.connected_components(g)
    return ccomponents
Пример #57
0
    def draw_graph(self, ax=None, node_size=8):
        """ Draws the state graph of the maze in a fancy way. """

        if ax is None:
            fig, ax = plt.subplots(1)

        graph = self.compute_graph()
        d = nx.shortest_path(graph, graph.start)
        p_goal = d[graph.goal]
        d_goal = len(p_goal)
        subgraph = nx.subgraph(graph, d.keys())
        n_levels = max(map(len, d.values()))

        nodespos = dict()
        max_nodes_in_level = 0
        nodes = [graph.start]
        nodespos[graph.start] = [-0.5, 0]

        for i in range(1, n_levels + 2):

            nodes = [[
                n for n in nx.DiGraph.successors(graph, m)
                if len(d[n]) == i + 1
            ] for m in nodes]
            nodes = sum(nodes, [])
            seen = set()
            seen_add = seen.add
            nodes = [n for n in nodes if n not in seen and not seen_add(n)]

            #nodes = list(set(sum(nodes, [])))

            # place the nodes of the solution on the left
            l = [n for n in nodes if n in p_goal]
            if l != []:
                nodes.remove(l[0])
                nodes = [l[0]] + nodes

            if len(nodes) > max_nodes_in_level:
                max_nodes_in_level = len(nodes)

            for j, n in enumerate(nodes):
                nodespos[n] = [j - 1.0 * len(nodes) / 2, -i]

        if ax is None:
            fig, ax = subplots(1)

        main_edges = [(n1, n2) for (n1, n2) in subgraph.edges()
                      if (len(d[n2]) - len(d[n1]) == 1)]

        other_edges = [e for e in subgraph.edges() if e not in main_edges]

        nx.draw_networkx_edges(nx.Graph(subgraph),
                               edgelist=other_edges,
                               ax=ax,
                               pos=nodespos,
                               width=0.5,
                               alpha=.4)

        nx.draw_networkx_edges(nx.Graph(subgraph),
                               edgelist=main_edges,
                               ax=ax,
                               pos=nodespos,
                               width=2)

        nx.draw_networkx_nodes(subgraph,
                               pos=nodespos,
                               ax=ax,
                               node_size=node_size)

        for n, col in ([graph.start, 'g'], [graph.goal, 'b']):
            nx.draw_networkx_nodes(graph,
                                   nodespos, [n],
                                   ax=ax,
                                   node_size=1.5 * node_size,
                                   node_color=col)
        ax.set_ylim(-n_levels, 1)
        ax.set_xlim(-1.0 * max_nodes_in_level / 2 - 1,
                    1.0 * max_nodes_in_level / 2)

        ax.set_axis_off()
Пример #58
0
    def compute_score(self):
        """
        
        Computes a score for the maze.

        This is an example of scoring function which implements a
        few of my favorite criteria for a good maze:

        - The solution is unique and long.
        - There are plenty of loops and bagends.
        - Many openings, many false endings which make the maze
          difficult to solve backwards.

        This is all very subjective and incomplete. To grow mazes
        according to other criteria, overwrite this function in a
        subclass of Vmaze. 
        
        """

        score = 1.0

        graph = self.compute_graph()

        if len(graph.nodes()) == 0:
            return 0

        shortest = nx.shortest_path(graph, graph.start)

        # Check that there is only one solution.

        if graph.goal not in shortest.keys():
            return 0

        # Check that there is one solution exactly

        gen = nx.all_simple_paths(graph, graph.start, graph.goal)
        next(gen)
        try:
            next(gen)
        except StopIteration:
            pass
        else:
            return 1.0

        distances = {n: len(p) for n, p in shortest.items()}
        path_goal = shortest[graph.goal]
        d_goal = distances[graph.goal]
        subgraph = nx.subgraph(graph, shortest.keys())

        # Reward the length of the minimal solution

        score *= d_goal**2

        # Reward the openings, and the states that are nearer
        # than the solution in general

        openings_1 = len([d for n, d in distances.items() if d <= 2])

        openings_2 = len([d for n, d in distances.items() if d <= d_goal])

        score *= (openings_1 * openings_2**2)

        # Reward the "loops"

        n_loops = len([
            e for e in subgraph.edges()
            if abs(distances[e[0]] - distances[e[1]]) > 1
        ])

        score *= (n_loops**0.5)

        # Reward the false endings

        paths_to_end = nx.shortest_path(graph, target=graph.goal)
        n_endings = len(
            [path for path in paths_to_end.values() if len(path) < 3])

        score *= n_endings

        return score
Пример #59
0
def simulated_annealing(graph,
                        num_buses,
                        size_bus,
                        constraints,
                        test_mode=False):
    all_names = graph.nodes()
    x = all_names[0]
    print(graph[x])
    print(graph[x]['weight'])
    CD = constraint_dictionary(all_names, constraints)

    #assignment = random_assignment(all_names, num_buses, size_bus)
    assignment = greedy(graph, num_buses, size_bus, constraints)
    groups = assignment_to_groups(assignment)

    assert is_valid(graph, num_buses, size_bus, assignment)
    T0 = 1.0
    iterations = 5000

    student_status = calculate_student_status(graph, constraints, assignment)
    starting_score = evaluate_assignment(graph, constraints, assignment)
    current_score = starting_score
    for i in range(iterations):
        T = T0 * (1 - i / iterations)
        swap = propose_swap(assignment, num_buses, size_bus, groups=groups)
        if swap is None:
            continue
        val, student_status_swap = swap_value(graph, constraints, CD,
                                              student_status, assignment, swap)

        if val >= 0 or random.random() < np.exp(val / T):
            group_A, A, group_B, B = swap
            if A is not None:
                assignment[A] = group_B
                groups[group_B].append(A)
                groups[group_A].remove(A)
            assignment[B] = group_A
            groups[group_A].append(B)
            groups[group_B].remove(B)

            student_status = student_status_swap
            if (test_mode):
                new_score = evaluate_assignment(graph, constraints, assignment)
                student_status_check = calculate_student_status(
                    graph, constraints, assignment)
                for s in student_status_check:
                    assert student_status[s] == student_status_check[s]
                if new_score != current_score + val:
                    print(new_score, current_score, val)
                    print(swap)
                    print(student_status)
                    print(assignment_to_groups(assignment))
                    print(constraints)

                    nodes = []
                    for x in assignment:
                        if assignment[x] == group_A or assignment[x] == group_B:
                            nodes.append(x)
                    import matplotlib.pyplot as plt
                    subgraph = nx.subgraph(graph, nodes)
                    nx.draw(subgraph, with_labels=True, font_weight='bold')
                    plt.show()
                    assert False
                current_score = new_score

    assert is_valid(graph, num_buses, size_bus, assignment)
    #print('Score:', evaluate_assignment(graph, constraints, assignment))

    return assignment
Пример #60
-1
    def _build_list(self):
        print "using disconnectivity analysis to find minima to untrap"
        self.minpairs = deque()
        
        graph = TSGraph(self.database).graph
        cclist = list(nx.connected_components(graph))
        
        # get the largest cluster
        group1 = cclist[0]
        min1 = sorted(group1, key=lambda m: m.energy)[0]
        if not min1 == self.database.minima()[0]:
            # make sure that the global minimum is in group1
            print "warning, the global minimum is not the in the largest cluster."

        # compute the energy barriers for all minima in the cluster        
        subgraph = nx.subgraph(graph, group1)
        energy_barriers = self._compute_barriers(subgraph, min1)
        
        # sort the minima by the barrier height divided by the energy difference
        weights = [(m, np.abs(barrier) / np.abs(m.energy - min1.energy)) 
                   for (m, barrier) in energy_barriers.iteritems()]
        weights.sort(key=lambda v: 1. / v[1])

        self.minpairs = deque()    
        for min2, w in weights:
            if len(self.minpairs) > self.list_len:
                break
            
            if not self.is_good_pair(min1, min2):
                continue
            
            self.minpairs.append((min1, min2))
            if True:
                # print some stuff
                print "    untrap analysis: minimum", min2._id, "with energy", min2.energy, "barrier", energy_barriers[min2], "untrap weight", w