Пример #1
0
def test_brandes_erlebach_book():
    # Figure 1 chapter 7: Connectivity
    # http://www.informatik.uni-augsburg.de/thi/personen/kammer/Graph_Connectivity.pdf
    G = nx.Graph()
    G.add_edges_from([(1, 2), (1, 3), (1, 4), (1, 5), (2, 3), (2, 6), (3, 4),
                      (3, 6), (4, 6), (4, 7), (5, 7), (6, 8), (6, 9), (7, 8),
                      (7, 10), (8, 11), (9, 10), (9, 11), (10, 11)])
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cutsets
        assert_equal(3, len(nx.minimum_edge_cut(G, 1, 11, **kwargs)),
                     msg=msg.format(flow_func.__name__))
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        # Node 5 has only two edges
        assert_equal(2, len(edge_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
        # node cuts
        assert_equal(set([6, 7]), minimum_st_node_cut(G, 1, 11, **kwargs),
                     msg=msg.format(flow_func.__name__))
        assert_equal(set([6, 7]), nx.minimum_node_cut(G, 1, 11, **kwargs),
                     msg=msg.format(flow_func.__name__))
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(2, len(node_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
def test_brandes_erlebach_book():
    # Figure 1 chapter 7: Connectivity
    # http://www.informatik.uni-augsburg.de/thi/personen/kammer/Graph_Connectivity.pdf
    G = nx.Graph()
    G.add_edges_from([(1, 2), (1, 3), (1, 4), (1, 5), (2, 3), (2, 6), (3, 4),
                      (3, 6), (4, 6), (4, 7), (5, 7), (6, 8), (6, 9), (7, 8),
                      (7, 10), (8, 11), (9, 10), (9, 11), (10, 11)])
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        errmsg = f"Assertion failed in function: {flow_func.__name__}"
        # edge cutsets
        assert 3 == len(nx.minimum_edge_cut(G, 1, 11, **kwargs)), errmsg
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        # Node 5 has only two edges
        assert 2 == len(edge_cut), errmsg
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert not nx.is_connected(H), errmsg
        # node cuts
        assert {6, 7} == minimum_st_node_cut(G, 1, 11, **kwargs), errmsg
        assert {6, 7} == nx.minimum_node_cut(G, 1, 11, **kwargs), errmsg
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert 2 == len(node_cut), errmsg
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert not nx.is_connected(H), errmsg
Пример #3
0
def test_brandes_erlebach_book():
    # Figure 1 chapter 7: Connectivity
    # http://www.informatik.uni-augsburg.de/thi/personen/kammer/Graph_Connectivity.pdf
    G = nx.Graph()
    G.add_edges_from([(1, 2), (1, 3), (1, 4), (1, 5), (2, 3), (2, 6), (3, 4),
                      (3, 6), (4, 6), (4, 7), (5, 7), (6, 8), (6, 9), (7, 8),
                      (7, 10), (8, 11), (9, 10), (9, 11), (10, 11)])
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cutsets
        assert_equal(3, len(nx.minimum_edge_cut(G, 1, 11, **kwargs)),
                     msg=msg.format(flow_func.__name__))
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        # Node 5 has only two edges
        assert_equal(2, len(edge_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
        # node cuts
        assert_equal(set([6, 7]), minimum_st_node_cut(G, 1, 11, **kwargs),
                     msg=msg.format(flow_func.__name__))
        assert_equal(set([6, 7]), nx.minimum_node_cut(G, 1, 11, **kwargs),
                     msg=msg.format(flow_func.__name__))
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(2, len(node_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
Пример #4
0
def FindGroups(Gsub):
    MAXSIZE=50
    MINSPAM=0.58 #NYC
    # MINSPAM=0.59 #ZIP
    

    for bcc in list(nx.biconnected_components(Gsub)):
        c=0
        denom=0
        bcc=map(int, bcc)
        if len(bcc)>MAXSIZE:
            bccgraph=createGraph(bcc,Gsub)
            if nx.is_connected(bccgraph):
                mincutedge=nx.minimum_edge_cut(bccgraph)
                for e in mincutedge:
                    bccgraph.remove_edge(e[0],e[1])
                cc=nx.connected_components(bccgraph)
                for cx in cc:
                    FindGroups(createGraph(cx,bccgraph))
        else:
            ans=SS(bcc)
            scorepred=ans[0]
            spamicity=ans[1]
            group=ans[2]

            if spamicity>=MINSPAM:
                if str(bcc) not in ccgroups:
                    ccgroups[str(bcc)]=0
                
                c=0
                denom=0
                for u in group.users:
                    if u in fake:
                        c=c+1
                    denom=denom+1
                # ccgroups[str(finalgrps[grp]['users'])]=spamicity
                store=(c*1.0)/denom

                c=0
                denom=0
                for u in group.users:
                    for p in group.prods:
                        if p in wholerev[u]:
                            if int(wholerev[u][p].label)==-1:
                                c=c+1
                            denom=denom+1
                ccgroups[str(bcc)]=spamicity
                if len(ccgroups) not in grps:
                    grps[len(ccgroups)]={'id':len(ccgroups),'users':list(group.users),'prods':list(group.prods),'scorepred':scorepred, 'scoregt':store, 'scoregtreviewprec':(c*1.0)/denom, 'fakegt':0,'fakepred':spamicity}

                # filew.write(str(bcc)+"\n")
            elif len(bcc)>2:
                bccgraph=createGraph(bcc,Gsub)
                if nx.is_connected(bccgraph):
                    mincutedge=nx.minimum_edge_cut(bccgraph)
                    for e in mincutedge:
                        bccgraph.remove_edge(e[0],e[1])
                    cc=nx.connected_components(bccgraph)
                    for cx in cc:
                        FindGroups(createGraph(cx,bccgraph))
Пример #5
0
def print_robustness(): #What is the smallest number of nodes that can be removed from this graph in order to disconnect it?
    # 1) whole graph:
    nx.node_connectivity(G) #1 - too small, When higher - it is good
    nx.minimum_node_cut(G) #{'Чкаловская'}
    nx.edge_connectivity(G) # 1
    nx.minimum_edge_cut(G) #{('Марьино', 'Чкаловская')}
    # 2) concrete path
    nx.node_connectivity(G, 'Киевская', 'Чкаловская') #2 - better
    nx.minimum_node_cut(G, 'Киевская', 'Чкаловская') #{'Курская', 'Сретенский бульвар'}
    nx.edge_connectivity(G, 'Киевская', 'Чкаловская') #2 - better
    nx.minimum_edge_cut(G, 'Киевская', 'Чкаловская') #{('Курская', 'Чкаловская'), ('Сретенский бульвар', 'Чкаловская')}
Пример #6
0
    def cut_edges_detection(self, segments, feature):

        #T = self.iterdiff(feature, segments)
        G, n_nodes, T = self.build_nodes_edges(segments, feature)

        hyp = Timeline()
        hypothesis = Timeline()
        hyp.add(
            Segment(segments[0].start, segments[n_nodes[0]].end)
        )

        hyp.add(
            Segment(segments[0].start, segments[n_nodes[0]].end)
        )
        for i, j in enumerate(n_nodes):
            hyp.add(
                Segment(
                    segments[n_nodes[i - 1]].end,
                    segments[n_nodes[i]].end
                )
            )
            Coupure = nx.minimum_edge_cut(G, T[j + 1], T[j])
            if len(Coupure) == 0:
                hypothesis.add(hyp[i])

        return hypothesis
Пример #7
0
def test_white_harary_paper():
    # Figure 1b white and harary (2001)
    # http://eclectic.ss.uci.edu/~drwhite/sm-w23.PDF
    # A graph with high adhesion (edge connectivity) and low cohesion
    # (node connectivity)
    G = nx.disjoint_union(nx.complete_graph(4), nx.complete_graph(4))
    G.remove_node(7)
    for i in range(4,7):
        G.add_edge(0,i)
    G = nx.disjoint_union(G, nx.complete_graph(4))
    G.remove_node(G.order()-1)
    for i in range(7,10):
        G.add_edge(0,i)
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cuts
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        assert_equal(3, len(edge_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
        # node cuts
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(set([0]), node_cut, msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
Пример #8
0
def RemoveCardCut(F, k):
    G = copy.deepcopy(F)
    k_set = list()
    counter = 0

    # Finding st-cut with smalles cardinality
    card_set = nx.minimum_edge_cut(G, 's', 't')

    # Sorting the cut by descending capacities
    card_cut_set = list()
    for u, v in card_set:
        card_cut_set.append((u, v, G[u][v]['capacity']))
    card_cut_set.sort(key=lambda card_cut_set: card_cut_set[2], reverse=True)
    print('Card cut set : ', card_cut_set)
    print('\n')

    # Removing edges from G and adding them to k-set
    for u, v, c in card_cut_set:
        if counter < k:
            k_set.append((u, v, c, 0))
            G.remove_edge(u, v)
        counter += 1
    # If there 	are less than k edges in the cut set remove the last eges randomly
    cs = len(card_cut_set)
    if cs < k:
        for u, v in random.sample(G.edges(), (k - cs)):
            k_set.append((u, v, G[u][v]['capacity'], 0))
            G.remove_edge(u, v)

    return G, k_set
Пример #9
0
 def getMinCut(self):
     graph = self.getGraph()
     try:
         min_cut = nx.minimum_edge_cut(graph)
     except:  # not connected
         return -1
     return len(min_cut)
def find_which_nodes_and_edges_to_remove(G, n1, n2):
    a = nx.node_connectivity(G, n1, n2)
    b = nx.minimum_node_cut(G, n1, n2)
    c = nx.edge_connectivity(G, n1, n2)
    d = nx.minimum_edge_cut(G, n1, n2)
    print(a, b, c, d)
    return (a, b, c, d)
Пример #11
0
    def graph_change(self, idx1, idx2):
        self.G2 = deepcopy(self.G1)

        v1 = self.M[idx1]
        v2 = self.M[idx2]

        if self.G2.has_edge(
                v1, v2
        ) == False:  #Edge between v1 and v2 is not present. Hence add the edge.

            self.G2.add_edge(v1, v2, weight=self.dist(v1, v2))
            return (1)

        else:
            if len(
                    nx.minimum_edge_cut(self.G2, v1, v2)
            ) == 1:  #Edge is present but it is a bridge. The graph is not altered

                return (-1)

            else:
                self.G2.remove_edge(
                    v1, v2
                )  #Edge is removed if it is present in graph but not a bridge
                return (0)
Пример #12
0
def __cut(graph):
    ''' param: 
            graph:a nx.DiGraph obj
	    return:
		    cs : edge cut set of the graph
		    g1 , g2 : subgraphs induced by cs
	
    '''
    assert isinstance(graph, nx.DiGraph), "graph class: %s " % graph.__class__
    assert graph.number_of_nodes(
    ) > 1, "Number of nodes: %d" % graph.number_of_nodes()
    unigraph = nx.Graph(graph)
    cs = nx.minimum_edge_cut(unigraph)
    if not cs:
        raise Exception, "Cut Set of this graph is Empty"

    #CS中的边,可能不存在于原来的有向图中,所以需要将这种边的方向颠倒
    #将所有real edge,存到RCS中
    rcs = []
    for eachEdge in cs:
        if not graph.has_edge(eachEdge[0], eachEdge[1]):
            eachEdge = (eachEdge[1], eachEdge[0])  #调换方向
        rcs.append(eachEdge)
    graph.remove_edges_from(rcs)
    glist = []
    for eachCntComp in nx.weakly_connected_component_subgraphs(graph,
                                                               copy=False):
        glist.append(eachCntComp)
    assert len(glist) == 2
    return rcs, glist[0], glist[1]
Пример #13
0
 def getready(self, jsonfile=None, randcof=(10, 0.5), hops=0):
     """Do the basic stuff to get ready."""
     while jsonfile is None:
         try:
             self.createnetwork(config=readconf(jsonfile), randcof=randcof)
             if hops and self.checkhops(hops):
                 continue
             self.calceotx()
             credit = self.calc_tx_credit()
         except ZeroDivisionError:
             logging.info('Found graph with no connection between S and D')
             continue
         except nx.exception.NetworkXException:
             logging.info(str(self.graph.nodes))
             logging.info(str(self.graph.edges))
             continue
         logging.info('Created random graph successfully!')
         break
     else:
         self.createnetwork(config=readconf(jsonfile), randcof=randcof)
         self.calceotx()
         credit = self.calc_tx_credit()
         logging.info('Created network from JSON successfully!')
     self.nodes = {
         name: components.Node(name=name,
                               coding=self.coding,
                               fieldsize=self.fieldsize,
                               random=self.random)
         for name in self.graph.nodes
     }
     for node in self.nodes.values():
         try:
             node.seteotx(self.graph.nodes[str(node)]['EOTX'])
             if not self.nomore:
                 node.setcredit(credit[str(node)])
         except KeyError:
             pass
     self.eotxdict['None'] = self.geteotx()
     if self.anchor:
         self.calcanchor()
         for node in self.nodes.values():
             try:
                 node.setpriority(self.graph.nodes[str(node)]['Priority'])
                 node.setcredit(self.graph.nodes[str(node)]['codingRate'])
             except KeyError:
                 pass
         self.eotxdict['None'] = {
             node: self.graph.nodes[node]['Priority']
             for node in self.graph.nodes
         }
     self.mcut = nx.minimum_edge_cut(self.graph, s='S', t='D')
     self.dijkstra = nx.shortest_path(self.graph,
                                      source='S',
                                      target='D',
                                      weight='weight')
     if self.moreres:
         try:
             self.calcdeotx()
         except ZeroDivisionError:
             pass
Пример #14
0
def test_edge_cutset_random_graphs():
    for i in range(5):
        G = nx.fast_gnp_random_graph(50,0.2)
        cutset = nx.minimum_edge_cut(G)
        assert_equal(nx.edge_connectivity(G), len(cutset))
        G.remove_edges_from(cutset)
        assert_false(nx.is_connected(G))
Пример #15
0
def test_edge_cutset_random_graphs():
    for i in range(5):
        G = nx.fast_gnp_random_graph(50, 0.2)
        cutset = nx.minimum_edge_cut(G)
        assert_equal(nx.edge_connectivity(G), len(cutset))
        G.remove_edges_from(cutset)
        assert_false(nx.is_connected(G))
Пример #16
0
def test_white_harary_paper():
    # Figure 1b white and harary (2001)
    # http://eclectic.ss.uci.edu/~drwhite/sm-w23.PDF
    # A graph with high adhesion (edge connectivity) and low cohesion
    # (node connectivity)
    G = nx.disjoint_union(nx.complete_graph(4), nx.complete_graph(4))
    G.remove_node(7)
    for i in range(4, 7):
        G.add_edge(0, i)
    G = nx.disjoint_union(G, nx.complete_graph(4))
    G.remove_node(G.order() - 1)
    for i in range(7, 10):
        G.add_edge(0, i)
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cuts
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        assert_equal(3, len(edge_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
        # node cuts
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(set([0]), node_cut, msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
Пример #17
0
def constructGraphFromPredictions(predicted_labels, labeled_set, unlabeled_set, remove_bridges, unlabeled_set_md):
    
    predicted_true = unlabeled_set[['source','target']][predicted_labels]

    labeled_true = labeled_set[labeled_set.label][['source','target']]
    labeled_false = labeled_set[labeled_set.label==False][['source','target']]

    predicted_true_pairs = [tuple(x) for x in predicted_true.values]
    labeled_true_pairs = [tuple(x) for x in labeled_true.values]
    labeled_false_pairs = [tuple(x) for x in labeled_false.values]

    G = nx.Graph()
    G.add_edges_from(predicted_true_pairs)
    G.add_edges_from(labeled_true_pairs)
    G.remove_edges_from(labeled_false_pairs)
    
    
    if (remove_bridges):       
        bridges = find_bridges_(G, labeled_true_pairs)
        G.remove_edges_from(bridges)


    #remove cuts so that there is no path between labeled false pairs
    for lab_false in labeled_false_pairs:
        if has_path_(G, lab_false[0], lab_false[1]):
        
            cuts = nx.minimum_edge_cut(G, s=lab_false[0], t=lab_false[1], flow_func=shortest_augmenting_path)
            G.remove_edges_from(cuts)
        
    
    return G
Пример #18
0
def RunProbabilityTest(G):
  '''
  Description:
    Finds the probability of running Karger's on the
    same graph 10*n**2 times with n = 20 and finding
    the min cut correctly.

  Args:
    G networkx graph of 20 nodes
    
  Returns:
    probability of running Karger's algo
    on the same graph and finding the min cut
  '''
  min_cuts_found = 0.0
  
  min_edge_cut = len(nx.minimum_edge_cut(G))
  for i in range(1, (10*20**2)+1):
    H = G.copy()

    H = RunKarger(H)

    # See if karger's returns the correct min cut
    if H.number_of_edges() == min_edge_cut:
      min_cuts_found += 1

  # For every n node sized graph find the probability 
  # of getting the min cut each time karger's is run
  # for a total of 10*n**2 runs
  print min_cuts_found, i
  return float('{0:.3f}'.format(min_cuts_found/i))
Пример #19
0
 def getMinCut(self):
     graph = self.getGraph()
     try: 
       min_cut = nx.minimum_edge_cut(graph)
     except: # not connected
       return -1
     return len(min_cut)
Пример #20
0
def test_white_harary_paper():
    # Figure 1b white and harary (2001)
    # http://eclectic.ss.uci.edu/~drwhite/sm-w23.PDF
    # A graph with high adhesion (edge connectivity) and low cohesion
    # (node connectivity)
    G = nx.disjoint_union(nx.complete_graph(4), nx.complete_graph(4))
    G.remove_node(7)
    for i in range(4, 7):
        G.add_edge(0, i)
    G = nx.disjoint_union(G, nx.complete_graph(4))
    G.remove_node(G.order() - 1)
    for i in range(7, 10):
        G.add_edge(0, i)
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        errmsg = f"Assertion failed in function: {flow_func.__name__}"
        # edge cuts
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        assert 3 == len(edge_cut), errmsg
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert not nx.is_connected(H), errmsg
        # node cuts
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert {0} == node_cut, errmsg
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert not nx.is_connected(H), errmsg
Пример #21
0
def __cut(graph):
    ''' param: 
            graph:a nx.DiGraph obj
	    return:
		    cs : edge cut set of the graph
		    g1 , g2 : subgraphs induced by cs
	
    '''
    assert isinstance(graph, nx.DiGraph), "graph class: %s " % graph.__class__
    assert graph.number_of_nodes() > 1,   "Number of nodes: %d" % graph.number_of_nodes()
    unigraph = nx.Graph( graph )          
    cs = nx.minimum_edge_cut( unigraph ) 
    if not cs:
        raise Exception,"Cut Set of this graph is Empty"

    #CS中的边,可能不存在于原来的有向图中,所以需要将这种边的方向颠倒
    #将所有real edge,存到RCS中
    rcs = []
    for eachEdge in cs:
        if not graph.has_edge( eachEdge[0], eachEdge[1] ):
            eachEdge = (eachEdge[1], eachEdge[0]) #调换方向
        rcs.append(eachEdge)
    graph.remove_edges_from(rcs)
    glist = []
    for eachCntComp in nx.weakly_connected_component_subgraphs(graph, copy = False):
        glist.append(eachCntComp)
    assert len(glist) == 2
    return rcs, glist[0], glist[1]
Пример #22
0
def greedy_edge_disjoint(self, G, source='s', target='t', weight='None', k=''):
    """
    Greedy Algorithm to find edge disjoint subgraph from s to t. 
    See Hyman et al. 2018 SIAM MMS

    Parameters
    ----------
        self : object 
            DFN Class Object
        G : NetworkX graph
            NetworkX Graph based on the DFN
        source : node 
            Starting node
        target : node
            Ending node
        weight : string
            Edge weight used for finding the shortest path
        k : int
            Number of edge disjoint paths requested
    
    Returns
    -------
        H : NetworkX Graph
            Subgraph of G made up of the k shortest of all edge-disjoint paths from source to target

    Notes
    -----
        1. Edge weights must be numerical and non-negative.
        2. See Hyman et al. 2018 "Identifying Backbones in Three-Dimensional Discrete Fracture Networks: A Bipartite Graph-Based Approach" SIAM Multiscale Modeling and Simulation for more details 

    """
    print("--> Identifying edge disjoint paths")
    if G.graph['representation'] != "intersection":
        print(
            "--> ERROR!!! Wrong type of DFN graph representation\nRepresentation must be intersection\nReturning Empty Graph\n"
        )
        return nx.Graph()
    Gprime = G.copy()
    Hprime = nx.Graph()
    Hprime.graph['representation'] = G.graph['representation']
    cnt = 0

    # if a number of paths in not provided k will equal the min cut between s and t
    min_cut = len(nx.minimum_edge_cut(G, 's', 't'))
    if k == '' or k > min_cut:
        k = min_cut

    while nx.has_path(Gprime, source, target):
        path = nx.shortest_path(Gprime, source, target, weight=weight)
        H = Gprime.subgraph(path)
        Hprime.add_edges_from(H.edges(data=True))
        Gprime.remove_edges_from(list(H.edges()))

        cnt += 1
        if cnt > k:
            break
    print("--> Complete")
    return Hprime
Пример #23
0
def min_cut_volume(recorder, graph, labels, params):
    assert params.n_shot == 1 and params.n_way == 2
    cut = nx.minimum_edge_cut(graph, 0, 1)
    graph.remove_edges_from(cut)
    part_a = nx.node_connected_component(graph, 0)
    part_b = nx.node_connected_component(graph, 1)
    label_0, label_1 = labels[0].item(), labels[1].item()
    return connect_parts_labels(recorder, graph, part_a, part_b, labels,
                                label_0, label_1)
Пример #24
0
 def minimum_edge_cut(self):
     if nx.is_connected(self.DG):
         cutset = nx.minimum_edge_cut(self.DG)
         print('### information about the minimum number of edges that disconnects the graph')
         print('the minimum number of edges that if removed, '\
             'would partition the graph into two components: ' + str(len(cutset)))
         print('the minimum edge cut contains the following edges: ' + str(cutset))
     else:
         print('Graph is not connected! No information provided for the minimum edge cut.')
Пример #25
0
def mincut(edges: Iterable[Sequence[np.uint64]], affs: Sequence[np.uint64],
           source: np.uint64, sink: np.uint64) -> np.ndarray:
    """ Computes the min cut on a local graph

    :param edges: n x 2 array of uint64s
    :param affs: float array of length n
    :param source: uint64
    :param sink: uint64
    :return: m x 2 array of uint64s
        edges that should be removed
    """

    time_start = time.time()

    # edges, affs, remapping = merge_cross_chunk_edges(edges, affs)

    weighted_graph = nx.Graph()
    weighted_graph.add_edges_from(edges)

    for i_edge, edge in enumerate(edges):
        weighted_graph[edge[0]][edge[1]]['capacity'] = affs[i_edge]

    dt = time.time() - time_start
    print("Graph creation: %.2fms" % (dt * 1000))
    time_start = time.time()

    ccs = list(nx.connected_components(weighted_graph))
    for cc in ccs:
        if not (source in cc and sink in cc):
            weighted_graph.remove_nodes_from(cc)

    # cutset = nx.minimum_edge_cut(weighted_graph, source, sink)
    cutset = nx.minimum_edge_cut(weighted_graph,
                                 source,
                                 sink,
                                 flow_func=shortest_augmenting_path)

    dt = time.time() - time_start
    print("Mincut: %.2fms" % (dt * 1000))

    if cutset is None:
        return np.array([], dtype=np.uint64)

    time_start = time.time()

    weighted_graph.remove_edges_from(cutset)
    ccs = list(nx.connected_components(weighted_graph))
    print("Graph split up in %d parts" % (len(ccs)))

    for cc in ccs:
        print("CC size = %d" % len(cc))

    dt = time.time() - time_start
    print("Test: %.2fms" % (dt * 1000))

    return np.array(list(cutset), dtype=np.uint64)
Пример #26
0
def test_edge_cutset_random_graphs():
    for i in range(5):
        G = nx.fast_gnp_random_graph(50,0.2)
        if not nx.is_connected(G):
            ccs = iter(nx.connected_components(G))
            start = next(ccs)[0]
            G.add_edges_from( (start,c[0]) for c in ccs )
        cutset = nx.minimum_edge_cut(G)
        assert_equal(nx.edge_connectivity(G), len(cutset))
        G.remove_edges_from(cutset)
        assert_false(nx.is_connected(G))
def test_edge_cutset_random_graphs():
    for i in range(5):
        G = nx.fast_gnp_random_graph(50, 0.2)
        if not nx.is_connected(G):
            ccs = iter(nx.connected_components(G))
            start = next(ccs)[0]
            G.add_edges_from((start, c[0]) for c in ccs)
        cutset = nx.minimum_edge_cut(G)
        assert_equal(nx.edge_connectivity(G), len(cutset))
        G.remove_edges_from(cutset)
        assert_false(nx.is_connected(G))
Пример #28
0
    def calculate_bridges(self, G):
        if type(G) != nx.Graph:
            print("Argument passed to the function should be a Graph")
            raise TypeError

        b = 0
        for i in G.edges():

            if len(nx.minimum_edge_cut(G, i[0], i[1])) == 1:
                b += 1
        return (b)
Пример #29
0
def reduce_graph(g):
    # show_graph(g,3)
    nx.set_edge_attributes(g, 'capacity', 1)
    g = nx.convert_node_labels_to_integers(g, 0)
    g1 = nx.Graph()
    for edge in g.edges():
        if edge in g1.edges():
            g1.edge[edge[0]][edge[1]]['capacity'] += 1
        else:
            g1.add_edge(edge[0], edge[1], capacity=1)
    construction(g1, 0, set(g1.nodes()))
    # show_graph(A, 3)
    capacities=nx.get_edge_attributes(A,'capacity')
    if max(capacities.values())<=1:
        return g,None,1
    if max(capacities.values())==2:
        return g,None,2
    remove_edges = list()
    biconnected = list()
    for e in A.edges(data=True):
        if e[2]['capacity'] <= 2:
            remove_edges.append(e)
        if e[2]['capacity'] == 2:
            biconnected.append(e)
    A.remove_edges_from(remove_edges)
    for e in biconnected:
        cut_edges = list(nx.minimum_edge_cut(g1, e[0], e[1]))
        if cut_edges[1][0] in nx.node_connected_component(A, cut_edges[0][0]):
            g.add_edge(cut_edges[0][0], cut_edges[1][0])
            if cut_edges[1][1] in nx.node_connected_component(A, cut_edges[0][1]):
                g.add_edge(cut_edges[0][1], cut_edges[1][1])
            else:
                raise Exception("Biconnected edge does not match")
        elif cut_edges[1][0] in nx.node_connected_component(A, cut_edges[0][1]):
            g.add_edge(cut_edges[0][0], cut_edges[1][1])
            if cut_edges[1][1] in nx.node_connected_component(A, cut_edges[0][0]):
                g.add_edge(cut_edges[0][1], cut_edges[1][0])
            else:
                raise Exception("Biconnected edge does not match")
        else:
            raise Exception("Biconnected edge does not match")
        # print e[0],g1[e[0]]
        # print e[1],g1[e[1]]
    # show_graph(A, 1)
    # show_graph(g,1)
    G = list(nx.connected_components(A))
    connected_components=list(nx.connected_components(A))
    for i in connected_components:
        if len(i)<3:
            G.remove(i)
    if G==[]:
        return g,None,2
    return g,G,3
Пример #30
0
def test_edge_cutset_random_graphs():
    for flow_func in flow_funcs:
        for i in range(3):
            G = nx.fast_gnp_random_graph(50, 0.25)
            if not nx.is_connected(G):
                ccs = iter(nx.connected_components(G))
                start = arbitrary_element(next(ccs))
                G.add_edges_from((start, arbitrary_element(c)) for c in ccs)
            cutset = nx.minimum_edge_cut(G, flow_func=flow_func)
            assert_equal(nx.edge_connectivity(G), len(cutset), msg=msg.format(flow_func.__name__))
            G.remove_edges_from(cutset)
            assert_false(nx.is_connected(G), msg=msg.format(flow_func.__name__))
Пример #31
0
def test_edge_cutset_random_graphs():
    for flow_func in flow_funcs:
        errmsg = f"Assertion failed in function: {flow_func.__name__}"
        for i in range(3):
            G = nx.fast_gnp_random_graph(50, 0.25, seed=42)
            if not nx.is_connected(G):
                ccs = iter(nx.connected_components(G))
                start = arbitrary_element(next(ccs))
                G.add_edges_from((start, arbitrary_element(c)) for c in ccs)
            cutset = nx.minimum_edge_cut(G, flow_func=flow_func)
            assert nx.edge_connectivity(G) == len(cutset), errmsg
            G.remove_edges_from(cutset)
            assert not nx.is_connected(G), errmsg
Пример #32
0
def test_edge_cutset_random_graphs():
    for flow_func in flow_funcs:
        for i in range(3):
            G = nx.fast_gnp_random_graph(50, 0.25)
            if not nx.is_connected(G):
                ccs = iter(nx.connected_components(G))
                start = arbitrary_element(next(ccs))
                G.add_edges_from((start, arbitrary_element(c)) for c in ccs)
            cutset = nx.minimum_edge_cut(G, flow_func=flow_func)
            assert_equal(nx.edge_connectivity(G), len(cutset),
                         msg=msg.format(flow_func.__name__))
            G.remove_edges_from(cutset)
            assert_false(nx.is_connected(G), msg=msg.format(flow_func.__name__))
def UndirectedGraphFeature(df, dfOrigin, part):
    indexs, files = df.index, dfOrigin.file_id.unique()
    for col in apiSet:
        df[col+'_center_degree'] = 0

    for index, file in zip(indexs, files):
        X = pd.read_csv('./' + part + '/' + str(index) + '.csv')
        api = X.groupby(by='tid').apply(lambda x: ' '.join(x.api))
        api = pd.DataFrame(api)
        api.rename(columns={0: 'api_call'}, inplace=True)

        G = nx.Graph()
        for row in api.index:
            apiCall = (api.loc[row, 'api_call']).split(' ')
            for i in range(len(apiCall) - 1):
                G.add_edge(apiCall[i], apiCall[i + 1])
        if (len(G) <= 1):
            continue
        isConnnected = (nx.is_connected(G) == False)
        if isConnnected:
            df.loc[index, 'avg_length'] = -1
            df.loc[index, 'minimum_edge_cut'] = -1
            df.loc[index, 'degree_assortativity_coefficient'] = -1
            df.loc[index, 'radius'] = -1
            df.loc[index, 'diameter'] = -1
            df.loc[index, 'periphery'] = -1
            df.loc[index, 'is_eulerian'] = -1
            df.loc[index, 'center'] = -1
            df.loc[index, 'order'] = -1
            df.loc[index, 'size'] = -1
            df.loc[index, 'density'] = -1

        else:
            df.loc[index, 'avg_length'] = nx.average_shortest_path_length(G)
            df.loc[index, 'minimum_edge_cut'] =  len(set(nx.minimum_edge_cut(G)))
            df.loc[index, 'degree_assortativity_coefficient'] = nx.degree_assortativity_coefficient(G)
            df.loc[index, 'radius'] = nx.radius(G)
            df.loc[index, 'diameter'] = nx.diameter(G)
            df.loc[index, 'periphery'] = len(set(nx.periphery(G)))
            df.loc[index, 'is_eulerian'] = int(nx.is_eulerian(G))
            df.loc[index, 'center'] =  len(set(nx.center(G)))
            df.loc[index, 'density'] = nx.density(G)
            df.loc[index, 'order'] = G.order()
            df.loc[index, 'size'] = G.size()

        if not isConnnected:
            for x in set(nx.center(G)):
                df.loc[index,x+'_center_degree'] = 1
        print(index)
    return df
Пример #34
0
def test_octahedral_cutset():
    G=nx.octahedral_graph()
    # edge cuts
    edge_cut = nx.minimum_edge_cut(G)
    assert_equal(4, len(edge_cut))
    H = G.copy()
    H.remove_edges_from(edge_cut)
    assert_false(nx.is_connected(H))
    # node cuts
    node_cut = nx.minimum_node_cut(G)
    assert_equal(4,len(node_cut))
    H = G.copy()
    H.remove_nodes_from(node_cut)
    assert_false(nx.is_connected(H))
def test_octahedral_cutset():
    G = nx.octahedral_graph()
    # edge cuts
    edge_cut = nx.minimum_edge_cut(G)
    assert_equal(4, len(edge_cut))
    H = G.copy()
    H.remove_edges_from(edge_cut)
    assert_false(nx.is_connected(H))
    # node cuts
    node_cut = nx.minimum_node_cut(G)
    assert_equal(4, len(node_cut))
    H = G.copy()
    H.remove_nodes_from(node_cut)
    assert_false(nx.is_connected(H))
Пример #36
0
def partition(G, capacity):
    graph_components = {}
    need_cut = []
    cut_set = nx.minimum_edge_cut(G)
    G = delete_edges(G, cut_set)
    sub_graphs = [graph for graph in nx.connected_component_subgraphs(G)]
    for i in sub_graphs:
        graph_components[i] = nx.number_of_nodes(i)
        if (graph_components[i] > capacity):
            need_cut.append(i)
    while (need_cut):
        graph_components.pop(need_cut[0])
        big_graph = need_cut.pop(0)
        cut_set = nx.minimum_edge_cut(big_graph)
        big_graph = delete_edges(big_graph, cut_set)
        sub_big_graph = [
            graph for graph in nx.connected_component_subgraphs(big_graph)
        ]
        for i in sub_big_graph:
            graph_components[i] = nx.number_of_nodes(i)
            if (graph_components[i] > capacity):
                need_cut.append(i)
    return graph_components, need_cut
Пример #37
0
    def cluster(self, dgraph):

        ## get edges in the cut
        cop = super().to_undirected(dgraph.dgraph)
        cut_edges=nx.minimum_edge_cut(cop)

        ## remove edges in the cut and get connected subgraphs
        cop.remove_edges_from(cut_edges) 
        sub_graphs = nx.connected_component_subgraphs(cop)
        output=[]

        for sg in sub_graphs:
            output+=[list(sg.nodes())]

        return output
Пример #38
0
def minimum_cut(graph):
    cutset = set()
    for subgraph in (graph.subgraph(c)
                     for c in nx.connected_components(graph)):
        edges = nx.minimum_edge_cut(subgraph)
        if not edges:
            continue
        if not cutset:
            cutset = edges
        elif len(edges) < len(cutset):
            cutset = edges
    if cutset:
        return cutset
    else:
        raise Exception('no way to make new connected component')
Пример #39
0
def test_petersen_cutset():
    G = nx.petersen_graph()
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cuts
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        assert_equal(3, len(edge_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
        # node cuts
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(3, len(node_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
Пример #40
0
def test_icosahedral_cutset():
    G = nx.icosahedral_graph()
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cuts
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        assert_equal(5, len(edge_cut), msg=msg % (flow_func.__name__, ))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg % (flow_func.__name__, ))
        # node cuts
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(5, len(node_cut), msg=msg % (flow_func.__name__, ))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg % (flow_func.__name__, ))
Пример #41
0
def test_icosahedral_cutset():
    G = nx.icosahedral_graph()
    for flow_func in flow_funcs:
        kwargs = dict(flow_func=flow_func)
        # edge cuts
        edge_cut = nx.minimum_edge_cut(G, **kwargs)
        assert_equal(5, len(edge_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_edges_from(edge_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
        # node cuts
        node_cut = nx.minimum_node_cut(G, **kwargs)
        assert_equal(5, len(node_cut), msg=msg.format(flow_func.__name__))
        H = G.copy()
        H.remove_nodes_from(node_cut)
        assert_false(nx.is_connected(H), msg=msg.format(flow_func.__name__))
Пример #42
0
def FiveA():
  '''
  Description:
    Runs Karger's algorithm on random graphs.
    Creates the plot of the number of nodes in a
    graph vs. the probability of finding a min cut.
  '''
  
  prob_min_cut = {}
  p = 0.5

  # Create random graphs of 5,...,20 nodes
  for n in range(5, 21):
  
    min_cuts_found = 0.0

    for i in range(1, (10*(n**2))+1):
  
      G = CreateGraph(n, p)
      #plt.figure(1)
      #DrawGraph(G, 'ps3_q5.png')

      # Get the min cut using a built in method from networkx
      min_edge_cut = len(nx.minimum_edge_cut(G))

      G = RunKarger(G)

      # See if karger's returns the correct min cut
      if G.number_of_edges() == min_edge_cut:
        min_cuts_found += 1

    # For every n node sized graph find the probability 
    # of getting the min cut each time karger's is run
    # for a total of 10*n**2 runs
    prob_min_cut[n] = float('{0:.3f}'.format(min_cuts_found/i))
    
    # Output results to a csv file
    Write('ps3_q5_output.txt', n, prob_min_cut[n])
    

  # Read in results file and create a plot of results
  ReadCSV('ps3_q5_output.txt')  
Пример #43
0
def cut(graph):
    ''' parame: 
            graph:a nx.DiGraph obj
	    return:
		    cs : edge cut set of the graph
		    g1 , g2 : subgraphs induced by cs
	
    '''
    debug = True
    assert isinstance(graph, nx.DiGraph), "Input_para.__class__  %s " % graph.__class__
    assert graph.number_of_nodes() > 1,   "Number of nodes: %d" % graph.number_of_nodes()
    if debug: print "\nDigraph Edges Are:\n    %s" % str(graph.edges())
    unigraph = nx.Graph(graph)           #将输入的图转为无向图
    cs = nx.minimum_edge_cut(unigraph)   #找出该无向图的minimum edge cut -> CS
    #balance函数调用cut前,graph一定是一个un-balance 结构,所以一定有CUT?
    if not cs:
        raise Exception,"Cut Set of this graph is Empty"
    #CS中的边,可能不存在于原来的有向图中,所以需要将这种边的方向颠倒
    #将所有real edge,存到RCS中
    rcs = []
    original_edges = graph.edges()
    for eachEdge in cs:
        if not eachEdge in original_edges:
            eachEdge = (eachEdge[1], eachEdge[0]) #调换方向
        rcs.append(eachEdge)
    graph.remove_edges_from(rcs)			      #在原图中移除CS
    if debug: print "Edge Cut Set RCS :\n    %s" % str(rcs)
    if debug: print "After remove RCS :\n    %s" % str(graph.edges())
    
    # 移除RCS中的边之后得到的两个Weakly Connected Subgraph
    glist = []
    for eachCntComp in nx.weakly_connected_component_subgraphs(graph):
		#找到移除CS后的两个弱连接分量
        glist.append(eachCntComp)
        if debug:
            print "Weakly CC %d:" % len(glist)
            print "    nodes:%s" % eachCntComp.nodes() 
            print "    edges:%s" % eachCntComp.edges()
    assert len(glist) == 2
    return rcs, glist[0], glist[1]
	def build_graph(self):
		""" insert notes and edges based on user dictionary"""
		#  for key in self.user_dic.keys():
		#  self.graph.add_node(key)
		print "start building the graph"
		distinct_user = Set([])

		distinct_user.union(set( self.user_dic.keys() ))

		for value in self.user_dic.values():
			distinct_user.union(set(value))

		for eachuser in distinct_user:
			self.graph.add_node(key)

		for key in self.user_dic.keys():
			for value in self.user_dic[key]:
				# g.add_edges( [(1,2)] )
				self.graph.add_edge(key, value)

		for node in self.graph.nodes():
			self.color_dic[node] = "white"

		self.node_color = [self.color_dic[node] for node in self.graph.nodes()]

		allgraph = list(nx.connected_component_subgraphs(self.graph))

		mincut = nx.minimum_edge_cut(self.graph)
		print "mincut is ", mincut
		print "length of all connected component is ", len(allgraph)

		for graph in allgraph:

			# min_weighted_dominating_set(graphUD, weight=None)

			print graph.number_of_nodes()

		print "finish building the graph"
Пример #45
0
def CalMaxFlows(DG_network, Dnodes, maxFlows):
	"""
	If two nodes are connected in networkx graph G, This function returns maxflow value, shortest path length, minimum edges cut 
	between this two nodes.
	"""
	for i in range(len(Dnodes)):
		for j in range(i+1, len(Dnodes)):

			if nx.has_path(DG_network, Dnodes[i], Dnodes[j]):
				maxflow = nx.maximum_flow_value(DG_network, Dnodes[i], Dnodes[j], capacity = 'weight')
				shortest_path_length = nx.shortest_path_length(DG_network, source = Dnodes[i], target = Dnodes[j])
				min_edges_cut = len(nx.minimum_edge_cut(DG_network, Dnodes[i], Dnodes[j]))
			else:
				continue
			if Dnodes[i] < Dnodes[j]:
				a_path = (Dnodes[i], Dnodes[j])
			else:
				a_path = (Dnodes[j], Dnodes[i])

			if not maxFlows.has_key(a_path):
				maxFlows[a_path] = (maxflow, shortest_path_length, min_edges_cut)
			else:
				print "impossibly!", a_path
				sys.exit(1)
Пример #46
0
    def cut_edges_detection(self, feature, first_segments):

        """Recherche des arcs de coupure
            first_segments: segmenation initiale"""

        G, n_nodes, T = self.build_nodes_edges(feature, first_segments)

        hyp = Timeline()
        hypothesis = Timeline()
        hyp.add(
            Segment(first_segments[0].start, first_segments[n_nodes[0]].end)
        )
        for i, j in enumerate(n_nodes):
            hyp.add(
                Segment(
                    first_segments[n_nodes[i - 1]].end,
                    first_segments[n_nodes[i]].end
                )
            )
            Coupure = nx.minimum_edge_cut(G, T[j + 1], T[j])
            if len(Coupure) == 0:
                hypothesis.add(hyp[i])

        return hypothesis
Пример #47
0
def get_coordinate_path(coords):    
    """
    Returns a path of 'coords' (assumed a single <x,y> coordinates of a 
    skeleton) such that:

    1) the start (endpoint) has the highest, second-lowest distance (the lowest
    distance is always +/- 1 pixel; the second lowest will be the greatest
    for an endpoint)
    
    2) all other points are separated from each other by at most 1 pixel

    Args:
        coords: the two-column array of pixel distances
    Returns:
        the sorted coordinate list
    """
    n_coords = len(coords)
    distances = scipy.spatial.distance_matrix(coords,coords,p=2)
    for i in range(n_coords):
        distances[i,i] = np.inf
    # check that the skeletonization is OK
    maximum_of_minimum_distances = np.sqrt(2)
    max_of_min = max(np.min(distances,axis=0))
    assert abs(max_of_min - maximum_of_minimum_distances) < 1e-6 , \
        "Skeletonization failed?"
    # POST: distances okay; all pixels at most 1 away in x and y
    # Now we need to decide on (possible arbitrary) endpoints. These should
    # be the two nodes with the largest *second* lowest distances (all have
    # at least one neighbor which is +/- 1 pixel; 'interior' nodes have at 
    # least two '1-pixel' neighbords
    second_lowest_distances = [sorted(row)[1] for row in distances]
    # sorted from low to high; what we want is the highest, second lowest
    sort_idx_second_highest = np.argsort(second_lowest_distances)
    endpoint = sort_idx_second_highest[-1]
    # POST: have endpoint. Add all the points with their two closest to the 
    # graph (except the endpoint, where we only add its closest)
    # create a graph of all the pixels
    G = nx.Graph()
    n_neightbors = 2
    # sort the data so the endpoint is first?
    print(endpoint)
    sorted_idx = list(np.arange(endpoint,n_coords)) + \
                 list(np.arange(0,endpoint))
    sorted_idx= np.array(sorted_idx)
    distances = distances[sorted_idx]
    coords = coords[sorted_idx]
    for i in range(n_coords):
        dist_tmp = distances[i]
        closest_nodes = np.argsort(dist_tmp)
        # add the closest N
        j = 0
        G.add_edge(i,closest_nodes[0],weight=1)
        G.add_edge(i,closest_nodes[1],weight=1)
    print("connectivity")
    remove_all_but_one = list(nx.minimum_edge_cut(G))
    for r in remove_all_but_one[:-1]:
        g.remove_edge(*r)
    print(nx.node_connectivity(G))
    nx.draw(G)
    plt.show()
    graph,path = single_chinese_postman_path(G)
    print(path,n_coords)
    for i in range(len(path)):
        print(len(set(path[:i])),i,n_coords)
    """
    see: 
https://stackoverflow.com/questions/18794308/algorithm-to-cover-all-edges-given-starting-node

https://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.algorithms.matching.max_weight_matching.html#networkx.algorithms.matching.max_weight_matching

    also https://groups.google.com/forum/#!topic/networkx-discuss/NxbsY2dzkNk
    
    https://healthyalgorithms.com/2009/03/23/aco-in-python-minimum-weight-perfect-matchings-aka-matching-algorithms-and-reproductive-health-part-4/
    """
    coords_x = np.array(coords[:,0])
    coords_y = np.array(coords[:,1])

    return coords[path]
def InterviewAlgorithm_main(argv1):
	#----------------------------------------------
	#1.Get the input documents
	#----------------------------------------------

	corpus = [argv1]
	#get keywords

	files = [argv1]

	#----------------------------------------------
	#2.Initialize MemCached Client
	#----------------------------------------------
	graphcache=memcache.Client(["127.0.0.1:11211"], debug=1)
	InterviewAlgorithmWithIntrinisicMerit_SparkMapReducer.flushCache(graphcache)

	#---------------------------------------------------------------------------------
	#3.Compute intrinsic merit (either using linear or quadratic overlap)
	#---------------------------------------------------------------------------------

	definitiongraphedges=defaultdict(list)
	definitiongraphedgelabels=defaultdict(list)
	weight_str_map=defaultdict()

	for filestr in files:
		outputfile = 'Output-Webspider-HTML.out'
		output = open(outputfile, 'w')
		file1 = open(filestr)
		raw1 = file1.read()
		doc1 = nltk.word_tokenize(raw1.decode("utf-8"))
		#fdist1 = FreqDist(doc1)
		stopwords = nltk.corpus.stopwords.words('english')
		stopwords = stopwords + [' ','or','and','who','he','she','whom','well','is','was','were','are','there','where','when','may', 'The', 'the', 'In', 		'in','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
		puncts = [' ','.', '"', ',', '{', '}', '+', '-', '*', '/', '%', '&', '(', ')', '[', ']', '=', '@', '#', ':', '|', ';','\'s']
		#freqterms1 = [w for w in fdist1.keys() if w not in stopwords and w not in puncts and (fdist1.freq(w) * compute_idf(corpus, w))]
		freqterms1 = [w for w in doc1 if w not in stopwords and w not in puncts]
	
		current_level = 1
		nodewithmaxparents = ''
		noofparents = 0
		maxparents = 0
		relatedness = 0
		first_convergence_level = 1
		tokensofthislevel = []
		convergingterms = []
		convergingparents = []
		tokensofprevlevel = []
		prevlevelsynsets = []
		commontokens = []
		vertices = 0
		edges = 0
		overlap = 0
		iter = 0
		#recurse down to required depth and update intrinsic merit score
		#relatedness is either sum(overlaps) or sum((overlapping_parents)*(overlaps)^2) also called convergence factor
		while current_level < 3:
			#crucial - gather nodes which converge/overlap (have more than 1 parent)
			if current_level > 1:
				print current_level
				prevlevelsynsets_tokens=[]
				for s in prevlevelsynsets:
					s_lemma=s.lemma_names()
					prevlevelsynsets_tokens.append(s_lemma[0])
				for x in freqterms1:
					#prevlevelsynsets_tokens=[]
					#for s in prevlevelsynsets:
					#	s_lemma=s.lemma_names()
					#	prevlevelsynsets_tokens.append(s_lemma[0])
					if parents_computation_spark:
						parents_x = InterviewAlgorithmWithIntrinisicMerit_SparkMapReducer.Spark_MapReduce_Parents(x,prevlevelsynsets_tokens,graphcache)
						if len(parents_x) > 1:
							convergingterms.append(x)
					else:
						#parents_x = parents(x,prevlevelsynsets)
						parents_x = parents_tokens(x,prevlevelsynsets_tokens)
						if len(parents_x) > 1:
							convergingterms.append(x)
					convergingparents = convergingparents + ([w for w in parents_x if len(parents_x) > 1])
					noofparents = len(parents_x)
					if noofparents > maxparents:
						maxparents = noofparents
						nodewithmaxparents = x 
					for y in parents_x:
						if parents_computation_spark:
							definitiongraphedges[x].append(y)
						else:
							y_lemma_names=y.lemma_names()
							definitiongraphedges[x].append(y_lemma_names[0])
				output.write('converging terms(terms with more than 1 parent):\n ')
				output.write('converging parents :\n')
	
			print "InterviewAlgorithmWithIntrinisicMerit_Crawl_Visual_Spark.py:freqterms1=",freqterms1	
			tokensofthislevel=InterviewAlgorithmWithIntrinisicMerit_SparkMapReducer.Spark_MapReduce(current_level, freqterms1, graphcache).tokensatthislevel
			print "InterviewAlgorithmWithIntrinisicMerit_Crawl_Visual_Spark.py:tokensofthislevel:",tokensofthislevel
			picklef=open("RecursiveGlossOverlap_MapReduce_Persisted.txt","r")
			prevlevelsynsets=InterviewAlgorithmWithIntrinisicMerit_SparkMapReducer.asfer_pickle_load(picklef)
			print "prevlevelsynsets:",prevlevelsynsets
			picklef=open("RecursiveGlossOverlap_MapReduce_Persisted.txt","w")
			picklef.seek(0)
			picklef.truncate()
			output.write('At level:\n')
			output.write(str(current_level))
			output.write('\n')
			output.write('tokens grasped at this level:\n')
			#pickle.dump(tokensofthislevel, output)
			output.write('\n')
			listcount = len(tokensofthislevel)
			setcount = len(set(tokensofthislevel))
			overlap =  listcount-setcount
			if overlap > 0 and iter == 0 :
				first_convergence_level = current_level
				iter = 1
			#choose between two relatedness/convergence criteria :- 
			#1) simple linear overlap or 2) zipf distributed quadratic overlap
			#relatedness = relatedness + len(convergingparents)*overlap 
			relatedness = relatedness + overlap + len(convergingparents)
			#relatedness = relatedness + ((len(convergingparents)*overlap*overlap) + 1) 
			#find out common tokens of this and previous level so that same token does not get grasped again - 	
			#relatedness must be increased since repetition of keywords in two successive levels is a sign of 
			#interrelatedness(a backedge from child-of-one-of-siblings to one-of-siblings). Remove vertices and edges 					#corresponding to common tokens
			commontokens = set(tokensofthislevel).intersection(set(tokensofprevlevel))
			tokensofthislevel = list(set(tokensofthislevel).difference(commontokens))
			relatedness = relatedness + len(commontokens)
			output.write('removing tokens already grasped:\n')
			#pickle.dump(commontokens,output)
			output.write('\n')
			output.write('Relatedness:\n')
			output.write(str(relatedness))
			output.write('\n')
			#decrease the vertices count to address common tokens removed above - edges should remain same since they 
			#would just point elsewhere
			vertices = vertices + setcount - len(commontokens)
			output.write('Vertices:\n')
			output.write(str(vertices))
			output.write('\n')
			edges = edges + listcount
			output.write('Edges:\n')
			output.write(str(edges))
			output.write('\n')
			current_level = current_level + 1
			freqterms1 = tokensofthislevel
			tokensofprevlevel = tokensofthislevel
			tokensofthislevel = []
		
		intrinsic_merit = vertices*edges*relatedness / first_convergence_level
		output.write('Intrinsic merit of this document is:\n')
		output.write(str(intrinsic_merit))
		output.write('\n')
		output.write('Node with maximum parents (and hence the most likely class of document) is:\n')
		output.write(nodewithmaxparents)
		output.write('\n')
	
	print definitiongraphedges


	nxg=nx.DiGraph()
	pos=nx.spectral_layout(nxg)
	for k,v in definitiongraphedges.iteritems():
		for l in v:
			nxg.add_edge(k,l)
			nxg.add_edge(l,k)
			ksynset=wn.synsets(k)
			lsynset=wn.synsets(l)
			if ksynset and lsynset:
				print "ksynset=",ksynset[0]
				print "lsynset=",lsynset[0]
				hypoksynsets=set([i for i in ksynset[0].closure(lambda n:n.hyponyms())])
				hyperlsynsets=set([i for i in lsynset[0].closure(lambda n:n.hypernyms())])
				for m in hypoksynsets:
					try:
						mlemmanames=m.lemma_names()
						weight_str_map[k+" - "+l]=weight_str_map[k+" - "+l]+" contains "+mlemmanames[0]
					except KeyError:
						weight_str_map[k+" - "+l]=""
				for n in hyperlsynsets:
					try:
						nlemmanames=n.lemma_names()
						weight_str_map[l+" - "+k]=weight_str_map[l+" - "+k]+" is part of "+nlemmanames[0]
					except KeyError:
						weight_str_map[l+" - "+k]=""
	if not required_none_vertices:
		filter_none_vertices(nxg)
	
	nx.draw_networkx(nxg)
	try:
		nx.write_dot(nxg,"InterviewAlgorithmWithIntrinisicMerit_Crawl_Visual_RGOGraph.dot")
	except:
		pass
	plt.show()
	nxg.remove_edges_from(nxg.selfloop_edges())
	#print "Core number =",nx.core_number(nxg)
	sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True)
	print "Core number (sorted) :",sorted_core_nxg
	print "============================================================================================================="
	print "Unsupervised Classification based on top percentile Core numbers of the definition graph(subgraph of WordNet)"
	print "============================================================================================================="
	no_of_classes=len(nx.core_number(nxg))
	top_percentile=0
	max_core_number=0
	for n in sorted_core_nxg:
		print "This document belongs to class:",n[0],",core number=",n[1]
		if top_percentile < no_of_classes*0.10:
			top_percentile+=1
		else:	
			break
		if n[1] > max_core_number:
			max_core_number=n[1]
	print "max_core_number",max_core_number

	print "==================================================================="
	print "Page Rank of the vertices of RGO Definition Graph"
	print "==================================================================="
	print sorted(nx.pagerank(nxg).items(),key=operator.itemgetter(1),reverse=True)

	try:
		print "=========================================================================================================="
		print "Alternative Quantitative Intrinsic Merit  - connectivity of RGO Definition Graph - Mengers Theorem"
		print "=========================================================================================================="
		print nx.node_connectivity(nxg)
	except:
		pass 
	try:
		print "=========================================================================================================="
		print "Alternative Quantitative Intrinsic Merit  - Maxflow-Mincut of RGO Definition Graph - Minimum Edge Cut"
		print "=========================================================================================================="
		print nx.minimum_edge_cut(nxg)
	except:
		pass 
	try:
		print "=========================================================================================================="
		print "Alternative Quantitative Intrinsic Merit  - Maxflow-Mincut of RGO Definition Graph - Stoer-Wagner"
		print "=========================================================================================================="
		print nx.stoer_wagner(nxg)
	except:
		pass 
	try:
		print "=========================================================================================================="
		print "Alternative Quantitative Intrinsic Merit  - Average Clustering Coefficient"
		print "=========================================================================================================="
		print nx.average_clustering(nxg)
	except:
		pass
print "Page Rank of the vertices of RGO Definition Graph"
print "==================================================================="
print sorted(nx.pagerank(nxg).items(),key=operator.itemgetter(1),reverse=True)

try:
	print "=========================================================================================================="
	print "Alternative Quantitative Intrinsic Merit  - connectivity of RGO Definition Graph - Mengers Theorem"
	print "=========================================================================================================="
	print nx.node_connectivity(nxg)
except:
	pass 
try:
	print "=========================================================================================================="
	print "Alternative Quantitative Intrinsic Merit  - Maxflow-Mincut of RGO Definition Graph - Minimum Edge Cut"
	print "=========================================================================================================="
	print nx.minimum_edge_cut(nxg)
except:
	pass 
try:
	print "=========================================================================================================="
	print "Alternative Quantitative Intrinsic Merit  - Maxflow-Mincut of RGO Definition Graph - Stoer-Wagner"
	print "=========================================================================================================="
	print nx.stoer_wagner(nxg)
except:
	pass 
#try:
print "=========================================================================================================="
print "Alternative Quantitative Intrinsic Merit  - Junction Tree Width"
print "=========================================================================================================="
print TreeWidth.tree_width(nxg,3)
#except:
Пример #50
0
def generateGraph(group):
    logger = logging.getLogger("networkanalysis:generateGraph")
    connected_components = []
    threads = []
    t = time.time()
    db = mlDB(group)
    try:
        db.executeSqlCommand("drop index ind_ex3")
    except:
        pass
    db.executeSqlCommand("create index ind_ex3 on learn(timestamp)")
    db.close()
    numThreads = cpu_count()

    for i in range(numThreads):
        tThread = Process(target=worker, args=(group, i, numThreads))
        threads.append(tThread)
    for tThread in threads:
        tThread.start()
    for tThread in threads:
        tThread.join()

    G = nx.empty_graph()
    for i in range(numThreads):
        G0 = pickle.load(open(str(i) + ".p", "rb"))
        G = nx.compose(G, G0)
        os.system("rm " + str(i) + ".p")

    """
    G = nx.compose(G,complete_graph_from_list(["a", "b", "c", "d"]))
    print(G.edges())
    G = nx.compose(G,complete_graph_from_list(["d","e"]))
    print(G.edges())
    """

    """
    db = mlDB(group)
    timestamps = db.getSqlCommand('select timestamp,id from location_learn group by timestamp')
    totalThings = 0
    for timestamp in timestamps:
        totalThings = totalThings + 1.0
        
    timestamps = db.getSqlCommand('select timestamp,id from location_learn group by timestamp')
    edges = []
    print('collecting edges...')
    count = 0
    
    for timestamp in timestamps:
        count = count + 1.0
        percentDone = count/totalThings*100.0
        if percentDone % 10 == 0:
            print('%s percent done'%str(round(percentDone)))
        macs_db = db.getSqlCommand('select mac_address from location_learn where timestamp=%s'%timestamp[0])
        macs = []
        for mac_db in macs_db:
            macs.append(mac_db[0])
        hashedSet = hash(frozenset(macs))
        if hashedSet not in finishedSets:
            G = nx.compose(G,complete_graph_from_list(macs))
            finishedSets.append(hashedSet)

        for i in range(0,len(macs)-1):
            for j in range(i,len(macs)):
                if i is not j:
                    if (macs[i],macs[j]) not in edges:
                        edges.append((macs[i],macs[j]))
        """
    logger.debug("Finished collecting edges in %s seconds" % "{0:.2f}".format(round(time.time() - t, 2)))
    outdeg = G.degree()
    to_remove = [n for n in outdeg if outdeg[n] == 0]
    G.remove_nodes_from(to_remove)
    # G.add_edges_from([('a','b'),('a','c'),('a','d'),('a','e'),('b','c'),('b','d'),('b','e'),('c','d'),('c','e'),('d','e'),('e8:fc:af:81:4f:d4','a'),('22:10:7a:ed:1d:87','b')]) #,('e8:fc:af:81:4f:d4','a')
    cliques = sorted(nx.find_cliques(G), key=len, reverse=True)

    # Need algorithm where you keep cutting until
    # you only cut off one node at a time
    # then stop cutting
    removeEdges = []
    for k in nx.connected_component_subgraphs(G):
        isOne = False
        cuts = nx.minimum_edge_cut(k)
        for edge in cuts:
            k.remove_edge(edge[0], edge[1])
        connected_components = list(nx.connected_components(k))
        for connected_component in connected_components:
            if len(connected_component) == 1:
                isOne = True
        if isOne:
            pass
        else:
            for cut in cuts:
                removeEdges.append(cut)

    if len(removeEdges) > 0:
        print("NEED TO REMOVE EDGES!")
        print(removeEdges)
    # for edge in removeEdges:
    #    G.remove_edge(edge[0],edge[1])
    connected_components = list(nx.connected_components(G))
    if len(G.nodes()) < 50:
        pos = nx.shell_layout(G)  # positions for all nodes
    else:
        pos = nx.spring_layout(G, scale=10000)  # positions for all nodes
    # pos=nx.spectral_layout(G) # positions for all nodes

    N = len(connected_components)

    labels = {}
    for node in G.nodes():
        labels[node] = "%s" % node

    nx.set_node_attributes(G, "pos", pos)
    connected_components_locs = {}
    connected_components_macs = {}

    for i in range(len(connected_components)):
        connected_components[i] = list(set(connected_components[i]))
        connected_components_locs[i] = []
        connected_components_macs[i] = []
        locations = "Cluster " + str(i) + "\n"
        num = 0
        maxX = 0
        minY = 1000000
        for component in connected_components[i]:
            connected_components_macs[i].append(component)
            G.node[component]["name"] = component
            if G.node[component]["pos"][0] > maxX:
                maxX = G.node[component]["pos"][0]
            if G.node[component]["pos"][1] < minY:
                minY = G.node[component]["pos"][1]
            num = num + 1
            db = mlDB(group)
            locs_uuid = db.executeSqlCommand(
                'select location_uuid from learn where mac_address like "%s" group by location_uuid' % component
            )
            db.close()
            for loc in locs_uuid:
                connected_components_locs[i].append(loc[0])

        connected_components_locs[i] = list(set(connected_components_locs[i]))
        connected_components_macs[i] = list(set(connected_components_macs[i]))

    db = mlDB(group)
    db.insertResource("connected_components_macs", connected_components_macs)
    db.insertResource("connected_components_locs", connected_components_locs)
    db.insertResource("connected_components", connected_components)
    db.insertResource("G", G)
    calculation_parameters = db.getResource("calculation_parameters")
    for con in range(len(connected_components)):
        if con not in calculation_parameters.keys():
            calculation_parameters[con] = calculation_parameters[0]
    builtins.PARAMETERS[group] = calculation_parameters
    db.insertResource("calculation_parameters", calculation_parameters)

    # Create index if it hasn't already been created
    logger.debug("Creating the learning index")
    try:
        db.executeSqlCommand("drop index ind_ex1")
        logger.debug("Dropped previous index")
    except:
        logger.debug("No index to drop")
    db.executeSqlCommand("create index ind_ex1 on learn(mac_address, location_uuid)")
    logger.debug("Generated new index on learn")

    logger.debug("Creating the training index")
    try:
        db.executeSqlCommand("drop index ind_ex4")
    except:
        pass
    db.executeSqlCommand("create index ind_ex4 on test(timestamp)")

    db.close()
    return len(connected_components)
Пример #51
0
def general_k_edge_subgraphs(G, k):
    """General algorithm to find all maximal k-edge-connected subgraphs in G.

    Returns
    -------
    k_edge_subgraphs : a generator of nx.Graphs that are k-edge-subgraphs
        Each k-edge-subgraph is a maximal set of nodes that defines a subgraph
        of G that is k-edge-connected.

    Notes
    -----
    Implementation of the basic algorithm from _[1].  The basic idea is to find
    a global minimum cut of the graph. If the cut value is at least k, then the
    graph is a k-edge-connected subgraph and can be added to the results.
    Otherwise, the cut is used to split the graph in two and the procedure is
    applied recursively. If the graph is just a single node, then it is also
    added to the results. At the end, each result is either guaranteed to be
    a single node or a subgraph of G that is k-edge-connected.

    This implementation contains optimizations for reducing the number of calls
    to max-flow, but there are other optimizations in _[1] that could be
    implemented.

    References
    ----------
    .. [1] Zhou, Liu, et al. (2012) Finding maximal k-edge-connected subgraphs
        from a large graph.  ACM International Conference on Extending Database
        Technology 2012 480-–491.
        https://openproceedings.org/2012/conf/edbt/ZhouLYLCL12.pdf

    Example
    -------
    >>> from networkx.utils import pairwise
    >>> paths = [
    ...     (11, 12, 13, 14, 11, 13, 14, 12),  # a 4-clique
    ...     (21, 22, 23, 24, 21, 23, 24, 22),  # another 4-clique
    ...     # connect the cliques with high degree but low connectivity
    ...     (50, 13),
    ...     (12, 50, 22),
    ...     (13, 102, 23),
    ...     (14, 101, 24),
    ... ]
    >>> G = nx.Graph(it.chain(*[pairwise(path) for path in paths]))
    >>> sorted(map(len, k_edge_subgraphs(G, k=3)))
    [1, 1, 1, 4, 4]
    """
    if k < 1:
        raise ValueError('k cannot be less than 1')

    # Node pruning optimization (incorporates early return)
    # find_ccs is either connected_components/strongly_connected_components
    find_ccs = partial(_high_degree_components, k=k)

    # Quick return optimization
    if G.number_of_nodes() < k:
        for node in G.nodes():
            yield G.subgraph([node]).copy()
        return

    # Intermediate results
    R0 = {G.subgraph(cc).copy() for cc in find_ccs(G)}
    # Subdivide CCs in the intermediate results until they are k-conn
    while R0:
        G1 = R0.pop()
        if G1.number_of_nodes() == 1:
            yield G1
        else:
            # Find a global minimum cut
            cut_edges = nx.minimum_edge_cut(G1)
            cut_value = len(cut_edges)
            if cut_value < k:
                # G1 is not k-edge-connected, so subdivide it
                G1.remove_edges_from(cut_edges)
                for cc in find_ccs(G1):
                    R0.add(G1.subgraph(cc).copy())
            else:
                # Otherwise we found a k-edge-connected subgraph
                yield G1
Пример #52
0
	g = nx.Graph()
	for ma in matches:
		g.add_nodes_from(ma['teams'])
		n1, n2 = ma['teams']
		g.add_edge(
				n1,
				n2,
				attr_dict = {
					'capacity': g.get_edge_data(
						n1, n2, { }
					).get('capacity', 0) + 1,
				}
		)
	for repi in itertools.count(1):
		print repi, nx.number_of_nodes(g), nx.number_of_edges(g)
		min_cut_edges = list(nx.minimum_edge_cut(g))
		g.remove_edges_from(min_cut_edges)
		ccs = list(nx.connected_component_subgraphs(g))
		assert len(ccs) == 2
		n1, n2 = [ sg.number_of_nodes() for sg in ccs ]
		if n1 > n2:
			g = ccs[0]
		else:
			g = ccs[1]

if 0:
	with open('dat.csv', 'wb') as datcsvf:
		datcsvf.write('gh,ga\n')
		for ma in matches:
			datcsvf.write(str(ma['score'][0]))
			datcsvf.write(',')