def undo_cycles(self):
        vertices = self.states_df.columns
        nx_graph = self.new_filled_nx_graph()
        dir_edge_to_freq = {}

        bad_dir_edges = []
        cycles = list(nx.simple_cycles(nx_graph))
        num_cyc = len(cycles)
        while num_cyc > 0:
            for cyc in cycles:
                for dir_edge in cyc:
                    if dir_edge not in dir_edge_to_freq.keys:
                        dir_edge_to_freq[dir_edge] = 1
                    else:
                        dir_edge_to_freq[dir_edge] += 1
            max_freq_edge = max(dir_edge_to_freq,
                                key=dir_edge_to_freq.get)
            bad_dir_edges.append(max_freq_edge)
            beg_vtx, end_vtx = max_freq_edge
            self.vtx_to_parents[end_vtx].remove(beg_vtx)
            nx_graph.remove_edge(beg_vtx, end_vtx)
            cycles = list(nx.simple_cycles(nx_graph))
            num_cyc = len(cycles)
        for (beg_vtx, end_vtx) in reversed(bad_dir_edges):
            self.vtx_to_parents[beg_vtx].append(end_vtx)
示例#2
0
    def _check_graph(self, ebunch=None, delete_graph=False):
        """
        Checks for self loops and cycles in the graph.
        If finds any, reverts the graph to previous state or
        in case when called from __init__ deletes the graph.
        """
        if delete_graph:
            if ebunch is not None:
                for edge in ebunch:
                    if edge[0] == edge[1]:
                        del self
                        raise ValueError("Self Loops are not allowed",
                                         edge)

            simple_cycles = [loop for loop in nx.simple_cycles(self)]
            if simple_cycles:
                del self
                raise ValueError("Cycles are not allowed",
                                 simple_cycles)
            return True
        else:
            for edge in ebunch:
                if edge[0] == edge[1]:
                    raise ValueError("Self loops are not allowed", edge)

            import copy
            test_G = copy.deepcopy(self)
            nx.DiGraph.add_edges_from(test_G, ebunch)
            simple_cycles = [loop for loop in nx.simple_cycles(test_G)]
            if simple_cycles:
                del test_G
                raise ValueError("Cycles are not allowed", simple_cycles)
            return True
 def test_simple_cycles_small(self):
     G = nx.DiGraph()
     G.add_path([1, 2, 3, 1])
     c = sorted(nx.simple_cycles(G))
     assert_equal(c, [[1, 2, 3, 1]])
     G.add_path([10, 20, 30, 10])
     c = sorted(nx.simple_cycles(G))
     assert_equal(c, [[1, 2, 3, 1], [10, 20, 30, 10]])
    def follow_path(self, node):
        """Given a head node, follow_path will parse through all connecting nodes and translate the path into the
           execution portion of the script. """
        #declare variables
        temp_script3 = ""    
        var = 1
        original_node = node
        cycle_element_used = set()

        while var == 1:
            #check for edges that have been traversed and ignore them
            original_walked = [self.graph.edge[j][k]['walked'] for j,k in self.graph.out_edges(original_node)]
            edges = self.graph.out_edges(node)
            for j,k in edges:
                if self.graph.edge[j][k]['walked'] == True:
                    edges.remove((j,k))
                #if a loop is detected, parse through and translate everything into a for loop for the script
                if len(nx.simple_cycles(self.graph)) > 0:
                    cycles_list = nx.simple_cycles(self.graph)[0]
                    if j in cycles_list and not cycle_element_used:
                        temp_script3 = temp_script3 + "for n in range(10):\n"
                        
                        #cycles_list.pop(), this was commented out due to the fact that the order of the cycle was incorrect
                        cycles_list.remove(cycles_list[0])
                        for i in range(len(cycles_list)):
                            temp_script3 = temp_script3 + "    " + cycles_list[i].add_calc(cycles_list[i-1].var, self.graph.edge[cycles_list[i-1]][cycles_list[i]]['msname']) + '\n'                             
                        cycle_element_used.add(j)
                        cycle_element_used.add(k)
  
            #move to the next node and add any lines of code that have not already been included in the script
            walk_list = {self.graph.edge[j][k]['walk_value']:(j,k) for j,k in edges}
            if len(walk_list) > 0:
                x,y =  walk_list[min(walk_list)]
                self.graph.edge[x][y]['walked'] = True
                if y.add_calc(x.var, self.graph.edge[x][y]['msname']) not in temp_script3:
                    temp_script3 = temp_script3 + y.add_calc(x.var, self.graph.edge[x][y]['msname']) + '\n'

                #if there are more than one edges branching off of a node, take the one with the lowest walk_value
                #else:simply take the edge and follow it to the next node
                if len(self.graph.successors(node)) > 1:
                    node = y
                else:
                    node = self.graph.successors(node)[0]
            
            #if all edges down one path has been traveres, check the head node for any other paths and follow the one
            #with the next least walk_value
            else:
                node = original_node
        
            #if everything has been traversed, reset all walked values to False to ensure that the next runthrough 
            #succeeds
            if False not in original_walked:
                for j,k in self.graph.out_edges():
                    self.graph.edge[j][k]['walked'] = False
                break

        #apply changes to the execution portion of the script
        self.script_execution = temp_script3
示例#5
0
 def test_simple_cycles_small(self):
     G = nx.DiGraph()
     G.add_path([1,2,3,1])
     c=sorted(nx.simple_cycles(G))
     assert_equal(c,[[1,2,3,1]])
     G.add_path([10,20,30,10])
     c=sorted(nx.simple_cycles(G))
     ca=[[1,2,3,1],[10,20,30,10]]
     for (a,b) in zip(c,ca):
         assert_true(self.is_cyclic_permuatation(a[:-1],b[:-1]))
 def test_simple_cycles_small(self):
     G = nx.DiGraph()
     G.add_cycle([1,2,3])
     c=sorted(nx.simple_cycles(G))
     assert_equal(len(c),1)
     assert_true(self.is_cyclic_permutation(c[0],[1,2,3]))
     G.add_cycle([10,20,30])
     cc=sorted(nx.simple_cycles(G))
     ca=[[1,2,3],[10,20,30]]
     for c in cc:
         assert_true(any(self.is_cyclic_permutation(c,rc) for rc in ca))
示例#7
0
def checkInstructionGraphCycles(instructions):
    import networkx as nx

    g = nx.DiGraph()

    for i in instructions:
        g.add_node(i)

        for ix in range(i.flowsToCount()):
            flowsTo = i.flowsTo(ix)
            if flowsTo in instructions:
                g.add_edge(i, flowsTo)

    cycles = nx.simple_cycles(g)

    for c in cycles:
        if not checkCycleHasEntrypoint(c):
            print "************************************"
            print "No entrypoint in the following cycle: "
            for i in c:
                print i
                print "children:"
                for sub in i.children():
                    print "\t", repr(sub)
            print "************************************"
        else:
            print "************************************"
            print "cycle with ", len(c), " is OK"
            for i in c:
                if i.getTypedJumpTarget():
                    print "*** ",
                else:
                    print "    ",
                print repr(i)
            print "************************************"
 def get_voltage_equations(circuit, symbols, V):
     g = get_circuit_graph(circuit)
     loops = [i for i in nx.simple_cycles(g) if len(i) > 3]
     result = []
     for loop in loops:
         T = 0
         for n in range(len(loop) - 1):
             n1 = loop[n]
             n2 = loop[n+1]
             t = [n1, n2]
             t.sort()
             I = [i for i in current_symbols if current_symbols[i] == t][0]
             R = circuit[n1][n2]
             if R != V:
                 if n1 > n2:
                     T += I * R
                 else:
                     T -= I * R
             else:
                 if n1 > n2:
                     T += V
                 else:
                     T -= V
                     
         result.append(T)
     return result
def analyse_cycles(sdfg):
    vectors = core.check_consistency( sdfg )
    s = vectors['s']
    q = vectors['q']
    print("HSDF graph size: {}".format( sum(q.values()) ))
    par = {}
    for cycle in nx.simple_cycles( sdfg ):
        edges = [ (cycle[i - 1], cycle[i]) for i in range(len(cycle)) ]
        wtsum = 0
        multiple = 1
        z = {}
        for v, w in edges:
            data = sdfg.get_edge_data( v, w )
            tokens = data.get('tokens', 0)
            prates = data.get('production', core.cyclic(1))

            wtsum += s[ (v, w) ] * tokens
            z[v] = prates.sum() * s[ (v, w) ]
            multiple = core.lcm( multiple, z[v] )

        if wtsum % multiple == 0:
            for v in cycle:
                parv = wtsum // z[ v ]
                par[v] = parv if v not in par else min(par[v], parv)

        print("Cycle {}: tokens = {:.3f}, integral: {}".format( cycle, wtsum / multiple, wtsum % multiple == 0 ))

    for v in par:
        if q[v] % par[v] == 0:
            q[v] = q[v] // par[v]
        elif par[v] % q[v] == 0:
            q[v] = 1
    
    print("New HSDF graph size: {}".format( sum(q.values()) ))
示例#10
0
def get_all_substance_combinations_with_cycles(alpha, beta):
    try:
        import numpy
        alpha = numpy.array(alpha)
        beta = numpy.array(beta)
    except ImportError:
        print('This method requires that alpha and beta are NumPy arrays.'
              'NumPy does not appear to be installed. Please install NumPy.')
        raise

    # alpha, beta are stoichiometry matrices as used throughout code

    # number of reactions = number of columns of alpha
    no_rxn = alpha.shape[1]
    # number of substance = number of rows of alpha
    no_sub = alpha.shape[0]

    # check
    if no_rxn != beta.shape[1] or no_sub != beta.shape[0]:
        raise

    # get substance adjacency matrix
    subs_adj = get_substance_adjacency(alpha, beta)

    # get directed substance graph
    subs_G = nx.from_numpy_matrix(subs_adj, create_using=nx.DiGraph())

    # get cycles in substance graph
    subs_cycles  = nx.simple_cycles(subs_G)
    # remove substance index repetitions
    for c_i in range(len(subs_cycles)):
        subs_cycles[c_i] = list(set(subs_cycles[c_i]))
示例#11
0
    def dependency_list(self):
        r'''
        Returns a list of dependencies in the order with which they should be
        called to ensure data is calculated by one model before it's asked for
        by another.

        Notes
        -----
        This raises an exception if the graph has cycles which means the
        dependencies are unresolvable (i.e. there is no order which the
        models can be called that will work).  In this case it is possible
        to visually inspect the graph using ``dependency_graph``.

        See Also
        --------
        dependency_graph
        dependency_map

        '''
        dtree = self.dependency_graph()
        cycles = list(nx.simple_cycles(dtree))
        if cycles:
            raise Exception('Cyclic dependency found: ' + ' -> '.join(
                            cycles[0] + [cycles[0][0]]))
        d = nx.algorithms.dag.lexicographical_topological_sort(dtree, sorted)
        return list(d)
示例#12
0
文件: fas.py 项目: litaotju/netlistx
def comb_fas( graph):
    '''@param: graph, a nx.DiGraph obj
    '''
    assert isinstance( graph, nx.DiGraph)
    origin_weight = nx.get_edge_attributes( graph, 'weight')
    weight = origin_weight.copy()

    assert len(weight) == graph.number_of_edges(), "Some edge doesnot has a weight attr."
    fas = []
    while( not nx.is_directed_acyclic_graph(graph) ):
        c = list( nx.simple_cycles(graph) )[0]
        mini_weight = min( [ weight[edge] for edge in get_edges(c)] )

        cycle_edges_weight = {edge:weight[edge] for edge in get_edges(c) }
        for eachEdge in cycle_edges_weight.keys():
            cycle_edges_weight[eachEdge] -= mini_weight
            weight[eachEdge ] -= mini_weight
            if cycle_edges_weight[eachEdge] == 0:
                fas.append( eachEdge )
                graph.remove_edge( eachEdge[0], eachEdge[1] )

    for eachEdge in copy.copy(fas):
        graph.add_edge( eachEdge[0], eachEdge[1], {'weight' : origin_weight[eachEdge]} )
        if nx.is_directed_acyclic_graph( graph):
            fas.remove(eachEdge)
            continue
        else:
            graph.remove_edge( eachEdge[0], eachEdge[1] )

    return fas
示例#13
0
文件: paths.py 项目: petrushev/graphx
def cycles(request, graph):
    offset, limit = _getPaging(request)

    icycles = nx.simple_cycles(graph)
    icycles = islice(icycles, offset, offset + limit)

    request.respondJson({'cycles': tuple(icycles)})
示例#14
0
文件: codejam.py 项目: tdongsi/python
    def _solve_bff(self, bff_str):
        # Construct the directed graph
        bffs = [int(e.strip()) for e in bff_str.split(' ')]
        nodes = [i+1 for i in xrange(len(bffs))]
        gr = nx.DiGraph()
        gr.add_nodes_from(nodes)
        gr.add_edges_from([e for e in zip(nodes, bffs)])

        max_length = 0
        tree = self._build_tree(bffs)
        paths = []
        # For each simple cycles in the graph
        for cycle in nx.simple_cycles(gr):
            if len(cycle) == 2:
                # If cycle length is two, we can add more nodes to form a path
                path_length = self._find_path_length(cycle, tree)
                # All the paths can be chained to form a circle
                paths.append(path_length)
            elif len(cycle) > max_length:
                # If cycle length is three, we cannot add more nodes
                max_length = len(cycle)

        total_path_length = sum(paths)
        if total_path_length > max_length:
            max_length = total_path_length

        return max_length
示例#15
0
    def check(self):
        if not self.graph.is_acyclic():
            err = "Graph cannot be processed because it contains cycles in it:"
            # FIXME(mattymo): GraphSolver cannot be used to call this method
            err += ', '.join(six.moves.map(str,
                                           nx.simple_cycles(
                                               nx.DiGraph(self.graph))))
            err += '\n'
            raise errors.InvalidData(err)

        non_existing_tasks = []
        invalid_tasks = []

        for node_key, node_value in six.iteritems(self.graph.node):
            if not node_value.get('id'):
                successors = self.graph.successors(node_key)
                predecessors = self.graph.predecessors(node_key)

                neighbors = successors + predecessors

                non_existing_tasks.append(node_key)
                invalid_tasks.extend(neighbors)

        if non_existing_tasks:
            raise errors.InvalidData(
                "Tasks '{non_existing_tasks}' can't be in requires"
                "|required_for|groups|tasks for [{invalid_tasks}]"
                " because they don't exist in the graph".format(
                    non_existing_tasks=', '.join(
                        str(x) for x in sorted(non_existing_tasks)),
                    invalid_tasks=', '.join(
                        str(x) for x in sorted(set(invalid_tasks)))))
示例#16
0
def find_cycles(instance):
		
	bn_graph = convert_to_graph(instance)
	
	cycles_found = list(nx.simple_cycles(bn_graph))

	return cycles_found
示例#17
0
def MaximalNonBranchingPaths(G):
    '''
    G: a networkx DiGraph or MultiDiGraph; http://rosalind.info/problems/ba3m/
    '''
    paths = []
    for v in G.nodes():
        if G.in_degree(v) != 1 or G.out_degree(v) != 1:
            if G.out_degree(v) > 0:
                for i in G.out_edges(v):
                    non_branching_path = [*i]
                    w = i[1]
                    while G.in_degree(w) == 1 and G.out_degree(w) == 1:
                        u = G.out_edges(w)[0][1]
                        non_branching_path.append(u)
                        w = u
                    paths.append(non_branching_path)
    
    for cycle in nx.simple_cycles(G):
        branch = 0
        for v in cycle:
            if G.in_degree(v) != 1 or G.out_degree(v) != 1:
                branch = 1
        if branch == 0:
            cycle.append(cycle[0])
            paths.append(cycle)
    
    return paths
示例#18
0
 def test_simple_graph_with_reported_bug(self):
     G = nx.DiGraph()
     edges = [
         (0, 2),
         (0, 3),
         (1, 0),
         (1, 3),
         (2, 1),
         (2, 4),
         (3, 2),
         (3, 4),
         (4, 0),
         (4, 1),
         (4, 5),
         (5, 0),
         (5, 1),
         (5, 2),
         (5, 3),
     ]
     G.add_edges_from(edges)
     cc = sorted(nx.simple_cycles(G))
     assert_equal(len(cc), 26)
     rcc = sorted(nx.recursive_simple_cycles(G))
     assert_equal(len(cc), len(rcc))
     for c in cc:
         assert_true(any(self.is_cyclic_permutation(c, rc) for rc in rcc))
     for rc in rcc:
         assert_true(any(self.is_cyclic_permutation(rc, c) for c in cc))
示例#19
0
def solver(sent_tuples, featureset):
    stats = {}
    globalStats = {}
    #outs = [enum(x) for x in sent_tuples]
    count = 1
    for combination in itertools.product(*sent_tuples):
        # print combination
        # if count % 1000 == 0:
        #     print count
        # count = count+1
        G = nx.DiGraph()
        for c in combination:
            key = c[0][0]
            val = c[0][1]
            for v in val:
                G.add_edge(key,v)
            if c not in stats:
                stats[c] = 0
        try:
            k = nx.simple_cycles(G).next()
        except StopIteration:
            for f in featureset:
                feature = f(combination)
                for k in feature:
                    if k in globalStats:
                        globalStats[k] += feature[k]
                    else:
                        globalStats[k] = feature[k]
            for c in combination:
                stats[c] = stats[c] + 1

    return (stats, globalStats)
示例#20
0
def simple_loops(g, u):
    """
    iterator over the list of simple loops of graph g at the undersample rate u
    """
    gx = graph2nx(num2CG(g2num(undersample(g,u)), len(g)))
    for l in networkx.simple_cycles(gx):
        yield l
示例#21
0
	def is_graph_fair(self,graph):
		control_dict = nx.get_node_attributes(graph,'control')
		for cycle in nx.simple_cycles(graph):
			controls = [control_dict[n] for n in cycle]
			if self.is_system_fair(controls) == False:
				return False
		return True
示例#22
0
def find_attractor_old(G_transition_graph):
    
    ''':
        Arguments:
            G_transition_graph [networkx Graph object]
                networkx graph directed graph showing how network configurations map to one another
        Return:
            attractors [dict of list of lists of ints]
                ['fixed'] = [[532][948]]
                ['cycle'] = []
        --> The output of this tells me all the cycles in the network, but it does not tell me whether they
            are accessible from the initial state that i'm interested in. need to figure this out.
    '''
    attractor_list = nx.simple_cycles(G_transition_graph) #in case of deterministic system, any cycle without considering edge direction will be directed cycle.
    attractors = {}
    attractors['fixed'] = []
    attractors['cycle'] = []

    for network_ID in attractor_list:
        # print network_ID
        if len(network_ID) == 1:
            attractors['fixed'].append(network_ID)
        else:
            attractors['cycle'].append(network_ID)

    return attractors #this outputs decID of attractor states (fixed and cyclic)
示例#23
0
def get_syntax_paths(syntax, from_token, to_token):
    """

    :param nx.DiGraph graph:
    :param Token from_token:
    :param Token to_token:
    :return dict:
    """
    assert isinstance(from_token, Token)
    assert isinstance(to_token, Token)
    paths = {}
    for idx, syntax_tree in syntax.syntax_trees.iteritems():
        neutralized_graph = nx.Graph(syntax_tree.graph)
        if from_token == to_token:
            cycles = [cycle for cycle in nx.simple_cycles(syntax_tree.graph) if from_token.index in cycle]
            if len(cycles) == 0:
                return paths
            else:
                path = min(cycles, key=lambda cycle: len(cycle))
        else:
            if not nx.has_path(neutralized_graph, from_token.index, to_token.index):
                continue
            path = nx.shortest_path(neutralized_graph, from_token.index, to_token.index)
        token_path = [syntax.tokens[token_idx] for token_idx in path]
        syntax_path = SyntaxPath(syntax, idx, token_path)
        paths[idx] = syntax_path

    return paths
示例#24
0
def _prune_states(K, graph, source, sink):
    """
    Removes cycles and redundant nodes (that are not reachable from source)
    from the subgraph of graph defined by the nodes in K.
    
    """
    
    # Create a subgraph with the nodes now in K
    # Find and remove cycles by deleting the edge between 
    # the second to last node and the last node of the cycle,
    # thus keeping nodes that may be important 
    # to the trust calculation.
    subgraph = graph.subgraph(K)
    cycles = nx.simple_cycles(subgraph)
    if cycles:
        for cycle in cycles:
            subgraph.remove_edges_from([(cycle[-2], cycle[-1])])
            
    # Get all paths from source to sink without cycles and redundant nodes
    simple_paths = list(nx.all_simple_paths(G=graph, source=source, target=sink))
    relevant_nodes = set(chain.from_iterable(simple_paths))
            
    # Remove nodes no longer used (not in simple_paths)
    for n in K:
        if n not in relevant_nodes:
            subgraph.remove_node(n)
            
    return subgraph
示例#25
0
文件: CPD.py 项目: infoburp/pgmpy
    def add_edge(self, u, v, label):
        """
        Add an edge between u and v.

        The nodes u and v will be automatically added if they are
        not already in the graph.

        Parameters
        ----------
        u,v: nodes
            Nodes can be any hashable (and not None) Python object.
        label: string
            Label should be value of the variable observed.
            (underscore separated if multiple variables)
        attr_dict: dictionary, optional (default= no attributes)
            Dictionary of edge attributes. Key/Value pairs will
            update existing data associated with the edge.
        attr: Keyword arguments, optional
            Edge data can be assigned using keyword arguments.

        Examples
        --------
        >>> from pgmpy.factors import TreeCPD, Factor
        >>> tree = TreeCPD([('B', Factor(['A'], [2], [0.8, 0.2]), 0),
        ...                 ('B', 'C', 1)])
        >>> tree.add_edge('C', Factor(['A'], [2], [0.1, 0.9]), label=0)
        """
        if u != v:
            super(TreeCPD, self).add_edge(u, v, label=label)
            if list(nx.simple_cycles(self)):
                super(TreeCPD, self).remove_edge(u, v)
                raise ValueError("Self Loops and Cycles are not allowed")
        else:
            raise ValueError("Self Loops and Cycles are not allowed")
示例#26
0
def find_attractor(decStateTransMap):

    '''
        Arguments:
            -- 1. decStateTransMap
        Return:
            -- attractor
    '''
    attractor_list = nx.simple_cycles(decStateTransMap) #in case of deterministic system, any cycle without considering edge direction will be directed cycle.
    attractors = {}
    #attractors['fixed'] = []
    #attractors['cycle'] = []

    undirectedMap = nx.DiGraph.to_undirected(decStateTransMap)

    for u in attractor_list:
        attractors[u[0]] = {}
        if len(u) == 1:
            attractors[u[0]]['type'] = 'fixed'
        else:
            attractors[u[0]]['type'] = 'cycle'

    for v in attractors.iterkeys():
        basin = nx.node_connected_component(undirectedMap, v)
        attractors[v]['basin'] = basin
        attractors[v]['basin-size'] = len(basin)

    sorted_attractors = OrderedDict(sorted(attractors.items(), key=lambda kv: kv[1]['basin-size'], reverse=True))
    return sorted_attractors
示例#27
0
 def test_simple_cycles(self):
     edges = [(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)]
     G = nx.DiGraph(edges)
     cc = sorted(nx.simple_cycles(G))
     ca = [[0], [0, 1, 2], [0, 2], [1, 2], [2]]
     for c in cc:
         assert_true(any(self.is_cyclic_permutation(c, rc) for rc in ca))
示例#28
0
def find_most_repeated_cycles(di_graph):
    """
    Returns a list filled with this format for each element: [edge : amount_of_appearances].

    Args:
        di_graph : nx.DiGraph()
            A networkx DiGraph class for representing DAG

    Returns:
        MATRIX[[TUPLE, INT], [TUPLE, INT], [TUPLE, INT], ...]
            If we have at least one edge with one appearance
        MATRIX[]
            If we don't have edges
    """
    list_all_cycles = []
    cycles = list(nx.simple_cycles(di_graph))
    for i in range(0, len(cycles)):
        list_all_cycles.append(find_cycle_edges(cycles[i], di_graph.edges(cycles[i])))
    flatted_edges = sum(list_all_cycles, [])  # This flattens the nested list of edges

    # This list contains a list of edges and their appearances on the list, but only appearances bigger than 0
    checked_edges = []
    while len(flatted_edges) > 0:
        cont = flatted_edges.count(flatted_edges[0])
        if cont > 0:  # Amount of appearances bigger than 1
            checked_edges.append([flatted_edges[0], cont])
        # This remove a value from a list
        flatted_edges[:] = (value for value in flatted_edges if value != flatted_edges[0])
    return checked_edges
示例#29
0
文件: param.py 项目: StackStorm/st2
def _validate(G):
    '''
    Validates dependency graph to ensure it has no missing or cyclic dependencies
    '''
    for name in G.nodes():
        if 'value' not in G.node[name] and 'template' not in G.node[name]:
            msg = 'Dependency unsatisfied in variable "%s"' % name
            raise ParamException(msg)

    if not nx.is_directed_acyclic_graph(G):
        graph_cycles = nx.simple_cycles(G)

        variable_names = []
        for cycle in graph_cycles:
            try:
                variable_name = cycle[0]
            except IndexError:
                continue

            variable_names.append(variable_name)

        variable_names = ', '.join(sorted(variable_names))
        msg = ('Cyclic dependency found in the following variables: %s. Likely the variable is '
               'referencing itself' % (variable_names))
        raise ParamException(msg)
示例#30
0
def sq():
    with open("rosalind_sq.txt") as f:
        lines = f.readlines()
        # remove empty lines
        lines = [line for line in lines if line.strip()]

    # Num test cases
    t = int(lines[0])
    del lines[0]
    for i in xrange(t):
        n, e = map(int, lines[0].split())
        edge_list = map(lambda x: map(int, x.strip().split()), lines[1:e+1])
        del lines[:e+1]

        # Create the graph
        G = nx.DiGraph()
        G.add_nodes_from(range(1,n+1))
        for edge in edge_list:
            G.add_edge(edge[0], edge[1])
            G.add_edge(edge[1], edge[0])


        cycles = [c for c in nx.simple_cycles(G) if len(c)==4]
        if cycles:
            print 1,
        else:
            print -1,

    print ""
示例#31
0
 def test_unsortable(self):
     #  TODO What does this test do?  das 6/2013
     G = nx.DiGraph()
     nx.add_cycle(G, ['a', 1])
     c = list(nx.simple_cycles(G))
示例#32
0
def enumMaximumMatchingIter(g, match, all_matches, add_e=None):
    '''Recurively search maximum matchings.

    <g>: undirected bipartite graph. Nodes are separated by their
         'bipartite' attribute.
    <match>: list of edges forming one maximum matching of <g>.
    <all_matches>: list, each is a list of edges forming a maximum
                   matching of <g>. Newly found matchings will be appended
                   into this list.
    <add_e>: tuple, the edge used to form subproblems. If not None,
             will be added to each newly found matchings.

    Return <all_matches>: updated list of all maximum matchings.

    Author: guangzhi XU ([email protected]; [email protected])
    Update time: 2017-05-21 20:09:06.
    '''

    #---------------Form directed graph D---------------
    d = formDirected(g, match)

    #-----------------Find cycles in D-----------------
    cycles = list(nx.simple_cycles(d))

    if len(cycles) == 0:

        #---------If no cycle, find a feasible path---------
        all_uncovered = set(g.node).difference(set([ii[0] for ii in match]))
        all_uncovered = all_uncovered.difference(set([ii[1] for ii in match]))
        all_uncovered = list(all_uncovered)

        #--------------If no path, terminiate--------------
        if len(all_uncovered) == 0:
            return all_matches

        #----------Find a length 2 feasible path----------
        idx = 0
        uncovered = all_uncovered[idx]
        while True:

            if uncovered not in nx.isolates(g):
                paths = nx.single_source_shortest_path(d, uncovered, cutoff=2)
                len2paths = [vv for kk, vv in paths.items() if len(vv) == 3]

                if len(len2paths) > 0:
                    reversed = False
                    break

                #----------------Try reversed path----------------
                paths_rev = nx.single_source_shortest_path(d.reverse(),
                                                           uncovered,
                                                           cutoff=2)
                len2paths = [
                    vv for kk, vv in paths_rev.items() if len(vv) == 3
                ]

                if len(len2paths) > 0:
                    reversed = True
                    break

            idx += 1
            if idx > len(all_uncovered) - 1:
                return all_matches

            uncovered = all_uncovered[idx]

        #-------------Create a new matching M'-------------
        len2path = len2paths[0]
        if reversed:
            len2path = len2path[::-1]
        len2path = zip(len2path[:-1], len2path[1:])

        new_match = []
        for ee in d.edges():
            if ee in len2path:
                if g.node[ee[1]]['bipartite'] == 0:
                    new_match.append((ee[1], ee[0]))
            else:
                if g.node[ee[0]]['bipartite'] == 0:
                    new_match.append(ee)

        if add_e is not None:
            for ii in add_e:
                new_match.append(ii)

        all_matches.append(new_match)

        #---------------------Select e---------------------
        e = set(len2path).difference(set(match))
        e = list(e)[0]

        #-----------------Form subproblems-----------------
        g_plus = g.copy()
        g_minus = g.copy()
        g_plus.remove_node(e[0])
        g_plus.remove_node(e[1])

        g_minus.remove_edge(e[0], e[1])

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = enumMaximumMatchingIter(g_minus, match, all_matches,
                                              add_e)
        all_matches = enumMaximumMatchingIter(g_plus, new_match, all_matches,
                                              add_e_new)

    else:
        #----------------Find a cycle in D----------------
        cycle = cycles[0]
        cycle.append(cycle[0])
        cycle = zip(cycle[:-1], cycle[1:])

        #-------------Create a new matching M'-------------
        new_match = []
        for ee in d.edges():
            if ee in cycle:
                if g.node[ee[1]]['bipartite'] == 0:
                    new_match.append((ee[1], ee[0]))
            else:
                if g.node[ee[0]]['bipartite'] == 0:
                    new_match.append(ee)

        if add_e is not None:
            for ii in add_e:
                new_match.append(ii)

        all_matches.append(new_match)

        #-----------------Choose an edge E-----------------
        e = set(match).intersection(set(cycle))
        e = list(e)[0]

        #-----------------Form subproblems-----------------
        g_plus = g.copy()
        g_minus = g.copy()
        g_plus.remove_node(e[0])
        g_plus.remove_node(e[1])
        g_minus.remove_edge(e[0], e[1])

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = enumMaximumMatchingIter(g_minus, new_match, all_matches,
                                              add_e)
        all_matches = enumMaximumMatchingIter(g_plus, match, all_matches,
                                              add_e_new)

    return all_matches
示例#33
0
    def run(
        self,
        max_cores=None,
        dry=False,
        set_successful=True,
        cmd_wrapper=signature.default_cmd_fxn_wrapper,
        log_out_dir_func=default_task_log_output_dir,
        max_gpus=None,
        do_cleanup_atexit=True,
        lethal_signals=TERMINATION_SIGNALS,
    ):
        """
        Runs this Workflow's DAG

        :param int max_cores: The maximum number of cores to use at once.  A value of None indicates no maximum.
        :param int max_attempts: The maximum number of times to retry a failed job.
             Can be overridden with on a per-Task basis with Workflow.add_task(..., max_attempts=N, ...)
        :param callable log_out_dir_func: A function that returns a Task's logging directory (must be unique).
             It receives one parameter: the Task instance.
             By default a Task's log output is stored in log/stage_name/task_id.
             See _default_task_log_output_dir for more info.
        :param callable cmd_wrapper: A decorator which will be applied to every Task's cmd_fxn.
        :param bool dry: If True, do not actually run any jobs.
        :param bool set_successful: Sets this workflow as successful if all tasks finish without a failure.
            You might set this to False if you intend to add and
            run more tasks in this workflow later.
        :param do_cleanup_atexit: if False, do not attempt to cleanup unhandled exits.
        :param lethal_signals: signals to catch and shutdown

        Returns True if all tasks in the workflow ran successfully, False otherwise.
        If dry is specified, returns None.
        """

        if cmd_wrapper == signature.default_cmd_fxn_wrapper:
            warnings.warn(
                f"Having functions return bash strings as the default behavior is deprecated.  While "
                f"this behavior will be supported, it is recommended that you set cmd_wrapper to "
                f"cosmos.api.py_call which will be the new default."
                f"See examples/ex3.py. ")

        try:
            try:
                assert os.path.exists(os.getcwd(
                )), "current working dir does not exist! %s" % os.getcwd()

                assert hasattr(
                    self, "cosmos_app"
                ), "Workflow was not initialized using the Workflow.start method"
                assert hasattr(
                    log_out_dir_func,
                    "__call__"), "log_out_dir_func must be a function"
                assert self.session, "Workflow must be part of a sqlalchemy session"

                session = self.session
                self.log.info(
                    "Preparing to run %s using DRM `%s`, cwd is `%s`",
                    self,
                    self.cosmos_app.default_drm,
                    os.getcwd(),
                )
                try:
                    user = getpass.getuser()
                except:
                    # fallback to uid if we can't respove a user name
                    user = os.getuid()

                self.log.info("Running as %s@%s, pid %s", user,
                              os.uname()[1], os.getpid())

                self.max_cores = max_cores
                self.max_gpus = max_gpus
                #
                # Run some validation checks
                #

                # check GPU env variables are set correctly
                if self.max_gpus is not None and self.cosmos_app.default_drm == "local":
                    if "COSMOS_LOCAL_GPU_DEVICES" not in os.environ:
                        raise EnvironmentError(
                            "COSMOS_LOCAL_GPU_DEVICES environment variable must be set to a "
                            "comma delimited list of gpu devices if using a local DRM to manage "
                            "GPUs")

                # check for duplicate output files
                output_fnames_to_task_and_key = dict()
                for task in self.tasks:
                    for key, fname in list(task.output_map.items()):
                        current_value = output_fnames_to_task_and_key.setdefault(
                            fname, (task, key))
                        if current_value != (task, key):
                            task2, key2 = current_value
                            raise ValueError(
                                "Duplicate output files detected!:  "
                                '{task}.params["{key}"] == {task2}.params["{key2}"] == {fname}'
                                .format(**locals()))
                        output_fnames_to_task_and_key[fname] = (task, key)

                from ..job.JobManager import JobManager

                if self.jobmanager is None:
                    self.jobmanager = JobManager(
                        get_submit_args=self.cosmos_app.get_submit_args,
                        cmd_wrapper=cmd_wrapper,
                        log_out_dir_func=log_out_dir_func,
                        logger=self.log,
                        session=self.session,
                        workflow=self,
                    )

                self.status = WorkflowStatus.running
                self.successful = False

                if self.started_on is None:
                    self.started_on = datetime.datetime.now()

                task_graph = self.task_graph()
                stage_graph = self.stage_graph()

                assert len(set(self.stages)) == len(
                    self.stages), "duplicate stage name detected: %s" % (next(
                        duplicates(self.stages)))

                # renumber stages
                stage_graph_no_cycles = nx.DiGraph()
                stage_graph_no_cycles.add_nodes_from(stage_graph.nodes())
                stage_graph_no_cycles.add_edges_from(stage_graph.edges())
                for cycle in nx.simple_cycles(stage_graph):
                    stage_graph_no_cycles.remove_edge(cycle[-1], cycle[0])
                for i, s in enumerate(topological_sort(stage_graph_no_cycles)):
                    s.number = i + 1
                    if s.status != StageStatus.successful:
                        s.status = StageStatus.no_attempt

                # Make sure everything is in the sqlalchemy session
                session.add(self)
                successful = list(
                    [t for t in task_graph.nodes() if t.successful])

                # print stages
                for s in sorted(self.stages, key=lambda s: s.number):
                    self.log.info("%s %s" % (s, s.status))

                # Create Task Queue
                task_queue = _copy_graph(task_graph)
                self.log.info("Skipping %s successful tasks..." %
                              len(successful))
                task_queue.remove_nodes_from(successful)

                if do_cleanup_atexit:
                    handle_exits(self)

                if self.max_cores is not None:
                    self.log.info("Ensuring there are enough cores...")
                    # make sure we've got enough cores
                    for t in task_queue:
                        assert int(t.core_req) <= self.max_cores, (
                            "%s requires more cpus (%s) than `max_cores` (%s)"
                            % (
                                t,
                                t.core_req,
                                self.max_cores,
                            ))

                # Run this thing!
                self.log.info("Committing to SQL db...")
                session.commit()
            except KeyboardInterrupt:
                # haven't started submitting yet, just raise the exception
                self.log.fatal("ctrl+c caught")
                self.terminate(due_to_failure=False)
                raise

            if not dry:
                _run(self, session, task_queue, lethal_signals=lethal_signals)

                # set status
                if self.status == WorkflowStatus.failed_but_running:
                    self.status = WorkflowStatus.failed
                    # set stage status to failed
                    for s in self.stages:
                        if s.status == StageStatus.running_but_failed:
                            s.status = StageStatus.failed
                    session.commit()
                    return False
                elif self.status == WorkflowStatus.running:
                    if set_successful:
                        self.status = WorkflowStatus.successful
                    session.commit()
                    return True
                else:
                    self.log.warning('%s exited with status "%s"', self,
                                     self.status)
                    session.commit()
                    return False
            else:
                self.log.info("Workflow dry run is complete")
                return None
        except Exception as ex:
            self.log.fatal("Exception was raised")
            self.log.fatal(ex, exc_info=True)
            self.terminate(due_to_failure=False)
            raise
示例#34
0
def _enumMaximumMatchingIter2(adj,
                              matchadj,
                              all_matches,
                              n1,
                              add_e=None,
                              check_cycle=True):
    """Recurively search maximum matchings.
    Similar to _enumMaximumMatching but implemented using adjacency matrix
    of graph for a slight speed boost.

    Parameters
    ----------
#    g : 
#        Undirected bipartite graph. Nodes are separated by their
#        'bipartite' attribute.
#    match : 
#        List of edges forming one maximum matching of `g`.
#    all_matches : 
#	    List, each is a list of edges forming a maximum matching of `g`.
#	    Newly found matchings will be appended into this list.
    add_e : tuple, optional
        Edge used to form subproblems. If not `None`, will be added to each
        newly found matchings.

    Returns
    -------
    list
        Updated list of all maximum matchings.

    Author
    ------
    guangzhi XU ([email protected]; [email protected])
    Update time: 2017-05-21 20:09:06.

    """
    import networkx as nx
    from scipy import sparse

    #-------------------Find cycles-------------------
    if check_cycle:
        d = matchadj.multiply(adj)
        d[n1:, :] = adj[n1:, :] - matchadj[n1:, :].multiply(adj[n1:, :])

        dg = nx.from_numpy_matrix(d.toarray(), create_using=nx.DiGraph())
        cycles = list(nx.simple_cycles(dg))
        if len(cycles) == 0:
            check_cycle = False
        else:
            check_cycle = True

    if check_cycle:
        cycle = cycles[0]
        cycle.append(cycle[0])
        cycle = zip(cycle[:-1], cycle[1:])

        #--------------Create a new matching--------------
        new_match = matchadj.copy()
        for ee in cycle:
            if matchadj[ee[0], ee[1]] == 1:
                new_match[ee[0], ee[1]] = 0
                new_match[ee[1], ee[0]] = 0
                e = ee
            else:
                new_match[ee[0], ee[1]] = 1
                new_match[ee[1], ee[0]] = 1

        if add_e is not None:
            for ii in add_e:
                new_match[ii[0], ii[1]] = 1

        all_matches.append(new_match)

        #-----------------Form subproblems-----------------
        g_plus = adj.copy()
        g_minus = adj.copy()
        g_plus[e[0], :] = 0
        g_plus[:, e[1]] = 0
        g_plus[:, e[0]] = 0
        g_plus[e[1], :] = 0
        g_minus[e[0], e[1]] = 0
        g_minus[e[1], e[0]] = 0

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = _enumMaximumMatchingIter2(g_minus, new_match,
                                                all_matches, n1, add_e,
                                                check_cycle)
        all_matches = _enumMaximumMatchingIter2(g_plus, matchadj, all_matches,
                                                n1, add_e_new, check_cycle)

    else:
        #---------------Find uncovered nodes---------------
        uncovered = np.where(np.sum(matchadj, axis=1) == 0)[0]

        if len(uncovered) == 0:
            return all_matches

        #---------------Find feasible paths---------------
        paths = []
        for ii in uncovered:
            aa = adj[ii, :].dot(matchadj)
            if aa.sum() == 0:
                continue
            paths.append((ii, int(sparse.find(aa == 1)[1][0])))
            if len(paths) > 0:
                break

        if len(paths) == 0:
            return all_matches

        #----------------------Find e----------------------
        feas1, feas2 = paths[0]
        e = (feas1, int(sparse.find(matchadj[:, feas2] == 1)[0]))

        #----------------Create a new match----------------
        new_match = matchadj.copy()
        new_match[feas2, :] = 0
        new_match[:, feas2] = 0
        new_match[feas1, e[1]] = 1
        new_match[e[1], feas1] = 1

        if add_e is not None:
            for ii in add_e:
                new_match[ii[0], ii[1]] = 1

        all_matches.append(new_match)

        #-----------------Form subproblems-----------------
        g_plus = adj.copy()
        g_minus = adj.copy()
        g_plus[e[0], :] = 0
        g_plus[:, e[1]] = 0
        g_plus[:, e[0]] = 0
        g_plus[e[1], :] = 0
        g_minus[e[0], e[1]] = 0
        g_minus[e[1], e[0]] = 0

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = _enumMaximumMatchingIter2(g_minus, matchadj, all_matches,
                                                n1, add_e, check_cycle)
        all_matches = _enumMaximumMatchingIter2(g_plus, new_match, all_matches,
                                                n1, add_e_new, check_cycle)

    #if len(all_matches) % 1000 == 0:
    #    print('len', len(all_matches))

    #print('another')
    return all_matches
示例#35
0
	def simple_cycles(self):
		return list(nx.simple_cycles(self.graph))
示例#36
0
        file_list[i] = os.path.basename(f)
        i += 1

    # rename .for file is case of scalasca
    if scorep:
        tmp1 = glob.glob(os.path.join(srcOutDir, '*.for'))
        tmp2 = glob.glob(os.path.join(srcOutDir, '*.FOR'))
        for f in tmp1 + tmp2:
            fn = os.path.splitext(f)[0] + '.f'
            os.rename(f, fn)

    # create the graph
    G = createDiGraph(mod_dict, use_dict)

    # check for cyclic dependencies
    cyc = list(nx.simple_cycles(G))

    # draw the graph
    # saveGraph(G,cyc,3000,"graph.pdf")

    # raise for cyclic dependency
    if cyc != []:
        raise Exception("Error, cyclic dependency found!")

    file_list_dep = []
    while 1:
        x = [x for x in G.nodes_iter() if G.in_degree(x) == 0]
        #print '-->'," ".join(x)
        if x == []: break
        file_list_dep.extend(x)
        G.remove_nodes_from(x)
DG1.remove_nodes_from(remove)

# Array to store the number of directed cycles found for each order
# accumulated over randomizations
nksum_directed = np.zeros(kmax + 1, dtype=int)

for j_rand in range(n_randomizations):
    # Randomize the network
    edges = list(DG1.edges())
    for edge in edges:
        revert_edge = random.randint(0, 1)
        if revert_edge:
            DG1.remove_edge(edge[0], edge[1])
            DG1.add_edge(edge[1], edge[0])

    # Find the cycles
    directed_cycles = list(nx.simple_cycles(DG1))

    # Store their distribution
    for cycle in directed_cycles:
        k = len(cycle)
        if k <= kmax:
            nksum_directed[k] += 1

# Calculate the mean number of cycles per order
nk_directed = nksum_directed.astype(float) / n_randomizations

# Save the results to file
data = np.vstack((np.arange(kmax + 1, dtype=int), nk_directed))
np.savetxt("cycledistribution_rand_sevaseviene.dat", data, fmt="%6d")
示例#38
0
# Find cycles.
exchange_graph = nx.DiGraph()
fees = {}
for asset_key, asset in asset_pairs.iteritems():
    asset_name = str(asset_key.split('.')[0])
    base = str(asset['base'])
    quote = str(asset['quote'])
    assert asset_name == base + quote, '%s != %s + %s' % (asset_name, base,
                                                          quote)
    fee = 1 - asset['fees'][0][1] / 100.
    fees[base, quote] = fee
    fees[quote, base] = fee
    exchange_graph.add_edge(base, quote)
    exchange_graph.add_edge(quote, base)

cycles = map(tuple, nx.simple_cycles(exchange_graph))
edges = {
    cycle: tuple(zip(cycle[:-1], cycle[1:]) + [(cycle[-1], cycle[0])])
    for cycle in cycles
}

mongodb_address = None
client = pymongo.MongoClient(mongodb_address)
db = client['kraken']
collection = db['tickers']
cursor = collection.find()

rows = []
for doc in tqdm.tqdm(cursor, total=cursor.count()):
    timestamp = utc_to_tz(doc['timestamp'], tz.gettz('PST'))
    prices = get_prices(asset_pairs, doc['ticker'])
示例#39
0
def init_schema(filename, out_filename=None):
    """ Initialize an `ObjTables` schema from a tabular declarative specification in
    :obj:`filename`. :obj:`filename` can be a XLSX, CSV, or TSV file.

    Schemas (classes and attributes) should be defined using the following tabular format.
    Classes and their attributes can be defined in any order.

    .. table:: Format for specifying classes.
        :name: class_tabular_schema

        ==========================================  =========================  =================================================  ========
        Python                                      Tabular column             Tabular column values                              Optional
        ==========================================  =========================  =================================================  ========
        Class name                                  !Name                      Valid Python name
        Class                                       !Type                      ``Class``
        Superclass                                  !Parent                    Empty or the name of another class
        :obj:`obj_tables.Meta.table_format`         !Format                    ``row``, ``column``, ``multiple_cells``, ``cell``
        :obj:`obj_tables.Meta.verbose_name`         !Verbose name              String                                             Y
        :obj:`obj_tables.Meta.verbose_name_plural`  !Verbose name plural       String                                             Y
        :obj:`obj_tables.Meta.description`          !Description                                                                  Y
        ==========================================  =========================  =================================================  ========

    .. table:: Format for specifying attributes of classes.
        :name: attribute_tabular_schema

        ===========================================================  ====================  ==========================================  ========
        Python                                                       Tabular column        Tabular column values                       Optional
        ===========================================================  ====================  ==========================================  ========
        Name of instance of subclass of :obj:`obj_tables.Attribute`  !Name                 a-z, A-Z, 0-9, _, :, >, ., -, [, ], or ' '
        :obj:`obj_tables.Attribute`                                  !Type                 ``Attribute``
        Parent class                                                 !Parent               Name of the parent class
        Subclass of :obj:`obj_tables.Attribute`                      !Format               ``Boolean`, ``Float`, ``String``, etc.
        :obj:`obj_tables.Attribute.verbose_name`                     !Verbose name         String                                      Y
        :obj:`obj_tables.Attribute.verbose_name_plural`              !Verbose name plural  String                                      Y
        :obj:`obj_tables.Attribute.description`                      !Description          String                                      Y
        ===========================================================  ====================  ==========================================  ========

    Args:
        filename (:obj:`str`): path to
        out_filename (:obj:`str`, optional): path to save schema

    Returns:
        :obj:`tuple`:

            * :obj:`types.ModuleType`: module with classes
            * :obj:`str`: schema name

    Raises:
        :obj:`ValueError`: if schema specification is not in a supported format,
            an XLSX schema file does not contain a worksheet with the name ``!!_Schema`` which specifies the schema,
            the class inheritance structure is cyclic,
            or the schema specification is invalid (e.g., a class is defined multiple defined)
    """
    from obj_tables.io import WorkbookReader

    base, ext = os.path.splitext(filename)
    if ext in ['.xlsx']:
        sheet_name = '!!' + SCHEMA_SHEET_NAME
    elif ext in ['.csv', '.tsv']:
        if '*' in filename:
            sheet_name = '!!' + SCHEMA_SHEET_NAME
        else:
            sheet_name = ''
    else:
        raise ValueError('{} format is not supported.'.format(ext))

    wb = wc_utils.workbook.io.read(filename)
    if sheet_name not in wb:
        raise ValueError(
            'Schema file must contain a sheet with name "{}".'.format(
                sheet_name))
    ws = wb[sheet_name]

    name_col_name = '!Name'
    type_col_name = '!Type'
    parent_col_name = '!Parent'
    format_col_name = '!Format'
    verbose_name_col_name = '!Verbose name'
    verbose_name_plural_col_name = '!Verbose name plural'
    desc_col_name = '!Description'

    col_names = [
        name_col_name,
        type_col_name,
        parent_col_name,
        format_col_name,
        verbose_name_col_name,
        verbose_name_plural_col_name,
        desc_col_name,
    ]

    class_type = 'Class'
    attr_type = 'Attribute'

    rows = ws
    doc_metadata, model_metadata, _ = WorkbookReader.read_worksheet_metadata(
        sheet_name, rows)

    doc_schema_name = doc_metadata.get('schema', None)
    schema_schema_name = model_metadata.get('name', None)
    assert not doc_schema_name or not schema_schema_name or doc_schema_name == schema_schema_name, \
        "Schema names must be None or equal"
    schema_name = doc_schema_name or schema_schema_name
    module_name = schema_name or rand_schema_name()

    if model_metadata.get('type', None) != SCHEMA_TABLE_TYPE:
        raise ValueError(
            "The type of the schema must be '{}'.".format(SCHEMA_TABLE_TYPE))

    # parse model specifications
    header_row = rows[0]
    rows = rows[1:]

    if name_col_name not in header_row:
        raise ValueError('Schema must have column "{}"'.format(name_col_name))
    if type_col_name not in header_row:
        raise ValueError('Schema must have column "{}"'.format(type_col_name))
    if parent_col_name not in header_row:
        raise ValueError(
            'Schema must have column "{}"'.format(parent_col_name))
    if format_col_name not in header_row:
        raise ValueError(
            'Schema must have column "{}"'.format(format_col_name))
    extra_headers = set(header_row) - set(col_names)
    if extra_headers:
        raise ValueError('Schema has unrecognized columns:\n  {}'.format(
            '\n  '.join(natsorted(extra_headers, alg=ns.IGNORECASE))))

    cls_specs = {}
    explicit_model_names = []
    implicit_model_names = []
    for i_row, row_list in enumerate(rows):
        # ignore empty rows
        if all(cell in [None, ''] for cell in row_list):
            continue

        # ignore comment rows
        if len(row_list) == 1 and isinstance(
                row_list[0], str) and row_list[0].startswith(
                    '%/') and row_list[0].endswith('/%'):
            continue

        # convert cells to strings
        for i_cell, cell in enumerate(row_list):
            if cell is not None and not isinstance(cell, str):
                row_list[i_cell] = str(cell)

        row = {}
        for header, cell in zip(header_row, row_list):
            row[header] = cell

        if row[type_col_name] == class_type:
            cls_name = row[name_col_name]
            if not cls_name:
                raise ValueError(
                    'Class at row {} of the schema must have a name'.format(
                        i_row + 1))
            if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', cls_name):
                raise ValueError(
                    ("Invalid class name '{}' at row {} of the schema. "
                     "Class names must start with a letter or underscore, "
                     "and consist of letters, numbers, and underscores."
                     ).format(cls_name, i_row + 1))

            if cls_name in cls_specs:
                cls = cls_specs[cls_name]
                if cls['explictly_defined']:
                    raise ValueError(
                        'Class "{}" can only be defined once in the schema.'.
                        format(cls_name))
                cls['explictly_defined'] = True
            else:
                cls = cls_specs[cls_name] = {
                    'super_class': None,
                    'name': cls_name,
                    'attrs': {},
                    'attr_order': [],
                    'explictly_defined': True,
                }

            if row[parent_col_name]:
                cls['super_class'] = row[parent_col_name]

            if (row[format_col_name] or 'row') not in TableFormat.__members__:
                raise ValueError(
                    "Invalid class format '{}' at row {} of the schema".format(
                        row[format_col_name], i_row + 1))
            cls['tab_format'] = TableFormat[row[format_col_name] or 'row']

            def_verbose_name = cls_name
            cls['verbose_name'] = row.get(verbose_name_col_name,
                                          def_verbose_name) or def_verbose_name

            if row.get(verbose_name_col_name, None):
                def_plural_verbose_name = inflect.engine().plural(
                    row[verbose_name_col_name])
            else:
                def_plural_verbose_name = cls_name
            cls['verbose_name_plural'] = row.get(
                verbose_name_plural_col_name,
                def_plural_verbose_name) or def_plural_verbose_name

            cls['desc'] = row.get(desc_col_name, None) or None

            explicit_model_names.append(cls_name)

        elif row[type_col_name] == attr_type:
            cls_name = row[parent_col_name]
            if not cls_name:
                raise ValueError(
                    'Parent class of attribute at row {} must be defined'.
                    format(i_row + 1))
            if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', cls_name):
                raise ValueError((
                    "Parent class of attribute at row {} of the schema has an invalid name '{}'. "
                    "Class names must start with a letter or underscore, "
                    "and consist of letters, numbers, and underscores."
                ).format(i_row + 1, cls_name))

            if cls_name in cls_specs:
                cls = cls_specs[cls_name]
            else:
                cls = cls_specs[cls_name] = {
                    'explictly_defined': False,
                    'super_class': None,
                    'name': cls_name,
                    'attrs': {},
                    'attr_order': [],
                    'tab_format': TableFormat.row,
                    'verbose_name': cls_name,
                    'verbose_name_plural': cls_name,
                    'desc': None,
                }
                implicit_model_names.append(cls_name)

            attr_name = row[name_col_name]
            if not attr_name:
                raise ValueError(
                    'Attribute at row {} of the schema must have a name'.
                    format(i_row + 1))
            if not re.match(r'^[a-zA-Z_:>\.\- \[\]][a-zA-Z0-9_:>\.\- \[\]]*$',
                            attr_name):
                raise ValueError(
                    ("Invalid attribute name '{}' at row {} of the schema. "
                     "Attribute names must consist of alphanumeric "
                     "characters, underscores, colons, forward carets, "
                     "dots, dashes, square brackets, and spaces and "
                     "begin with a non-numeric character.").format(
                         attr_name, i_row + 1))
            attr_name = re.sub(r'[^a-zA-Z0-9_]', '_', attr_name)
            attr_name = stringcase.snakecase(attr_name)
            attr_name = re.sub(r'_+', '_', attr_name)

            if attr_name == 'Meta':
                raise ValueError(
                    '"{}" cannot have attribute with name "Meta" at row {} of the schema.'
                    .format(cls_name, i_row + 1)
                )  # pragma: no cover # unreachable because snake case is all lowercase
            if attr_name in cls['attrs']:
                raise ValueError(
                    'Attribute "{}" of "{}" can only be defined once.'.format(
                        row[name_col_name], cls_name))

            cls['attrs'][attr_name] = {
                'name': attr_name,
                'type': row[format_col_name],
                'desc': row.get(desc_col_name, None),
                'verbose_name': row.get(verbose_name_col_name,
                                        row[name_col_name])
            }
            cls['attr_order'].append(attr_name)

        else:
            if row[type_col_name]:
                raise ValueError(
                    'Type "{}" is not supported at row {} of the schema.'.
                    format(row[type_col_name], i_row + 1))
            else:
                raise ValueError(
                    'Type must be defined at row {} of the schema.'.format(
                        row[type_col_name], i_row + 1))

    # check that the inheritance graph is valid (i.e. acyclic)
    inheritance_graph = networkx.DiGraph()
    sub_classes = {'obj_tables.Model': []}
    for cls_name, cls_spec in cls_specs.items():
        if cls_spec['super_class']:
            if cls_spec['super_class'] not in cls_specs:
                raise ValueError(
                    'Superclass "{}" for class "{}" must be defined'.format(
                        cls_spec['super_class'], cls_name))

            inheritance_graph.add_edge(cls_spec['super_class'], cls_name)
            if cls_spec['super_class'] not in sub_classes:
                sub_classes[cls_spec['super_class']] = []
            sub_classes[cls_spec['super_class']].append(cls_name)
        else:
            inheritance_graph.add_edge('obj_tables.Model', cls_name)
            sub_classes['obj_tables.Model'].append(cls_name)
    if list(networkx.simple_cycles(inheritance_graph)):
        raise ValueError('The schema inheritance graph must be acyclic.')

    # create classes
    module = type(module_name, (types.ModuleType, ), {})

    all_attrs = get_attrs()
    classes_to_construct = list(sub_classes['obj_tables.Model'])
    while classes_to_construct:
        cls_name = classes_to_construct.pop()
        cls_spec = cls_specs[cls_name]
        # if not cls_spec['explictly_defined']:
        #     raise ValueError('Class "{}" is not defined in the schema'.format(cls_name))

        classes_to_construct.extend(sub_classes.get(cls_name, []))

        meta_attrs = {
            'table_format': cls_spec['tab_format'],
            'attribute_order': tuple(cls_spec['attr_order']),
            'description': cls_spec['desc'],
        }
        if cls_spec['verbose_name']:
            meta_attrs['verbose_name'] = cls_spec['verbose_name']
        if cls_spec['verbose_name_plural']:
            meta_attrs['verbose_name_plural'] = cls_spec['verbose_name_plural']

        attrs = {
            '__module__': module_name,
            '__doc__': cls_spec['desc'],
            'Meta': type('Meta', (Model.Meta, ), meta_attrs),
        }
        for attr_spec in cls_spec['attrs'].values():
            attr_type_spec, _, args = attr_spec['type'].partition('(')
            if attr_type_spec not in all_attrs:
                raise ValueError(
                    'Attribute "{}" is not defined in the schema'.format(
                        attr_type_spec))
            attr_type = all_attrs[attr_type_spec]
            attr_spec['python_type'] = attr_type_spec + 'Attribute'
            if args:
                attr_spec['python_args'] = args[0:-1]
                if attr_spec['verbose_name']:
                    attr_spec['python_args'] += ", verbose_name='{}'".format(
                        attr_spec['verbose_name'].replace("'", "\\'"))
            else:
                attr_spec['python_args'] = ''
                if attr_spec['verbose_name']:
                    attr_spec['python_args'] = "verbose_name='{}'".format(
                        attr_spec['verbose_name'].replace("'", "\\'"))

            if args:
                attr = eval('func(' + args, {}, {'func': attr_type})
            else:
                attr = attr_type()
            attr.verbose_name = attr_spec['verbose_name']
            attr.description = attr_spec['desc']
            attrs[attr_spec['name']] = attr

        if cls_spec['super_class'] is None or cls_spec[
                'super_class'] == 'obj_tables.Model':
            super_class = Model
        else:
            super_class = getattr(module, cls_spec['super_class'])

        cls = type(cls_spec['name'], (super_class, ), attrs)
        setattr(module, cls_spec['name'], cls)

    # optionally, generate a Python file
    if out_filename:
        with open(out_filename, 'w') as file:
            # print documentation
            file.write(
                '# Schema automatically generated at {:%Y-%m-%d %H:%M:%S}\n\n'.
                format(datetime.now()))

            # print import statements
            imported_modules = set(['obj_tables'])
            for cls_spec in cls_specs.values():
                for attr_spec in cls_spec['attrs'].values():
                    imported_modules.add(
                        'obj_tables.' +
                        attr_spec['python_type'].rpartition('.')[0])
            if 'obj_tables.' in imported_modules:
                imported_modules.remove('obj_tables.')
            for imported_module in imported_modules:
                file.write('import {}\n'.format(imported_module))

            # print definition of * import behavior
            file.write('\n')
            file.write('\n')
            file.write('__all__ = [\n')
            file.write(''.join("    '{}',\n".format(cls_name)
                               for cls_name in sorted(cls_specs.keys())))
            file.write(']\n')

            # print class definitions
            classes_to_define = list(sub_classes['obj_tables.Model'])
            while classes_to_define:
                cls_name = classes_to_define.pop(0)
                cls_spec = cls_specs[cls_name]
                classes_to_define.extend(sub_classes.get(cls_name, []))

                if cls_spec['super_class']:
                    super_class = cls_spec['super_class']
                else:
                    super_class = 'obj_tables.Model'

                file.write('\n')
                file.write('\n')
                file.write('class {}({}):\n'.format(cls_spec['name'],
                                                    super_class))
                if cls_spec['desc']:
                    file.write('    """ {} """\n\n'.format(cls_spec['desc']))
                for attr_name in cls_spec['attr_order']:
                    attr_spec = cls_spec['attrs'][attr_name]
                    file.write('    {} = obj_tables.{}({})\n'.format(
                        attr_spec['name'], attr_spec['python_type'],
                        attr_spec['python_args']))

                file.write('\n')
                file.write('    class Meta(obj_tables.Model.Meta):\n')
                file.write(
                    "        table_format = obj_tables.TableFormat.{}\n".
                    format(cls_spec['tab_format'].name))
                file.write("        attribute_order = (\n{}        )\n".format(
                    "".join("            '{}',\n".format(attr)
                            for attr in cls_spec['attr_order'])))
                if cls_spec['verbose_name']:
                    file.write("        verbose_name = '{}'\n".format(
                        cls_spec['verbose_name'].replace("'", "\\'")))
                if cls_spec['verbose_name_plural']:
                    file.write("        verbose_name_plural = '{}'\n".format(
                        cls_spec['verbose_name_plural'].replace("'", "\\'")))
                if cls_spec['desc']:
                    file.write("        description = '{}'\n".format(
                        cls_spec['desc'].replace("'", "\\'")))

    # get models in order of their definition
    model_names = det_dedupe(explicit_model_names + implicit_model_names)
    models = [getattr(module, model_name) for model_name in model_names]

    # return the created module and its name
    return (module, schema_name, models)
示例#40
0
def main():
    # Command line arguments
    parser = argparse.ArgumentParser(
        description='Extract model subsets from the National Hydrologic Model')
    parser.add_argument('-O',
                        '--output_dir',
                        help='Output directory for subset')
    parser.add_argument('-p',
                        '--param_filename',
                        help='Name of output parameter file')
    parser.add_argument('-s',
                        '--streamflow_filename',
                        help='Name of streamflow data file')
    parser.add_argument('-P',
                        '--paramdb_dir',
                        help='Location of parameter database')
    parser.add_argument('-M',
                        '--merged_paramdb_dir',
                        help='Location of merged parameter database')
    parser.add_argument('-C', '--cbh_dir', help='Location of CBH files')
    parser.add_argument('-g',
                        '--geodatabase_filename',
                        help='Full path to NHM geodatabase')
    parser.add_argument('-j', '--job', help='Job directory to work in')
    parser.add_argument('-v',
                        '--verbose',
                        help='Output additional information',
                        action='store_true')
    parser.add_argument('--check_DAG',
                        help='Verify the streamflow network',
                        action='store_true')
    parser.add_argument('--output_cbh',
                        help='Output CBH files for subset',
                        action='store_true')
    parser.add_argument('--output_shapefiles',
                        help='Output shapefiles for subset',
                        action='store_true')
    parser.add_argument('--output_streamflow',
                        help='Output streamflows for subset',
                        action='store_true')
    parser.add_argument('--cbh_netcdf',
                        help='Enable netCDF output for CBH files',
                        action='store_true')
    parser.add_argument('--param_netcdf',
                        help='Enable netCDF output for parameter file',
                        action='store_true')
    parser.add_argument(
        '--add_gages',
        metavar="KEY=VALUE",
        nargs='+',
        help=
        'Add arbitrary streamgages to POIs of form gage_id=segment. Segment must exist in the model subset. Additional streamgages are marked as poi_type=0.'
    )
    parser.add_argument(
        '--no_filter_params',
        help='Output all parameters regardless of modules selected',
        action='store_true')
    args = parser.parse_args()

    stdir = os.getcwd()

    # TODO: Add to command line arguments
    single_poi = False

    if args.job:
        if os.path.exists(args.job):
            # Change into job directory before running extraction
            os.chdir(args.job)
            # print('Working in directory: {}'.format(args.job))
        else:
            print('ERROR: Invalid jobs directory: {}'.format(args.job))
            exit(-1)

    # Setup the logging
    bandit_log = logging.getLogger('bandit')
    bandit_log.setLevel(logging.DEBUG)

    log_fmt = logging.Formatter('%(levelname)s: %(name)s: %(message)s')

    # Handler for file logs
    flog = logging.FileHandler('bandit.log')
    flog.setLevel(logging.DEBUG)
    flog.setFormatter(log_fmt)

    # Handler for console logs
    clog = logging.StreamHandler()
    clog.setLevel(logging.ERROR)
    clog.setFormatter(log_fmt)

    bandit_log.addHandler(flog)
    bandit_log.addHandler(clog)

    bandit_log.info('========== START {} =========='.format(
        datetime.datetime.now().isoformat()))

    addl_gages = None
    if args.add_gages:
        addl_gages = parse_gages(args.add_gages)
        bandit_log.info('Additionals streamgages specified on command line')

    config = bc.Cfg('bandit.cfg')

    # Override configuration variables with any command line parameters
    for kk, vv in iteritems(args.__dict__):
        if kk not in [
                'job', 'verbose', 'cbh_netcdf', 'add_gages', 'param_netcdf',
                'no_filter_params'
        ]:
            if vv:
                bandit_log.info(
                    'Overriding configuration for {} with {}'.format(kk, vv))
                config.update_value(kk, vv)

    # Where to output the subset
    outdir = config.output_dir

    # The control file to use
    control_filename = config.control_filename

    # What to name the output parameter file
    param_filename = config.param_filename

    # Location of the NHM parameter database
    paramdb_dir = config.paramdb_dir

    # Location of the merged parameter database
    merged_paramdb_dir = config.merged_paramdb_dir

    streamgage_file = config.streamgage_file

    # List of outlets
    # dsmost_seg = config.outlets

    # List of upstream cutoffs
    # uscutoff_seg = config.cutoffs

    # List of additional HRUs (have no route to segment within subset)
    # hru_noroute = config.hru_noroute

    # List of output variables to sbuset
    try:
        include_model_output = config.include_model_output
        output_vars_dir = config.output_vars_dir
        output_vars = config.output_vars
    except KeyError:
        include_model_output = False

    # Control what is checked and output for subset
    check_dag = config.check_DAG

    try:
        output_cbh = config.output_cbh

        # Location of the NHM CBH files
        cbh_dir = config.cbh_dir
    except KeyError:
        output_cbh = False

    try:
        output_streamflow = config.output_streamflow

        # What to name the streamflow output file
        obs_filename = config.streamflow_filename
    except KeyError:
        output_streamflow = False

    try:
        output_shapefiles = config.output_shapefiles

        # Full path and filename to the geodatabase to use for outputting shapefile subsets
        geo_file = config.geodatabase_filename
    except KeyError:
        output_shapefiles = False

    # Load the control file
    ctl = ControlFile(control_filename)

    if ctl.has_dynamic_parameters:
        if config.dyn_params_dir:
            if os.path.exists(config.dyn_params_dir):
                dyn_params_dir = config.dyn_params_dir
            else:
                bandit_log.error('dyn_params_dir: {}, does not exist.'.format(
                    config.dyn_params_dir))
                exit(2)
        else:
            bandit_log.error(
                'Control file has dynamic parameters but dyn_params_dir is not specified in the config file'
            )
            exit(2)

    # Load master list of valid parameters
    vpdb = ValidParams()

    # Build list of parameters required for the selected control file modules
    required_params = vpdb.get_params_for_modules(modules=ctl.modules.values())

    # TODO: make sure dynamic parameter filenames are correct
    # Write an updated control file
    # ctl.write('somefile')

    # Date range for pulling NWIS streamgage observations
    if isinstance(config.start_date, datetime.date):
        st_date = config.start_date
    else:
        st_date = datetime.datetime(
            *[int(x) for x in re.split('-| |:', config.start_date)])

    if isinstance(config.end_date, datetime.date):
        en_date = config.end_date
    else:
        en_date = datetime.datetime(
            *[int(x) for x in re.split('-| |:', config.end_date)])

    # ===============================================================
    params_file = '{}/{}'.format(merged_paramdb_dir, PARAMETERS_XML)

    # Output revision of NhmParamDb and the revision used by merged paramdb
    nhmparamdb_revision = git_version(paramdb_dir)
    bandit_log.info('Parameters based on NhmParamDb revision: {}'.format(
        nhmparamdb_revision))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read hru_nhm_to_local and hru_nhm_to_region
    # Create segment_nhm_to_local and segment_nhm_to_region

    # TODO: since hru_nhm_to_region and nhru_nhm_to_local are only needed for
    #       CBH files we should 'soft-fail' if the files are missing and just
    #       output a warning and turn off CBH output if it was selected.
    # hru_nhm_to_region = get_parameter('{}/hru_nhm_to_region.msgpack'.format(cbh_dir))
    # hru_nhm_to_local = get_parameter('{}/hru_nhm_to_local.msgpack'.format(cbh_dir))

    # Load the NHMparamdb
    print('Loading NHM ParamDb')
    pdb = ParamDb(merged_paramdb_dir)
    nhm_params = pdb.parameters
    nhm_global_dimensions = pdb.dimensions

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get tosegment_nhm
    # NOTE: tosegment is now tosegment_nhm and the regional tosegment is gone.
    tosegment = nhm_params.get('tosegment').data
    nhm_seg = nhm_params.get('nhm_seg').data

    if args.verbose:
        print('Generating stream network from tosegment_nhm')

    # Build the stream network
    dag_ds = nx.DiGraph()
    for ii, vv in enumerate(tosegment):
        #     dag_ds.add_edge(ii+1, vv)
        if vv == 0:
            dag_ds.add_edge(ii + 1, 'Out_{}'.format(ii + 1))
        else:
            dag_ds.add_edge(ii + 1, vv)

    # nx.draw_networkx(dag_ds)
    bandit_log.debug('Number of NHM downstream nodes: {}'.format(
        dag_ds.number_of_nodes()))
    bandit_log.debug('Number of NHM downstream edges: {}'.format(
        dag_ds.number_of_edges()))

    if check_dag:
        if not nx.is_directed_acyclic_graph(dag_ds):
            bandit_log.error('Cycles and/or loops found in stream network')

            for xx in nx.simple_cycles(dag_ds):
                bandit_log.error('Cycle found for segment {}'.format(xx))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Build dictionary which maps poi_gage_id to poi_gage_segment
    # poi_gage_segment_tmp = get_parameter('{}/poi_gage_segment.msgpack'.format(merged_paramdb_dir))['data']
    # poi_gage_id_tmp = get_parameter('{}/poi_gage_id.msgpack'.format(merged_paramdb_dir))['data']
    poi_gage_segment_tmp = nhm_params.get('poi_gage_segment').data
    poi_gage_id_tmp = nhm_params.get('poi_gage_id').data

    # Create dictionary to lookup nhm_segment for a given poi_gage_id
    poi_id_to_seg = dict(zip(poi_gage_id_tmp, poi_gage_segment_tmp))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read streamgage ids from file - one streamgage id per row
    with open(streamgage_file, 'r') as fhdl:
        streamgages = fhdl.read().splitlines()

    # =====================================
    # dag_ds should not change below here
    # For each streamgage:
    #   1) lookup nhm_segment (if any) and use as outlet
    #   2) create output directory
    #   3) subset the stream network, HRUs, params, etc

    uscutoff_seg = []

    for sg in streamgages:
        print('Working on streamgage {}'.format(sg))

        while True:
            # Create the upstream graph
            dag_us = dag_ds.reverse()
            bandit_log.debug('Number of NHM upstream nodes: {}'.format(
                dag_us.number_of_nodes()))
            bandit_log.debug('Number of NHM upstream edges: {}'.format(
                dag_us.number_of_edges()))

            # Trim the u/s graph to remove segments above the u/s cutoff segments
            try:
                for xx in uscutoff_seg:
                    try:
                        dag_us.remove_nodes_from(
                            nx.dfs_predecessors(dag_us, xx))

                        # Also remove the cutoff segment itself
                        dag_us.remove_node(xx)
                    except KeyError:
                        print(
                            'WARNING: nhm_segment {} does not exist in stream network'
                            .format(xx))
            except TypeError:
                bandit_log.error(
                    '\nSelected cutoffs should at least be an empty list instead of NoneType. ({})'
                    .format(outdir))
                exit(200)

            bandit_log.debug(
                'Number of NHM upstream nodes (trimmed): {}'.format(
                    dag_us.number_of_nodes()))
            bandit_log.debug(
                'Number of NHM upstream edges (trimmed): {}'.format(
                    dag_us.number_of_edges()))

            # Lookup the outlet for the current streamgage
            try:
                dsmost_seg = [poi_id_to_seg[sg]]

                if dsmost_seg[0] == 0:
                    # POI stream segment was never properly assigned in paramdb
                    bandit_log.error(
                        'Streamgage {} has segment = 0. Skipping.'.format(sg))
                    break
                elif len(dsmost_seg) > 1:
                    # Should never have more than one segment per streamgage
                    bandit_log.info(
                        'Streamgage {} has more than one stream segment.'.
                        format(sg))
                    break
            except KeyError:
                bandit_log.error(
                    'Streamgage {} does not exist in poi_gage_id'.format(sg))
                break

            sg_dir = '{}/{}'.format(outdir, sg)

            try:
                os.makedirs(sg_dir)
            except OSError as exception:
                if exception.errno != errno.EEXIST:
                    raise
                else:
                    pass

            # =======================================
            # Given a d/s segment (dsmost_seg) create a subset of u/s segments
            if args.verbose:
                print('\tExtracting model subset')

            # Get all unique segments u/s of the starting segment
            uniq_seg_us = set()
            if dsmost_seg:
                for xx in dsmost_seg:
                    try:
                        pred = nx.dfs_predecessors(dag_us, xx)
                        uniq_seg_us = uniq_seg_us.union(
                            set(pred.keys()).union(set(pred.values())))
                    except KeyError:
                        bandit_log.error(
                            'KeyError: Segment {} does not exist in stream network'
                            .format(xx))
                # print('\nKeyError: Segment {} does not exist in stream network'.format(xx))

                # Get a subgraph in the dag_ds graph and return the edges
                dag_ds_subset = dag_ds.subgraph(uniq_seg_us).copy()

                # 2018-02-13 PAN: It is possible to have outlets specified which are not truly
                #                 outlets in the most conservative sense (e.g. a point where
                #                 the stream network exits the study area). This occurs when
                #                 doing headwater extractions where all segments for a headwater
                #                 are specified in the configuration file. Instead of creating
                #                 output edges for all specified 'outlets' the set difference
                #                 between the specified outlets and nodes in the graph subset
                #                 which have no edges is performed first to reduce the number of
                #                 outlets to the 'true' outlets of the system.
                node_outlets = [ee[0] for ee in dag_ds_subset.edges()]
                true_outlets = set(dsmost_seg).difference(set(node_outlets))
                bandit_log.debug('node_outlets: {}'.format(','.join(
                    map(str, node_outlets))))
                bandit_log.debug('true_outlets: {}'.format(','.join(
                    map(str, true_outlets))))

                # Add the downstream segments that exit the subgraph
                for xx in true_outlets:
                    dag_ds_subset.add_edge(xx, 'Out_{}'.format(xx))
            else:
                # No outlets specified so pull the CONUS
                dag_ds_subset = dag_ds

            # Create list of toseg ids for the model subset
            try:
                # networkx 1.x
                toseg_idx = list(
                    set(xx[0] for xx in dag_ds_subset.edges_iter()))
            except AttributeError:
                # networkx 2.x
                toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges))

            toseg_idx0 = [xx - 1
                          for xx in toseg_idx]  # 0-based version of toseg_idx

            bandit_log.info('Number of segments in subset: {}'.format(
                len(toseg_idx)))

            # NOTE: With monolithic nhmParamDb files hru_segment becomes hru_segment_nhm and the regional hru_segments are gone.
            # 2019-09-16 PAN: This initially assumed hru_segment in the monolithic paramdb was ALWAYS
            #                 ordered 1..nhru. This is not always the case so the nhm_id parameter
            #                 needs to be loaded and used to map the nhm HRU ids to their
            #                 respective indices.
            hru_segment = nhm_params.get('hru_segment').data
            nhm_id = nhm_params.get('nhm_id').data

            nhm_id_to_idx = {}
            for ii, vv in enumerate(nhm_id):
                # keys are 1-based, values are 0-based
                nhm_id_to_idx[vv] = ii

            bandit_log.info('Number of NHM hru_segment entries: {}'.format(
                len(hru_segment)))

            # Create a dictionary mapping segments to HRUs
            seg_to_hru = {}
            for ii, vv in enumerate(hru_segment):
                # keys are 1-based, values in arrays are 1-based
                seg_to_hru.setdefault(vv, []).append(ii + 1)

    # Get HRU ids ordered by the segments in the model subset - entries are 1-based
            hru_order_subset = []
            for xx in toseg_idx:
                if xx in seg_to_hru:
                    for yy in seg_to_hru[xx]:
                        hru_order_subset.append(yy)
                else:
                    bandit_log.warning(
                        'Stream segment {} has no HRUs connected to it.'.
                        format(xx))
                    # raise ValueError('Stream segment has no HRUs connected to it.')

            # Append the additional non-routed HRUs to the list
            # if len(hru_noroute) > 0:
            #     for xx in hru_noroute:
            #         if hru_segment[xx-1] == 0:
            #             bandit_log.info('User-supplied HRU {} is not connected to any stream segment'.format(xx))
            #             hru_order_subset.append(xx)
            #         else:
            #             bandit_log.error('User-supplied HRU {} routes to stream segment {} - Skipping.'.format(xx,
            #                                                                                            hru_segment[xx-1]))

            hru_order_subset0 = [xx - 1 for xx in hru_order_subset]

            bandit_log.info('Number of HRUs in subset: {}'.format(
                len(hru_order_subset)))

            # Use hru_order_subset to pull selected indices for parameters with nhru dimensions
            # hru_order_subset contains the in-order indices for the subset of hru_segments
            # toseg_idx contains the in-order indices for the subset of tosegments

            # Renumber the tosegment list
            new_tosegment = []

            # Map old DAG_subds indices to new
            for xx in toseg_idx:
                if list(dag_ds_subset.neighbors(xx))[0] in toseg_idx:
                    new_tosegment.append(
                        toseg_idx.index(list(dag_ds_subset.neighbors(xx))[0]) +
                        1)
                else:
                    # Outlets should be assigned zero
                    new_tosegment.append(0)

            # Renumber the hru_segments for the subset
            new_hru_segment = []

            for xx in toseg_idx:
                # if DAG_subds.neighbors(xx)[0] in toseg_idx:
                if xx in seg_to_hru:
                    for _ in seg_to_hru[xx]:
                        # The new indices should be 1-based from PRMS
                        new_hru_segment.append(toseg_idx.index(xx) + 1)

            # Append zeroes to new_hru_segment for each additional non-routed HRU
            # if len(hru_noroute) > 0:
            #     for xx in hru_noroute:
            #         if hru_segment[xx-1] == 0:
            #             new_hru_segment.append(0)

            bandit_log.info('Size of hru_segment for subset: {}'.format(
                len(new_hru_segment)))

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Subset hru_deplcrv
            hru_deplcrv = nhm_params.get('hru_deplcrv').data

            bandit_log.info('Size of NHM hru_deplcrv: {}'.format(
                len(hru_deplcrv)))

            # Get subset of hru_deplcrv using hru_order
            # A single snarea_curve can be referenced by multiple HRUs
            hru_deplcrv_subset = np.array(hru_deplcrv)[
                tuple(hru_order_subset0), ]
            uniq_deplcrv = list(set(hru_deplcrv_subset))
            uniq_deplcrv0 = [xx - 1 for xx in uniq_deplcrv]

            # Create new hru_deplcrv and renumber
            new_hru_deplcrv = [
                uniq_deplcrv.index(cc) + 1 for cc in hru_deplcrv_subset
            ]
            bandit_log.info('Size of hru_deplcrv for subset: {}'.format(
                len(new_hru_deplcrv)))

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Subset poi_gage_segment
            new_poi_gage_segment = []
            new_poi_gage_id = []
            new_poi_type = []

            if nhm_params.exists('poi_gage_segment'):
                poi_gage_segment = nhm_params.get('poi_gage_segment').tolist()
                bandit_log.info('Size of NHM poi_gage_segment: {}'.format(
                    len(poi_gage_segment)))

                poi_gage_id = nhm_params.get('poi_gage_id').data
                poi_type = nhm_params.get('poi_type').data

                # We want to get the indices of the poi_gage_segments that match the
                # segments that are part of the subset. We can then use these
                # indices to subset poi_gage_id and poi_type.
                # The poi_gage_segment will need to be renumbered for the subset of segments.

                # To subset poi_gage_segment we have to lookup each segment in the subset

                # Reset the cutoff list
                uscutoff_seg = []

                # for ss in uniq_seg_us:
                try:
                    # networkx 1.x
                    for ss in nx.nodes_iter(dag_ds_subset):
                        if ss in poi_gage_segment:
                            new_poi_gage_segment.append(
                                toseg_idx.index(ss) + 1)
                            new_poi_gage_id.append(
                                poi_gage_id[poi_gage_segment.index(ss)])
                            new_poi_type.append(
                                poi_type[poi_gage_segment.index(ss)])
                except AttributeError:
                    # networkx 2.x
                    for ss in dag_ds_subset.nodes:
                        if ss in poi_gage_segment:
                            new_poi_gage_segment.append(
                                toseg_idx.index(ss) + 1)
                            new_poi_gage_id.append(
                                poi_gage_id[poi_gage_segment.index(ss)])
                            new_poi_type.append(
                                poi_type[poi_gage_segment.index(ss)])

                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Add any valid user-specified streamgage, nhm_seg pairs
                if addl_gages:
                    for ss, vv in iteritems(addl_gages):
                        if ss in new_poi_gage_id:
                            idx = new_poi_gage_id.index(ss)
                            bandit_log.warning(
                                'Existing NHM POI, {}, overridden on commandline (was {}, now {})'
                                .format(ss, new_poi_gage_segment[idx],
                                        toseg_idx.index(vv) + 1))
                            new_poi_gage_segment[idx] = toseg_idx.index(vv) + 1
                            new_poi_type[idx] = 0
                        elif toseg_idx.index(vv) + 1 in new_poi_gage_segment:
                            sidx = new_poi_gage_segment.index(
                                toseg_idx.index(vv) + 1)
                            bandit_log.warning(
                                'User-specified streamgage ({}) has same nhm_seg ({}) as existing POI ({}), replacing streamgage ID'
                                .format(ss,
                                        toseg_idx.index(vv) + 1,
                                        new_poi_gage_id[sidx]))
                            new_poi_gage_id[sidx] = ss
                            new_poi_type[sidx] = 0
                        elif vv not in seg_to_hru.keys():
                            bandit_log.warning(
                                'User-specified streamgage ({}) has nhm_seg={} which is not part of the model subset - Skipping.'
                                .format(ss, vv))
                        else:
                            new_poi_gage_id.append(ss)
                            new_poi_gage_segment.append(
                                toseg_idx.index(vv) + 1)
                            new_poi_type.append(0)
                            bandit_log.info(
                                'Added user-specified POI streamgage ({}) at nhm_seg={}'
                                .format(ss, vv))

            # ==================================================================
            # ==================================================================
            # Process the parameters and create a parameter file for the subset
            params = list(nhm_params.keys())

            # Remove the POI-related parameters if we have no POIs
            if len(new_poi_gage_segment) == 0:
                bandit_log.warning(
                    'No POI gages found for subset; removing POI-related parameters.'
                )

                for rp in ['poi_gage_id', 'poi_gage_segment', 'poi_type']:
                    # params.pop(rp, None)
                    try:
                        params.remove(rp)
                    except ValueError:
                        print('ERROR: unable to remove {}'.format(rp))
                        pass

            params.sort()

            dims = {}
            for kk in nhm_global_dimensions.values():
                dims[kk.name] = kk.size

            # Resize dimensions to the model subset
            crap_dims = dims.copy()  # need a copy since we modify dims
            for dd, dv in iteritems(crap_dims):
                # dimensions 'nmonths' and 'one' are never changed
                if dd in HRU_DIMS:
                    dims[dd] = len(hru_order_subset0)
                elif dd == 'nsegment':
                    dims[dd] = len(toseg_idx0)
                elif dd == 'ndeplval':
                    dims[dd] = len(uniq_deplcrv0) * 11
                    # if 'ndepl' not in dims:
                    dims['ndepl'] = len(uniq_deplcrv0)
                elif dd == 'npoigages':
                    dims[dd] = len(new_poi_gage_segment)

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Build a ParameterSet for output
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            new_ps = ParameterSet()

            for dd, dv in iteritems(dims):
                new_ps.dimensions.add(dd, dv)

                if dd == 'npoigages':
                    # 20170217 PAN: nobs is missing from the paramdb but is necessary
                    new_ps.dimensions.add('nobs', dv)

            new_params = list(required_params)

            # WARNING: 2019-04-23 PAN
            #          Very hacky way to remove parameters that shouldn't always get
            #          included. Need to figure out a better way.
            check_list = [
                'basin_solsta', 'gvr_hru_id', 'hru_solsta', 'humidity_percent',
                'irr_type', 'obsout_segment', 'rad_conv', 'rain_code',
                'hru_lon'
            ]

            for xx in check_list:
                if xx in new_params:
                    if xx in ['basin_solsta', 'hru_solsta', 'rad_conv']:
                        if not new_ps.dimensions.exists('nsol'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nsol') == 0:
                            new_params.remove(xx)
                    elif xx == 'humidity_percent':
                        if not new_ps.dimensions.exists('nhumid'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nhumid') == 0:
                            new_params.remove(xx)
                    elif xx == 'irr_type':
                        if not new_ps.dimensions.exists('nwateruse'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nwateruse') == 0:
                            new_params.remove(xx)
                    elif xx == 'gvr_hru_id':
                        if ctl.get('mapOutON_OFF').values == 0:
                            new_params.remove(xx)
                    elif xx in [
                            'hru_lat',
                            'hru_lon',
                    ]:
                        if not nhm_params.exists(xx):
                            new_params.remove(xx)

            new_params.sort()
            for pp in params:
                if pp in new_params or args.no_filter_params:
                    cparam = nhm_params.get(pp).tostructure()

                    new_ps.parameters.add(cparam['name'])

                    ndims = len(cparam['dimensions'])
                    if args.verbose:
                        sys.stdout.write(
                            '\r                                       ')
                        sys.stdout.write('\rProcessing {} '.format(
                            cparam['name']))
                        sys.stdout.flush()

                    # Get order of dimensions and total size for parameter
                    dim_order = [None] * ndims

                    for dd, dv in iteritems(cparam['dimensions']):
                        dim_order[dv['position']] = dd

                    for dd in dim_order:
                        # self.parameters.get(varname).dimensions.add(dd, self.dimensions.get(dd).size)
                        new_ps.parameters.get(cparam['name']).dimensions.add(
                            dd,
                            new_ps.dimensions.get(dd).size)

                        new_ps.parameters.get(
                            cparam['name']).datatype = cparam['datatype']

                    first_dimension = dim_order[0]

                    if ndims == 2:
                        second_dimension = dim_order[1]

                    # Write out the data for the parameter
                    if ndims == 1:
                        # 1D Parameters
                        if first_dimension == 'one':
                            outdata = np.array(cparam['data'])
                        elif first_dimension == 'nsegment':
                            if pp in ['tosegment']:
                                outdata = np.array(new_tosegment)
                            else:
                                outdata = np.array(
                                    cparam['data'])[tuple(toseg_idx0), ]
                        elif first_dimension == 'ndeplval':
                            # This is really a 2D in disguise, however, it is stored in C-order unlike
                            # other 2D arrays
                            outdata = np.array(cparam['data']).reshape(
                                (-1, 11))[tuple(uniq_deplcrv0), :]
                        elif first_dimension == 'npoigages':
                            if pp == 'poi_gage_segment':
                                outdata = np.array(new_poi_gage_segment)
                            elif pp == 'poi_gage_id':
                                outdata = np.array(new_poi_gage_id)
                            elif pp == 'poi_type':
                                outdata = np.array(new_poi_type)
                            else:
                                bandit_log.error(
                                    'Unkown parameter, {}, with dimensions {}'.
                                    format(pp, first_dimension))
                        elif first_dimension in HRU_DIMS:
                            if pp == 'hru_deplcrv':
                                outdata = np.array(new_hru_deplcrv)
                            elif pp == 'hru_segment':
                                outdata = np.array(new_hru_segment)
                            else:
                                outdata = np.array(
                                    cparam['data'])[tuple(hru_order_subset0), ]
                        else:
                            bandit_log.error(
                                'No rules to handle dimension {}'.format(
                                    first_dimension))
                    elif ndims == 2:
                        # 2D Parameters
                        outdata = np.array(cparam['data']).reshape(
                            (-1, dims[second_dimension]), order='F')

                        if first_dimension == 'nsegment':
                            outdata = outdata[tuple(toseg_idx0), :]
                        elif first_dimension in HRU_DIMS:
                            outdata = outdata[tuple(hru_order_subset0), :]
                        else:
                            bandit_log.error(
                                'No rules to handle 2D parameter, {}, which contains dimension {}'
                                .format(pp, first_dimension))

                    # Convert outdata to a list for writing
                    if first_dimension == 'ndeplval':
                        outlist = outdata.ravel().tolist()
                    else:
                        outlist = outdata.ravel(order='F').tolist()

                    new_ps.parameters.get(cparam['name']).data = outlist

            # Write the new parameter file
            header = [
                'Written by Bandit version {}'.format(__version__),
                'NhmParamDb revision: {}'.format(nhmparamdb_revision)
            ]
            if args.param_netcdf:
                base_filename = os.path.splitext(param_filename)[0]
                param_filename = '{}.nc'.format(base_filename)
                new_ps.write_netcdf('{}/{}'.format(sg_dir, param_filename))
            else:
                new_ps.write_parameter_file('{}/{}'.format(
                    sg_dir, param_filename),
                                            header=header)

            ctl.get('param_file').values = param_filename

            if args.verbose:
                sys.stdout.write('\n')
                # sys.stdout.write('\r                                       ')
                # sys.stdout.write('\r\tParameter file written: {}\n'.format('{}/{}'.format(outdir, param_filename)))
                sys.stdout.flush()

            # 2019-09-16 PAN: Nasty hack to handle parameter databases that may not have
            #                 a one-to-one match between index value and nhm_id.
            cparam = nhm_params.get('nhm_id').tostructure()
            hru_order_subset_nhm_id = np.array(
                cparam['data'])[tuple(hru_order_subset0), ].ravel(
                    order='F').tolist()

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write CBH files
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if output_cbh:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Subset the cbh files for the selected HRUs
                if len(hru_order_subset) > 0:
                    if args.verbose:
                        print('Processing CBH files')

                    if os.path.splitext(cbh_dir)[1] == '.nc':
                        cbh_hdl = CbhNetcdf(src_path=cbh_dir,
                                            st_date=st_date,
                                            en_date=en_date,
                                            nhm_hrus=hru_order_subset_nhm_id)
                        # nhm_hrus=hru_order_subset)
                    else:
                        # Subset the hru_nhm_to_local mapping
                        # TODO: This section will not work with the monolithic paramdb - remove
                        hru_order_ss = OrderedDict()
                        for kk in hru_order_subset:
                            hru_order_ss[kk] = hru_nhm_to_local[kk]

                        cbh_hdl = CbhAscii(src_path=cbh_dir,
                                           st_date=st_date,
                                           en_date=en_date,
                                           nhm_hrus=hru_order_subset,
                                           indices=hru_order_ss,
                                           mapping=hru_nhm_to_region)

                    if args.cbh_netcdf:
                        # Pull the filename prefix off of the first file found in the
                        # source netcdf CBH directory.
                        file_it = glob.iglob(cbh_dir)
                        cbh_prefix = os.path.basename(
                            next(file_it)).split('_')[0]

                        cbh_outfile = '{}/{}.nc'.format(outdir, cbh_prefix)
                        cbh_hdl.write_netcdf(cbh_outfile)
                        ctl.get('tmax_day').values = os.path.basename(
                            cbh_outfile)
                        ctl.get('tmin_day').values = os.path.basename(
                            cbh_outfile)
                        ctl.get('precip_day').values = os.path.basename(
                            cbh_outfile)
                    else:
                        cbh_hdl.write_ascii(pathname=sg_dir)
                    # bandit_log.info('{} written to: {}'.format(vv, '{}/{}.cbh'.format(outdir, vv)))
                else:
                    bandit_log.error('No HRUs associated with the segments')

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write output variables
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # 2019-08-07 PAN: first prototype for extractions of output variables
            if include_model_output:
                if len(hru_order_subset) > 0:
                    try:
                        os.makedirs(f'{sg_dir}/model_output')
                        print(
                            'Creating directory model_output, for model output variables'
                        )
                    except OSError:
                        print(
                            'Using existing model_output directory for output variables'
                        )

                    for vv in output_vars:
                        if args.verbose:
                            sys.stdout.write(
                                '\r                                                  '
                            )
                            sys.stdout.write(
                                f'\rProcessing output variable: {vv} ')
                            sys.stdout.flush()

                        filename = f'{output_vars_dir}/{vv}.nc'

                        if vv[0:3] == 'seg':
                            mod_out = ModelOutput(filename=filename,
                                                  varname=vv,
                                                  startdate=st_date,
                                                  enddate=en_date,
                                                  nhm_segs=toseg_idx)
                        else:
                            mod_out = ModelOutput(
                                filename=filename,
                                varname=vv,
                                startdate=st_date,
                                enddate=en_date,
                                nhm_hrus=hru_order_subset_nhm_id)

                        mod_out.write_csv(f'{sg_dir}/model_output')
                    sys.stdout.write('\n')
                    sys.stdout.flush()

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write dynamic parameters
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if ctl.has_dynamic_parameters:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Add dynamic parameters
                for cparam in ctl.dynamic_parameters:
                    param_name = 'dyn_{}'.format(cparam)
                    input_file = '{}/{}.nc'.format(dyn_params_dir, param_name)
                    output_file = '{}/{}.param'.format(sg_dir, param_name)

                    if not os.path.exists(input_file):
                        bandit_log.warning(
                            'WARNING: CONUS dynamic parameter file: {}, does not exist... skipping'
                            .format(input_file))
                    else:
                        if args.verbose:
                            print(
                                'Writing dynamic parameter {}'.format(cparam))

                        mydyn = dyn_params.DynamicParameters(
                            input_file, cparam, st_date, en_date,
                            hru_order_subset_nhm_id)
                        # mydyn = dyn_params.DynamicParameters(input_file, cparam, st_date, en_date, hru_order_subset)

                        mydyn.read_netcdf()
                        out_order = [kk for kk in hru_order_subset_nhm_id]
                        # out_order = [kk for kk in hru_order_subset]
                        for cc in ['day', 'month', 'year']:
                            out_order.insert(0, cc)

                        header = ' '.join(map(str, out_order))

                        # Output ASCII files
                        out_ascii = open(output_file, 'w')
                        out_ascii.write('{}\n'.format(cparam))
                        out_ascii.write('{}\n'.format(header))
                        out_ascii.write('####\n')
                        mydyn.data.to_csv(out_ascii,
                                          columns=out_order,
                                          na_rep='-999',
                                          sep=' ',
                                          index=False,
                                          header=False,
                                          encoding=None,
                                          chunksize=50)
                        out_ascii.close()

            # Write an updated control file to the output directory
            ctl.write('{}.bandit'.format('{}/{}'.format(
                sg_dir, control_filename)))

            if output_streamflow:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Download the streamgage information from NWIS
                if args.verbose:
                    print(
                        'Downloading NWIS streamgage observations for {} stations'
                        .format(len(new_poi_gage_id)))

                streamflow = prms_nwis.NWIS(gage_ids=new_poi_gage_id,
                                            st_date=st_date,
                                            en_date=en_date,
                                            verbose=args.verbose)
                streamflow.get_daily_streamgage_observations()
                streamflow.write_prms_data(
                    filename='{}/{}'.format(sg_dir, obs_filename))

            # *******************************************
            # Create a shapefile of the selected HRUs
            if output_shapefiles:
                if args.verbose:
                    print('-' * 40)
                    print('Writing shapefiles for model subset')

                if not os.path.isdir(geo_file):
                    bandit_log.error(
                        'File geodatabase, {}, does not exist. Shapefiles will not be created'
                        .format(geo_file))
                else:
                    geo_shp = prms_geo.Geo(geo_file)

                    # Create GIS sub-directory if it doesn't already exist
                    gis_dir = '{}/GIS'.format(sg_dir)
                    try:
                        os.makedirs(gis_dir)
                    except OSError as exception:
                        if exception.errno != errno.EEXIST:
                            raise
                        else:
                            pass

                    # Output a shapefile of the selected HRUs
                    # print('\tHRUs')
                    # geo_shp.select_layer('nhruNationalIdentifier')
                    geo_shp.select_layer('nhru')
                    geo_shp.write_shapefile(
                        '{}/GIS/HRU_subset.shp'.format(sg_dir),
                        'hru_id_nat',
                        hru_order_subset_nhm_id,
                        included_fields=[
                            'nhm_id', 'model_idx', 'region', 'hru_id_nat'
                        ])

                    # geo_shp.write_shapefile3('{}/GIS/HRU_subset.gdb'.format(outdir), 'hru_id_nat', hru_order_subset)

                    # geo_shp.filter_by_attribute('hru_id_nat', hru_order_subset)
                    # geo_shp.write_shapefile2('{}/HRU_subset.shp'.format(outdir))
                    # geo_shp.write_kml('{}/HRU_subset.kml'.format(outdir))

                    # Output a shapefile of the selected stream segments
                    # print('\tSegments')
                    geo_shp.select_layer('nsegmentNationalIdentifier')
                    geo_shp.write_shapefile(
                        '{}/GIS/Segments_subset.shp'.format(sg_dir),
                        'seg_id_nat',
                        toseg_idx,
                        included_fields=['seg_id_nat', 'model_idx', 'region'])

                    # geo_shp.filter_by_attribute('seg_id_nat', uniq_seg_us)
                    # geo_shp.write_shapefile2('{}/Segments_subset.shp'.format(outdir))

                    del geo_shp

            break  # break out of while True loop

    bandit_log.info('========== END {} =========='.format(
        datetime.datetime.now().isoformat()))

    os.chdir(stdir)
示例#41
0
 def test_simple_cycles_empty(self):
     G = nx.DiGraph()
     assert list(nx.simple_cycles(G)) == []
示例#42
0
 def test_worst_case_graph(self):
     # see figure 1 in Johnson's paper
     for k in range(3, 10):
         G = self.worst_case_graph(k)
         l = len(list(nx.simple_cycles(G)))
         assert l == 3 * k
示例#43
0
    def find_subtours():
        G = nx.DiGraph(X)
        S = list(nx.simple_cycles(G))

        #print("S = {}".format(S))
        return S
示例#44
0
    if book.title != 'Берегись Лиловой Пасты!':
        continue

    # print(book)
    # print(book.title)
    # print(book.sections)
    # print(book.end_pages)
    # print(book.total_pages)
    # print()
    # print(book.G)

    # print(repr(book.title_html))
    # print(book.title_html)

    cycles = list(nx.simple_cycles(book.G))
    print(cycles)

    # print(nx.recursive_simple_cycles(book.G))
    cycles_nodes = {item for sublist in cycles for item in sublist}
    # print(cycles_nodes)

    # Попробуем определить какие страницы в цикле шли раньше других, чтобы
    # правильно удалить зациклинность
    for edge in cycles:
        new_edge = None
        node_1, node_2 = edge

        for paths in nx.all_simple_paths(book.G, book.first_page, node_1):
            if node_2 not in paths:
                new_edge = node_2, node_1
示例#45
0
 def state_cycles(self):
     """Cycles found in state to state graph."""
     S = nx.DiGraph(sbn2sbs(self.tpm))
     return nx.simple_cycles(S)
示例#46
0
"""
this is just for playing arround with the networkx API in order to make sure I understood it correctly
"""

import networkx as nx

G = nx.DiGraph()

G.add_edge(0, 1)
G.add_edge(1, 2)
G.add_edge(2, 3)
G.add_edge(3, 0)
G.add_edge(1, 4)
G.add_edge(4, 3)
G.add_edge(1, 5)
G.add_edge(5, 6)
G.add_edge(6, 3)

for path in nx.all_simple_paths(G, 1, 0, 3):
    print(path)

for c in nx.simple_cycles(G):
    print(c)
示例#47
0
    def read_graphs(self, file_name):
        '''
        
        '''
        if not file_name:
            self.logger.error('no file given: ' + file_name)
            return

        f = open(file_name, 'r', encoding='utf-8')

        pargs = []
        comments = ''
        sent_id = ''
        sent_index = -1
        num_tokens = -1

        for line in f:
            # end of sentence -> build graph if there are pargs
            if line.strip() == '<\s>' and len(pargs) > 0:
                # build graph -> two pass -> add nodes and edges
                graph = nx.DiGraph(COMMENTS=comments,
                                   SENT_ID=sent_id,
                                   SENT_INDEX=sent_index,
                                   NUM_TOKENS=num_tokens)

                for i in range(num_tokens):
                    graph.add_node(str(i + 1))

                # add nodes while checking multi-word tokens
                for parg in pargs:
                    self.logger.debug('parg: ' + str(parg))
                    # update head node, predicate
                    d = graph.node[parg['ID_pred']]
                    if 'CAT' not in d:
                        d['WORD'] = parg['PRED']
                        d['CAT'] = parg['CAT_pred']
                        d['ARGS'] = {}
                    # can have multiple arg_positions to different ids
                    d['ARGS'][parg['ID_arg']] = parg['POS_arg']
                    graph.node[parg['ID_pred']] = d
                    # update dependent, argument
                    d = graph.node[parg['ID_arg']]
                    if 'HEADS' not in d:
                        d['HEADS'] = {}
                        d['HEAD_CATS'] = {}
                    d['WORD'] = parg['ARG']
                    d['HEADS'][parg['ID_pred']] = parg['POS_arg']
                    d['HEAD_CATS'][parg['ID_pred']] = parg['CAT_pred']
                    graph.node[parg['ID_arg']] = d
                    # add relation to the graph
                    d = {
                        'POS_arg': parg['POS_arg'],
                        'CAT_pred': parg['CAT_pred']
                    }
                    if 'X' in parg:
                        d['X'] = parg['X']
                    graph.add_edge(parg['ID_pred'],
                                   parg['ID_arg'],
                                   attr_dict=d)
                    self.logger.debug('added edge from ' + parg['ID_pred'] +
                                      ' to ' + parg['ID_arg'])

                # if no category, assign 'N'
                for n in graph.nodes():
                    # arguments
                    if 'CAT' not in graph.node[n] and 'WORD' in graph.node[n]:
                        graph.node[n]['CAT'] = 'N'
                    # probably punctuation
                    elif 'CAT' not in graph.node[n]:
                        graph.node[n]['WORD'] = '_'
                        graph.node[n]['CAT'] = '_'

                # check for cycles
                if len(list(nx.simple_cycles(graph))) > 0:
                    self.logger.debug('cycle detected in ' + file_name +
                                      ' sent: ' + str(sent_index))
                    self.logger.debug(list(nx.simple_cycles(graph)))

                self._graphs.append(graph)

                self.logger.debug('GRAPH:' + str(graph.graph))
                self.logger.debug('NODES:' + str(graph.nodes()))
                self.logger.debug('EDGES:' + str(graph.edges()))

                # debug
                if len(graph.nodes()) > num_tokens:
                    self.logger.error(
                        'EXTRA nodes ' +
                        str(len(graph.nodes()) + ' ' + str(num_tokens)))
                for n in sorted(graph.nodes(), key=int):
                    self.logger.debug(n + ' : ' + str(graph.node[n]))
                for n1, n2 in sorted(graph.edges(),
                                     key=lambda x: (int(x[1]), int(x[0]))):
                    self.logger.debug(
                        str((n1, n2)) + ' : ' + str(graph.edge[n1][n2]))

            # sentence may be empty reset at closing tag
            if line.strip() == '<\s>':
                pargs = []
                comments = ''
                sent_id = ''
                sent_index = -1
                num_tokens = -1

            # only before the sentence
            elif line.strip()[0] == '<':
                # <s id="wsj_0013.7"> 27
                comments += line.strip()
                i = comments.find('"')
                j = comments.find('"', i + 1)
                #print(i,j,comments[i+1:j])
                # wsj_0013.7
                sent_id = comments[i + 1:j]
                # 7
                sent_index = int(sent_id.split('.')[1])
                # 28 <- 27+1
                num_tokens = int(comments[comments.find('>') + 1:].strip()) + 1
            # pargs, also adds empty nodes and multiword tokens
            else:
                w = self.to_attr(line)
                pargs.append(w)

        f.close()
示例#48
0
 def check_for_no_cycles(self, graph, title, premise):
     graph.add_edge(title, premise)
     if len(list(nx.simple_cycles(graph))) > 0:
         return False
     else:
         return True
示例#49
0
 def loop_exists(g):
     return next(nx.simple_cycles(g), [])
示例#50
0
def sofa_analyze(logdir, cfg):
    filein = []
    df_gpu = []
    df_cpu = []
    df_vmstat = []

    filein_gpu = logdir + "gputrace.csv"
    filein_cpu = logdir + "cputrace.csv"
    filein_vmstat = logdir + "vmstat_trace.csv"
    
    if os.path.isfile('%s/nvlink_topo.txt' % logdir):
        
        with open(logdir + 'nvlink_topo.txt') as f:
            lines = f.readlines()
            title = lines[0]
            num_gpus = 1 
            for word in title.split():
                if re.match(r'GPU', word) != None :
                   num_gpus = num_gpus + 1 
            print_info('# of GPUs: ' + str(num_gpus) )
            edges = []
            for i in range(num_gpus):
                connections = lines[1+i].split()
                for j in range(len(connections)):
                    if connections[j] == 'NV1' or connections[j] == 'NV2':
                        edges.append((i,j-1))
                        #print('%d connects to %d' % (i, j-1))
            
            ring_found = False
            if num_gpus > 1:
                G = nx.DiGraph(edges)           
                # Try to find ring with its length of num_gpus
                for cycle in nx.simple_cycles(G):
                    if len(cycle) == num_gpus:
                        print(("One of the recommended ring having length of %d" % len(cycle) ))
                        ring_found = True
                        os.system("mkdir -p sofalog/sofa_hints/")
                        xring_order = ','.join(map(str, cycle))
                        with open("sofalog/sofa_hints/xring_order.txt", "w") as f:
                            f.write('export CUDA_VISIBLE_DEVICES=' + xring_order)
                        break
                
                # Try to find ring with its length of num_gpus/2 
                if not ring_found:
                    for cycle in nx.simple_cycles(G):
                        if len(cycle) == num_gpus/2:
                            print(("One of the recommended ring having length of %d" % len(cycle) ))
                            ring_found = True
                            os.system("mkdir -p sofalog/sofa_hints/")
                            xring_order = ','.join(map(str, cycle))
                            with open("sofalog/sofa_hints/xring_order.txt", "w") as f:
                                f.write('export CUDA_VISIBLE_DEVICES=' + xring_order)
                            break   
    try:
        df_cpu = pd.read_csv(filein_cpu)
        cpu_profile(logdir, cfg, df_cpu)
        net_profile(logdir, cfg, df_cpu)
    except IOError:
        print_warning("cputrace.csv is not found")
        #quit()

    try:
        df_gpu = pd.read_csv(filein_gpu)
        #df_gpu.loc[:, 'timestamp'] -= df_gpu.loc[0, 'timestamp']
        gpu_profile(logdir, cfg, df_gpu)
        if cfg.enable_deepprof:
            sofa_deepprof(logdir, cfg, df_cpu, df_gpu)  
    except IOError:
        print_warning(
            "gputrace.csv is not found. If there is no need to profile GPU, just ignore it.")
示例#51
0
def enumMaximumMatchingIter2(adj,
                             matchadj,
                             all_matches,
                             n1,
                             add_e=None,
                             check_cycle=True):
    '''Similar to enumMaximumMatching() but implemented using adjacency matrix
    of graph. Slight speed boost.
    '''

    #-------------------Find cycles-------------------
    if check_cycle:
        d = matchadj.multiply(adj)
        d[n1:, :] = adj[n1:, :] - matchadj[n1:, :].multiply(adj[n1:, :])

        dg = nx.from_numpy_matrix(d.toarray(), create_using=nx.DiGraph())
        cycles = list(nx.simple_cycles(dg))
        if len(cycles) == 0:
            check_cycle = False
        else:
            check_cycle = True

    #if len(cycles)>0:
    if check_cycle:
        cycle = cycles[0]
        cycle.append(cycle[0])
        cycle = zip(cycle[:-1], cycle[1:])

        #--------------Create a new matching--------------
        new_match = matchadj.copy()
        for ee in cycle:
            if matchadj[ee[0], ee[1]] == 1:
                new_match[ee[0], ee[1]] = 0
                new_match[ee[1], ee[0]] = 0
                e = ee
            else:
                new_match[ee[0], ee[1]] = 1
                new_match[ee[1], ee[0]] = 1

        if add_e is not None:
            for ii in add_e:
                new_match[ii[0], ii[1]] = 1

        all_matches.append(new_match)

        #-----------------Form subproblems-----------------
        g_plus = adj.copy()
        g_minus = adj.copy()
        g_plus[e[0], :] = 0
        g_plus[:, e[1]] = 0
        g_plus[:, e[0]] = 0
        g_plus[e[1], :] = 0
        g_minus[e[0], e[1]] = 0
        g_minus[e[1], e[0]] = 0

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = enumMaximumMatchingIter2(g_minus, new_match, all_matches,
                                               n1, add_e, check_cycle)
        all_matches = enumMaximumMatchingIter2(g_plus, matchadj, all_matches,
                                               n1, add_e_new, check_cycle)

    else:
        #---------------Find uncovered nodes---------------
        uncovered = numpy.where(numpy.sum(matchadj, axis=1) == 0)[0]

        if len(uncovered) == 0:
            return all_matches

        #---------------Find feasible paths---------------
        paths = []
        for ii in uncovered:
            aa = adj[ii, :].dot(matchadj)
            if aa.sum() == 0:
                continue
            paths.append((ii, int(sparse.find(aa == 1)[1][0])))
            if len(paths) > 0:
                break

        if len(paths) == 0:
            return all_matches

        #----------------------Find e----------------------
        feas1, feas2 = paths[0]
        e = (feas1, int(sparse.find(matchadj[:, feas2] == 1)[0]))

        #----------------Create a new match----------------
        new_match = matchadj.copy()
        new_match[feas2, :] = 0
        new_match[:, feas2] = 0
        new_match[feas1, e[1]] = 1
        new_match[e[1], feas1] = 1

        if add_e is not None:
            for ii in add_e:
                new_match[ii[0], ii[1]] = 1

        all_matches.append(new_match)

        #-----------------Form subproblems-----------------
        g_plus = adj.copy()
        g_minus = adj.copy()
        g_plus[e[0], :] = 0
        g_plus[:, e[1]] = 0
        g_plus[:, e[0]] = 0
        g_plus[e[1], :] = 0
        g_minus[e[0], e[1]] = 0
        g_minus[e[1], e[0]] = 0

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = enumMaximumMatchingIter2(g_minus, matchadj, all_matches,
                                               n1, add_e, check_cycle)
        all_matches = enumMaximumMatchingIter2(g_plus, new_match, all_matches,
                                               n1, add_e_new, check_cycle)

    if len(all_matches) % 1000 == 0:
        print 'len', len(all_matches)

    return all_matches
示例#52
0
def reduce_graph(G, partial=False, debug=False, unglue=True, recursive=True):
    """Reduce the graph: iteratively prune the graph and glue the resulting cycles.
    
    * After gluing, each cycle is represented by the source node of its limiting step.
    * Edges entering the glued cycle in the pruned graph are conserved.
    * Edges exiting the cycle before pruning are restored with a corrected weight.
    
    By default, aim for full reduction and raise an error if a node has multiple outgoing edges or if
    a cycle has multiple limiting steps. Enable the *partial* parameter to relax these constraints.
    
    In case of partial reduction, only terminal elementary cycles with a single limiting step are glued.
    
    Information on the glued nodes and original source of glued edges are conserved as metadata to enable the unglue step.
    """

    # All nodes from a glued cycle are associated to their representative, original target and weight.
    glued_nodes = {}

    # All representative nodes are associated to the list of nodes in the original cycle
    glued_cycles = {}

    # Edges entering a glued cycle in the pruned graph or exiting it in the original graph
    glued_edges = set()

    # Collect this information for all cycles in the pruned graph
    all_nodes = set(G.nodes)
    pruned_G = prune(G, partial=partial, debug=debug)
    for cycle in nx.simple_cycles(pruned_G):
        cycle_edges = list(pruned_G.out_edges(cycle, data='weight'))
        # Only glue cycles where all nodes have a single target (in case of partial pruning)
        if len(cycle_edges) != len(cycle):
            print("Skip non-pruned cycle: ", cycle)
            continue

        lim_s, _, lim_w = max(cycle_edges, key=lambda x: x[2])
        if partial:
            if len([e for e in cycle_edges if e[2] == lim_w]) > 1:
                print("Cycle with multiple limiting steps can not be glued")
                continue
        else:
            check_unique(cycle_edges, lim_w, debug)

        cur_glued_nodes = {n: (lim_s, t, w) for n, t, w in cycle_edges}
        glued_nodes.update(cur_glued_nodes)
        glued_cycles[lim_s] = cycle

        # Collect glued edges: entering the cycle in the pruned graph or exiting it in the original graph
        other_nodes = all_nodes.difference(cur_glued_nodes)
        glued_edges.update(
            nx.algorithms.boundary.edge_boundary(pruned_G, other_nodes))
        glued_edges.update(
            nx.algorithms.boundary.edge_boundary(G, cur_glued_nodes))

    if len(glued_cycles) == 0:
        if debug: print("This graph has no cycle to glue!")
        if unglue: return unglue_graph(pruned_G, debug=debug)
        return pruned_G

    if debug:
        print("This graph has %s cycles to glue:" % len(glued_cycles))
        for g, c in glued_cycles.items():
            print("  *  %s: %s" % (g, c))
        print("Glued %s edges:" % len(glued_edges))
        for e in glued_edges:
            print("  *", e)

    # The glued graph contains all edges between non-glued nodes of the pruned graph
    hidden_nodes = set(glued_nodes).difference(glued_cycles)
    glued_G = pruned_G.subgraph(all_nodes.difference(hidden_nodes)).copy()

    # Redirect, normalize, annotate and restore glued edges
    restored_edges = {}
    for s, t in glued_edges:
        # Assume that we will copy the edge
        edge_info = G.edges[(s, t)].copy()

        # When the target is glued, redirect and keep track of the original one!
        if t in glued_nodes:
            if debug: print("REDIRECTING EDGE TARGET!!!")
            if "glued_target" not in edge_info: edge_info['glued_target'] = t
            t = glued_nodes.get(t)[0]

        # When the source is glued, redirect and update the weight
        if s in glued_nodes:
            s, _, w = glued_nodes[s]
            wlim = glued_nodes[s][2]
            edge_info['weight'] += wlim - w

        # Add the new edge, unless a better one already exists
        bgw = restored_edges.get((s, t))
        if bgw is not None and bgw['weight'] < edge_info['weight']: continue
        restored_edges[(s, t)] = edge_info

    # Add all glued edges
    glued_G.add_edges_from([(s, t, info)
                            for (s, t), info in restored_edges.items()])

    # Add metadata required to restore edges in the glued cycles
    for cur_repr, cur_nodes in glued_cycles.items():
        cur_glued_cycle = []
        for src in cur_nodes:

            # Add existing glued cycles
            prev_glued = G.nodes[src].get('glued_cycles')
            if prev_glued:
                if debug:
                    print('Merging %s-cycle previously glued in %s' %
                          (len(prev_glued), src))
                cur_glued_cycle += prev_glued

            mrepr, tgt, w = glued_nodes[src]
            rtgt = G.edges[(src, tgt)].get('glued_target')
            if rtgt is not None and rtgt != tgt:
                if debug:
                    print('Merging a redirected edge (%s, %s / %s, %s) !' %
                          (src, tgt, rtgt, w))
                tgt = rtgt
            if mrepr != cur_repr:
                raise 'Mismatching representative node!'
            if src == cur_repr: continue
            cur_glued_cycle.append((src, tgt, w))
        glued_G.add_node(cur_repr)
        glued_G.nodes[cur_repr]['glued_cycles'] = cur_glued_cycle

    if recursive:
        return reduce_graph(glued_G,
                            partial=partial,
                            debug=debug,
                            recursive=True,
                            unglue=unglue)

    if unglue:
        return unglue_graph(glued_G, debug=debug)

    return glued_G
示例#53
0
    def run(self,
            max_cores=None,
            max_attempts=1,
            dry=False,
            set_successful=True,
            cmd_wrapper=signature.default_cmd_fxn_wrapper,
            log_out_dir_func=default_task_log_output_dir):
        """
        Runs this Workflow's DAG

        :param int max_cores: The maximum number of cores to use at once.  A value of None indicates no maximum.
        :param int max_attempts: The maximum number of times to retry a failed job.
        :param callable log_out_dir_func: A function that returns a Task's logging directory (must be unique).
             It receives one parameter: the Task instance.
             By default a Task's log output is stored in log/stage_name/task_id.
             See _default_task_log_output_dir for more info.
        :param bool dry: If True, do not actually run any jobs.
        :param bool set_successful: Sets this workflow as successful if all tasks finish without a failure.  You might set this to False if you intend to add and
            run more tasks in this workflow later.

        """
        assert os.path.exists(os.getcwd(
        )), 'current working dir does not exist! %s' % os.getcwd()

        assert hasattr(
            self, 'cosmos_app'
        ), 'Workflow was not initialized using the Workflow.start method'
        assert hasattr(log_out_dir_func,
                       '__call__'), 'log_out_dir_func must be a function'
        assert self.session, 'Workflow must be part of a sqlalchemy session'

        session = self.session
        self.log.info('Preparing to run %s using DRM `%s`, cwd is `%s`' %
                      (self, self.cosmos_app.default_drm, os.getcwd()))

        self.max_cores = max_cores
        self.max_attempts = max_attempts

        from ..job.JobManager import JobManager

        if self.jobmanager is None:
            self.jobmanager = JobManager(
                get_submit_args=self.cosmos_app.get_submit_args,
                cmd_wrapper=cmd_wrapper,
                log_out_dir_func=log_out_dir_func)

        self.status = WorkflowStatus.running
        self.successful = False

        if self.started_on is None:
            self.started_on = datetime.datetime.now()

        task_graph = self.task_graph()
        stage_graph = self.stage_graph()

        assert len(set(self.stages)) == len(
            self.stages), 'duplicate stage name detected: %s' % (next(
                duplicates(self.stages)))

        # renumber stages
        stage_graph_no_cycles = nx.DiGraph()
        stage_graph_no_cycles.add_nodes_from(stage_graph.nodes())
        stage_graph_no_cycles.add_edges_from(stage_graph.edges())
        for cycle in nx.simple_cycles(stage_graph):
            stage_graph_no_cycles.remove_edge(cycle[-1], cycle[0])
        for i, s in enumerate(topological_sort(stage_graph_no_cycles)):
            s.number = i + 1

        # Make sure everything is in the sqlalchemy session
        session.add(self)
        successful = filter(lambda t: t.successful, task_graph.nodes())

        # print stages
        for s in sorted(self.stages, key=lambda s: s.number):
            self.log.info('%s %s' % (s, s.status))

        # Create Task Queue
        task_queue = _copy_graph(task_graph)
        self.log.info('Skipping %s successful tasks...' % len(successful))
        task_queue.remove_nodes_from(successful)

        handle_exits(self)

        if self.max_cores is not None:
            self.log.info('Ensuring there are enough cores...')
            # make sure we've got enough cores
            for t in task_queue:
                assert int(
                    t.core_req
                ) <= self.max_cores, '%s requires more cpus (%s) than `max_cores` (%s)' % (
                    t, t.core_req, self.max_cores)

        # Run this thing!
        self.log.info('Committing to SQL db...')
        session.commit()
        if not dry:
            _run(self, session, task_queue)

            # set status
            if self.status == WorkflowStatus.failed_but_running:
                self.status = WorkflowStatus.failed
                # set stage status to failed
                for s in self.stages:
                    if s.status == StageStatus.running_but_failed:
                        s.status = StageStatus.failed
                session.commit()
                return False
            elif self.status == WorkflowStatus.running:
                if set_successful:
                    self.status = WorkflowStatus.successful
                session.commit()
                return True
            else:
                self.log.warning('Workflow exited with status %s', self.status)
                session.commit()
                return False
        else:
            self.log.info('Workflow dry run is complete')
示例#54
0
 def find_cycles(self, pipeline):
     return nx.simple_cycles(Pipeline.graph(pipeline))
示例#55
0
    def cutoff_cycles(self, links):
        graph = self.graph
        nodes = self.__actual_nodes
        sub_graph = nx.DiGraph(links)
        gen_cycles = nx.simple_cycles(sub_graph)
        cycles = [cycle for cycle in gen_cycles]
        _related_cycle_nodes = []

        if len(cycles):
            for cycle in cycles:
                for n in cycle:
                    if n not in nodes:
                        _related_cycle_nodes.append(n)
            cycle_list = []
            for cycle in cycles:
                temp_list = []
                if len(cycle) == 2:
                    source_id = cycle[0]
                    target_id = cycle[1]
                    if graph.has_edge(source_id, target_id):
                        temp = {
                            'source_id': str(source_id),
                            'target_id': str(target_id)
                        }
                        temp_list.append(
                            dict(temp.items() +
                                 graph[source_id][target_id].items()))
                    source_id = cycle[1]
                    target_id = cycle[0]
                    if graph.has_edge(source_id, target_id):
                        temp = {
                            'source_id': str(source_id),
                            'target_id': str(target_id)
                        }
                        temp_list.append(
                            dict(temp.items() +
                                 graph[source_id][target_id].items()))

                    cycle_list.append(temp_list)
                else:
                    for i in range(len(cycle)):
                        if i == (len(cycle) - 1):
                            source_id = cycle[i]
                            target_id = cycle[0]
                            if graph.has_edge(source_id, target_id):
                                temp = {
                                    'source_id': str(source_id),
                                    'target_id': str(target_id)
                                }
                                temp_list.append(
                                    dict(temp.items() +
                                         graph[source_id][target_id].items()))
                        else:
                            source_id = cycle[i]
                            target_id = cycle[i + 1]
                            if graph.has_edge(source_id, target_id):
                                temp = {
                                    'source_id': str(source_id),
                                    'target_id': str(target_id)
                                }
                                temp_list.append(
                                    dict(temp.items() +
                                         graph[source_id][target_id].items()))

                    cycle_list.append(temp_list)

            cycle_result = [{
                'paths':
                cycle,
                'type':
                'circle' if len(cycle) > 2 else
                'each_other' if len(cycle) == 2 else 'self',
                'circle_id':
                create_union_id()
            } for cycle in cycle_list]

            return {
                'cycle_result': cycle_result,
                'actual_nodes': nodes + _related_cycle_nodes
            }
        else:
            return {'cycle_result': [], 'actual_nodes': nodes}
示例#56
0
 def test_simple_cycles_graph(self):
     with pytest.raises(nx.NetworkXNotImplemented):
         G = nx.Graph()
         c = sorted(nx.simple_cycles(G))
示例#57
0
def simple_cycles_func(graph):
    """simple_cycles_func"""
    return list(nx.simple_cycles(graph))
#
# Input: G - A genome graph
#
# Output: None,None if this is not a red/blue alternating cycle otherwise return the cycle and the colors


# + slideshow={"slide_type": "subslide"}
def red_blue_cycle_check(sub_graph, cycle):
    checked_cycle = None
    colors = []
    return checked_cycle, colors


G_P4_P5 = combine(genome_to_graph([P4]), genome_to_graph([P5]))
# Below is an example for how you can find all the cycles
test_cycles = list(nx.simple_cycles(G_P4_P5.to_directed()))
edge_cycles = []  # just a cycle listed as edges
for cycle in test_cycles:
    edge_cycle = []
    a = cycle[0]
    for b in cycle[1:]:
        edge_cycle.append([a, b])
        a = b
    edge_cycle.append([b, cycle[0]])
    edge_cycles.append(edge_cycle)
# Running the code on all cycles
for edge_cycle in edge_cycles:
    #print(edge_cycle)
    checked_cycle, colors = red_blue_cycle_check(G_P4_P5, edge_cycle)
    #print(colors)
示例#59
0
 def test_complete_directed_graph(self):
     # see table 2 in Johnson's paper
     ncircuits = [1, 5, 20, 84, 409, 2365, 16064]
     for n, c in zip(range(2, 9), ncircuits):
         G = nx.DiGraph(nx.complete_graph(n))
         assert len(list(nx.simple_cycles(G))) == c
示例#60
0
def conflict_serializable(schedule):
    """
    A schedule is conflict serializable if its conflict graph is acyclic.
    Aborted transactions are ignored.
    """
    return len(list(nx.simple_cycles(conflict_graph(schedule)))) == 0