def total_expand(inputs): source_nodes = list(ut.nx_source_nodes(inputs.exi_graph)) sink = list(ut.nx_sink_nodes(inputs.exi_graph))[0] rmi_list = [ RootMostInput(node, sink, inputs.exi_graph) for node in source_nodes ] exi_graph = inputs.exi_graph table = inputs.table reorder = True new_inputs = TableInput(rmi_list, exi_graph, table, reorder=reorder) return new_inputs
def nx_all_nodes_between(graph, source, target, data=False): """ Find all nodes with on paths between source and target. """ import utool as ut import networkx as nx if source is None: # assume there is a single source sources = list(ut.nx_source_nodes(graph)) assert len(sources) == 1, ( 'specify source if there is not only one') source = sources[0] if target is None: # assume there is a single source sinks = list(ut.nx_sink_nodes(graph)) assert len(sinks) == 1, ( 'specify sink if there is not only one') target = sinks[0] all_simple_paths = list(nx.all_simple_paths(graph, source, target)) nodes = list(ut.union_ordered(ut.flatten(all_simple_paths))) return nodes
def nx_dag_node_rank(graph, nodes=None): """ Returns rank of nodes that define the "level" each node is on in a topological sort. This is the same as the Graphviz dot rank. Ignore: simple_graph = ut.simplify_graph(exi_graph) adj_dict = ut.nx_to_adj_dict(simple_graph) import plottool as pt pt.qt4ensure() pt.show_nx(graph) Example: >>> # ENABLE_DOCTEST >>> from utool.util_graph import * # NOQA >>> import utool as ut >>> adj_dict = {0: [5], 1: [5], 2: [1], 3: [4], 4: [0], 5: [], 6: [4], 7: [9], 8: [6], 9: [1]} >>> import networkx as nx >>> nodes = [2, 1, 5] >>> f_graph = ut.nx_from_adj_dict(adj_dict, nx.DiGraph) >>> graph = f_graph.reverse() >>> #ranks = ut.nx_dag_node_rank(graph, nodes) >>> ranks = ut.nx_dag_node_rank(graph, nodes) >>> result = ('ranks = %r' % (ranks,)) >>> print(result) ranks = [3, 2, 1] """ import utool as ut source = list(ut.nx_source_nodes(graph))[0] longest_paths = dict([(target, dag_longest_path(graph, source, target)) for target in graph.nodes()]) node_to_rank = ut.map_dict_vals(len, longest_paths) if nodes is None: return node_to_rank else: ranks = ut.dict_take(node_to_rank, nodes) return ranks
def draw_twoday_count(ibs, visit_info_list_): import copy visit_info_list = copy.deepcopy(visit_info_list_) aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'aids') nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) if False: # HACK REMOVE DATA TO MAKE THIS FASTER num = 20 for info in visit_info_list: non_resight_nids = list(set(info['unique_nids']) - set(resight_nids)) sample_nids2 = non_resight_nids[0:num] + resight_nids[:num] info['grouped_aids'] = ut.dict_subset(info['grouped_aids'], sample_nids2) info['unique_nids'] = sample_nids2 # Build a graph of matches if False: debug = False for info in visit_info_list: edges = [] grouped_aids = info['grouped_aids'] aids_list = list(grouped_aids.values()) ams_list = ibs.get_annotmatch_rowids_in_cliques(aids_list) aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list) aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list) for ams, aids, aids1, aids2 in zip(ams_list, aids_list, aids1_list, aids2_list): edge_nodes = set(aids1 + aids2) ##if len(edge_nodes) != len(set(aids)): # #print('--') # #print('aids = %r' % (aids,)) # #print('edge_nodes = %r' % (edge_nodes,)) bad_aids = edge_nodes - set(aids) if len(bad_aids) > 0: print('bad_aids = %r' % (bad_aids,)) unlinked_aids = set(aids) - edge_nodes mst_links = list(ut.itertwo(list(unlinked_aids) + list(edge_nodes)[:1])) bad_aids.add(None) user_links = [(u, v) for (u, v) in zip(aids1, aids2) if u not in bad_aids and v not in bad_aids] new_edges = mst_links + user_links new_edges = [(int(u), int(v)) for u, v in new_edges if u not in bad_aids and v not in bad_aids] edges += new_edges info['edges'] = edges # Add edges between days grouped_aids1, grouped_aids2 = ut.take_column(visit_info_list, 'grouped_aids') nids_day1, nids_day2 = ut.take_column(visit_info_list, 'unique_nids') resight_nids = ut.isect(nids_day1, nids_day2) resight_aids1 = ut.take(grouped_aids1, resight_nids) resight_aids2 = ut.take(grouped_aids2, resight_nids) #resight_aids3 = [list(aids1) + list(aids2) for aids1, aids2 in zip(resight_aids1, resight_aids2)] ams_list = ibs.get_annotmatch_rowids_between_groups(resight_aids1, resight_aids2) aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list) aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list) between_edges = [] for ams, aids1, aids2, rawaids1, rawaids2 in zip(ams_list, aids1_list, aids2_list, resight_aids1, resight_aids2): link_aids = aids1 + aids2 rawaids3 = rawaids1 + rawaids2 badaids = ut.setdiff(link_aids, rawaids3) assert not badaids user_links = [(int(u), int(v)) for (u, v) in zip(aids1, aids2) if u is not None and v is not None] # HACK THIS OFF user_links = [] if len(user_links) == 0: # Hack in an edge between_edges += [(rawaids1[0], rawaids2[0])] else: between_edges += user_links assert np.all(0 == np.diff(np.array(ibs.unflat_map(ibs.get_annot_nids, between_edges)), axis=1)) import plottool_ibeis as pt import networkx as nx #pt.qt4ensure() #len(list(nx.connected_components(graph1))) #print(ut.graph_info(graph1)) # Layout graph layoutkw = dict( prog='neato', draw_implicit=False, splines='line', #splines='curved', #splines='spline', #sep=10 / 72, #prog='dot', rankdir='TB', ) def translate_graph_to_origin(graph): x, y, w, h = ut.get_graph_bounding_box(graph) ut.translate_graph(graph, (-x, -y)) def stack_graphs(graph_list, vert=False, pad=None): graph_list_ = [g.copy() for g in graph_list] for g in graph_list_: translate_graph_to_origin(g) bbox_list = [ut.get_graph_bounding_box(g) for g in graph_list_] if vert: dim1 = 3 dim2 = 2 else: dim1 = 2 dim2 = 3 dim1_list = np.array([bbox[dim1] for bbox in bbox_list]) dim2_list = np.array([bbox[dim2] for bbox in bbox_list]) if pad is None: pad = np.mean(dim1_list) / 2 offset1_list = ut.cumsum([0] + [d + pad for d in dim1_list[:-1]]) max_dim2 = max(dim2_list) offset2_list = [(max_dim2 - d2) / 2 for d2 in dim2_list] if vert: t_xy_list = [(d2, d1) for d1, d2 in zip(offset1_list, offset2_list)] else: t_xy_list = [(d1, d2) for d1, d2 in zip(offset1_list, offset2_list)] for g, t_xy in zip(graph_list_, t_xy_list): ut.translate_graph(g, t_xy) nx.set_node_attributes(g, name='pin', values='true') new_graph = nx.compose_all(graph_list_) #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False) # NOQA return new_graph # Construct graph for count, info in enumerate(visit_info_list): graph = nx.Graph() edges = [(int(u), int(v)) for u, v in info['edges'] if u is not None and v is not None] graph.add_edges_from(edges, attr_dict={'zorder': 10}) nx.set_node_attributes(graph, name='zorder', values=20) # Layout in neato _ = pt.nx_agraph_layout(graph, inplace=True, **layoutkw) # NOQA # Extract components and then flatten in nid ordering ccs = list(nx.connected_components(graph)) root_aids = [] cc_graphs = [] for cc_nodes in ccs: cc = graph.subgraph(cc_nodes) try: root_aids.append(list(ut.nx_source_nodes(cc.to_directed()))[0]) except nx.NetworkXUnfeasible: root_aids.append(list(cc.nodes())[0]) cc_graphs.append(cc) root_nids = ibs.get_annot_nids(root_aids) nid2_graph = dict(zip(root_nids, cc_graphs)) resight_nids_ = set(resight_nids).intersection(set(root_nids)) noresight_nids_ = set(root_nids) - resight_nids_ n_graph_list = ut.take(nid2_graph, sorted(noresight_nids_)) r_graph_list = ut.take(nid2_graph, sorted(resight_nids_)) if len(n_graph_list) > 0: n_graph = nx.compose_all(n_graph_list) _ = pt.nx_agraph_layout(n_graph, inplace=True, **layoutkw) # NOQA n_graphs = [n_graph] else: n_graphs = [] r_graphs = [stack_graphs(chunk) for chunk in ut.ichunks(r_graph_list, 100)] if count == 0: new_graph = stack_graphs(n_graphs + r_graphs, vert=True) else: new_graph = stack_graphs(r_graphs[::-1] + n_graphs, vert=True) #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False) # NOQA info['graph'] = new_graph graph1_, graph2_ = ut.take_column(visit_info_list, 'graph') if False: _ = pt.show_nx(graph1_, layout='custom', node_labels=False, as_directed=False) # NOQA _ = pt.show_nx(graph2_, layout='custom', node_labels=False, as_directed=False) # NOQA graph_list = [graph1_, graph2_] twoday_graph = stack_graphs(graph_list, vert=True, pad=None) nx.set_node_attributes(twoday_graph, name='pin', values='true') if debug: ut.nx_delete_None_edge_attr(twoday_graph) ut.nx_delete_None_node_attr(twoday_graph) print('twoday_graph(pre) info' + ut.repr3(ut.graph_info(twoday_graph), nl=2)) # Hack, no idea why there are nodes that dont exist here between_edges_ = [edge for edge in between_edges if twoday_graph.has_node(edge[0]) and twoday_graph.has_node(edge[1])] twoday_graph.add_edges_from(between_edges_, attr_dict={'alpha': .2, 'zorder': 0}) ut.nx_ensure_agraph_color(twoday_graph) layoutkw['splines'] = 'line' layoutkw['prog'] = 'neato' agraph = pt.nx_agraph_layout(twoday_graph, inplace=True, return_agraph=True, **layoutkw)[-1] # NOQA if False: fpath = ut.truepath('~/ggr_graph.png') agraph.draw(fpath) ut.startfile(fpath) if debug: print('twoday_graph(post) info' + ut.repr3(ut.graph_info(twoday_graph))) _ = pt.show_nx(twoday_graph, layout='custom', node_labels=False, as_directed=False) # NOQA
def make_expanded_input_graph(graph, target): """ Starting from the `target` property we trace all possible paths in the `graph` back to all sources. Args: graph (nx.DiMultiGraph): the dependency graph with a single source. target (str): a single target node in graph Notes: Each edge in the graph must have a `local_input_id` that defines the type of edge it is: (eg one-to-many, one-to-one, nwise/multi). # Step 1: Extracting the Relevant Subgraph We start by searching for all sources of the graph (we assume there is only one). Then we extract the subgraph defined by all edges between the sources and the target. We augment this graph with a dummy super source `s` and super sink `t`. This allows us to associate an edge with the real source and sink. # Step 2: Trace all paths from `s` to `t`. Create a set of all paths from the source to the sink and accumulate the `local_input_id` of each edge along the path. This will uniquely identify each path. We use a hack to condense the accumualated ids in order to display them nicely. # Step 3: Create the new `exi_graph` Using the traced paths with ids we construct a new graph representing expanded inputs. The nodes in the original graph will be copied for each unique path that passes through the node. We identify these nodes using the accumulated ids built along the edges in our path set. For each path starting from the target we add each node augmented with the accumulated ids on its output(?) edge. We also add the edges along these paths which results in the final `exi_graph`. # Step 4: Identify valid inputs candidates The purpose of this graph is to identify which inputs are needed to compute dependant properties. One valid set of inputs is all sources of the graph. However, sometimes it is preferable to specify a model that may have been trained from many inputs. Therefore any node with a one-to-many input edge may also be specified as an input. # Step 5: Identify root-most inputs The user will only specify one possible set of the inputs. We refer to this set as the "root-most" inputs. This is a set of candiate nodes such that all paths from the sink to the super source are blocked. We default to the set of inputs which results in the fewest dependency computations. However this is arbitary. The last step that is not represented here is to compute the order that the branches must be specified in when given to the depcache for a computation. Returns: nx.DiGraph: exi_graph: the expanded input graph Notes: All * nodes are defined to be distinct. TODO: To make a * node non-distinct it must be suffixed with an identifier. CommandLine: python -m dtool.input_helpers make_expanded_input_graph --show Example: >>> # ENABLE_DOCTEST >>> from dtool.input_helpers import * # NOQA >>> from dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> table = depc['smk_match'] >>> table = depc['vsone'] >>> graph = table.depc.explicit_graph.copy() >>> target = table.tablename >>> exi_graph = make_expanded_input_graph(graph, target) >>> x = list(exi_graph.nodes())[0] >>> print('x = %r' % (x,)) >>> ut.quit_if_noshow() >>> import plottool as pt >>> pt.show_nx(graph, fnum=1, pnum=(1, 2, 1)) >>> pt.show_nx(exi_graph, fnum=1, pnum=(1, 2, 2)) >>> ut.show_if_requested() """ # FIXME: this does not work correctly when # The nesting of non-1-to-1 dependencies is greater than 2 (I think) # algorithm for finding inputs does not work. # FIXME: two vocabs have the same edge id, they should be the same in the # Expanded Input Graph as well. Their accum_id needs to be changed. def condense_accum_ids(rinput_path_id): # Hack to condense and consolidate graph sources prev = None compressed = [] for item in rinput_path_id: if item == '1' and prev is not None: pass # done append ones elif item != prev: compressed.append(item) prev = item #if len(compressed) > 1 and compressed[0] in ['1', '*']: if len(compressed) > 1 and compressed[0] == '1': compressed = compressed[1:] compressed = tuple(compressed) return compressed BIG_HACK = True #BIG_HACK = False def condense_accum_ids_stars(rinput_path_id): # Hack to condense and consolidate graph sources rcompressed = [] has_star = False # Remove all but the final star (this is a really bad hack) for item in reversed(rinput_path_id): is_star = '*' in item if not (is_star and has_star): if not has_star: rcompressed.append(item) has_star = has_star or is_star compressed = tuple(rcompressed[::-1]) return compressed def accumulate_input_ids(edge_list): """ python -m dtool.example_depcache2 testdata_depc4 --show """ edge_data = ut.take_column(edge_list, 3) # We are accumulating local input ids toaccum_list_ = ut.dict_take_column(edge_data, 'local_input_id') if BIG_HACK and True: v_list = ut.take_column(edge_list, 1) # show the local_input_ids at the entire level pred_ids = ([[ x['local_input_id'] for x in list(graph.pred[node].values())[0].values() ] if len(graph.pred[node]) else [] for node in v_list]) toaccum_list = [ x + ':' + ';'.join(y) for x, y in zip(toaccum_list_, pred_ids) ] else: toaccum_list = toaccum_list_ # Default dumb accumulation accum_ids_ = ut.cumsum(zip(toaccum_list), tuple()) accum_ids = ut.lmap(condense_accum_ids, accum_ids_) if BIG_HACK: accum_ids = ut.lmap(condense_accum_ids_stars, accum_ids) accum_ids = [('t', ) + x for x in accum_ids] ut.dict_set_column(edge_data, 'accum_id', accum_ids) return accum_ids sources = list(ut.nx_source_nodes(graph)) print(sources) # assert len(sources) == 1, 'expected a unique source' source = sources[0] graph = graph.subgraph(ut.nx_all_nodes_between(graph, source, target)).copy() # Remove superfluous data ut.nx_delete_edge_attr( graph, [ 'edge_type', 'isnwise', 'nwise_idx', # 'parent_colx', 'ismulti' ]) # Make all '*' edges have distinct local_input_id's. # TODO: allow non-distinct suffixes count = ord('a') for edge in graph.edges(keys=True, data=True): dat = edge[3] if dat['local_input_id'] == '*': dat['local_input_id'] = '*' + chr(count) dat['taillabel'] = '*' + chr(count) count += 1 # Augment with dummy super source/sink nodes source_input = 'source_input' target_output = 'target_output' graph.add_edge(source_input, source, local_input_id='s', taillabel='1') graph.add_edge(target, target_output, local_input_id='t', taillabel='1') # Find all paths from the table to the source. paths_to_source = ut.all_multi_paths(graph, source_input, target_output, data=True) # Build expanded input graph # The inputs to this table can be derived from this graph. # The output is a new expanded input graph. exi_graph = nx.DiGraph() for path in paths_to_source: # Accumlate unique identifiers along the reversed path edge_list = ut.reverse_path_edges(path) accumulate_input_ids(edge_list) # A node's output(?) on this path determines its expanded branch id exi_nodes = [ ExiNode(v, BranchId(d['accum_id'], k, d.get('parent_colx', -1))) for u, v, k, d in edge_list[:-1] ] exi_node_to_label = { node: node[0] + '[' + ','.join([str(x) for x in node[1]]) + ']' for node in exi_nodes } exi_graph.add_nodes_from(exi_nodes) nx.set_node_attributes(exi_graph, name='label', values=exi_node_to_label) # Undo any accumulation ordering and remove dummy nodes old_edges = ut.reverse_path_edges(edge_list[1:-1]) new_edges = ut.reverse_path_edges(list(ut.itertwo(exi_nodes))) for new_edge, old_edge in zip(new_edges, old_edges): u2, v2 = new_edge[:2] d = old_edge[3] taillabel = d['taillabel'] parent_colx = d.get('parent_colx', -1) if not exi_graph.has_edge(u2, v2): exi_graph.add_edge(u2, v2, taillabel=taillabel, parent_colx=parent_colx) sink_nodes = list(ut.nx_sink_nodes(exi_graph)) source_nodes = list(ut.nx_source_nodes(exi_graph)) assert len(sink_nodes) == 1, 'expected a unique sink' sink_node = sink_nodes[0] # First identify if a node is root_specifiable node_dict = ut.nx_node_dict(exi_graph) for node in exi_graph.nodes(): root_specifiable = False # for edge in exi_graph.in_edges(node, keys=True): for edge in exi_graph.in_edges(node): # key = edge[-1] # assert key == 0, 'multi di graph is necessary' edata = exi_graph.get_edge_data(*edge) if edata.get('taillabel').startswith('*'): if node != sink_node: root_specifiable = True if exi_graph.in_degree(node) == 0: root_specifiable = True node_dict[node]['root_specifiable'] = root_specifiable # Need to specify any combo of red nodes such that # 1) for each path from a (leaf) to the (root) there is exactly one red # node along that path. This garentees that all inputs are gievn. path_list = ut.flatten([ nx.all_simple_paths(exi_graph, source_node, sink_node) for source_node in source_nodes ]) rootmost_nodes = set([]) for path in path_list: flags = [node_dict[node]['root_specifiable'] for node in path] valid_nodes = ut.compress(path, flags) rootmost_nodes.add(valid_nodes[-1]) # Rootmost nodes are the ones specifiable by default when computing the # normal property. for node in rootmost_nodes: node_dict[node]['rootmost'] = True # We actually need to hack away any root-most nodes that have another # rootmost node as the parent. Otherwise, this would cause constraints in # what the user could specify as valid input combinations. # ie: specify a vocab and an index, but the index depends on the vocab. # this forces the user to specify the vocab that was the parent of the index # the user should either just specify the index and have the vocab inferred # or for now, we just dont allow this to happen. nx.get_node_attributes(exi_graph, 'rootmost') recolor_exi_graph(exi_graph, rootmost_nodes) return exi_graph
def _order_rmi_list(inputs, reorder=False): """ Attempts to put the required inputs in the correct order as specified by the order of declared dependencies the user specified during the depcache declaration (in the user defined decorators). for 1-to-1 properties this is just the root_ids. For vsone, it should be root1, root2 For vsmany it should be root1, root2* Ok, here is the measure: Order is primarily determined by your parent input order as given in the table definition. If one parent expands in to multiple parents then the secondary ordering inherits from the parents. If the two paths merge, then there is no problem. There is only one parent. CommandLine: python -m dtool.input_helpers _order_rmi_list --show Example: >>> # ENABLE_DOCTEST >>> from dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> exi_inputs1 = depc['vsone'].rootmost_inputs.total_expand() >>> assert exi_inputs1.rmi_list[0] != exi_inputs1.rmi_list[1] >>> print('exi_inputs1 = %r' % (exi_inputs1,)) >>> exi_inputs2 = depc['neighbs'].rootmost_inputs.total_expand() >>> assert '*' not in str(exi_inputs2.rmi_list[0]) >>> assert '*' in str(exi_inputs2.rmi_list[1]) >>> print('exi_inputs2 = %r' % (exi_inputs2,)) >>> exi_inputs3 = depc['meta_labeler'].rootmost_inputs.total_expand() >>> print('exi_inputs3 = %r' % (exi_inputs3,)) >>> exi_inputs4 = depc['smk_match'].rootmost_inputs.total_expand() >>> print('exi_inputs4 = %r' % (exi_inputs4,)) >>> ut.quit_if_noshow() >>> import plottool as pt >>> from plottool.interactions import ExpandableInteraction >>> inter = ExpandableInteraction(nCols=2) >>> depc['vsone'].show_dep_subgraph(inter) >>> exi_inputs1.show_exi_graph(inter) >>> depc['neighbs'].show_dep_subgraph(inter) >>> exi_inputs2.show_exi_graph(inter) >>> depc['meta_labeler'].show_dep_subgraph(inter) >>> exi_inputs3.show_exi_graph(inter) >>> depc['smk_match'].show_dep_subgraph(inter) >>> exi_inputs4.show_exi_graph(inter) >>> inter.start() >>> #depc['viewpoint_classification'].show_input_graph() >>> ut.show_if_requested() """ # hack for labels rmi_list = ut.unique(inputs.rmi_list) rootmost_exi_nodes = [rmi.node for rmi in rmi_list] # Ensure that nodes form a complete rootmost set # Remove over-complete nodes sink_nodes = list(ut.nx_sink_nodes(inputs.exi_graph)) source_nodes = list(ut.nx_source_nodes(inputs.exi_graph)) assert len(sink_nodes) == 1, 'can only have one sink node' sink_node = sink_nodes[0] path_list = ut.flatten([ nx.all_simple_paths(inputs.exi_graph, source_node, sink_node) for source_node in source_nodes ]) rootmost_nodes = set([]) rootmost_candidates = set(rootmost_exi_nodes) rootmost_nodes = set([]) for path in path_list: flags = [node in rootmost_candidates for node in path] if not any(flags): raise ValueError('Missing RMI on path=%r' % (path, )) valid_nodes = ut.compress(path, flags) rootmost_nodes.add(valid_nodes[-1]) if reorder: # This re-orders the parent input specs based on the declared order # input defined by the user. This ordering is represented by the # parent_colx property from the table.parents() if len(inputs.rmi_list) > 1: inputs.rmi_list = sort_rmi_list(inputs.rmi_list) else: flags = [x in rootmost_nodes for x in inputs.rmi_list] inputs.rmi_list = ut.compress(inputs.rmi_list, flags)