def test_predecessor(self): assert_equal(nx.dfs_predecessors(self.G, source=0), { 1: 0, 2: 1, 3: 4, 4: 2 }) assert_equal(nx.dfs_predecessors(self.D), {1: 0, 3: 2})
def test_predecessor(self): assert (nx.dfs_predecessors(self.G, source=0) == { 1: 0, 2: 1, 3: 4, 4: 2 }) assert nx.dfs_predecessors(self.D) == {1: 0, 3: 2}
def dls_test_predecessor(self): assert_equal(nx.dfs_predecessors(self.G, source=0, depth_limit=3), { 1: 0, 2: 1, 3: 2, 7: 2 }) assert_equal(nx.dfs_predecessors(self.D, source=2, depth_limit=3), { 8: 7, 9: 8, 3: 2, 7: 2 })
def test_dls_predecessor(self): assert (nx.dfs_predecessors(self.G, source=0, depth_limit=3) == { 1: 0, 2: 1, 3: 2, 7: 2 }) assert (nx.dfs_predecessors(self.D, source=2, depth_limit=3) == { 8: 7, 9: 8, 3: 2, 7: 2 })
def _find_missing_input(calc_inp_nodes, graph): '''Search for *tentatively* missing data.''' calc_inp_nodes = set(calc_inp_nodes) # for efficiency below missing_input_nodes = [] for node in nx.dfs_predecessors(graph): if (node not in calc_inp_nodes and graph.out_degree(node) == 0): missing_input_nodes.append(node) return missing_input_nodes
def _find_missing_input(calc_inp_nodes, graph): '''Search for *tentatively* missing data.''' calc_inp_nodes = set(calc_inp_nodes) # for efficiency below missing_input_nodes = [] for node in nx.dfs_predecessors(graph): if ( node not in calc_inp_nodes and graph.out_degree(node) == 0): missing_input_nodes.append(node) return missing_input_nodes
def part1(): # Reverse edges RG = G.reverse() # Get predecessors predecessors = nx.dfs_predecessors(RG, 'shiny gold') # Count predecessors for p in predecessors: print(p) return len(predecessors)
def get_all_predecessors(self, cfgnode): """ Get all predecessors of a specific node on the control flow graph. :param CFGNode cfgnode: The CFGNode object :return: A list of predecessors in the CFG :rtype: list """ return networkx.dfs_predecessors(self._graph, cfgnode)
def gather_relevant_nodes(G, s): subnodes = nx.dfs_successors(G, source=s) ret = [] for l in subnodes.values(): ret.extend(l) for node in ret: prednodes = nx.dfs_predecessors(G, node) for l in prednodes.keys(): ret.append(l) return ret
def link2pred(linkdata,lookup_data): ######################################################### # convert sub to graph to get upscaled reconstruction ######################################################### numsegments = len(linkdata) linkdata_con = np.concatenate(linkdata,axis=0) edges = [] # radius_estimate_around_trace for ix in range(numsegments): edge1 = linkdata[ix][:-1,-1] edge2 = linkdata[ix][1:,-1] rad = (linkdata[ix][1:,-2]+linkdata[ix][:-1,-2])/2 edges.append(np.concatenate((edge1[:,None],edge2[:,None],rad[:,None]),axis=1)) edges = np.concatenate(edges,axis=0) # [keepthese, ia, ic] = unique(edges(:, [1 2])); # [subs(:, 1), subs(:, 2), subs(:, 3)] = ind2sub(outsiz([1 2 3]), keepthese); # edges_ = reshape(ic, [], 2); # weights_ = edges(ia, 3:end); # in order to go back to original index: unique_edges[edges_reduced[0,0]] unique_edges,unique_indicies,unique_inverse = np.unique(edges[:,:2], return_index=True,return_inverse=True) edges_reduced = np.reshape(unique_inverse,(edges.shape[0],2)) # connectivity graph dat = np.ones((edges_reduced.shape[0],1)).flatten() e1 = edges_reduced[:,0] e2 = edges_reduced[:,1] sM = csr_matrix((dat,(e1,e2)), shape=(np.max(edges_reduced)+1,np.max(edges_reduced)+1)) # build shorthest spanning tree from seed seed_index = edges_reduced[0,0] nxsM = nx.from_scipy_sparse_matrix(sM) preds = nx.dfs_predecessors(nxsM,seed_index) orderlist = nx.dfs_preorder_nodes(nxsM, seed_index) orderlist = np.array(list(orderlist)) seed_vals = lookup_data[unique_edges[seed_index]] swc_data=[] swc_list={} # iterate over orderlist (set first column based on this) for ix, idx_trace in enumerate(orderlist): swc_list[idx_trace] = ix + 1 if ix==0: target = -1 else: target = swc_list[preds[idx_trace]] loc_xyzr = lookup_data[unique_edges[idx_trace]] swc_data.append([ix+1,1,loc_xyzr[0],loc_xyzr[1],loc_xyzr[2],loc_xyzr[3],target]) return swc_data
def ruleScore(graph): # recursive search scoreFunction = {} for n1 in list(graph.nodes()): # print(n1) preds_n1 = graph.predecessors(n1) # print(preds_n1) if (len(preds_n1) >= 1): scoreFunction[n1] = 0 for pred1 in preds_n1: for pred2 in preds_n1: if pred1 != pred2: temp1 = set( nx.dfs_predecessors(graph, pred1).keys() + nx.dfs_predecessors(graph, pred1).values()) temp2 = set( nx.dfs_predecessors(graph, pred2).keys() + nx.dfs_predecessors(graph, pred2).values()) scoreFunction[n1] = scoreFunction[n1] + len( list(temp1.intersection(temp2))) if pred1 in nx.dfs_predecessors(graph, pred2): scoreFunction[n1] = scoreFunction[n1] + 1 if pred2 in nx.dfs_predecessors(graph, pred1): scoreFunction[n1] = scoreFunction[n1] + 1 else: scoreFunction[n1] = 0 return (scoreFunction) # find the end of a node in the bistring
def ruleScore6(graph): #Function to calculate total ancestor overlap as described in the main text # recursive search scoreFunction = {} for n1 in list(graph.nodes()): preds_n1_dict = graph.predecessors(n1) preds_n1 = list(flatten(preds_n1_dict)) if (len(preds_n1) >= 1): scoreFunction[n1] = 0 for pred1 in preds_n1: for pred2 in preds_n1: if pred1 != pred2: temp1 = set( nx.dfs_predecessors(graph, pred1).keys() + nx.dfs_predecessors(graph, pred1).values()) temp2 = set( nx.dfs_predecessors(graph, pred2).keys() + nx.dfs_predecessors(graph, pred2).values()) scoreFunction[n1] = scoreFunction[n1] + len( list(temp1.intersection(temp2))) if pred1 in nx.dfs_predecessors(graph, pred2): scoreFunction[n1] = scoreFunction[n1] + 1 if pred2 in nx.dfs_predecessors(graph, pred1): scoreFunction[n1] = scoreFunction[n1] + 1 else: scoreFunction[n1] = 0 return (scoreFunction)
def get_all_predecessors(self, cfgnode): """ Get all predecessors of a specific node on the control flow graph. :param CFGNode cfgnode: The CFGNode object :return: A list of predecessors in the CFG :rtype: list """ s = set() for child, parent in networkx.dfs_predecessors(self.graph, cfgnode).items(): s.add(child) s.add(parent) return list(s)
def traversal(self): rslt = {} rslt['dfs_predecessors'] = nx.dfs_predecessors(self.graph) rslt['dfs_successors'] = nx.dfs_successors(self.graph) #rslt['dfs_preorder_nodes']=nx.dfs_preorder_nodes(self.graph) #rslt['dfs_postorder_nodes']=nx.dfs_postorder_nodes(self.graph) #rslt['dfs_labeled_edges']=nx.dfs_labeled_edges(self.graph) #rslt['edge_dfs']=nx.edge_dfs(self.graph) #rslt['dfs_edges']=nx.dfs_edges(self.graph) #rslt['dfs_tree']=nx.dfs_tree(self.graph) fname_traversal = self.DIR + '/traversal.json' with open(fname_traversal, "w") as f: json.dump(rslt, f, cls=SetEncoder, indent=2) print(fname_traversal)
def _resolve_grouping_node(group_node, group_tree, group_graph, target_graph): """ Resolves the predecessors of a grouping node and adds them to the target graph. This function can be used to enrich connectivity and discourse graphs with information from the grouping graph by fetching the nodes that participate in a visual group. Parameters: group_node: A string with the identifier of the grouping node. group_tree: A depth-first search tree for the grouping graph. group_graph: An AI2D-RST grouping graph. target_graph: A NetworkX graph which contains the grouping node to resolve. Returns: An updated target graph with diagram element nodes added under the grouping node. """ # Get the predecessors of the grouping node preds = nx.dfs_predecessors(group_tree, group_node) # Get a list of unique node identifiers among predecessors. These are # the nodes on which a subgraph will be induced. preds = list(set(list(preds.keys()) + list(preds.values()))) # Induce a subgraph based on the nodes pred_group = group_graph.subgraph(preds).copy() # Set up edge dictionary edge_attrs = {} # Encode edge type information for s, t in pred_group.edges(): # Add edge attributes to the dictionary edge_attrs[(s, t)] = {'kind': 'grouping'} # Set edge attributes nx.set_edge_attributes(pred_group, edge_attrs) # Add the nodes and edges from the subgraph to the connectivity graph target_graph.add_nodes_from(pred_group.nodes(data=True)) target_graph.add_edges_from(pred_group.edges(data=True))
def get_next_groups(self, processed_nodes): """Get nodes that have predecessors in processed_nodes list. All predecessors should be taken into account, not only direct parents :param processed_nodes: set of nodes names :returns: list of nodes names """ result = [] for node in self.nodes(): if node in processed_nodes: continue predecessors = nx.dfs_predecessors(self.reverse(), node) if (set(predecessors.keys()) <= processed_nodes): result.append(node) return result
def cheapestSuccessorConnection(G, H): nodes = H.nodes() candidates = [] tmp_weight = 0 counter = 0 while not udah_belom(H): for x in nodes: candidates = [] counter += 1 x_neigh = H.neighbors(x) if len(x_neigh) > 2: for y in x_neigh: dummyGraph = H.copy() #remove edge removeNeighborColor(dummyGraph, x, y) tmp_weight = G[x][y]['weight'] #save weight dummyGraph.remove_edge(x, y) anak_anak = nx.dfs_predecessors(dummyGraph, x) for anak in anak_anak: if validColor(dummyGraph, anak, y): tmp = (anak, y, G[anak][y]['weight']) candidates.append(tmp) dummyGraph.add_edge(x, y, weight=tmp_weight) addNeighborColor(dummyGraph, x, y) if len(candidates) > 0: candidates = sorted(candidates, key=lambda z: z[2]) fro, tom, wei = candidates[0] removeNeighborColor(dummyGraph, x, tom) dummyGraph.remove_edge(x, tom) addNeighborColor(dummyGraph, fro, tom) dummyGraph.add_edge(fro, tom, weight=wei) H = dummyGraph.copy() if counter == 100001: #drawGraph(H) print "reached 100000 iterations in Cheapest Successor Connection" #os.system('say "reached 100000 iterations in cheapest Successor Connection"') print 'Moving on to Cheapest Leaf Connection' #os.system('say "Moving on to cheapest Leaf Connection"') H = cheapestLeafConnection(G, H) return H return H
def cheapestSuccessorConnection(G, H): nodes = H.nodes() candidates = [] tmp_weight = 0 counter = 0 while not udah_belom(H): for x in nodes: candidates=[] counter += 1 x_neigh = H.neighbors(x) if len(x_neigh) > 2: for y in x_neigh: dummyGraph = H.copy() #remove edge removeNeighborColor(dummyGraph, x, y) tmp_weight = G[x][y]['weight'] #save weight dummyGraph.remove_edge(x,y) anak_anak = nx.dfs_predecessors(dummyGraph, x) for anak in anak_anak: if validColor(dummyGraph, anak, y): tmp = (anak, y, G[anak][y]['weight']) candidates.append(tmp) dummyGraph.add_edge(x, y, weight = tmp_weight) addNeighborColor(dummyGraph, x, y) if len(candidates)>0: candidates = sorted(candidates, key = lambda z: z[2]) fro, tom, wei = candidates[0] removeNeighborColor(dummyGraph, x, tom) dummyGraph.remove_edge(x,tom) addNeighborColor(dummyGraph, fro, tom) dummyGraph.add_edge(fro,tom, weight=wei) H = dummyGraph.copy() if counter==100001: #drawGraph(H) print "reached 100000 iterations in Cheapest Successor Connection" #os.system('say "reached 100000 iterations in cheapest Successor Connection"') print 'Moving on to Cheapest Leaf Connection' #os.system('say "Moving on to cheapest Leaf Connection"') H = cheapestLeafConnection(G, H) return H return H
def directLeafConnection(G, H): nodes = H.nodes() counter = 0 while not udah_belom(H): if time.time() - waktu1 > 300: return H for x in nodes: counter += 1 x_neigh = H.neighbors(x) if len(x_neigh) > 2: for y in x_neigh: dummyGraph = H.copy() dummyGraph.node[x]['neighbor_color'].remove( dummyGraph.node[y]['color']) #remove neighbor color dummyGraph.node[y]['neighbor_color'].remove( dummyGraph.node[x]['color']) #remove neighbor color dummyGraph.remove_edge(x, y) tree = nx.dfs_successors(dummyGraph, x) anak_anak = nx.dfs_predecessors(dummyGraph, x) for anak in anak_anak: if anak not in tree: if validColor(dummyGraph, anak, y): dummyGraph.add_edge( anak, y, weight=G[anak][y]['weight']) addNeighborColor(dummyGraph, y, anak) H = dummyGraph.copy() #drawHraph(H) break if counter % 100000 == 0: print 'iteration: ', counter, ' in Direct Leaf Connection' if counter == 500001: #os.system('say "Redo from scratch"') mst = kruskal_mst(G) H = G.copy() H.remove_edges_from(H.edges()) H.add_edges_from(mst) H = directLeafConnection(G, H) return H return H
def directLeafConnection(G, H): nodes = H.nodes() counter = 0 while not udah_belom(H): if time.time()-waktu1 > 300: return H for x in nodes: counter += 1 x_neigh = H.neighbors(x) if len(x_neigh) > 2: for y in x_neigh: dummyGraph = H.copy() dummyGraph.node[x]['neighbor_color'].remove(dummyGraph.node[y]['color']) #remove neighbor color dummyGraph.node[y]['neighbor_color'].remove(dummyGraph.node[x]['color']) #remove neighbor color dummyGraph.remove_edge(x,y) tree = nx.dfs_successors(dummyGraph, x) anak_anak = nx.dfs_predecessors(dummyGraph, x) for anak in anak_anak: if anak not in tree: if validColor(dummyGraph, anak, y): dummyGraph.add_edge(anak,y, weight=G[anak][y]['weight']) addNeighborColor(dummyGraph, y, anak) H = dummyGraph.copy() #drawHraph(H) break if counter%100000==0: print 'iteration: ', counter, ' in Direct Leaf Connection' if counter == 500001: #os.system('say "Redo from scratch"') mst = kruskal_mst(G) H = G.copy() H.remove_edges_from(H.edges()) H.add_edges_from(mst) H = directLeafConnection(G, H) return H return H
def hamilton(scene): involvedRoomIds = [] views = [] # load existing views. for fn in os.listdir(f'./latentspace/autoview/{scene["origin"]}'): if '.json' not in fn: continue with open(f'./latentspace/autoview/{scene["origin"]}/{fn}') as f: views.append(json.load(f)) for view in views: view['isVisited'] = False if view['roomId'] not in involvedRoomIds: involvedRoomIds.append(view['roomId']) print(involvedRoomIds) res = [] # deciding connections of a floorplan. G = nx.Graph() for room in scene['rooms']: room['isVisited'] = False floorMeta = p2d( '.', '/dataset/room/{}/{}f.obj'.format(room['origin'], room['modelId'])) try: H = sk.getWallHeight( f"./dataset/room/{room['origin']}/{room['modelId']}w.obj") except: continue for door in room['objList']: if 'coarseSemantic' not in door: continue if door['coarseSemantic'] not in ['Door', 'door']: continue if len(door['roomIds']) < 2: continue # if door['roomIds'][0] not in involvedRoomIds and door['roomIds'][1] not in involvedRoomIds: # continue x = (door['bbox']['min'][0] + door['bbox']['max'][0]) / 2 z = (door['bbox']['min'][2] + door['bbox']['max'][2]) / 2 DIS = np.Inf for wallIndex in range(floorMeta.shape[0]): wallIndexNext = (wallIndex + 1) % floorMeta.shape[0] dis = sk.pointToLineDistance(np.array([x, z]), floorMeta[wallIndex, 0:2], floorMeta[wallIndexNext, 0:2]) if dis < DIS: DIS = dis direction = np.array( [floorMeta[wallIndex, 2], 0, floorMeta[wallIndex, 3]]) translate = np.array([x, H / 2, z]) G.add_edge(door['roomIds'][0], door['roomIds'][1], translate=translate, direction=direction, directionToRoom=room['roomId']) pre = nx.dfs_predecessors(G) suc = nx.dfs_successors(G) print(pre, suc) # decide the s and t which are the start point and end point respectively. # ndproom = list(nx.dfs_successors(G).keys())[0] # ndproom = views[0]['roomId'] ndproom = involvedRoomIds[0] roomOrder = [] while ndproom != -1: roomOrder.append(ndproom) scene['rooms'][ndproom]['isVisited'] = True ndproom = hamiltonNextRoom(ndproom, pre, suc, scene) for room in scene['rooms']: room['isVisited'] = False print(roomOrder) def subPath(s): if s == len(roomOrder) - 1: return (True, s) state = False start = roomOrder[s] s += 1 while s < len(roomOrder) and roomOrder[s] != start: if roomOrder[s] in involvedRoomIds and not scene['rooms'][ roomOrder[s]]['isVisited']: state = True s += 1 return (state, s) i = 0 while i < len(roomOrder): state, s = subPath(i) if not state: roomOrder = roomOrder[0:i + 1] + roomOrder[s + 1:] i -= 1 else: scene['rooms'][roomOrder[i]]['isVisited'] = True i += 1 print(roomOrder) ndproom = roomOrder[0] for view in views: if view['roomId'] == ndproom: ndpNext = view # perform the algorithm of Angluin and Valiant. for i in range(1, len(roomOrder) + 1): while ndpNext is not None: ndp = ndpNext res.append(ndp) ndp['isVisited'] = True ndpNext = hamiltonNext(ndp, views, scene) if i == len(roomOrder): break lastndproom = roomOrder[i - 1] ndproom = roomOrder[i] edge = G[lastndproom][ndproom] # if edge['direction'].dot(edge['translate'] - ndp['probe']) < 0: if edge['directionToRoom'] != ndproom: edge['direction'] = -edge['direction'] ndpNext = { 'roomId': ndproom, 'probe': edge['translate'], 'origin': edge['translate'].tolist(), 'target': (edge['translate'] + edge['direction']).tolist(), 'direction': edge['direction'].tolist() } with open(f'./latentspace/autoview/{scene["origin"]}/path', 'w') as f: json.dump(res, f, default=sk.jsonDumpsDefault) return res
def cheapestLeafConnection(G, H): nodes = H.nodes() candidates = [] tmp_weight = 0 counter = 0 while not udah_belom(H): #counter = 0 for x in nodes: candidates = [] counter += 1 x_neigh = H.neighbors(x) if len(x_neigh) > 2: for y in x_neigh: #make a copy of current mst dummyGraph = H.copy() #remove edge removeNeighborColor(dummyGraph, x, y) tmp_weight = G[x][y]['weight'] #save weight dummyGraph.remove_edge(x, y) tree = nx.dfs_successors(dummyGraph, x) anak_anak = nx.dfs_predecessors(dummyGraph, x) for anak in anak_anak: if anak not in tree: #if leaf if validColor(dummyGraph, anak, y): tmp = (anak, y, G[anak][y]['weight']) candidates.append(tmp) dummyGraph.add_edge(x, y, weight=tmp_weight) addNeighborColor(dummyGraph, x, y) if len(candidates) > 0: candidates = sorted(candidates, key=lambda z: z[2]) fro, tom, wei = candidates[0] dummyGraph.add_edge(fro, tom, weight=wei) removeNeighborColor(dummyGraph, x, tom) dummyGraph.remove_edge(x, tom) addNeighborColor(dummyGraph, fro, tom) H = dummyGraph.copy() #drawHraph(H) elif (counter % 10000) == 0: if counter % 100000 == 0: print 'no candidates,', counter, ' iterations in Cheapest Leaf Connection' #drawGraph(dummyGraph) for x in nodes: x_neigh = dummyGraph.neighbors(x) if len(x_neigh) == 1: dummyGraph.remove_edge(x, x_neigh[0]) removeNeighborColor(dummyGraph, x, x_neigh[0]) for y in dummyGraph.neighbors(x_neigh[0]): if validColor( dummyGraph, x, y) and len(dummyGraph.neighbors(y)) < 3: dummyGraph.add_edge(x, y) addNeighborColor(dummyGraph, x, y) H = dummyGraph.copy() break break elif counter == 250001: print 'Using Cheapest Leaf Connection failed miserably =(' #os.system('say "Using candidates failed miserably..."') print 'Try using Direct Leaf Connection' #os.system('say "Try using direct leaf connection"') H = directLeafConnection(G, dummyGraph) return H return H
def minibatch_(w, loss__,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode): X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c) if X_p != []: boxes = [] ground_truth = inv[0][2] img_nr = inv[0][0] if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'): f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r') else: print 'warning' for line, y in zip(f, inv): tmp = line.split(',') coord = [] for s in tmp: coord.append(float(s)) boxes.append([coord, y[2]]) assert(len(boxes)<500) boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,500) if os.path.isfile('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt'): gr = open('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt', 'r') ground_truths = [] for line in gr: tmp = line.split(',') ground_truth = [] for s in tmp: ground_truth.append(int(s)) ground_truths.append(ground_truth) #prune boxes pruned_x = [] pruned_y = [] pruned_boxes = [] for i, y_ in enumerate(y_p): if y_ > 0: pruned_x.append(X_p[i]) pruned_y.append(y_p[i]) pruned_boxes.append(boxes[i]) if subsampling and pruned_boxes > subsamples: pruned_x = pruned_x[0:subsamples] pruned_y = pruned_y[0:subsamples] pruned_boxes = pruned_boxes[0:subsamples] if mode == 'mean_variance': sum_x += np.array(pruned_x).sum(axis=0) n_samples += len(pruned_x) sum_sq_x += (np.array(pruned_x)**2).sum(axis=0) return sum_x,n_samples,sum_sq_x # create_tree G, levels = create_tree(pruned_boxes) norm_x = [] #normalize for p_x in pruned_x: norm_x.append((p_x-mean)/variance) data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas) sucs = nx.dfs_successors(G) predecs = nx.dfs_predecessors(G) #preprocess: node - children children = {} last = -1 for node,children_ in zip(sucs.keys(),sucs.values()): if node != last+1: for i in range(last+1,node): children[i] = [] children[node] = children_ elif node == last +1: children[node] = children_ last = node if mode == 'train': nodes = list(G.nodes())[1:] for node in nodes: w = like_scikit(w,norm_x[node],pruned_y[node],learning_rate,alphas) #w = update_weights(w,data,predecs,children,node, learning_rate) else: loss__.append(loss_simple(w,data))#(w, data, predecs, children)) if mode == 'train': return w, len(pruned_y) else: return loss__
def resample_states(T, root, node_to_pmap, nstates, root_distn=None, P_default=None): """ This function applies to a tree for which nodes will be assigned states. Parameters ---------- T : undirected acyclic networkx graph A tree whose edges are annotated with transition matrices P. root : integer The root node. node_to_pmap : dict A map from a node to an array that gives the subtree likelihood for each state. nstates : integer Number of states. root_distn : dict, optional A dense array giving a finite distribution or weights over root states. Values should be positive but are not required to sum to 1. If the distribution is not provided, then it will be assumed to have values of 1 for each possible state. P_default : 2d ndarray, optional If an edge is not annotated with a transition matrix P, then this default transition matrix will be used. Returns ------- node_to_sampled_state : dict A map from each node of T to its state. If the state was not defined by the node_to_state argument, then the state will have been sampled. """ # Get the root pmap. root_pmap = node_to_pmap[root] # Try to compute the likelihood. # This will raise an informative exception if no path is possible. # If the likelihood is numerically zero then raise a different exception. likelihood = _mc0_dense.get_likelihood(root_pmap, root_distn=root_distn) if likelihood <= 0: raise _util.NumericalZeroProb( 'numerically intractably small likelihood: %s' % likelihood) # Bookkeeping structure related to tree traversal. predecessors = nx.dfs_predecessors(T, root) # Sample the node states, beginning at the root. node_to_sampled_state = {} for node in nx.dfs_preorder_nodes(T, root): # Get the precomputed pmap associated with the node. # This is a sparse map from state to subtree likelihood. pmap = node_to_pmap[node] # Define a prior distribution. if node == root: prior = root_distn else: # Get the parent node and its state. parent_node = predecessors[node] parent_state = node_to_sampled_state[parent_node] # Get the transition probability matrix. P = T[parent_node][node].get('P', P_default) # Get the distribution of a non-root node. prior = P[parent_state] # Sample the state from the posterior distribution. if prior is None: dpost = pmap else: dpost = prior * pmap node_to_sampled_state[node] = _util.array_random_choice(dpost) # Return the map of sampled states. return node_to_sampled_state
htgs = T['hashtags'][i].lower() if len(set(htgs.split(',')) & seed_htgs_CAF) != 0: CAF_ids.append(i) # construct tweet-reply network to get related tweets print('Listing relevant tweets ids...') edges = list(N['tweet-reply'].keys()) G = nx.Graph() G.add_edges_from(edges) # using depth-first search algorithm to traverse through the tree network using the seed nodes all_CAF_ids = [] for i in CAF_ids: all_CAF_ids.append(i) try: pred = nx.dfs_predecessors(G, source=i) all_CAF_ids.extend(list(pred.values())) except: pass try: succ = nx.dfs_successors(G, source=i) for j in succ.keys(): all_CAF_ids.append(j) all_CAF_ids.extend(succ[j]) except: pass all_CAF_ids = np.unique(all_CAF_ids) del G # subset Tweets print('Subsetting tweets...')
def get_node_to_distn(T, root, node_to_pmap, nstates, root_distn=None, P_default=None): """ Get marginal state distributions at nodes in a tree. This function is similar to the Rao-Teh state sampling function, except that instead of sampling a state at each node, this function computes marginal distributions over states at each node. Also, each edge of the input tree for this function has been annotated with its own transition probability matrix, whereas the Rao-Teh sampling function uses a single uniformized transition probability matrix for all edges. Parameters ---------- T : undirected acyclic networkx graph A tree whose edges are annotated with transition matrices P. root : integer Root node. node_to_pmap : dict Map from a node to a 1d array giving subtree likelihoods per state. This map incorporates state restrictions. nstates : integer Number of states. root_distn : 1d ndarray, optional A finite distribution over root states. P_default : 2d ndarray, optional Default transition matrix. Returns ------- node_to_distn : dict Sparse map from node to sparse map from state to probability. """ if P_default is not None: _density.check_square_dense(P_default) if root_distn is not None: if root_distn.shape[0] != nstates: raise ValueError('inconsistent root distribution') # Bookkeeping. predecessors = nx.dfs_predecessors(T, root) # Get the distributions. node_to_distn = {} for node in nx.dfs_preorder_nodes(T, root): # Get the map from state to subtree likelihood. pmap = node_to_pmap[node] if pmap.shape[0] != nstates: raise ValueError('inconsistent pmap') # Compute the prior distribution at the root separately. # If the prior distribution is not provided, # then treat it as uninformative. if node == root: distn = get_normalized_ndarray_distn(pmap, root_distn) else: parent_node = predecessors[node] parent_distn = node_to_distn[parent_node] # Get the transition matrix associated with this edge. P = T[parent_node][node].get('P', P_default) _density.check_square_dense(P) if P.shape[0] != nstates: raise Exception('internal inconsistency') # For each parent state, # get the distribution over child states; # this distribution will include both the P matrix # and the pmap of the child node. distn = np.zeros(nstates, dtype=float) for sa in range(nstates): pa = parent_distn[sa] if pa: # Construct the conditional transition probabilities. sb_weights = P[sa] * pmap sb_distn = get_normalized_ndarray_distn(sb_weights) # Add to the marginal distn. for sb in range(nstates): distn[sb] += pa * sb_distn[sb] # Set the node_to_distn. node_to_distn[node] = distn # Return the marginal state distributions at nodes. return node_to_distn
def dls_test_predecessor(self): assert_equal(nx.dfs_predecessors(self.G, source=0, depth_limit=3), {1: 0, 2: 1, 3: 2, 7: 2}) assert_equal(nx.dfs_predecessors(self.D, source=2, depth_limit=3), {8: 7, 9: 8, 3: 2, 7: 2})
def ScenarioTreeModelFromNetworkX( tree, node_name_attribute=None, edge_probability_attribute='probability', stage_names=None, scenario_name_attribute=None): """ Create a scenario tree model from a networkx tree. The height of the tree must be at least 1 (meaning at least 2 stages). Optional Arguments: - node_name_attribute: By default, node names are the same as the node hash in the networkx tree. This keyword can be set to the name of some property of nodes in the graph that will be used for their name in the PySP scenario tree. - edge_probability_attribute: Can be set to the name of some property of edges in the graph that defines the conditional probability of that branch (default: 'probability'). If this keyword is set to None, then all branches leaving a node are assigned equal conditional probabilities. - stage_names: Can define a list of stage names to use (assumed in time order). The length of this list much match the number of stages in the tree. - scenario_name_attribute: By default, scenario names are the same as the leaf-node hash in the networkx tree. This keyword can be set to the name of some property of leaf-nodes in the graph that will be used for their corresponding scenario in the PySP scenario tree. Examples: - A 2-stage scenario tree with 10 scenarios: G = networkx.DiGraph() G.add_node("Root") N = 10 for i in range(N): node_name = "Leaf"+str(i) G.add_node(node_name) G.add_edge("Root",node_name,probability=1.0/N) model = ScenarioTreeModelFromNetworkX(G) - A 4-stage scenario tree with 125 scenarios: branching_factor = 5 height = 3 G = networkx.balanced_tree( branching_factory, height, networkx.DiGraph()) model = ScenarioTreeModelFromNetworkX( G, edge_probability_attribute=None) """ if not has_networkx: raise ValueError("networkx module is not available") if not networkx.is_tree(tree): raise TypeError( "object is not a tree (see networkx.is_tree)") if not networkx.is_directed(tree): raise TypeError( "object is not directed (see networkx.is_directed)") if not networkx.is_branching(tree): raise TypeError( "object is not a branching (see networkx.is_branching") if not networkx.is_arborescence(tree): raise TypeError("Object must be a directed, rooted tree " "in which all edges point away from the " "root (see networkx.is_arborescence)") root = [u for u,d in tree.in_degree().items() if d == 0] assert len(root) == 1 root = root[0] num_stages = networkx.eccentricity(tree, v=root) + 1 if num_stages < 2: raise ValueError( "The number of stages must be at least 2") m = CreateAbstractScenarioTreeModel() if stage_names is not None: unique_stage_names = set() for cnt, stage_name in enumerate(stage_names,1): m.Stages.add(stage_name) unique_stage_names.add(stage_name) if cnt != num_stages: raise ValueError( "incorrect number of stages names (%s), should be %s" % (cnt, num_stages)) if len(unique_stage_names) != cnt: raise ValueError("all stage names were not unique") else: for i in range(num_stages): m.Stages.add('Stage'+str(i+1)) node_to_name = {} node_to_scenario = {} def _setup(u, succ): if node_name_attribute is not None: if node_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing name attribute: '%s'" % (u, node_name_attribute)) node_name = tree.node[u][node_name_attribute] else: node_name = u node_to_name[u] = node_name m.Nodes.add(node_name) if u in succ: for v in succ[u]: _setup(v, succ) else: # a leaf node if scenario_name_attribute is not None: if scenario_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing attribute: '%s'" % (u, scenario_name_attribute)) scenario_name = tree.node[u][scenario_name_attribute] else: scenario_name = u node_to_scenario[u] = scenario_name m.Scenarios.add(scenario_name) _setup(root, networkx.dfs_successors(tree, root)) m = m.create_instance() def _add_node(u, stage, succ, pred): if node_name_attribute is not None: if node_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing name attribute: '%s'" % (u, node_name_attribute)) node_name = tree.node[u][node_name_attribute] else: node_name = u m.NodeStage[node_name] = m.Stages[stage] if u == root: m.ConditionalProbability[node_name] = 1.0 else: assert u in pred edge = tree.edge[pred[u]][u] probability = None if edge_probability_attribute is not None: if edge_probability_attribute not in edge: raise KeyError( "edge '(%s, %s)' missing probability attribute: '%s'" % (pred[u], u, edge_probability_attribute)) probability = edge[edge_probability_attribute] else: probability = 1.0/len(succ[pred[u]]) m.ConditionalProbability[node_name] = probability if u in succ: child_names = [] for v in succ[u]: child_names.append( _add_node(v, stage+1, succ, pred)) total_probability = 0.0 for child_name in child_names: m.Children[node_name].add(child_name) total_probability += \ value(m.ConditionalProbability[child_name]) if abs(total_probability - 1.0) > 1e-5: raise ValueError( "edge probabilities leaving node '%s' " "do not sum to 1 (total=%r)" % (u, total_probability)) else: # a leaf node scenario_name = node_to_scenario[u] m.ScenarioLeafNode[scenario_name] = node_name m.Children[node_name].clear() return node_name _add_node(root, 1, networkx.dfs_successors(tree, root), networkx.dfs_predecessors(tree, root)) return m
def ScenarioTreeModelFromNetworkX( tree, node_name_attribute=None, edge_probability_attribute='probability', stage_names=None, scenario_name_attribute=None): """ Create a scenario tree model from a networkx tree. The height of the tree must be at least 1 (meaning at least 2 stages). Optional Arguments: - node_name_attribute: By default, node names are the same as the node hash in the networkx tree. This keyword can be set to the name of some property of nodes in the graph that will be used for their name in the PySP scenario tree. - edge_probability_attribute: Can be set to the name of some property of edges in the graph that defines the conditional probability of that branch (default: 'probability'). If this keyword is set to None, then all branches leaving a node are assigned equal conditional probabilities. - stage_names: Can define a list of stage names to use (assumed in time order). The length of this list much match the number of stages in the tree. - scenario_name_attribute: By default, scenario names are the same as the leaf-node hash in the networkx tree. This keyword can be set to the name of some property of leaf-nodes in the graph that will be used for their corresponding scenario in the PySP scenario tree. Examples: - A 2-stage scenario tree with 10 scenarios: G = networkx.DiGraph() G.add_node("Root") N = 10 for i in range(N): node_name = "Leaf"+str(i) G.add_node(node_name) G.add_edge("Root",node_name,probability=1.0/N) model = ScenarioTreeModelFromNetworkX(G) - A 4-stage scenario tree with 125 scenarios: branching_factor = 5 height = 3 G = networkx.balanced_tree( branching_factory, height, networkx.DiGraph()) model = ScenarioTreeModelFromNetworkX( G, edge_probability_attribute=None) """ if not has_networkx: raise ValueError("networkx module is not available") if not networkx.is_tree(tree): raise TypeError( "object is not a tree (see networkx.is_tree)") if not networkx.is_directed(tree): raise TypeError( "object is not directed (see networkx.is_directed)") if not networkx.is_branching(tree): raise TypeError( "object is not a branching (see networkx.is_branching") if not networkx.is_arborescence(tree): raise TypeError("Object must be a directed, rooted tree " "in which all edges point away from the " "root (see networkx.is_arborescence)") in_degree_items = tree.in_degree() # Prior to networkx ~2.0, in_degree() returned a dictionary. # Now it is a view on items, so only call .items() for the old case if hasattr(in_degree_items, 'items'): in_degree_items = in_degree_items.items() root = [u for u,d in in_degree_items if d == 0] assert len(root) == 1 root = root[0] num_stages = networkx.eccentricity(tree, v=root) + 1 if num_stages < 2: raise ValueError( "The number of stages must be at least 2") m = CreateAbstractScenarioTreeModel() if stage_names is not None: unique_stage_names = set() for cnt, stage_name in enumerate(stage_names,1): m.Stages.add(stage_name) unique_stage_names.add(stage_name) if cnt != num_stages: raise ValueError( "incorrect number of stages names (%s), should be %s" % (cnt, num_stages)) if len(unique_stage_names) != cnt: raise ValueError("all stage names were not unique") else: for i in range(num_stages): m.Stages.add('Stage'+str(i+1)) node_to_name = {} node_to_scenario = {} def _setup(u, succ): if node_name_attribute is not None: if node_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing name attribute: '%s'" % (u, node_name_attribute)) node_name = tree.node[u][node_name_attribute] else: node_name = u node_to_name[u] = node_name m.Nodes.add(node_name) if u in succ: for v in succ[u]: _setup(v, succ) else: # a leaf node if scenario_name_attribute is not None: if scenario_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing attribute: '%s'" % (u, scenario_name_attribute)) scenario_name = tree.node[u][scenario_name_attribute] else: scenario_name = u node_to_scenario[u] = scenario_name m.Scenarios.add(scenario_name) _setup(root, networkx.dfs_successors(tree, root)) m = m.create_instance() def _add_node(u, stage, succ, pred): if node_name_attribute is not None: if node_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing name attribute: '%s'" % (u, node_name_attribute)) node_name = tree.node[u][node_name_attribute] else: node_name = u m.NodeStage[node_name] = m.Stages[stage] if u == root: m.ConditionalProbability[node_name] = 1.0 else: assert u in pred # prior to networkx ~2.0, we used a .edge attribute on DiGraph, # which no longer exists. if hasattr(tree, 'edge'): edge = tree.edge[pred[u]][u] else: edge = tree.edges[pred[u],u] probability = None if edge_probability_attribute is not None: if edge_probability_attribute not in edge: raise KeyError( "edge '(%s, %s)' missing probability attribute: '%s'" % (pred[u], u, edge_probability_attribute)) probability = edge[edge_probability_attribute] else: probability = 1.0/len(succ[pred[u]]) m.ConditionalProbability[node_name] = probability if u in succ: child_names = [] for v in succ[u]: child_names.append( _add_node(v, stage+1, succ, pred)) total_probability = 0.0 for child_name in child_names: m.Children[node_name].add(child_name) total_probability += \ value(m.ConditionalProbability[child_name]) if abs(total_probability - 1.0) > 1e-5: raise ValueError( "edge probabilities leaving node '%s' " "do not sum to 1 (total=%r)" % (u, total_probability)) else: # a leaf node scenario_name = node_to_scenario[u] m.ScenarioLeafNode[scenario_name] = node_name m.Children[node_name].clear() return node_name _add_node(root, 1, networkx.dfs_successors(tree, root), networkx.dfs_predecessors(tree, root)) return m
def minibatch_(clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode): if mode == 'loss_test' or mode == 'loss_scikit_test' or mode == 'levels_test': X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c) else: X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c) if X_p != []: boxes = [] ground_truth = inv[0][2] img_nr = inv[0][0] if less_features: X_p = [fts[0:features_used] for fts in X_p] if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'): f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r') else: print 'warning' for line, y in zip(f, inv): tmp = line.split(',') coord = [] for s in tmp: coord.append(float(s)) boxes.append([coord, y[2]]) assert(len(boxes)<500) boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,500) if os.path.isfile('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt'): gr = open('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt', 'r') ground_truths = [] for line in gr: tmp = line.split(',') ground_truth = [] for s in tmp: ground_truth.append(int(s)) ground_truths.append(ground_truth) #prune boxes pruned_x = [] pruned_y = [] pruned_boxes = [] if prune: for i, y_ in enumerate(y_p): if y_ > 0: pruned_x.append(X_p[i]) pruned_y.append(y_p[i]) pruned_boxes.append(boxes[i]) else: pruned_x = X_p pruned_y = y_p pruned_boxes = boxes if subsampling and pruned_boxes > subsamples: pruned_x = pruned_x[0:subsamples] pruned_y = pruned_y[0:subsamples] pruned_boxes = pruned_boxes[0:subsamples] if mode == 'mean_variance': sum_x += np.array(pruned_x).sum(axis=0) n_samples += len(pruned_x) sum_sq_x += (np.array(pruned_x)**2).sum(axis=0) scaler.partial_fit(pruned_x) # Don't cheat - fit only on training data return sum_x,n_samples,sum_sq_x, scaler # create_tree G, levels = create_tree(pruned_boxes) #normalize norm_x = [] if normalize: # for p_x in pruned_x: # norm_x.append((p_x-mean)/variance) norm_x = scaler.transform(pruned_x) else: norm_x = pruned_x data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas) sucs = nx.dfs_successors(G) predecs = nx.dfs_predecessors(G) #preprocess: node - children children = {} last = -1 for node,children_ in zip(sucs.keys(),sucs.values()): if node != last+1: for i in range(last+1,node): children[i] = [] children[node] = children_ elif node == last +1: children[node] = children_ last = node if mode == 'train': nodes = list(G.nodes()) for node in nodes: print node if node == 0: w = learn_root(w,norm_x[0],pruned_y[0],learning_rate,alphas) else: w = like_scikit(scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr) return w, len(pruned_y) elif mode == 'scikit_train': clf.partial_fit(norm_x,pruned_y) return clf elif mode == 'loss_train' or mode == 'loss_test': loss__.append(loss(scaler, w, data, predecs, children,img_nr,-1)) mse.append(((data[2] - np.dot(w,np.array(data[3]).T)) ** 2).sum()) a2 = alphas[2] data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, [0,0,a2,0]) hinge1.append(loss(scaler, w, data, predecs, children,img_nr,-1)) a3 = alphas[3] data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, [0,0,0,a3]) hinge2.append(loss(scaler, w, data, predecs, children,img_nr,-1)) full_image.append([pruned_y[0],np.dot(w,np.array(norm_x[0]).T)]) return loss__, mse,hinge1,hinge2,full_image elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train': loss__.append(((clf.predict(norm_x) - pruned_y)**2).sum()) return loss__ elif mode == 'finite_differences': feature = random.sample(range(4096),1)[0] #1. Pick an example z. example = random.sample(range(len(norm_x[1:])),1)[0] #2. Compute the loss Q(z, w) for the current w. Q = loss(scaler, w,data, predecs, children,img_nr,example) #3. Compute the gradient g = ∇w Q(z, w). g = gradient(w,norm_x,pruned_y,example,predecs,children,boxes,alphas,img_nr,feature) #4. Apply a slight perturbation w0 = w +δ. For instance, change a single weight #by a small increment, or use δ = −γg with γ small enough. w0 = w w0[feature] = w0[feature] + delta #5. Compute the new loss Q(z, w0 #) and verify that Q(z, w0) ≈ Q(z, w) + δg # Q(z, w + delta*e_i) ≈ ( Q(z, w) + delta * g_i ) Q_ = loss(scaler, w0,data, predecs, children,img_nr,example) #print Q,Q_,g #print abs(Q_ - Q+(delta*g)) < 0.001 #raw_input() elif mode == 'levels_train' or mode == 'levels_test': im = mpimg.imread('/home/stahl/Images/'+ (format(img_nr, "06d")) +'.jpg') plt.imshow(im) preds = [] for i,x_ in enumerate(norm_x): preds.append(np.dot(w, x_)) cpls = [] truelvls = [] used_boxes_ = [] # to get prediction min and max for colorbar min_pred = 10 max_pred = -5 for level in levels: cpl,used_boxes = count_per_level(scaler,w, preds, img_nr, pruned_boxes,levels[level], '') if used_boxes is not None: used_b_preds = [x[1] for x in used_boxes] if min(used_b_preds) < min_pred: min_pred = min(used_b_preds) if max(used_b_preds) > max_pred: max_pred = max(used_b_preds) if min(preds) < min_pred: min_pred = min(preds) if max(preds) > max_pred: max_pred = max(preds) print'minmax of intersections: ', min_pred, max_pred cNorm = colors.Normalize(vmin=min_pred, vmax=max_pred) scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pl.cm.jet) scalarMap.set_array(range(int(round(min_pred - 0.5)), int(round(max_pred + 0.5)))) for pr_box, pr in zip(pruned_boxes,preds): pru_box = pr_box[0] colorVal = scalarMap.to_rgba(pr) ax = plt.gca() ax.add_patch(Rectangle((int(pru_box[0]), int(pru_box[1])), int(pru_box[2] - pru_box[0]), int(pru_box[3] - pru_box[1]), alpha=0.1, facecolor = colorVal, edgecolor = 'black')) for level in levels: #tru and truelvls was in order to check if count_per_level method is correct cpl,used_boxes = count_per_level(scaler,w, preds, img_nr, pruned_boxes,levels[level], '') #tru = count_per_level(None, pruned_y, img_nr, pruned_boxes,levels[level], 'gt') cpls.append(cpl) #plot image and predictions as color - only for debugging/testing if used_boxes is not None: for u_box in used_boxes: pru_box = pr_box[0] colorVal = scalarMap.to_rgba(u_box[1]) #print u_box[0],u_box[1] ax = plt.gca() ax.add_patch(Rectangle((int(pru_box[0]), int(pru_box[1])), int(pru_box[2] - pru_box[0]), int(pru_box[3] - pru_box[1]), alpha=0.1, facecolor = colorVal, edgecolor = 'black')) #truelvls.append(tru) #print 'truth: ', pruned_y[0] matplotlib.pylab.colorbar(scalarMap, shrink=0.9) plt.draw() plt.savefig('/home/stahl/'+str(img_nr)) plt.clf() return cpls, truelvls, used_boxes_,pruned_boxes,preds
import re import sys import networkx as nx node_regex = re.compile("(\w+ \w+) bags contain ") bags_regex = re.compile("(\d) (\w+ \w+) bags?[,\.] ?") graph = nx.DiGraph() for line in sys.stdin: bag_name = node_regex.match(line).groups()[0] content = bags_regex.findall(line) if "no other bags" not in line else [] graph.add_node(bag_name) # idempotent for count_str, next_bag in content: graph.add_node(next_bag) # still idempotent count = int(count_str) graph.add_edge(next_bag, bag_name, weight=count) my_bag = "shiny gold" print(len(list(nx.dfs_predecessors(graph, my_bag))))
def main(): test_imgs, train_imgs = get_seperation() # learn # if os.path.isfile('/home/stahl/Models/'+class_+c+'normalized_constrained.pickle'): # with open('/home/stahl/Models/'+class_+c+'normalized_constrained.pickle', 'rb') as handle: # w = pickle.load(handle) # else: loss_ = {} weights = {} gamma = 0.5 epochs = 50 images = 10 subsamples = 10 weights_visualization = {} learning_rates = [math.pow(10,-3),math.pow(10,-4),math.pow(10,-5),math.pow(10,-6)] learning_rates_ = {} weights_sample = random.sample(range(4096), 10) all_alphas = [0,math.pow(10,-1),math.pow(10,-2),math.pow(10,-3)] sum_x = np.zeros(4096) n_samples = 0.0 sum_sq_x = np.zeros(4096) #TODO: normalize for minibatch in range(0,images): sum_x,n_samples,sum_sq_x = minibatch_([], [],[],[],test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,None,None,'mean_variance') mean = sum_x/n_samples variance = (sum_sq_x - (sum_x * sum_x) / n_samples) / (n_samples - 1) print mean,variance raw_input() for alpha1 in all_alphas: for learning_rate0 in learning_rates: learning_rate = learning_rate0 alphas = [alpha1,0,0] w = np.zeros(4096) change = -100000 for epoch in range(epochs): print epoch, learning_rate if learning_rate0 in learning_rates_: learning_rates_[learning_rate0].append(learning_rate) else: learning_rates_[learning_rate0] = [learning_rate] #shuffle images, not boxes! shuffled = range(0,images) random.shuffle(shuffled) for minibatch in shuffled: w,t = minibatch_(w, [],alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance,'train') #update learning_rate learning_rate = learning_rate0 * (1+learning_rate0*gamma*t)**-1 #compute average loss on training set loss__ = [] for minibatch in range(0,images): loss__ = minibatch_(w, loss__,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance,'test') # save avg loss for plotting temp_label = [alphas[0], learning_rate0] llloss = sum(loss__)/len(loss__) if tuple(temp_label) in loss_: change = llloss - loss_[alphas[0], learning_rate0][-1] print 'avg', llloss, ' change: ', change if tuple(temp_label) in loss_: loss_[alphas[0], learning_rate0].append(sum(loss__)/len(loss__)) else: loss_[alphas[0], learning_rate0] = [sum(loss__)/len(loss__)] # save sample weights for plotting ww_ = [] for w_ in weights_sample: ww_.append(w[w_]) temp_label = [alphas[0],learning_rate0] if tuple(temp_label) in weights_visualization: weights_visualization[alphas[0],learning_rate0].append(ww_) else: weights_visualization[alphas[0],learning_rate0] = [ww_] #TODO: update learning rate #save final weights weights[alphas[0], learning_rate0] = w final_model_losses = [x[-1] for x in loss_.values()] best_model_index = final_model_losses.index(min(final_model_losses)) a1, learning_rate_0 = loss_.keys()[best_model_index] print a1, learning_rate_0 w_best = weights[a1, learning_rate_0] #plot for i,l in zip(loss_.keys(),loss_.values()): to_plot = [math.log(a) for a in loss_[i]] labl = "alpha1=%s"%i[0] plt.plot(range(len(loss_[i])),to_plot,'-', label=labl) plt.title('Learning rate=%s'%learning_rate_0) plt.xlabel('Iterations') plt.ylabel('Log(Loss)') plt.legend() plt.savefig('/home/stahl/debugwithalphas.png') plt.figure() for l in learning_rates_: plt.plot(range(len(learning_rates_[l])),learning_rates_[l],label='start=%s'%l) plt.xlabel('Iterations') plt.ylabel('Learning rate') plt.legend() plt.savefig('/home/stahl/learning_ratewithalphas.png') plt.figure() for in_ in range(len(weights_sample)): refactor = [weights_visualization[a1,learning_rate_0][x][in_] for x in range(len(weights_visualization[a1,learning_rate_0]))] plt.plot(range(len(refactor)),refactor,'-') plt.xlabel('Iterations') plt.ylabel('Weights') plt.savefig('/home/stahl/best_weights_pruned_withalphas.png') print "model learned" with open('/home/stahl/Models/'+class_+c+'normalized_constrained_best.pickle', 'wb') as handle: pickle.dump(w_best, handle) sys.exit("Error message") #TODO: compute average loss test set using best configuration on hold out set loss__ = [] for minibatch in range(0,100,1): print alphas, learning_rate, minibatch X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c) if X_p != []: #TODO: prune? boxes = [] ground_truth = inv[0][2] img_nr = inv[0][0] if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'): f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r') else: print 'warning' for line, y in zip(f, inv): tmp = line.split(',') coord = [] for s in tmp: coord.append(float(s)) boxes.append([coord, y[2]]) assert(len(boxes)<500) boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,500) if os.path.isfile('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt'): gr = open('/home/stahl/GroundTruth/sheep_coords_for_features/'+ (format(img_nr, "06d")) +'.txt', 'r') ground_truths = [] for line in gr: tmp = line.split(',') ground_truth = [] for s in tmp: ground_truth.append(int(s)) ground_truths.append(ground_truth) # create_tree G, levels = create_tree(boxes) # normalize new_matrix = preprocessing.normalize(X_p, norm='l2', axis=0) root = boxes[0] boxes_ = boxes[1:] for box, y,i_ in zip(boxes_, y_p[1:],range(1,len(boxes))): if i_ in G.nodes(): parent = nx.dfs_predecessors(G,i_).values().index(i_) for i, b_i in enumerate(levels.values()): if any(x == parent for x in b_i): level = i break else: print len(new_matrix), i_ if i_ >= len(new_matrix): new_matrix = new_matrix[0:-1] y_p = y_p[0:-1] else: new_matrix = np.vstack((new_matrix[0:i_],new_matrix[i_+1:])) y_p_temp = y_p[0:i_] y_p_temp.append(y_p[i_+1:]) y_p = y_p_temp # y_p = y_p[0:8] # new_matrix = new_matrix[0:8] data = (G, levels, y_p, new_matrix, boxes, ground_truths, alphas) sucs = nx.dfs_successors(G) predecs = nx.dfs_predecessors(G) #preprocess: node - children children = {} last = -1 #print sucs, predecs for node,children_ in zip(sucs.keys(),sucs.values()): #print node, children_, predecs.values()[node-1] #print node,children_, last+1 #raw_input() if node != last+1: for i in range(last+1,node): children[i] = [] children[node] = children_ elif node == last +1: children[node] = children_ last = node loss__.append(loss(w, data, predecs, children)) temp_label = [alphas[0],alphas[1],alphas[2],learning_rate] if tuple(temp_label) in loss_: loss_[alphas[0],alphas[1],alphas[2],learning_rate].append(sum(loss__)/len(loss__)) else: loss_[alphas[0],alphas[1],alphas[2],learning_rate] = [sum(loss__)/len(loss__)]
def part_one(): RG = G.reverse() predecessors = sorted(nx.dfs_predecessors(RG, 'shiny gold')) print(predecessors) print(len(predecessors))
def main(): # Command line arguments parser = argparse.ArgumentParser( description='Extract model subsets from the National Hydrologic Model') parser.add_argument('-O', '--output_dir', help='Output directory for subset') parser.add_argument('-p', '--param_filename', help='Name of output parameter file') parser.add_argument('-s', '--streamflow_filename', help='Name of streamflow data file') parser.add_argument('-P', '--paramdb_dir', help='Location of parameter database') parser.add_argument('-M', '--merged_paramdb_dir', help='Location of merged parameter database') parser.add_argument('-C', '--cbh_dir', help='Location of CBH files') parser.add_argument('-g', '--geodatabase_filename', help='Full path to NHM geodatabase') parser.add_argument('-j', '--job', help='Job directory to work in') parser.add_argument('-v', '--verbose', help='Output additional information', action='store_true') parser.add_argument('--check_DAG', help='Verify the streamflow network', action='store_true') parser.add_argument('--output_cbh', help='Output CBH files for subset', action='store_true') parser.add_argument('--output_shapefiles', help='Output shapefiles for subset', action='store_true') parser.add_argument('--output_streamflow', help='Output streamflows for subset', action='store_true') parser.add_argument('--cbh_netcdf', help='Enable netCDF output for CBH files', action='store_true') parser.add_argument('--param_netcdf', help='Enable netCDF output for parameter file', action='store_true') parser.add_argument( '--add_gages', metavar="KEY=VALUE", nargs='+', help= 'Add arbitrary streamgages to POIs of form gage_id=segment. Segment must exist in the model subset. Additional streamgages are marked as poi_type=0.' ) parser.add_argument( '--no_filter_params', help='Output all parameters regardless of modules selected', action='store_true') args = parser.parse_args() stdir = os.getcwd() # TODO: Add to command line arguments single_poi = False if args.job: if os.path.exists(args.job): # Change into job directory before running extraction os.chdir(args.job) # print('Working in directory: {}'.format(args.job)) else: print('ERROR: Invalid jobs directory: {}'.format(args.job)) exit(-1) # Setup the logging bandit_log = logging.getLogger('bandit') bandit_log.setLevel(logging.DEBUG) log_fmt = logging.Formatter('%(levelname)s: %(name)s: %(message)s') # Handler for file logs flog = logging.FileHandler('bandit.log') flog.setLevel(logging.DEBUG) flog.setFormatter(log_fmt) # Handler for console logs clog = logging.StreamHandler() clog.setLevel(logging.ERROR) clog.setFormatter(log_fmt) bandit_log.addHandler(flog) bandit_log.addHandler(clog) bandit_log.info('========== START {} =========='.format( datetime.datetime.now().isoformat())) addl_gages = None if args.add_gages: addl_gages = parse_gages(args.add_gages) bandit_log.info('Additionals streamgages specified on command line') config = bc.Cfg('bandit.cfg') # Override configuration variables with any command line parameters for kk, vv in iteritems(args.__dict__): if kk not in [ 'job', 'verbose', 'cbh_netcdf', 'add_gages', 'param_netcdf', 'no_filter_params' ]: if vv: bandit_log.info( 'Overriding configuration for {} with {}'.format(kk, vv)) config.update_value(kk, vv) # Where to output the subset outdir = config.output_dir # The control file to use control_filename = config.control_filename # What to name the output parameter file param_filename = config.param_filename # Location of the NHM parameter database paramdb_dir = config.paramdb_dir # Location of the merged parameter database merged_paramdb_dir = config.merged_paramdb_dir streamgage_file = config.streamgage_file # List of outlets # dsmost_seg = config.outlets # List of upstream cutoffs # uscutoff_seg = config.cutoffs # List of additional HRUs (have no route to segment within subset) # hru_noroute = config.hru_noroute # List of output variables to sbuset try: include_model_output = config.include_model_output output_vars_dir = config.output_vars_dir output_vars = config.output_vars except KeyError: include_model_output = False # Control what is checked and output for subset check_dag = config.check_DAG try: output_cbh = config.output_cbh # Location of the NHM CBH files cbh_dir = config.cbh_dir except KeyError: output_cbh = False try: output_streamflow = config.output_streamflow # What to name the streamflow output file obs_filename = config.streamflow_filename except KeyError: output_streamflow = False try: output_shapefiles = config.output_shapefiles # Full path and filename to the geodatabase to use for outputting shapefile subsets geo_file = config.geodatabase_filename except KeyError: output_shapefiles = False # Load the control file ctl = ControlFile(control_filename) if ctl.has_dynamic_parameters: if config.dyn_params_dir: if os.path.exists(config.dyn_params_dir): dyn_params_dir = config.dyn_params_dir else: bandit_log.error('dyn_params_dir: {}, does not exist.'.format( config.dyn_params_dir)) exit(2) else: bandit_log.error( 'Control file has dynamic parameters but dyn_params_dir is not specified in the config file' ) exit(2) # Load master list of valid parameters vpdb = ValidParams() # Build list of parameters required for the selected control file modules required_params = vpdb.get_params_for_modules(modules=ctl.modules.values()) # TODO: make sure dynamic parameter filenames are correct # Write an updated control file # ctl.write('somefile') # Date range for pulling NWIS streamgage observations if isinstance(config.start_date, datetime.date): st_date = config.start_date else: st_date = datetime.datetime( *[int(x) for x in re.split('-| |:', config.start_date)]) if isinstance(config.end_date, datetime.date): en_date = config.end_date else: en_date = datetime.datetime( *[int(x) for x in re.split('-| |:', config.end_date)]) # =============================================================== params_file = '{}/{}'.format(merged_paramdb_dir, PARAMETERS_XML) # Output revision of NhmParamDb and the revision used by merged paramdb nhmparamdb_revision = git_version(paramdb_dir) bandit_log.info('Parameters based on NhmParamDb revision: {}'.format( nhmparamdb_revision)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Read hru_nhm_to_local and hru_nhm_to_region # Create segment_nhm_to_local and segment_nhm_to_region # TODO: since hru_nhm_to_region and nhru_nhm_to_local are only needed for # CBH files we should 'soft-fail' if the files are missing and just # output a warning and turn off CBH output if it was selected. # hru_nhm_to_region = get_parameter('{}/hru_nhm_to_region.msgpack'.format(cbh_dir)) # hru_nhm_to_local = get_parameter('{}/hru_nhm_to_local.msgpack'.format(cbh_dir)) # Load the NHMparamdb print('Loading NHM ParamDb') pdb = ParamDb(merged_paramdb_dir) nhm_params = pdb.parameters nhm_global_dimensions = pdb.dimensions # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get tosegment_nhm # NOTE: tosegment is now tosegment_nhm and the regional tosegment is gone. tosegment = nhm_params.get('tosegment').data nhm_seg = nhm_params.get('nhm_seg').data if args.verbose: print('Generating stream network from tosegment_nhm') # Build the stream network dag_ds = nx.DiGraph() for ii, vv in enumerate(tosegment): # dag_ds.add_edge(ii+1, vv) if vv == 0: dag_ds.add_edge(ii + 1, 'Out_{}'.format(ii + 1)) else: dag_ds.add_edge(ii + 1, vv) # nx.draw_networkx(dag_ds) bandit_log.debug('Number of NHM downstream nodes: {}'.format( dag_ds.number_of_nodes())) bandit_log.debug('Number of NHM downstream edges: {}'.format( dag_ds.number_of_edges())) if check_dag: if not nx.is_directed_acyclic_graph(dag_ds): bandit_log.error('Cycles and/or loops found in stream network') for xx in nx.simple_cycles(dag_ds): bandit_log.error('Cycle found for segment {}'.format(xx)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build dictionary which maps poi_gage_id to poi_gage_segment # poi_gage_segment_tmp = get_parameter('{}/poi_gage_segment.msgpack'.format(merged_paramdb_dir))['data'] # poi_gage_id_tmp = get_parameter('{}/poi_gage_id.msgpack'.format(merged_paramdb_dir))['data'] poi_gage_segment_tmp = nhm_params.get('poi_gage_segment').data poi_gage_id_tmp = nhm_params.get('poi_gage_id').data # Create dictionary to lookup nhm_segment for a given poi_gage_id poi_id_to_seg = dict(zip(poi_gage_id_tmp, poi_gage_segment_tmp)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Read streamgage ids from file - one streamgage id per row with open(streamgage_file, 'r') as fhdl: streamgages = fhdl.read().splitlines() # ===================================== # dag_ds should not change below here # For each streamgage: # 1) lookup nhm_segment (if any) and use as outlet # 2) create output directory # 3) subset the stream network, HRUs, params, etc uscutoff_seg = [] for sg in streamgages: print('Working on streamgage {}'.format(sg)) while True: # Create the upstream graph dag_us = dag_ds.reverse() bandit_log.debug('Number of NHM upstream nodes: {}'.format( dag_us.number_of_nodes())) bandit_log.debug('Number of NHM upstream edges: {}'.format( dag_us.number_of_edges())) # Trim the u/s graph to remove segments above the u/s cutoff segments try: for xx in uscutoff_seg: try: dag_us.remove_nodes_from( nx.dfs_predecessors(dag_us, xx)) # Also remove the cutoff segment itself dag_us.remove_node(xx) except KeyError: print( 'WARNING: nhm_segment {} does not exist in stream network' .format(xx)) except TypeError: bandit_log.error( '\nSelected cutoffs should at least be an empty list instead of NoneType. ({})' .format(outdir)) exit(200) bandit_log.debug( 'Number of NHM upstream nodes (trimmed): {}'.format( dag_us.number_of_nodes())) bandit_log.debug( 'Number of NHM upstream edges (trimmed): {}'.format( dag_us.number_of_edges())) # Lookup the outlet for the current streamgage try: dsmost_seg = [poi_id_to_seg[sg]] if dsmost_seg[0] == 0: # POI stream segment was never properly assigned in paramdb bandit_log.error( 'Streamgage {} has segment = 0. Skipping.'.format(sg)) break elif len(dsmost_seg) > 1: # Should never have more than one segment per streamgage bandit_log.info( 'Streamgage {} has more than one stream segment.'. format(sg)) break except KeyError: bandit_log.error( 'Streamgage {} does not exist in poi_gage_id'.format(sg)) break sg_dir = '{}/{}'.format(outdir, sg) try: os.makedirs(sg_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise else: pass # ======================================= # Given a d/s segment (dsmost_seg) create a subset of u/s segments if args.verbose: print('\tExtracting model subset') # Get all unique segments u/s of the starting segment uniq_seg_us = set() if dsmost_seg: for xx in dsmost_seg: try: pred = nx.dfs_predecessors(dag_us, xx) uniq_seg_us = uniq_seg_us.union( set(pred.keys()).union(set(pred.values()))) except KeyError: bandit_log.error( 'KeyError: Segment {} does not exist in stream network' .format(xx)) # print('\nKeyError: Segment {} does not exist in stream network'.format(xx)) # Get a subgraph in the dag_ds graph and return the edges dag_ds_subset = dag_ds.subgraph(uniq_seg_us).copy() # 2018-02-13 PAN: It is possible to have outlets specified which are not truly # outlets in the most conservative sense (e.g. a point where # the stream network exits the study area). This occurs when # doing headwater extractions where all segments for a headwater # are specified in the configuration file. Instead of creating # output edges for all specified 'outlets' the set difference # between the specified outlets and nodes in the graph subset # which have no edges is performed first to reduce the number of # outlets to the 'true' outlets of the system. node_outlets = [ee[0] for ee in dag_ds_subset.edges()] true_outlets = set(dsmost_seg).difference(set(node_outlets)) bandit_log.debug('node_outlets: {}'.format(','.join( map(str, node_outlets)))) bandit_log.debug('true_outlets: {}'.format(','.join( map(str, true_outlets)))) # Add the downstream segments that exit the subgraph for xx in true_outlets: dag_ds_subset.add_edge(xx, 'Out_{}'.format(xx)) else: # No outlets specified so pull the CONUS dag_ds_subset = dag_ds # Create list of toseg ids for the model subset try: # networkx 1.x toseg_idx = list( set(xx[0] for xx in dag_ds_subset.edges_iter())) except AttributeError: # networkx 2.x toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges)) toseg_idx0 = [xx - 1 for xx in toseg_idx] # 0-based version of toseg_idx bandit_log.info('Number of segments in subset: {}'.format( len(toseg_idx))) # NOTE: With monolithic nhmParamDb files hru_segment becomes hru_segment_nhm and the regional hru_segments are gone. # 2019-09-16 PAN: This initially assumed hru_segment in the monolithic paramdb was ALWAYS # ordered 1..nhru. This is not always the case so the nhm_id parameter # needs to be loaded and used to map the nhm HRU ids to their # respective indices. hru_segment = nhm_params.get('hru_segment').data nhm_id = nhm_params.get('nhm_id').data nhm_id_to_idx = {} for ii, vv in enumerate(nhm_id): # keys are 1-based, values are 0-based nhm_id_to_idx[vv] = ii bandit_log.info('Number of NHM hru_segment entries: {}'.format( len(hru_segment))) # Create a dictionary mapping segments to HRUs seg_to_hru = {} for ii, vv in enumerate(hru_segment): # keys are 1-based, values in arrays are 1-based seg_to_hru.setdefault(vv, []).append(ii + 1) # Get HRU ids ordered by the segments in the model subset - entries are 1-based hru_order_subset = [] for xx in toseg_idx: if xx in seg_to_hru: for yy in seg_to_hru[xx]: hru_order_subset.append(yy) else: bandit_log.warning( 'Stream segment {} has no HRUs connected to it.'. format(xx)) # raise ValueError('Stream segment has no HRUs connected to it.') # Append the additional non-routed HRUs to the list # if len(hru_noroute) > 0: # for xx in hru_noroute: # if hru_segment[xx-1] == 0: # bandit_log.info('User-supplied HRU {} is not connected to any stream segment'.format(xx)) # hru_order_subset.append(xx) # else: # bandit_log.error('User-supplied HRU {} routes to stream segment {} - Skipping.'.format(xx, # hru_segment[xx-1])) hru_order_subset0 = [xx - 1 for xx in hru_order_subset] bandit_log.info('Number of HRUs in subset: {}'.format( len(hru_order_subset))) # Use hru_order_subset to pull selected indices for parameters with nhru dimensions # hru_order_subset contains the in-order indices for the subset of hru_segments # toseg_idx contains the in-order indices for the subset of tosegments # Renumber the tosegment list new_tosegment = [] # Map old DAG_subds indices to new for xx in toseg_idx: if list(dag_ds_subset.neighbors(xx))[0] in toseg_idx: new_tosegment.append( toseg_idx.index(list(dag_ds_subset.neighbors(xx))[0]) + 1) else: # Outlets should be assigned zero new_tosegment.append(0) # Renumber the hru_segments for the subset new_hru_segment = [] for xx in toseg_idx: # if DAG_subds.neighbors(xx)[0] in toseg_idx: if xx in seg_to_hru: for _ in seg_to_hru[xx]: # The new indices should be 1-based from PRMS new_hru_segment.append(toseg_idx.index(xx) + 1) # Append zeroes to new_hru_segment for each additional non-routed HRU # if len(hru_noroute) > 0: # for xx in hru_noroute: # if hru_segment[xx-1] == 0: # new_hru_segment.append(0) bandit_log.info('Size of hru_segment for subset: {}'.format( len(new_hru_segment))) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Subset hru_deplcrv hru_deplcrv = nhm_params.get('hru_deplcrv').data bandit_log.info('Size of NHM hru_deplcrv: {}'.format( len(hru_deplcrv))) # Get subset of hru_deplcrv using hru_order # A single snarea_curve can be referenced by multiple HRUs hru_deplcrv_subset = np.array(hru_deplcrv)[ tuple(hru_order_subset0), ] uniq_deplcrv = list(set(hru_deplcrv_subset)) uniq_deplcrv0 = [xx - 1 for xx in uniq_deplcrv] # Create new hru_deplcrv and renumber new_hru_deplcrv = [ uniq_deplcrv.index(cc) + 1 for cc in hru_deplcrv_subset ] bandit_log.info('Size of hru_deplcrv for subset: {}'.format( len(new_hru_deplcrv))) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Subset poi_gage_segment new_poi_gage_segment = [] new_poi_gage_id = [] new_poi_type = [] if nhm_params.exists('poi_gage_segment'): poi_gage_segment = nhm_params.get('poi_gage_segment').tolist() bandit_log.info('Size of NHM poi_gage_segment: {}'.format( len(poi_gage_segment))) poi_gage_id = nhm_params.get('poi_gage_id').data poi_type = nhm_params.get('poi_type').data # We want to get the indices of the poi_gage_segments that match the # segments that are part of the subset. We can then use these # indices to subset poi_gage_id and poi_type. # The poi_gage_segment will need to be renumbered for the subset of segments. # To subset poi_gage_segment we have to lookup each segment in the subset # Reset the cutoff list uscutoff_seg = [] # for ss in uniq_seg_us: try: # networkx 1.x for ss in nx.nodes_iter(dag_ds_subset): if ss in poi_gage_segment: new_poi_gage_segment.append( toseg_idx.index(ss) + 1) new_poi_gage_id.append( poi_gage_id[poi_gage_segment.index(ss)]) new_poi_type.append( poi_type[poi_gage_segment.index(ss)]) except AttributeError: # networkx 2.x for ss in dag_ds_subset.nodes: if ss in poi_gage_segment: new_poi_gage_segment.append( toseg_idx.index(ss) + 1) new_poi_gage_id.append( poi_gage_id[poi_gage_segment.index(ss)]) new_poi_type.append( poi_type[poi_gage_segment.index(ss)]) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add any valid user-specified streamgage, nhm_seg pairs if addl_gages: for ss, vv in iteritems(addl_gages): if ss in new_poi_gage_id: idx = new_poi_gage_id.index(ss) bandit_log.warning( 'Existing NHM POI, {}, overridden on commandline (was {}, now {})' .format(ss, new_poi_gage_segment[idx], toseg_idx.index(vv) + 1)) new_poi_gage_segment[idx] = toseg_idx.index(vv) + 1 new_poi_type[idx] = 0 elif toseg_idx.index(vv) + 1 in new_poi_gage_segment: sidx = new_poi_gage_segment.index( toseg_idx.index(vv) + 1) bandit_log.warning( 'User-specified streamgage ({}) has same nhm_seg ({}) as existing POI ({}), replacing streamgage ID' .format(ss, toseg_idx.index(vv) + 1, new_poi_gage_id[sidx])) new_poi_gage_id[sidx] = ss new_poi_type[sidx] = 0 elif vv not in seg_to_hru.keys(): bandit_log.warning( 'User-specified streamgage ({}) has nhm_seg={} which is not part of the model subset - Skipping.' .format(ss, vv)) else: new_poi_gage_id.append(ss) new_poi_gage_segment.append( toseg_idx.index(vv) + 1) new_poi_type.append(0) bandit_log.info( 'Added user-specified POI streamgage ({}) at nhm_seg={}' .format(ss, vv)) # ================================================================== # ================================================================== # Process the parameters and create a parameter file for the subset params = list(nhm_params.keys()) # Remove the POI-related parameters if we have no POIs if len(new_poi_gage_segment) == 0: bandit_log.warning( 'No POI gages found for subset; removing POI-related parameters.' ) for rp in ['poi_gage_id', 'poi_gage_segment', 'poi_type']: # params.pop(rp, None) try: params.remove(rp) except ValueError: print('ERROR: unable to remove {}'.format(rp)) pass params.sort() dims = {} for kk in nhm_global_dimensions.values(): dims[kk.name] = kk.size # Resize dimensions to the model subset crap_dims = dims.copy() # need a copy since we modify dims for dd, dv in iteritems(crap_dims): # dimensions 'nmonths' and 'one' are never changed if dd in HRU_DIMS: dims[dd] = len(hru_order_subset0) elif dd == 'nsegment': dims[dd] = len(toseg_idx0) elif dd == 'ndeplval': dims[dd] = len(uniq_deplcrv0) * 11 # if 'ndepl' not in dims: dims['ndepl'] = len(uniq_deplcrv0) elif dd == 'npoigages': dims[dd] = len(new_poi_gage_segment) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build a ParameterSet for output # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ new_ps = ParameterSet() for dd, dv in iteritems(dims): new_ps.dimensions.add(dd, dv) if dd == 'npoigages': # 20170217 PAN: nobs is missing from the paramdb but is necessary new_ps.dimensions.add('nobs', dv) new_params = list(required_params) # WARNING: 2019-04-23 PAN # Very hacky way to remove parameters that shouldn't always get # included. Need to figure out a better way. check_list = [ 'basin_solsta', 'gvr_hru_id', 'hru_solsta', 'humidity_percent', 'irr_type', 'obsout_segment', 'rad_conv', 'rain_code', 'hru_lon' ] for xx in check_list: if xx in new_params: if xx in ['basin_solsta', 'hru_solsta', 'rad_conv']: if not new_ps.dimensions.exists('nsol'): new_params.remove(xx) elif new_ps.dimensions.get('nsol') == 0: new_params.remove(xx) elif xx == 'humidity_percent': if not new_ps.dimensions.exists('nhumid'): new_params.remove(xx) elif new_ps.dimensions.get('nhumid') == 0: new_params.remove(xx) elif xx == 'irr_type': if not new_ps.dimensions.exists('nwateruse'): new_params.remove(xx) elif new_ps.dimensions.get('nwateruse') == 0: new_params.remove(xx) elif xx == 'gvr_hru_id': if ctl.get('mapOutON_OFF').values == 0: new_params.remove(xx) elif xx in [ 'hru_lat', 'hru_lon', ]: if not nhm_params.exists(xx): new_params.remove(xx) new_params.sort() for pp in params: if pp in new_params or args.no_filter_params: cparam = nhm_params.get(pp).tostructure() new_ps.parameters.add(cparam['name']) ndims = len(cparam['dimensions']) if args.verbose: sys.stdout.write( '\r ') sys.stdout.write('\rProcessing {} '.format( cparam['name'])) sys.stdout.flush() # Get order of dimensions and total size for parameter dim_order = [None] * ndims for dd, dv in iteritems(cparam['dimensions']): dim_order[dv['position']] = dd for dd in dim_order: # self.parameters.get(varname).dimensions.add(dd, self.dimensions.get(dd).size) new_ps.parameters.get(cparam['name']).dimensions.add( dd, new_ps.dimensions.get(dd).size) new_ps.parameters.get( cparam['name']).datatype = cparam['datatype'] first_dimension = dim_order[0] if ndims == 2: second_dimension = dim_order[1] # Write out the data for the parameter if ndims == 1: # 1D Parameters if first_dimension == 'one': outdata = np.array(cparam['data']) elif first_dimension == 'nsegment': if pp in ['tosegment']: outdata = np.array(new_tosegment) else: outdata = np.array( cparam['data'])[tuple(toseg_idx0), ] elif first_dimension == 'ndeplval': # This is really a 2D in disguise, however, it is stored in C-order unlike # other 2D arrays outdata = np.array(cparam['data']).reshape( (-1, 11))[tuple(uniq_deplcrv0), :] elif first_dimension == 'npoigages': if pp == 'poi_gage_segment': outdata = np.array(new_poi_gage_segment) elif pp == 'poi_gage_id': outdata = np.array(new_poi_gage_id) elif pp == 'poi_type': outdata = np.array(new_poi_type) else: bandit_log.error( 'Unkown parameter, {}, with dimensions {}'. format(pp, first_dimension)) elif first_dimension in HRU_DIMS: if pp == 'hru_deplcrv': outdata = np.array(new_hru_deplcrv) elif pp == 'hru_segment': outdata = np.array(new_hru_segment) else: outdata = np.array( cparam['data'])[tuple(hru_order_subset0), ] else: bandit_log.error( 'No rules to handle dimension {}'.format( first_dimension)) elif ndims == 2: # 2D Parameters outdata = np.array(cparam['data']).reshape( (-1, dims[second_dimension]), order='F') if first_dimension == 'nsegment': outdata = outdata[tuple(toseg_idx0), :] elif first_dimension in HRU_DIMS: outdata = outdata[tuple(hru_order_subset0), :] else: bandit_log.error( 'No rules to handle 2D parameter, {}, which contains dimension {}' .format(pp, first_dimension)) # Convert outdata to a list for writing if first_dimension == 'ndeplval': outlist = outdata.ravel().tolist() else: outlist = outdata.ravel(order='F').tolist() new_ps.parameters.get(cparam['name']).data = outlist # Write the new parameter file header = [ 'Written by Bandit version {}'.format(__version__), 'NhmParamDb revision: {}'.format(nhmparamdb_revision) ] if args.param_netcdf: base_filename = os.path.splitext(param_filename)[0] param_filename = '{}.nc'.format(base_filename) new_ps.write_netcdf('{}/{}'.format(sg_dir, param_filename)) else: new_ps.write_parameter_file('{}/{}'.format( sg_dir, param_filename), header=header) ctl.get('param_file').values = param_filename if args.verbose: sys.stdout.write('\n') # sys.stdout.write('\r ') # sys.stdout.write('\r\tParameter file written: {}\n'.format('{}/{}'.format(outdir, param_filename))) sys.stdout.flush() # 2019-09-16 PAN: Nasty hack to handle parameter databases that may not have # a one-to-one match between index value and nhm_id. cparam = nhm_params.get('nhm_id').tostructure() hru_order_subset_nhm_id = np.array( cparam['data'])[tuple(hru_order_subset0), ].ravel( order='F').tolist() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write CBH files # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if output_cbh: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Subset the cbh files for the selected HRUs if len(hru_order_subset) > 0: if args.verbose: print('Processing CBH files') if os.path.splitext(cbh_dir)[1] == '.nc': cbh_hdl = CbhNetcdf(src_path=cbh_dir, st_date=st_date, en_date=en_date, nhm_hrus=hru_order_subset_nhm_id) # nhm_hrus=hru_order_subset) else: # Subset the hru_nhm_to_local mapping # TODO: This section will not work with the monolithic paramdb - remove hru_order_ss = OrderedDict() for kk in hru_order_subset: hru_order_ss[kk] = hru_nhm_to_local[kk] cbh_hdl = CbhAscii(src_path=cbh_dir, st_date=st_date, en_date=en_date, nhm_hrus=hru_order_subset, indices=hru_order_ss, mapping=hru_nhm_to_region) if args.cbh_netcdf: # Pull the filename prefix off of the first file found in the # source netcdf CBH directory. file_it = glob.iglob(cbh_dir) cbh_prefix = os.path.basename( next(file_it)).split('_')[0] cbh_outfile = '{}/{}.nc'.format(outdir, cbh_prefix) cbh_hdl.write_netcdf(cbh_outfile) ctl.get('tmax_day').values = os.path.basename( cbh_outfile) ctl.get('tmin_day').values = os.path.basename( cbh_outfile) ctl.get('precip_day').values = os.path.basename( cbh_outfile) else: cbh_hdl.write_ascii(pathname=sg_dir) # bandit_log.info('{} written to: {}'.format(vv, '{}/{}.cbh'.format(outdir, vv))) else: bandit_log.error('No HRUs associated with the segments') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write output variables # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 2019-08-07 PAN: first prototype for extractions of output variables if include_model_output: if len(hru_order_subset) > 0: try: os.makedirs(f'{sg_dir}/model_output') print( 'Creating directory model_output, for model output variables' ) except OSError: print( 'Using existing model_output directory for output variables' ) for vv in output_vars: if args.verbose: sys.stdout.write( '\r ' ) sys.stdout.write( f'\rProcessing output variable: {vv} ') sys.stdout.flush() filename = f'{output_vars_dir}/{vv}.nc' if vv[0:3] == 'seg': mod_out = ModelOutput(filename=filename, varname=vv, startdate=st_date, enddate=en_date, nhm_segs=toseg_idx) else: mod_out = ModelOutput( filename=filename, varname=vv, startdate=st_date, enddate=en_date, nhm_hrus=hru_order_subset_nhm_id) mod_out.write_csv(f'{sg_dir}/model_output') sys.stdout.write('\n') sys.stdout.flush() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write dynamic parameters # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ctl.has_dynamic_parameters: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add dynamic parameters for cparam in ctl.dynamic_parameters: param_name = 'dyn_{}'.format(cparam) input_file = '{}/{}.nc'.format(dyn_params_dir, param_name) output_file = '{}/{}.param'.format(sg_dir, param_name) if not os.path.exists(input_file): bandit_log.warning( 'WARNING: CONUS dynamic parameter file: {}, does not exist... skipping' .format(input_file)) else: if args.verbose: print( 'Writing dynamic parameter {}'.format(cparam)) mydyn = dyn_params.DynamicParameters( input_file, cparam, st_date, en_date, hru_order_subset_nhm_id) # mydyn = dyn_params.DynamicParameters(input_file, cparam, st_date, en_date, hru_order_subset) mydyn.read_netcdf() out_order = [kk for kk in hru_order_subset_nhm_id] # out_order = [kk for kk in hru_order_subset] for cc in ['day', 'month', 'year']: out_order.insert(0, cc) header = ' '.join(map(str, out_order)) # Output ASCII files out_ascii = open(output_file, 'w') out_ascii.write('{}\n'.format(cparam)) out_ascii.write('{}\n'.format(header)) out_ascii.write('####\n') mydyn.data.to_csv(out_ascii, columns=out_order, na_rep='-999', sep=' ', index=False, header=False, encoding=None, chunksize=50) out_ascii.close() # Write an updated control file to the output directory ctl.write('{}.bandit'.format('{}/{}'.format( sg_dir, control_filename))) if output_streamflow: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Download the streamgage information from NWIS if args.verbose: print( 'Downloading NWIS streamgage observations for {} stations' .format(len(new_poi_gage_id))) streamflow = prms_nwis.NWIS(gage_ids=new_poi_gage_id, st_date=st_date, en_date=en_date, verbose=args.verbose) streamflow.get_daily_streamgage_observations() streamflow.write_prms_data( filename='{}/{}'.format(sg_dir, obs_filename)) # ******************************************* # Create a shapefile of the selected HRUs if output_shapefiles: if args.verbose: print('-' * 40) print('Writing shapefiles for model subset') if not os.path.isdir(geo_file): bandit_log.error( 'File geodatabase, {}, does not exist. Shapefiles will not be created' .format(geo_file)) else: geo_shp = prms_geo.Geo(geo_file) # Create GIS sub-directory if it doesn't already exist gis_dir = '{}/GIS'.format(sg_dir) try: os.makedirs(gis_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise else: pass # Output a shapefile of the selected HRUs # print('\tHRUs') # geo_shp.select_layer('nhruNationalIdentifier') geo_shp.select_layer('nhru') geo_shp.write_shapefile( '{}/GIS/HRU_subset.shp'.format(sg_dir), 'hru_id_nat', hru_order_subset_nhm_id, included_fields=[ 'nhm_id', 'model_idx', 'region', 'hru_id_nat' ]) # geo_shp.write_shapefile3('{}/GIS/HRU_subset.gdb'.format(outdir), 'hru_id_nat', hru_order_subset) # geo_shp.filter_by_attribute('hru_id_nat', hru_order_subset) # geo_shp.write_shapefile2('{}/HRU_subset.shp'.format(outdir)) # geo_shp.write_kml('{}/HRU_subset.kml'.format(outdir)) # Output a shapefile of the selected stream segments # print('\tSegments') geo_shp.select_layer('nsegmentNationalIdentifier') geo_shp.write_shapefile( '{}/GIS/Segments_subset.shp'.format(sg_dir), 'seg_id_nat', toseg_idx, included_fields=['seg_id_nat', 'model_idx', 'region']) # geo_shp.filter_by_attribute('seg_id_nat', uniq_seg_us) # geo_shp.write_shapefile2('{}/Segments_subset.shp'.format(outdir)) del geo_shp break # break out of while True loop bandit_log.info('========== END {} =========='.format( datetime.datetime.now().isoformat())) os.chdir(stdir)
def get_node_to_distn(T, root, node_to_pmap, root_distn=None, P_default=None): """ Get marginal state distributions at nodes in a tree. This function is similar to the Rao-Teh state sampling function, except that instead of sampling a state at each node, this function computes marginal distributions over states at each node. Also, each edge of the input tree for this function has been annotated with its own transition probability matrix, whereas the Rao-Teh sampling function uses a single uniformized transition probability matrix for all edges. Parameters ---------- T : undirected acyclic networkx graph A tree whose edges are annotated with transition matrices P. root : integer Root node. node_to_pmap : dict Map from node to a map from a state to the subtree likelihood. This map incorporates state restrictions. root_distn : dict, optional A finite distribution over root states. P_default : weighted directed networkx graph, optional Default transition matrix. Returns ------- node_to_distn : dict Sparse map from node to sparse map from state to probability. """ # Bookkeeping. predecessors = nx.dfs_predecessors(T, root) # Get the distributions. node_to_distn = {} for node in nx.dfs_preorder_nodes(T, root): # Get the map from state to subtree likelihood. pmap = node_to_pmap[node] # Compute the prior distribution at the root separately. # If the prior distribution is not provided, # then treat it as uninformative. if node == root: distn = get_normalized_dict_distn(pmap, root_distn) else: parent_node = predecessors[node] parent_distn = node_to_distn[parent_node] # Get the transition matrix associated with this edge. P = T[parent_node][node].get('P', P_default) if P is None: raise ValueError('no transition matrix is available') # For each parent state, # get the distribution over child states; # this distribution will include both the P matrix # and the pmap of the child node. distn = defaultdict(float) for sa, pa in parent_distn.items(): # Construct the conditional transition probabilities. feasible_sb = set(P[sa]) & set(node_to_pmap[node]) sb_weights = {} for sb in feasible_sb: a = P[sa][sb]['weight'] b = node_to_pmap[node][sb] sb_weights[sb] = a*b sb_distn = get_normalized_dict_distn(sb_weights) # Add to the marginal distn. for sb, pb in sb_distn.items(): distn[sb] += pa * pb # Set the node_to_distn. node_to_distn[node] = distn # Return the marginal state distributions at nodes. return node_to_distn
def cheapestLeafConnection(G, H): nodes = H.nodes() candidates = [] tmp_weight = 0 counter = 0 while not udah_belom(H): #counter = 0 for x in nodes: candidates=[] counter += 1 x_neigh = H.neighbors(x) if len(x_neigh) > 2: for y in x_neigh: #make a copy of current mst dummyGraph = H.copy() #remove edge removeNeighborColor(dummyGraph, x, y) tmp_weight = G[x][y]['weight'] #save weight dummyGraph.remove_edge(x,y) tree = nx.dfs_successors(dummyGraph, x) anak_anak = nx.dfs_predecessors(dummyGraph, x) for anak in anak_anak: if anak not in tree: #if leaf if validColor(dummyGraph, anak, y): tmp = (anak, y, G[anak][y]['weight']) candidates.append(tmp) dummyGraph.add_edge(x, y, weight = tmp_weight) addNeighborColor(dummyGraph, x, y) if len(candidates)>0: candidates = sorted(candidates, key = lambda z: z[2]) fro, tom, wei = candidates[0] dummyGraph.add_edge(fro,tom, weight=wei) removeNeighborColor(dummyGraph, x, tom) dummyGraph.remove_edge(x,tom) addNeighborColor(dummyGraph, fro, tom) H = dummyGraph.copy() #drawHraph(H) elif (counter%10000) ==0: if counter%100000==0: print 'no candidates,', counter, ' iterations in Cheapest Leaf Connection' #drawGraph(dummyGraph) for x in nodes: x_neigh = dummyGraph.neighbors(x) if len(x_neigh)==1: dummyGraph.remove_edge(x, x_neigh[0]) removeNeighborColor(dummyGraph, x, x_neigh[0]) for y in dummyGraph.neighbors(x_neigh[0]): if validColor(dummyGraph, x, y) and len(dummyGraph.neighbors(y))<3: dummyGraph.add_edge(x,y) addNeighborColor(dummyGraph, x, y) H = dummyGraph.copy() break break elif counter == 250001: print 'Using Cheapest Leaf Connection failed miserably =(' #os.system('say "Using candidates failed miserably..."') print 'Try using Direct Leaf Connection' #os.system('say "Try using direct leaf connection"') H = directLeafConnection(G, dummyGraph) return H return H
def minibatch_(functions, clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,img_nr,alphas,learning_rate,subsamples, mode): X_p, y_p, inv = get_data_from_img_nr(class_,img_nr, subsamples) if X_p != []: boxes = [] ground_truth = inv[0][2] img_nr = inv[0][0] print img_nr if less_features: X_p = [fts[0:features_used] for fts in X_p] if os.path.isfile('/var/node436/local/tstahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'): f = open('/var/node436/local/tstahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r') else: print 'warning' for line, y in zip(f, inv): tmp = line.split(',') coord = [] for s in tmp: coord.append(float(s)) boxes.append([coord, y[2]]) #assert(len(boxes)<500) boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,5000) if os.path.isfile('/var/node436/local/tstahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d"))): gr = open('/var/node436/local/tstahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d")), 'r') else: gr = [] ground_truths = [] for line in gr: tmp = line.split(',') ground_truth = [] for s in tmp: ground_truth.append(int(s)) ground_truths.append(ground_truth) #prune boxes pruned_x = [] pruned_y = [] pruned_boxes = [] if prune: for i, y_ in enumerate(y_p): if y_ > 0: pruned_x.append(X_p[i]) pruned_y.append(y_p[i]) pruned_boxes.append(boxes[i]) else: pruned_x = X_p pruned_y = y_p pruned_boxes = boxes if subsampling and pruned_boxes > subsamples: pruned_x = pruned_x[0:subsamples] pruned_y = pruned_y[0:subsamples] pruned_boxes = pruned_boxes[0:subsamples] # create_tree G, levels = create_tree(pruned_boxes) #prune tree to only have levels which fully cover the image, tested if prune_fully_covered: nr_levels_covered = 100 total_size = surface_area(pruned_boxes, levels[0]) for level in levels: sa = surface_area(pruned_boxes, levels[level]) sa_co = sa/total_size if sa_co != 1.0: G.remove_nodes_from(levels[level]) else: nr_levels_covered = level levels = {k: levels[k] for k in range(0,nr_levels_covered + 1)} # prune levels, speedup + performance levels_tmp = {k:v for k,v in levels.iteritems() if k<prune_tree_levels} levels_gone = {k:v for k,v in levels.iteritems() if k>=prune_tree_levels} levels = levels_tmp #prune tree as well, for patches training for trash_level in levels_gone.values(): G.remove_nodes_from(trash_level) coords = [] features = [] f_c = [] f = [] #either subsampling or prune_fully_covered #assert(subsampling != prune_fully_covered) if subsampling: if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)): f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r+') else: if mode == 'extract_train' or mode == 'extract_test': print 'coords for %s with %s samples have to be extracted'%(img_nr,subsamples) f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'w') for level in levels: levl_boxes = extract_coords(levels[level], pruned_boxes) if levl_boxes != []: for lvl_box in levl_boxes: if lvl_box not in coords: coords.append(lvl_box) f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3])) f_c.write('\n') f_c.close() print 'features for %s with %s samples have to be extracted'%(img_nr,subsamples) os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc') os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib') #print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples) os.system("EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),subsamples,class_,format(img_nr, "06d"),subsamples)) if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)): f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') else: f_c = [] coords = [] if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)): f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') elif prune_fully_covered: if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))): f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r+') else: if mode == 'extract_train' or mode == 'extract_test': print 'coords for %s with fully_cover_tree samples have to be extracted'%(img_nr) f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'w') for level in levels: levl_boxes = extract_coords(levels[level], pruned_boxes) if levl_boxes != []: for lvl_box in levl_boxes: if lvl_box not in coords: coords.append(lvl_box) f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3])) f_c.write('\n') f_c.close() print 'features for %s with fully_cover_tree samples have to be extracted'%(img_nr) os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc') os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib') #print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples) print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),format(img_nr, "06d")) os.system("EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),format(img_nr, "06d"))) if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))): f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') else: f_c = [] coords = [] if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))): f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') else: if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep%s.txt'%(format(img_nr, "06d"))): f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep%s.txt'%(format(img_nr, "06d")), 'r') if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep%s.txt'%(format(img_nr, "06d"))): f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep%s.txt'%(format(img_nr, "06d")), 'r+') if f_c != []: for i,line in enumerate(f_c): str_ = line.rstrip('\n').split(',') cc = [] for s in str_: cc.append(float(s)) coords.append(cc) if f != []: for i,line in enumerate(f): str_ = line.rstrip('\n').split(',') ff = [] for s in str_: ff.append(float(s)) features.append(ff) #assert len(coords) == len(features) # append x,y of intersections if learn_intersections: for inters,coord in zip(features,coords): # if inters not in pruned_x: pruned_x.append(inters) ol = 0.0 ol = get_intersection_count(coord, ground_truths) pruned_y.append(ol) if mode == 'mean_variance': print 'normalizing' sum_x += np.array(pruned_x).sum(axis=0) n_samples += len(pruned_x) sum_sq_x += (np.array(pruned_x)**2).sum(axis=0) scaler.partial_fit(pruned_x) # Don't cheat - fit only on training data return sum_x,n_samples,sum_sq_x, scaler if less_features: features = [fts[0:features_used] for fts in features] #normalize norm_x = [] if normalize and (mode != 'extract_train' and mode != 'extract_test'): # for p_x in pruned_x: # norm_x.append((p_x-mean)/variance) norm_x = scaler.transform(pruned_x) if features != []: features = scaler.transform(features) else: norm_x = pruned_x data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas) sucs = nx.dfs_successors(G) predecs = nx.dfs_predecessors(G) #preprocess: node - children children = {} last = -1 for node,children_ in zip(sucs.keys(),sucs.values()): if node != last+1: for i in range(last+1,node): children[i] = [] children[node] = children_ elif node == last +1: children[node] = children_ last = node if mode == 'training': if alphas[0] == 0: #if we don't learn the proposals, we learn just the levels: better, because every level has same importance and faster print 'training levels', img_nr for level in levels: print 'level' , level if img_nr in functions: if level in functions[img_nr]: function = functions[img_nr][level] else: function = [] else: functions[img_nr] = {} function = [] w, function = tree_level_regression(class_,function,levels,level,features,coords,scaler,w,norm_x,pruned_y,None,predecs,children,pruned_boxes,learning_rate,alphas,img_nr,jans_idea) if level not in functions[img_nr]: functions[img_nr][level] = function return w, len(pruned_y), len(levels) else: #if we learn proposals, levels with more proposals have more significance...., slow - need to change print 'training patches', img_nr print predecs nodes = list(G.nodes()) for node in nodes: print node if node == 0: w = learn_root(w,norm_x[0],pruned_y[0],learning_rate,alphas) else: for num,n in enumerate(levels.values()): if node in n: level = num break if img_nr in functions: if level in functions[img_nr]: function = functions[img_nr][level] else: function = [] else: functions[img_nr] = {} function = [] #w, function = tree_level_regression(class_,function,levels,level,features,coords,scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr) w, function = constrained_regression(class_,function,features,coords,scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr,squared_hinge_loss) #TODO: train regressor/classifier that predicts/chooses level. Features: level, number of proposals, number of intersections, avg size of proposal, predictions(for regressor), etc. if level not in functions[img_nr]: functions[img_nr][level] = function return w, len(pruned_y), len(G.nodes()) elif mode == 'scikit_train': clf.partial_fit(norm_x,pruned_y) return clf elif mode == 'loss_train': if alphas[0] == 0: #levels loss__.append(tree_level_loss(class_,features,coords,scaler, w, data, predecs, children,img_nr,-1,functions)) return loss__ else: loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler,w, data, predecs, children,img_nr, -1)) elif mode == 'loss_test' or mode == 'loss_eval': print mode, loss__ if alphas[0] == 0: #levels loss__.append(tree_level_loss(class_,features,coords,scaler, w, data, predecs, children,img_nr,-1,functions)) cpl = max(0, np.dot(w,np.array(norm_x[0]).T)) full_image.append([pruned_y[0],cpl]) return loss__,full_image else: loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler,w, data, predecs, children,img_nr, -1)) cpl = max(0, np.dot(w,np.array(norm_x[0]).T)) full_image.append([pruned_y[0],cpl]) return loss__,full_image elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train': loss__.append(((clf.predict(norm_x) - pruned_y)**2).sum()) return loss__ elif mode == 'levels_train' or mode == 'levels_test': preds = [] for i,x_ in enumerate(norm_x): preds.append(np.dot(w, x_)) cpls = [] truelvls = [] used_boxes_ = [] total_size = surface_area(pruned_boxes, levels[0]) fully_covered_score = 0.0 fully_covered_score_lvls = 0.0 covered_levels = [] print mode, len(levels) for level in levels: function = functions[img_nr][level] cpl,used_boxes,_ = count_per_level([],class_,features,coords,scaler,w, preds, img_nr, pruned_boxes,levels[level], '',function) # clipp negative predictions cpl = max(0,cpl) if used_boxes != []: used_boxes_.append(used_boxes[0][1]) tru = y_p[0] cpls.append(cpl) sa = surface_area(pruned_boxes, levels[level]) sa_co = sa/total_size if sa_co == 1.0: fully_covered_score += cpl fully_covered_score_lvls += 1 covered_levels.append(cpl) truelvls.append(tru) return cpls,truelvls
def trim(self, by_inflow=True, by_outflow=True, min_count=0): """ Trims a graph to delete nodes that are not connected to the main component, which is the component containing the most-sampled node (MSN) by counts. by_inflow: whether to delete nodes that are not connected to the MSN by inflow by_outflow: whether to delete nodes that are not connected to the MSN by outflow min_count: nodes that do not have a count > min_count will be deleted Trimmed graph is saved as self.trim_graph. The trimmed transition matrix is saved as self.trim_transmat, and the count matrix is saved as self.trim_countmat. The mapping from the nodes in the trimmed set to the full set is given by self.trim_indices. """ totcounts = self.countmat.sum(axis=1) msn = totcounts.argmax() mask = np.ones(self.nnodes, dtype=bool) oldmask = np.zeros(self.nnodes, dtype=bool) if min_count > 0: mask[[i for i in range(self.nnodes) if totcounts[i] < min_count]] = False while (mask != oldmask).any(): oldmask = mask.copy() self.trim_indices = [ i for i in range(self.nnodes) if mask[i] == True ] self.trim_graph = self.graph.subgraph(self.trim_indices) if by_outflow: downstream = nx.dfs_successors(self.trim_graph, msn).values() dlist = list(itertools.chain(*downstream)) + [msn] mask[[i for i in range(self.nnodes) if i not in dlist]] = False if by_inflow: upstream = list( nx.dfs_predecessors(self.trim_graph, msn).keys()) + [msn] mask[[i for i in range(self.nnodes) if i not in upstream]] = False # count all transitions to masked states and add these as self-transitions to_add = {} rows = self.countmat.row cols = self.countmat.col data = self.countmat.data for i in range(len(data)): if mask[rows[i]] is False and mask[cols[i]] is True: if cols[i] in to_add: to_add[cols[i]] += data[i] else: to_add[cols[i]] = data[i] tmp_arr = self.countmat.toarray()[mask, ...][..., mask] for ind, full_ind in enumerate(self.trim_indices): if full_ind in to_add: tmp_arr[ind][ind] += to_add[full_ind] self.trim_countmat = scipy.sparse.coo_matrix(tmp_arr) if self.symmetrize: self.trim_countmat = symmetrize_matrix(self.trim_countmat) self.trim_nnodes = self.trim_countmat.shape[0] self.trim_transmat = count_to_trans(self.trim_countmat)
def ScenarioTreeModelFromNetworkX( tree, node_name_attribute=None, edge_probability_attribute='weight', stage_names=None, scenario_name_attribute=None): """ Create a scenario tree model from a networkx tree. The height of the tree must be at least 1 (meaning at least 2 stages). Required node attributes: - cost (str): A string identifying a component on the model whose value indicates the cost at the time stage of the node for any scenario traveling through it. Optional node attributes: - variables (list): A list of variable identifiers that will be tracked by the node. If the node is not a leaf node, these indicate variables with non-anticipativity constraints. - derived_variables (list): A list of variable or expression identifiers that will be tracked by the node (but will never have non-anticipativity constraints enforced). - bundle: A bundle identifier for the scenario defined by a leaf-stage node. This attribute is ignored on non-terminal tree nodes. This attribute appears on at least one leaf-stage node (and is not set to :const:`None`), then it must be set on all leaf-stage nodes (to something other than :const:`None`); otherwise, an exception will be raised. Optional edge attributes: - weight (float): Indicates the conditional probability of moving from the parent node to the child node in the directed edge. If not present, it will be assumed that all edges leaving the parent node have equal probability (normalized to sum to one). Args: stage_names: Can define a list of stage names to use (assumed in time order). The length of this list much match the number of stages in the tree. The default value of :const:`None` indicates that stage names should be automatically generated in with the form ['Stage1','Stage2',...]. node_name_attribute: By default, node names are the same as the node hash in the networkx tree. This keyword can be set to the name of some property of nodes in the graph that will be used for their name in the PySP scenario tree. scenario_name_attribute: By default, scenario names are the same as the leaf-node hash in the networkx tree. This keyword can be set to the name of some property of leaf-nodes in the graph that will be used for their corresponding scenario name in the PySP scenario tree. edge_probability_attribute: Can be set to the name of some property of edges in the graph that defines the conditional probability of that branch (default: 'weight'). If this keyword is set to :const:`None`, then all branches leaving a node are assigned equal conditional probabilities. Examples: A 2-stage scenario tree with 10 scenarios grouped into 2 bundles: >>> G = networkx.DiGraph() >>> G.add_node("root", variables=["x"]) >>> N = 10 >>> for i in range(N): >>> node_name = "s"+str(i) >>> bundle_name = "b"+str(i%2) >>> G.add_node(node_name, bundle=bundle) >>> G.add_edge("root", node_name, weight=1.0/N) >>> model = ScenarioTreeModelFromNetworkX(G) A 4-stage scenario tree with 125 scenarios: >>> branching_factor = 5 >>> height = 3 >>> G = networkx.balanced_tree( branching_factor, height, networkx.DiGraph()) >>> model = ScenarioTreeModelFromNetworkX(G) """ if not has_networkx: #pragma:nocover raise ValueError( "networkx>=2.0 module is not available") if not networkx.is_tree(tree): raise TypeError( "Graph object is not a tree " "(see networkx.is_tree)") if not networkx.is_directed(tree): raise TypeError( "Graph object is not directed " "(see networkx.is_directed)") if not networkx.is_branching(tree): raise TypeError( "Grapn object is not a branching " "(see networkx.is_branching") in_degree_items = tree.in_degree() # Prior to networkx ~2.0, in_degree() returned a dictionary. # Now it is a view on items, so only call .items() for the old case if hasattr(in_degree_items, 'items'): in_degree_items = in_degree_items.items() root = [u for u,d in in_degree_items if d == 0] assert len(root) == 1 root = root[0] num_stages = networkx.eccentricity(tree, v=root) + 1 if num_stages < 2: raise ValueError( "The number of stages must be at least 2") m = CreateAbstractScenarioTreeModel() if stage_names is not None: unique_stage_names = set() for cnt, stage_name in enumerate(stage_names,1): m.Stages.add(stage_name) unique_stage_names.add(stage_name) if cnt != num_stages: raise ValueError( "incorrect number of stages names (%s), should be %s" % (cnt, num_stages)) if len(unique_stage_names) != cnt: raise ValueError("all stage names were not unique") else: for i in range(num_stages): m.Stages.add('Stage'+str(i+1)) node_to_name = {} node_to_scenario = {} scenario_bundle = {} def _setup(u, succ): if node_name_attribute is not None: if node_name_attribute not in tree.nodes[u]: raise KeyError( "node '%s' missing node name " "attribute: '%s'" % (u, node_name_attribute)) node_name = tree.nodes[u][node_name_attribute] else: node_name = u node_to_name[u] = node_name m.Nodes.add(node_name) if u in succ: for v in succ[u]: _setup(v, succ) else: # a leaf node if scenario_name_attribute is not None: if scenario_name_attribute not in tree.nodes[u]: raise KeyError( "node '%s' missing scenario name " "attribute: '%s'" % (u, scenario_name_attribute)) scenario_name = tree.nodes[u][scenario_name_attribute] else: scenario_name = u node_to_scenario[u] = scenario_name m.Scenarios.add(scenario_name) scenario_bundle[scenario_name] = \ tree.nodes[u].get('bundle', None) _setup(root, networkx.dfs_successors(tree, root)) m = m.create_instance() def _add_node(u, stage, succ, pred): node_name = node_to_name[u] m.NodeStage[node_name] = m.Stages[stage] if u == root: m.ConditionalProbability[node_name] = 1.0 else: assert u in pred # prior to networkx ~2.0, we used a .edge attribute on DiGraph, # which no longer exists. if hasattr(tree, 'edge'): edge = tree.edge[pred[u]][u] else: edge = tree.edges[pred[u],u] probability = None if edge_probability_attribute is not None: if edge_probability_attribute not in edge: raise KeyError( "edge '(%s, %s)' missing probability attribute: '%s'" % (pred[u], u, edge_probability_attribute)) probability = edge[edge_probability_attribute] else: probability = 1.0/len(succ[pred[u]]) m.ConditionalProbability[node_name] = probability # get node variables if "variables" in tree.nodes[u]: node_variables = tree.nodes[u]["variables"] assert type(node_variables) in [tuple, list] for varstring in node_variables: m.NodeVariables[node_name].add(varstring) if "derived_variables" in tree.nodes[u]: node_derived_variables = tree.nodes[u]["derived_variables"] assert type(node_derived_variables) in [tuple, list] for varstring in node_derived_variables: m.NodeDerivedVariables[node_name].add(varstring) if "cost" in tree.nodes[u]: assert isinstance(tree.nodes[u]["cost"], six.string_types) m.NodeCost[node_name].value = tree.nodes[u]["cost"] if u in succ: child_names = [] for v in succ[u]: child_names.append( _add_node(v, stage+1, succ, pred)) total_probability = 0.0 for child_name in child_names: m.Children[node_name].add(child_name) total_probability += \ pyomo.core.value(m.ConditionalProbability[child_name]) if abs(total_probability - 1.0) > 1e-5: raise ValueError( "edge probabilities leaving node '%s' " "do not sum to 1 (total=%r)" % (u, total_probability)) else: # a leaf node scenario_name = node_to_scenario[u] m.ScenarioLeafNode[scenario_name] = node_name m.Children[node_name].clear() return node_name _add_node(root, 1, networkx.dfs_successors(tree, root), networkx.dfs_predecessors(tree, root)) if any(_b is not None for _b in scenario_bundle.values()): if any(_b is None for _b in scenario_bundle.values()): raise ValueError("Incomplete bundle specification. " "All scenarios require a bundle " "identifier.") m.Bundling.value = True bundle_scenarios = {} for bundle_name in sorted(set(scenario_bundle.values())): m.Bundles.add(bundle_name) bundle_scenarios[bundle_name] = [] for scenario_name in m.Scenarios: bundle_scenarios[scenario_bundle[scenario_name]].\ append(scenario_name) for bundle_name in m.Bundles: for scenario_name in sorted(bundle_scenarios[bundle_name]): m.BundleScenarios[bundle_name].add(scenario_name) return m
T = nx.DiGraph() for e in edges: T.add_edge(e[0], e[1]) draw("T", T) # reverse the edges so we have bottom up. edges.reverse() for n in nx.dfs_postorder_nodes(BC): print n sys.exit() """ # compute orderings. root = cnodes[0] preds = nx.dfs_predecessors(BC, root) nlist = [x for x in nx.dfs_postorder_nodes(BC, root)] # test solving. solved = set() for p in nlist: # get parent. parent = preds[p] # flip if is cut node. t = BC.node[p]["type"] if t == "C": continue # simplify.
def ScenarioTreeModelFromNetworkX( tree, node_name_attribute=None, edge_probability_attribute='weight', stage_names=None, scenario_name_attribute=None): """ Create a scenario tree model from a networkx tree. The height of the tree must be at least 1 (meaning at least 2 stages). Required node attributes: - cost (str): A string identifying a component on the model whose value indicates the cost at the time stage of the node for any scenario traveling through it. Optional node attributes: - variables (list): A list of variable identifiers that will be tracked by the node. If the node is not a leaf node, these indicate variables with non-anticipativity constraints. - derived_variables (list): A list of variable or expression identifiers that will be tracked by the node (but will never have non-anticipativity constraints enforced). - bundle: A bundle identifier for the scenario defined by a leaf-stage node. This attribute is ignored on non-terminal tree nodes. This attribute appears on at least one leaf-stage node (and is not set to :const:`None`), then it must be set on all leaf-stage nodes (to something other than :const:`None`); otherwise, an exception will be raised. Optional edge attributes: - weight (float): Indicates the conditional probability of moving from the parent node to the child node in the directed edge. If not present, it will be assumed that all edges leaving the parent node have equal probability (normalized to sum to one). Args: stage_names: Can define a list of stage names to use (assumed in time order). The length of this list much match the number of stages in the tree. The default value of :const:`None` indicates that stage names should be automatically generated in with the form ['Stage1','Stage2',...]. node_name_attribute: By default, node names are the same as the node hash in the networkx tree. This keyword can be set to the name of some property of nodes in the graph that will be used for their name in the PySP scenario tree. scenario_name_attribute: By default, scenario names are the same as the leaf-node hash in the networkx tree. This keyword can be set to the name of some property of leaf-nodes in the graph that will be used for their corresponding scenario name in the PySP scenario tree. edge_probability_attribute: Can be set to the name of some property of edges in the graph that defines the conditional probability of that branch (default: 'weight'). If this keyword is set to :const:`None`, then all branches leaving a node are assigned equal conditional probabilities. Examples: A 2-stage scenario tree with 10 scenarios grouped into 2 bundles: >>> G = networkx.DiGraph() >>> G.add_node("root", variables=["x"]) >>> N = 10 >>> for i in range(N): >>> node_name = "s"+str(i) >>> bundle_name = "b"+str(i%2) >>> G.add_node(node_name, bundle=bundle) >>> G.add_edge("root", node_name, weight=1.0/N) >>> model = ScenarioTreeModelFromNetworkX(G) A 4-stage scenario tree with 125 scenarios: >>> branching_factor = 5 >>> height = 3 >>> G = networkx.balanced_tree( branching_factor, height, networkx.DiGraph()) >>> model = ScenarioTreeModelFromNetworkX(G) """ if not has_networkx: #pragma:nocover raise ValueError( "networkx module is not available") if not networkx.is_tree(tree): raise TypeError( "Graph object is not a tree " "(see networkx.is_tree)") if not networkx.is_directed(tree): raise TypeError( "Graph object is not directed " "(see networkx.is_directed)") if not networkx.is_branching(tree): raise TypeError( "Grapn object is not a branching " "(see networkx.is_branching") in_degree_items = tree.in_degree() # Prior to networkx ~2.0, in_degree() returned a dictionary. # Now it is a view on items, so only call .items() for the old case if hasattr(in_degree_items, 'items'): in_degree_items = in_degree_items.items() root = [u for u,d in in_degree_items if d == 0] assert len(root) == 1 root = root[0] num_stages = networkx.eccentricity(tree, v=root) + 1 if num_stages < 2: raise ValueError( "The number of stages must be at least 2") m = CreateAbstractScenarioTreeModel() if stage_names is not None: unique_stage_names = set() for cnt, stage_name in enumerate(stage_names,1): m.Stages.add(stage_name) unique_stage_names.add(stage_name) if cnt != num_stages: raise ValueError( "incorrect number of stages names (%s), should be %s" % (cnt, num_stages)) if len(unique_stage_names) != cnt: raise ValueError("all stage names were not unique") else: for i in range(num_stages): m.Stages.add('Stage'+str(i+1)) node_to_name = {} node_to_scenario = {} scenario_bundle = {} def _setup(u, succ): if node_name_attribute is not None: if node_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing node name " "attribute: '%s'" % (u, node_name_attribute)) node_name = tree.node[u][node_name_attribute] else: node_name = u node_to_name[u] = node_name m.Nodes.add(node_name) if u in succ: for v in succ[u]: _setup(v, succ) else: # a leaf node if scenario_name_attribute is not None: if scenario_name_attribute not in tree.node[u]: raise KeyError( "node '%s' missing scenario name " "attribute: '%s'" % (u, scenario_name_attribute)) scenario_name = tree.node[u][scenario_name_attribute] else: scenario_name = u node_to_scenario[u] = scenario_name m.Scenarios.add(scenario_name) scenario_bundle[scenario_name] = \ tree.node[u].get('bundle', None) _setup(root, networkx.dfs_successors(tree, root)) m = m.create_instance() def _add_node(u, stage, succ, pred): node_name = node_to_name[u] m.NodeStage[node_name] = m.Stages[stage] if u == root: m.ConditionalProbability[node_name] = 1.0 else: assert u in pred # prior to networkx ~2.0, we used a .edge attribute on DiGraph, # which no longer exists. if hasattr(tree, 'edge'): edge = tree.edge[pred[u]][u] else: edge = tree.edges[pred[u],u] probability = None if edge_probability_attribute is not None: if edge_probability_attribute not in edge: raise KeyError( "edge '(%s, %s)' missing probability attribute: '%s'" % (pred[u], u, edge_probability_attribute)) probability = edge[edge_probability_attribute] else: probability = 1.0/len(succ[pred[u]]) m.ConditionalProbability[node_name] = probability # get node variables if "variables" in tree.node[u]: node_variables = tree.node[u]["variables"] assert type(node_variables) in [tuple, list] for varstring in node_variables: m.NodeVariables[node_name].add(varstring) if "derived_variables" in tree.node[u]: node_derived_variables = tree.node[u]["derived_variables"] assert type(node_derived_variables) in [tuple, list] for varstring in node_derived_variables: m.NodeDerivedVariables[node_name].add(varstring) if "cost" in tree.node[u]: assert isinstance(tree.node[u]["cost"], six.string_types) m.NodeCost[node_name].value = tree.node[u]["cost"] if u in succ: child_names = [] for v in succ[u]: child_names.append( _add_node(v, stage+1, succ, pred)) total_probability = 0.0 for child_name in child_names: m.Children[node_name].add(child_name) total_probability += \ pyomo.core.value(m.ConditionalProbability[child_name]) if abs(total_probability - 1.0) > 1e-5: raise ValueError( "edge probabilities leaving node '%s' " "do not sum to 1 (total=%r)" % (u, total_probability)) else: # a leaf node scenario_name = node_to_scenario[u] m.ScenarioLeafNode[scenario_name] = node_name m.Children[node_name].clear() return node_name _add_node(root, 1, networkx.dfs_successors(tree, root), networkx.dfs_predecessors(tree, root)) if any(_b is not None for _b in scenario_bundle.values()): if any(_b is None for _b in scenario_bundle.values()): raise ValueError("Incomplete bundle specification. " "All scenarios require a bundle " "identifier.") m.Bundling.value = True bundle_scenarios = {} for bundle_name in sorted(set(scenario_bundle.values())): m.Bundles.add(bundle_name) bundle_scenarios[bundle_name] = [] for scenario_name in m.Scenarios: bundle_scenarios[scenario_bundle[scenario_name]].\ append(scenario_name) for bundle_name in m.Bundles: for scenario_name in sorted(bundle_scenarios[bundle_name]): m.BundleScenarios[bundle_name].add(scenario_name) return m
def test_predecessor(self): assert_equal(nx.dfs_predecessors(self.G,source=0), {1: 0, 2: 1, 3: 4, 4: 2}) assert_equal(nx.dfs_predecessors(self.D), {1: 0, 3: 2})
def solveSteinerTreeDTH(self): print("start node: ", self.start_loc) print("home nodes: ", self.homes) # print("Traversal actual with homes: ", traversal_ordering) leaf_homes = self.getLeafNodes() preorder_nodes = dfs_preorder_nodes(self.steiner_tree, source=self.start_loc) traversal_ordering = [n for n in preorder_nodes if n in leaf_homes] print("Traversal ordering of the leaf: ", traversal_ordering) # """Remove non-leaf nodes from the order.""" # for home in traversal_ordering: # if home not in leaf_homes: # print(home) # traversal_ordering.remove(home) # current_loc = self.start_loc # """ Needs to start and end at root""" # print("Traversal_ordering: ", traversal_ordering) # # traversal_ordering.insert(0, current_loc) # # traversal_ordering.append(current_loc) """Hash map of the dropoffs""" dropoffs = dict() for i in range(len(traversal_ordering) - 1): current_leaf_home = traversal_ordering[i] next_leaf_home = traversal_ordering[i + 1] """Shortest path between current and next leaf home on the graph""" shortest_path = netx.shortest_path(self.netx_graph, source=current_leaf_home, target=next_leaf_home) print("Shortest path between ", current_leaf_home, " and ", next_leaf_home, shortest_path) for node in shortest_path: """Check if any node in the shortest node is a part of the Steiner Tree """ if node != current_leaf_home and node != next_leaf_home and node in self.steiner_tree: #print("Node : ", node) """Shares a common ancestor.""" """Drop off curr_node at curr->parent""" #print("Steiner tree: ", set(self.steiner_tree)) print( "Dfs ", networkx.dfs_predecessors(self.steiner_tree, source=self.start_loc)) dropoffs[current_leaf_home] = networkx.dfs_predecessors( self.steiner_tree, source=self.start_loc)[current_leaf_home] print("Dropoffs ", dropoffs) new_candidate_dropoff = set() for home in self.homes: if home in dropoffs.keys(): new_candidate_dropoff.add(dropoffs[home]) else: new_candidate_dropoff.add(home) print("homes ", self.homes) new_candidate_dropoff = list(new_candidate_dropoff) """Add source to the candidate dropoff list to create the steiner tree.""" new_candidate_dropoff.append(self.start_loc) #print("Candidate_dropoffs ", new_candidate_dropoff) steiner_tree_candidate_dropoffs = steiner_tree(self.netx_graph, new_candidate_dropoff, weight='weight') preorder_nodes = dfs_preorder_nodes(steiner_tree_candidate_dropoffs, source=self.start_loc) preorder_nodes = list(preorder_nodes) final_order = [ n for n in preorder_nodes if n in steiner_tree_candidate_dropoffs ] #print("final order pre", final_order) for elem in final_order: if elem in dropoffs.values(): keys = [k for k, v in dropoffs.items() if v == elem] if elem in self.homes or elem == self.start_loc: for i in keys: final_order.insert( final_order.index(elem) + 1, elem + " " + i) else: index = final_order.index(elem) for i in keys: final_order[index] = elem + " " + i index = index + 1 print("final order ", final_order) return self.get_cost_params(final_order)
def unaccelerated_get_node_to_pset(T, root, node_to_allowed_states=None, P_default=None): """ This is unused, but could possibly be used for unit testing. """ # Bookkeeping. successors = nx.dfs_successors(T, root) predecessors = nx.dfs_predecessors(T, root) # Compute the map from node to set. node_to_pset = {} for nb in nx.dfs_postorder_nodes(T, root): # If a parent node is available, get a set of states # involved in the transition matrix associated with the parent edge. # A more complicated implementation would use only the sink # states of that transition matrix. na_set = None if nb in predecessors: na = predecessors[nb] P = T[na][nb].get('P', P_default) na_set = set(P) # Use the set of allowed states for the current node, # if it is known. nb_set = None if nb in node_to_allowed_states: nb_set = set(node_to_allowed_states[nb]) # If a child node is available, get the set of states # that have transition to child states # for which the child subtree likelihoods are positive. nc_set = None if nb in successors: for nc in successors[nb]: allowed_set = set() P = T[nb][nc].get('P', P_default) for sb, sc in P.edges(): if sc in node_to_pset[nc]: allowed_set.add(sb) if nc_set is None: nc_set = allowed_set else: nc_set.intersection_update(allowed_set) # Take the intersection of informative constraints due to # possible parent transitions, # possible direct constraints on the node state, # and possible child node state constraints. pset = None for constraint_set in (na_set, nb_set, nc_set): if constraint_set is not None: if pset is None: pset = constraint_set else: pset.intersection_update(constraint_set) # If the pset is still None, # then as a last attempt to get a node set, # try using the states in P_default if it is available. if pset is None: if P_default is not None: pset = set(P_default) # This value should not be None unless there has been some problem. if pset is None: raise ValueError('internal error') # Define the pset for the node. node_to_pset[nb] = pset # Return the node_to_pset map. return node_to_pset
def add_trajectories(T, root, trajectories, edge_to_event_times=None): """ Construct a tree with merged trajectories. Parameters ---------- T_base : undirected weighted networkx graph A weighted base tree. root : integer Root node common to all trajectories. trajectories : sequence of undirected weighted networkx graphs Edges should be annotated with 'weight' and with 'state'. The state should change only at nodes of degree two. edge_to_event_times : dict, optional If available, this provides a map from an edge of T to a collection of times. The format of each edge key is as an ordered pair of nodes of T, in bfs order radiating from the root. The times are with respect to the node that is closer to the root. The purpose of this arg is to optionally break the trajectory history into pieces for resampling the trajectory of a variable that depends on the trajectories of the variables provided to this function. Returns ------- T_merged : undirected weighted networkx graph A new tree with more nodes. Edges are annotated with 'states' which gives a state for each trajectory. dummy_nodes : set of integers A set of dummy nodes added to the tree. These correspond to edge_to_event_times provided as input to this function. """ # Bookkeeping. predecessors = nx.dfs_predecessors(T, root) successors = nx.dfs_successors(T, root) T_bfs_edges = list(tuple(x) for x in nx.bfs_edges(T, root)) # Check that the trajectories have correct shape. for traj in trajectories: traj_specific_nodes = set(traj) - set(T) traj_skeleton = remove_selected_degree_two_nodes( traj, root, traj_specific_nodes) base_tree_edges = set(T_bfs_edges) traj_tree_edges = set(nx.bfs_edges(traj_skeleton, root)) extra_base_tree_edges = base_tree_edges - traj_tree_edges extra_traj_tree_edges = traj_tree_edges - base_tree_edges if extra_base_tree_edges or extra_traj_tree_edges: msg = ('expected the trajectory ' 'to follow the basic shape of the base tree; ') if extra_base_tree_edges: msg += 'extra base tree edges: %s ' % extra_base_tree_edges if extra_traj_tree_edges: msg += 'extra traj tree edges: %s ' % extra_traj_tree_edges raise ValueError(msg) # Check that the trajectories have the correct total edge weight. total_base_edge_weight = T.size(weight='weight') for traj in trajectories: traj_weight = traj.size(weight='weight') if not np.allclose(traj_weight, total_base_edge_weight): raise ValueError('each trajectory should have ' 'the same total weight as the base tree\n' 'base tree weight: %s\n' 'traj tree weight: %s' % ( total_base_edge_weight, traj_weight)) # For each trajectory get the map from base node to state. traj_node_to_state = [] for traj in trajectories: query_nodes = set(T) node_to_state = get_node_to_state(traj, query_nodes) traj_node_to_state.append(node_to_state) # For each directed edge of the base tree, # maintain a priority queue of interleaved transitions along trajectories. base_edge_to_q = {} for na, nb in T_bfs_edges: base_edge = (na, nb) base_edge_to_q[base_edge] = [] # Put dummy events into the edge-specific priority queues. if edge_to_event_times is not None: for edge, times in edge_to_event_times.items(): q = base_edge_to_q[edge] for tm in times: q_item = (tm, None, None) heapq.heappush(q, q_item) # For each trajectory, put events in the priority queue of each edge. for traj_index, traj in enumerate(trajectories): # Get the timing and nature of events along the edges. event_map = get_event_map(T, root, traj, predecessors=predecessors) # Add the event information to the priority queue # of the appropriate edge. for base_edge, events in event_map.items(): base_na, base_nb = base_edge for tm, traj_edge_object in events: traj_state = traj_edge_object['state'] q_item = (tm, traj_index, traj_state) heapq.heappush(base_edge_to_q[base_edge], q_item) # Initialize the return values. T_merged = nx.Graph() dummy_nodes = set() # For each edge of the original tree, # add segments to the merged tree, such that no trajectory # transition occurs within any segment. # Annotate every segment with the state of every trajectory. next_new_node = max(T) + 1 for base_edge in T_bfs_edges: # Unpack the edge endpoints. base_na, base_nb = base_edge # Get the edge weight from the base tree. base_edge_weight = T[base_na][base_nb]['weight'] # Initialize the most recent segment node. prev_node = base_na # Define the trajectory states at the beginning of the edge. current_states = [] for node_to_state in traj_node_to_state: current_states.append(node_to_state[base_na]) # Iterate through the priority queue, adding an edge # when a transition is encountered on any trajectory. q = base_edge_to_q[base_edge] tm = 0 while q: tm_event, traj_index, traj_state = heapq.heappop(q) T_merged.add_edge( prev_node, next_new_node, weight=tm_event-tm, states=list(current_states)) # If the traj_index is None then it means that # the event is a dummy event. # Dummy events do not change the state. if traj_index is None: dummy_nodes.add(next_new_node) else: current_states[traj_index] = traj_state # Update for the next iteration. prev_node = next_new_node next_new_node += 1 tm = tm_event # Check that we have reached the states # that we had expected to reach. for traj_index, node_to_state in enumerate(traj_node_to_state): if node_to_state[base_nb] != current_states[traj_index]: raise Exception('internal error') # Add the final segment. T_merged.add_edge( prev_node, base_nb, weight=base_edge_weight-tm, states=list(current_states)) # Return the merged tree and the set of dummy nodes. return T_merged, dummy_nodes
def minibatch_(all_train_imgs,all_test_imgs,clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode,mous): if mode == 'loss_test' or mode == 'loss_scikit_test' or mode == 'levels_test': X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c, subsamples) else: X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c, subsamples) if X_p != []: boxes = [] ground_truth = inv[0][2] img_nr = inv[0][0] if less_features: X_p = [fts[0:features_used] for fts in X_p] if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'): f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r') else: print 'warning' for line, y in zip(f, inv): tmp = line.split(',') coord = [] for s in tmp: coord.append(float(s)) boxes.append([coord,y[2]]) assert(len(boxes)<1500) boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,1500) gr = [] if os.path.isfile('/home/stahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d"))): gr = open('/home/stahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d")), 'r') ground_truths = [] if gr != []: #if no class image -> no ground truth. (I think this is only needed for learn _ntersection) for line in gr: tmp = line.split(',') ground_truth = [] for s in tmp: ground_truth.append(int(s)) ground_truths.append(ground_truth) if mode == 'mean_variance': scaler.partial_fit(X_p) # Don't cheat - fit only on training data return scaler # create_tree G, levels = create_tree(boxes) #prune tree to only have levels which fully cover the image # tested if prune_fully_covered: nr_levels_covered = 100 total_size = surface_area(boxes, levels[0]) for level in levels: sa = surface_area(boxes, levels[level]) sa_co = sa/total_size if sa_co != 1.0: G.remove_nodes_from(levels[level]) else: nr_levels_covered = level levels = {k: levels[k] for k in range(0,nr_levels_covered + 1)} #either subsampling or prune_fully_covered #assert(subsampling != prune_fully_covered) # prune levels, speedup + performance levels = {k:v for k,v in levels.iteritems() if k<prune_tree_levels} #prune G in order to remove nodes of the lower levels remaining_nodes = [] for lev in levels.values(): remaining_nodes.extend(lev) for g_node in G.nodes(): if g_node not in remaining_nodes: G.remove_node(g_node) coords = [] features = [] f_c = [] f = [] if learn_intersections and not prune_fully_covered: if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)): f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r+') else: if mode == 'extract_train' or mode == 'extract_test': print 'coords for %s with %s samples have to be extracted'%(img_nr,subsamples) f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'w') for level in levels: levl_boxes = extract_coords(levels[level], boxes) if levl_boxes != []: for lvl_box in levl_boxes: if lvl_box not in coords: coords.append(lvl_box) f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3])) f_c.write('\n') f_c.close() print 'features for %s with %s samples have to be extracted'%(img_nr,subsamples) os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc') os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib') #print "EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples) os.system("EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),subsamples,class_,format(img_nr, "06d"),subsamples)) if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)): f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') else: f_c = [] coords = [] if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)): f = open('/home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') elif prune_fully_covered: if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))): f_c = open('/home/stahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r+') else: if mode == 'extract_train' or mode == 'extract_test': print 'coords for %s with fully_cover_tree samples have to be extracted'%(img_nr) f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d")), 'w') for level in levels: levl_boxes = extract_coords(levels[level], boxes) if levl_boxes != []: for lvl_box in levl_boxes: if lvl_box not in coords: coords.append(lvl_box) f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3])) f_c.write('\n') f_c.close() print 'features for %s with fully_cover_tree samples have to be extracted'%(img_nr) os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc') os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib') #print "EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples) os.system("EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/%s_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),class_,format(img_nr, "06d"))) if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d"))): f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d")), 'r') else: f_c = [] coords = [] if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))): f = open('/home/stahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') # else: #we don't need to load intersections # if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/%s%s.txt'%(class_,format(img_nr, "06d"))): # f = open('/home/stahl/Features_prop_windows/Features_upper/%s%s.txt'%(class_,format(img_nr, "06d")), 'r') # if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s%s.txt'%(class_,format(img_nr, "06d"))): # f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s%s.txt'%(class_,format(img_nr, "06d")), 'r+') # else: # print '/home/stahl/Features_prop_windows/upper_levels/%s%s.txt does not exist'%(class_,format(img_nr, "06d")) for i,line in enumerate(f_c): str_ = line.rstrip('\n').split(',') cc = [] for s in str_: cc.append(float(s)) coords.append(cc) for i,line in enumerate(f): str_ = line.rstrip('\n').split(',') ff = [] for s in str_: ff.append(float(s)) features.append(ff) if less_features: features = [fts[0:features_used] for fts in features] if normalize and features != []: features = scaler.transform(features) print len(y_p), len(X_p) print len(features), len(coords) assert len(coords) == len(features) # append x,y of intersections #if learn_intersections: # for inters,coord in zip(features,coords): # if inters not in pruned_x: # X_p.append(inters) # ol = 0.0 # ol = get_intersection_count(coord, ground_truths) # y_p.append(ol) print len(y_p), len(X_p) #normalize norm_x = [] if normalize: # for p_x in pruned_x: # norm_x.append((p_x-mean)/variance) norm_x = scaler.transform(X_p) else: norm_x = X_p data = (G, levels, y_p, norm_x, boxes, ground_truths, alphas) sucs = nx.dfs_successors(G) predecs = nx.dfs_predecessors(G) #preprocess: node - children children = {} last = -1 for node,children_ in zip(sucs.keys(),sucs.values()): if node != last+1: for i in range(last+1,node): children[i] = [] children[node] = children_ elif node == last +1: children[node] = children_ last = node if mode == 'train': if alphas[2] == 0 and alphas[3] == 0: #just learn proposals and intersections # only use proposals and intersections used in pruned tree used_ind = get_used_proposals(G, boxes, coords, levels) used_x = [] used_y = [] for ind in used_ind['prop']: used_x.append(norm_x[ind]) used_y.append(y_p[ind]) for ind in used_ind['inters']: used_x.append(features[ind]) used_y.append(get_intersection_count(coords[ind], ground_truths)) print len(used_x),len(used_y) for x_i,y_i in zip(used_x,used_y): w = learn_root(w,x_i,y_i,learning_rate,alphas) else: nodes = list(G.nodes()) for node in nodes: if node == 0: if alphas[0] != 0: w = learn_root(w,norm_x[0],y_p[0],learning_rate,alphas) else: print 'learn nothing' else: w = constrained_regression(class_,features,coords,scaler,w,norm_x,y_p,node,predecs,children,boxes,learning_rate,alphas,img_nr, squared_hinge_loss) return w, len(y_p), len(G.nodes()) elif mode == 'scikit_train': print norm_x,y_p clf.partial_fit(norm_x,y_p) return clf elif mode == 'loss_train' or mode == 'loss_test': loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1)) # mse.append(((data[2] - np.dot(w,np.array(data[3]).T)) ** 2).sum()) # a2 = alphas[2] # data = (G, levels, y_p, norm_x, boxes, ground_truths, [0,0,a2,0]) # hinge1.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1)) # a3 = alphas[3] # data = (G, levels, y_p, norm_x, boxes, ground_truths, [0,0,0,a3]) # hinge2.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1)) full_image.append([y_p[0],max(0,np.dot(w,np.array(norm_x[0]).T))]) return loss__, full_image elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train': loss__.append(((clf.predict(norm_x) - y_p)**2).sum()) return loss__ elif mode == 'levels_train' or mode == 'levels_test': #im = mpimg.imread('/home/stahl/Images/'+ (format(img_nr, "06d")) +'.jpg') preds = [] for i,x_ in enumerate(norm_x): preds.append(np.dot(w, x_)) cpls = [] truelvls = [] used_boxes_ = [] total_size = surface_area(boxes, levels[0]) fully_covered_score = 0.0 fully_covered_score_lvls = 0.0 covered_levels = [] for level in levels: #tru and truelvls was in order to check if count_per_level method is correct cpl,used_boxes,_ = count_per_level(class_,features,coords,scaler,w, preds, img_nr, boxes,levels[level], '',[]) cpl = max(0,cpl) if used_boxes != []: used_boxes_.append(used_boxes[0][1]) tru,_,_ = count_per_level(class_,features,coords,scaler,w, preds, img_nr, boxes,levels[level], 'gt',[]) cpls.append(cpl) sa = surface_area(boxes, levels[level]) sa_co = sa/total_size if sa_co == 1.0: fully_covered_score += cpl fully_covered_score_lvls += 1 covered_levels.append(cpl) truelvls.append(tru) return cpls, truelvls, used_boxes_,boxes,preds,fully_covered_score/fully_covered_score_lvls,covered_levels
def preorder_st_traversal(H, T): """ Conducts a pre-ordered traversal of tree **T** with an **s-t** orientation. **s** is the source node and **t** is the target node for the PDP. Traversal of an **s-t** oriented tree involves traversing the tree in pre-order, but leaving the branch with **t** to be visited last at each step possible. Parameters ---------- H: graph The :ref:`request graph<Request (PDP) Graph>`. T: graph A spanning tree of **H**. Returns ------- P: graph :ref:`PDP tour<Tour Graph>` solution. """ s = T.graph['s'] t = T.graph['t'] requests = T.graph['requests'] P = nx.Graph() P.graph.update(T.graph) # Find successors and predecessors of nodes in T for traversing. V = set(T.nodes()) T_successors = nx.dfs_successors(T, s) successors = set(T_successors.keys()) # Update for nodes with empty successors. [T_successors.update({j: []}) for j in V.difference(successors)] T_predecessors = nx.dfs_predecessors(T, s) # Branch with t must be visited last. Store the nodes on this branch # for identification of traversal priority. last_branch = {t} b = T_predecessors[t] while not b == s: last_branch.add(b) b = T_predecessors[b] # boundary_nodes will keep track of the next nodes to be visited. boundary_nodes = [] # If a node is visited and added to P, its request number is added. visited_requests = {requests[s][0]} # Initialize with source, s. P.add_nodes_from([(s, T.nodes[s])]) u = s # While P does not contain all nodes, traverse T and add every # feasible node in order. while P.number_of_nodes() < T.number_of_nodes(): v = s neighbors = T_successors[v].copy() # For each node w branching from v, determine if w is to be # visited last, if so, relocate to front of boundary_nodes # so that it is popped (visited) last. for w in neighbors: if w in last_branch: boundary_nodes = [w] + boundary_nodes else: boundary_nodes = boundary_nodes + [w] # Consider nodes while there are nodes in boundary_nodes. while len(boundary_nodes) > 0: v = boundary_nodes.pop() # If v has not been added to P, and: # a) v is an origin type, # b) v is a destination type with corresponding origin in P, or # c) v is t and all other nodes are in P # Then add v to P and make an edge from last added u node. if v not in P and ( requests[v][1] == 'o' or (requests[v][1] == 'd' and requests[v][0] in visited_requests) or (v == t and P.number_of_nodes() == T.number_of_nodes() - 1)): visited_requests.add(requests[v][0]) P.add_nodes_from([(v, T.nodes[v])]) P.add_edges_from([(u, v, H.edges[u, v])]) u = v neighbors = T_successors[v].copy() # Update the boundary nodes. for w in neighbors: if w in last_branch: boundary_nodes = [w] + boundary_nodes else: boundary_nodes = boundary_nodes + [w] # Add in final edge from t to s to complete cycle. P.add_edges_from([(t, s, H.edges[t, s])]) P.graph['dist'] = P.size(weight='weight') return P
def get_node_to_pset(T, root, node_to_state=None, P_default=None): """ For each node, get the set of states that give positive subtree likelihood. This function is analogous to get_node_to_pmap. Parameters ---------- T : undirected unweighted acyclic networkx graph A tree whose edges are optionally annotated with edge-specific state transition probability matrix P. root : integer The root node. node_to_state : dict, optional A sparse map from a node to its known state if any. Nodes in this map are assumed to have completely known state. Nodes not in this map are assumed to have completely missing state. If this map is not provided, all states information will be assumed to be completely missing. Entries of this dict that correspond to nodes not in the tree will be silently ignored. P_default : networkx directed weighted graph, optional Sparse transition matrix to be used for edges which are not annotated with an edge-specific transition matrix. Returns ------- node_to_pset : dict A map from a node to the set of states with positive subtree likelihood. """ if len(set(T)) == 1: if root not in T: raise ValueError('unrecognized root') if (node_to_state is not None) and (root in node_to_state): root_state = node_to_state[root] root_pset = {root_state} else: all_states = set(P_default) root_pset = all_states return {root : root_pset} # Bookkeeping. successors = nx.dfs_successors(T, root) predecessors = nx.dfs_predecessors(T, root) # Compute the map from node to set. node_to_pset = {} for nb in nx.dfs_postorder_nodes(T, root): # If a parent node is available, get a set of states # involved in the transition matrix associated with the parent edge. # A more complicated implementation would use only the sink # states of that transition matrix. na_set = None if nb in predecessors: na = predecessors[nb] P = T[na][nb].get('P', P_default) na_set = set(P) # If the state of the current state is known, # define the set containing only that state. nb_set = None if nb in node_to_state: nb_set = {node_to_state[nb]} # If a child node is available, get the set of states # that have transition to child states # for which the child subtree likelihoods are positive. nc_set = None if nb in successors: for nc in successors[nb]: allowed_set = set() P = T[nb][nc].get('P', P_default) for sb, sc in P.edges(): if sc in node_to_pset[nc]: allowed_set.add(sb) if nc_set is None: nc_set = allowed_set else: nc_set.intersection_update(allowed_set) # Take the intersection of informative constraints due to # possible parent transitions, # possible direct constraints on the node state, # and possible child node state constraints. pset = None for constraint_set in (na_set, nb_set, nc_set): if constraint_set is not None: if pset is None: pset = constraint_set else: pset.intersection_update(constraint_set) # This value should not be None unless there has been some problem. if pset is None: raise ValueError('internal error') # Define the pset for the node. node_to_pset[nb] = pset # Return the node_to_pset map. return node_to_pset
# --- 🖥 Data analysis packages 🖥 --- # import matplotlib.pyplot as plt import numpy as np import pandas as pd import collections from collections import deque import time # --- 📃 Web scraping packages 📃 --- # import requests from bs4 import BeautifulSoup import re # --- 📅 Search algorithms and network analysis package 📅 --- # import networkx as nx # Create a new graph for the next problems G = nx.Graph() G.add_edges_from([("A","B"),("A","S"),("B","A"),("C","D"),("C","E"),("C","F"),("C","S"),("D","C"),("E","C"),("E","H"),("F","C"),("F","G"),("H","E"),("H","G"),("S","A"),("S","C"),("S","G")]) nx.draw_networkx(G, with_labels=True) plt.title('A more complex graph') #plt.show(); print(nx.dfs_predecessors(G, source='C')) print("The average shortest path is:", round(nx.average_shortest_path_length(G))) print("Shortest path scenarios:", [p for p in nx.all_shortest_paths(G, source='A', target='H')]) print("The shortest path length is", nx.shortest_path_length(G, source='A', target='H'))
def find_outer_bags(graph, root): rev_graph = graph.reverse() predecessors = nx.dfs_predecessors(rev_graph, root) return len(predecessors)
def get_event_map(T, root, traj, predecessors=None): """ Parameters ---------- T : undirected weighted networkx graph The base tree. root : integer The root node. This is used as the root for both the base tree and for the trajectory tree. traj : undirected weighted networkx graph Like the base tree, but with some extra degree-2 nodes. The naming of this argument is inspired by the term trajectory, although it is not required to be a trajectory in any technical sense. In particular its edges do not need to be annotated with states. On the other hand its edges need to be annotated with weights. predecessors : dict, optional Map from a node in the base tree T to its predecessor in the base tree. The directionality of the predecessor vs. successor is determined by the root. Returns ------- event_map : dict Map from an edge of the base tree to a list of (time, augmented tree edge object) pairs. The edge of the base tree is represented by an ordered pair, with the ordering being in the direction away from the root. The edge object of the augmented tree corresponds to the edge following the transition event. """ # Initialize the event map. event_map = defaultdict(list) # Bookkeeping. if predecessors is None: predecessors = nx.dfs_predecessors(T, root) traj_successors = nx.dfs_successors(traj, root) # Associate each trajectory edge to a base tree edge. traj_edge_to_base_edge = {} traj_preorder_edges = list(tuple(x) for x in nx.bfs_edges(traj, root)) for traj_edge in reversed(traj_preorder_edges): traj_na, traj_nb = traj_edge if traj_nb in T: base_nb = traj_nb base_na = predecessors[base_nb] base_edge = (base_na, base_nb) else: traj_nb_successors = traj_successors[traj_nb] if len(traj_nb_successors) != 1: raise Exception('internal error') traj_nb_successor = traj_nb_successors[0] traj_child_edge = (traj_nb, traj_nb_successor) base_edge = traj_edge_to_base_edge[traj_child_edge] traj_edge_to_base_edge[traj_edge] = base_edge # Each traj node that is not in T is a traj transition event. # Put each transition event into the priority queue # of the corresponding edge of the base tree. base_edge_to_tm = {} for traj_edge in traj_preorder_edges: # Unpack the endpoints of the trajectory edge. traj_na, traj_nb = traj_edge # If there is no event on this edge then continue. if (traj_na in T) and (traj_nb in T): continue # Map the trajectory event back to an edge of the base tree. base_edge = traj_edge_to_base_edge[traj_edge] # Get the timing of the current event along the edge. tm = base_edge_to_tm.get(base_edge, 0) # Define the networkx edge # corresponding to the segment of the trajectory. traj_edge_object = traj[traj_na][traj_nb] # If traj_na is a transition event, # then add its information to the event map. if traj_na not in T: event_info = (tm, traj_edge_object) event_map[base_edge].append(event_info) # Update the timing along the edge. traj_weight = traj_edge_object['weight'] base_edge_to_tm[base_edge] = tm + traj_weight # Return the event map. return event_map