Exemplo n.º 1
0
def load_train_test_graphs(dataset, recache_input):
    raw_mat_path = 'data/{}.npz'.format(dataset)
    train_graph_path = 'data/{}/train_graph.pkl'.format(dataset)
    test_graph_path = 'data/{}/test_graph.pkl'.format(dataset)

    if recache_input:
        print('loading sparse matrix from {}'.format(raw_mat_path))
        m = load_sparse_csr(raw_mat_path)

        print('splitting train and test...')
        train_m, test_m = split_train_test(
            m,
            weights=[0.9, 0.1])

        print('converting to nx.DiGraph')
        train_g = nx.from_scipy_sparse_matrix(train_m, create_using=nx.DiGraph(), edge_attribute='sign')
        test_g = nx.from_scipy_sparse_matrix(test_m, create_using=nx.DiGraph(), edge_attribute='sign')
                
        print('saving train and test graphs...')
        nx.write_gpickle(train_g, train_graph_path)
        nx.write_gpickle(test_g, test_graph_path)
    else:
        print('loading train and test graphs...')
        train_g = nx.read_gpickle(train_graph_path)
        test_g = nx.read_gpickle(test_graph_path)
    return train_g, test_g
Exemplo n.º 2
0
def main():
    rev_forward_g = nx.read_gpickle("rev_forward.gpickle")
    rev_backward_g = nx.read_gpickle("rev_backward.gpickle")
    with open("ids.pickle", "rb") as f:
        ids = pickle.load(f)

    cutoff = 12
    dists = []
    for start, forward_rounds, backward_rounds, end in ids:
        s = count(start)
        if not 4 <= s <= 6:
            continue

        # backward extension
        backward_extension_rounds = 3
        rounds = forward_rounds + backward_rounds + backward_extension_rounds

        for p, w in propagate(
            rev_backward_g,
            end,
            backward_extension_rounds - 1,
            cutoff
        ):
            p = add_last_round(p)
            dists.append((start, p, w, rounds))

            print("{} ... X ... {} with probability {}, {} rounds".format(
                start,
                " <- ".join(str(v) for v in p),
                math.exp(-w),
                rounds
            ))

    with open("dists.pickle", "wb") as f:
        pickle.dump(dists, f)
Exemplo n.º 3
0
def ntwks_to_matrices(in_files, edge_key):
    first = nx.read_gpickle(in_files[0])
    files = len(in_files)
    nodes = len(first.nodes())
    matrix = np.zeros((nodes, nodes, files))
    for idx, name in enumerate(in_files):
        graph = nx.read_gpickle(name)
        for u, v, d in graph.edges(data=True):
            graph[u][v]['weight'] = d[edge_key]  # Setting the edge requested edge value as weight value
        matrix[:, :, idx] = nx.to_numpy_matrix(graph)  # Retrieve the matrix
    return matrix
def _build_and_store_new_graph(data_file, name=""):
    """
    Reads the nodes and edges files stored in the 1.1 version and build a new Graph compatible with 2.0
    :param data_file: path to temporary directory
    :param name: name of the network
    :return: new Graph compatible with version 2.0
    """
    data_file += name
    edges = networkx.read_gpickle(data_file + "_edges" + ".gpickle")
    nodes = networkx.read_gpickle(data_file + "_nodes" + ".gpickle")
    net = networkx.Graph()
    net.add_nodes_from(nodes)
    net.add_edges_from(edges)
    return net
Exemplo n.º 5
0
def diff_history(directory, length = 1):
    glob_dir = os.path.join(directory, "*.pickle.tar.gz")
    pickle_files = glob.glob(glob_dir)
    pickle_files = sorted(pickle_files)
    pairs = [(a, b) for (a, b) in zip(pickle_files, pickle_files[1:])]
    pairs = pairs[-1*length:]
    diffs = []
    for fileA, fileB in pairs:
        graphA = nx.read_gpickle(fileA)
        graphB = nx.read_gpickle(fileB)
        diff = compare(graphA, graphB)
        # remove render folder which is timestamps
        diffs.append(diff)

    return diffs
Exemplo n.º 6
0
    def __init__(self, steps=False):
        while not(rospy.has_param('/planner/robots')):
            continue

        number = rospy.get_param('~robot_nr', 0)
        centralized = rospy.get_param('/planner/centralized', False)
        super(RobotNode, self).__init__(number, centralized)
        self.mapname = rospy.get_param('/world/name', 'hospital')
        self.file_path = rospy.get_param('/world/path', '/home/argen/catkin_ws/src/narko/mrpp')

        if self.centralized:
            rospy.loginfo("Waiting for planner...")
            rospy.wait_for_service('/planner/roadmap_plan')
        else:
            rospy.wait_for_service("/goal_manager/goal")
            self.robots = rospy.get_param('/planner/robots', 2)
            self.topological_map = nx.read_gpickle(self.file_path+'/maps/graphs/test/topological/'+self.mapname+'_topological.gpickle')
            self.planner = ADPP(self.topological_map, self.robots, self.number)
            rospy.Subscriber("/planner/coordination/broadcast", Inform, self.handle_message, queue_size=100)
            self.pub = rospy.Publisher("/planner/coordination/broadcast", Inform, queue_size=1)

        self.width = rospy.get_param('/world/width', 3117)
        self.height = rospy.get_param('/world/height', 1189)
        self.resolution = rospy.get_param('/world/height',0.04626)

        rospy.loginfo("Initialized %s node.",rospy.get_name())
Exemplo n.º 7
0
def simpleDisplay(ipaddress = "localhost",port = "9999"):
    '''
    利用每次处理后保存的图来进行恢复展示
    :return:
    '''
    # client,repo,stargazers,user = getRespond()
    # g = addTOGraph(repo,stargazers,user)
    # addEdge(stargazers,client,g)
    # getPopular(g)
    # savaGraph1(g)
    # top10(g)
    g = nx.read_gpickle("data/github.1")
    print nx.info(g)
    print

    mtsw_users = [n for n in g if g.node[n]['type'] == 'user']
    h = g.subgraph(mtsw_users)

    print nx.info(h)
    print
    d = json_graph.node_link_data(h)
    json.dump(d, open('data/githubRec.json', 'w'))
    cmdstr = "python3 -m http.server %s" % port
    webbrowser.open_new_tab("http://%s:%s/%s.html"%(ipaddress,port, "display_githubRec"))
    os.system(cmdstr)
Exemplo n.º 8
0
def fastInitializeBipartite():
   G = nx.read_gpickle("graph.p")
   groundTruth = pickle.load(open("groundTruth.p", "rb"))
   sites = pickle.load(open("sites.p", "rb"))
   users = pickle.load(open("users.p", "rb"))
   print "Loaded graph data from pickle"
   return (G, groundTruth, sites, users)
	def __init__(self, handle):
		super(SourcePairCombiner, self).__init__()
		self.handle = handle
		self.dirhandle = 'reassortant_edges'
		self.G = nx.read_gpickle('{0} Full Complement Graph.pkl'.format(self.handle))
		self.current_sourcepair = None # current sourcepair graph
		self.current_noi = None # current node of interest
Exemplo n.º 10
0
def main():
    if not (len(sys.argv) == 3 and sys.argv[1] in ["forward", "backward"]):
        print("usage: ./find_ext.py [forward/backward] [differentials file]", file=sys.stderr)
        sys.exit(1)

    direction = sys.argv[1]
    if direction == "forward":
        g = nx.read_gpickle("rev_forward.gpickle")
    else:
        g = nx.read_gpickle("rev_backward.gpickle")

    with open(sys.argv[2]) as f:
        for start, _ in map(literal_eval, f):
            rounds = 1
            for p, w in propagate(g, start, rounds):
                print((start, p, w))
Exemplo n.º 11
0
 def __init__(self,filename) :
     #self.graph = nx.barabasi_albert_graph(100,5)
     self.graph = nx.Graph()
     self.graph = nx.read_gpickle(filename)
     self.nodes = self.graph.nodes()
     self.graph = self.mapper()
     self.edges = self.graph.edges()
Exemplo n.º 12
0
    def _read_cell_graph(self, filename, format):
        """Load the cell-to-cell connectivity graph from a
        file. 

        Returns None if any error happens.
        """
        cell_graph = None
        if filename:
            try:
                start = datetime.now()
                if format == "gml":
                    cell_graph = nx.read_gml(filename)
                elif format == "pickle":
                    cell_graph = nx.read_gpickle(filename)
                elif format == "edgelist":
                    cell_graph = nx.read_edgelist(filename)
                elif format == "yaml":
                    cell_graph = nx.read_yaml(filename)
                elif format == "graphml":
                    cell_graph = cell_graph = nx.read_graphml(filename)
                else:
                    print "Unrecognized format:", format
                end = datetime.now()
                delta = end - start
                config.BENCHMARK_LOGGER.info(
                    "Read cell_graph from file %s of format %s in %g s"
                    % (filename, format, delta.seconds + 1e-6 * delta.microseconds)
                )
            except Exception, e:
                print e
Exemplo n.º 13
0
def main():
  seed(0) #set seed
  #get graph info
  G = nx.read_gpickle("input/graphMTC_CentroidsLength6.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. There is also the choice of a proper multidigraph: nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle")
  G = nx.freeze(G) #prevents edges or nodes to be added or deleted
  #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000.
  demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv')
  #get earthquake info
  q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl','input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #('input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl',  #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None):
  print 'weights: ', q.weights
  q.num_sites = len(q.lnsas[0])
  #determine which scenarios you want to run
  good_indices = pick_scenarios(q.lnsas, q.weights)
  
  travel_index_times = []
  index = 0
  #loop over scenarios
  print 'size of lnsas: ', len(q.lnsas)
  for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site
    if index in good_indices:
      print 'index: ', index
      (bridges, flow, path, path2) = run_simple_iteration(G, scenario, demand, False)
      travel_index_times.append((index, bridges, flow, path, path2))
#      print 'new travel times: ', travel_index_times
      if index%1000 ==0:
        util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times)
    index += 1 #IMPORTANT
  util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times)
  print 'the number of scenarios I considered doing: ', index
  print 'the number of scenarios I actually did: ', len(travel_index_times)
Exemplo n.º 14
0
def extract_all((networks, net_type, setup, args)):
    """
    Open a pickled network and extract data from it.

    Parameters
    ----------
    networks: iterable
        Iterable of filenames
    net_type: str
        Specific string labelling this data
    setup: str
        String describing the parameters
    args: tuple
        Attributes to be collected (e.g., "robustness")
    """
    getter = attrgetter(*args)
    z_getter = attrgetter("zscores")
    res = list()
    for filename in networks:
        try:
            net = nx.read_gpickle(filename)
        except (IOError, EOFError):
            print "failed to load network file '%s'" % filename
            os.rename(filename, filename + ".failed")
            continue
#        update(net, filename)
        results = list(z_getter(net))
        results.extend(list(getter(net)))
        results.append(numpy.mean(net.shortest_paths))
        # stripping .pkl file extension
        results.append(os.path.basename(filename)[:-4])
        results.append(net_type)
        results.append(setup)
        res.append(results)
    return res
Exemplo n.º 15
0
    def _read_celltype_graph(self, celltypes_file, format="gml"):
        """
        Read celltype-celltype connectivity graph from file.

        celltypes_file -- the path of the file containing
        the graph.
        
        format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml.

        """
        start = datetime.now()
        celltype_graph = None
        try:
            if format == "gml":
                celltype_graph = nx.read_gml(celltypes_file)
            elif format == "edgelist":
                celltype_graph = nx.read_edgelist(celltypes_file)
            elif format == "graphml":
                celltype_graph = nx.read_graphml(celltypes_file)
            elif format == "pickle":
                celltype_graph = nx.read_gpickle(celltypes_file)
            elif format == "yaml":
                celltype_graph = nx.read_yaml(celltypes_file)
            else:
                print "Unrecognized format %s" % (format)
        except Exception, e:
            print e
Exemplo n.º 16
0
    def __init__(self, fname, interactive=True):
        self.fname = fname
        self.graph = nx.read_gpickle(fname)
        
        #apply_workaround(self.graph, thr=1e-3)
        #remove_intersecting_edges(self.graph)

        print "Number of connected components:", \
                nx.number_connected_components(self.graph)

        self.selected_path_verts = []
        
        if interactive:
            self.fig = plt.figure()
            self.path_patch = None
            
            G_p = nx.connected_component_subgraphs(self.graph)[0]
            #G_p = nx.connected_component_subgraphs(prune_graph(self.graph))[0]

            plot.draw_leaf(G_p, fixed_width=True)

            plt.ion()
            plt.show()

            self.edit_loop()    
Exemplo n.º 17
0
def load_data():
    start = time.time()
    try:
        print("Loading data from /data pickles and hfd5 adj matrices")
        f = h5py.File('data/cosponsorship_data.hdf5', 'r')
        for chamber in ['house', 'senate']:
            for congress in SUPPORTED_CONGRESSES:
                adj_matrix_lookup[(chamber, congress)] = np.asarray(f[chamber + str(congress)])

                igraph_graph = igraph.load("data/" + chamber + str(congress) + "_igraph.pickle", format="pickle")
                igraph_graph_lookup[(chamber, congress, False)] = igraph_graph

                nx_graph = nx.read_gpickle("data/" + chamber + str(congress) + "_nx.pickle")
                nx_graph_lookup[(chamber, congress, False)] = nx_graph
    except IOError as e:
        print("Loading data from cosponsorship files")
        f = h5py.File("data/cosponsorship_data.hdf5", "w")
        for chamber in ['house', 'senate']:
            for congress in SUPPORTED_CONGRESSES:
                print("Starting %s %s" % (str(congress), chamber))
                adj_matrix = load_adjacency_matrices(congress, chamber)
                data = f.create_dataset(chamber + str(congress), adj_matrix.shape, dtype='f')
                data[0: len(data)] = adj_matrix

                # igraph
                get_cosponsorship_graph(congress, chamber, False).save("data/" + chamber + str(congress) + "_igraph.pickle", "pickle")
                # networkx
                nx.write_gpickle(get_cosponsorship_graph_nx(congress, chamber, False), "data/" + chamber + str(congress) + "_nx.pickle")

                print("Done with %s %s" % (str(congress), chamber))
    print("Data loaded in %d seconds" % (time.time() - start))
Exemplo n.º 18
0
    def check(self, test_name, tree_gen_func, **more_args):
        kws = self.some_kws_of_run.copy()
        
        kws.update(directed_params)
        
        if more_args:
            kws.update(more_args)

        paths = run(
            tree_gen_func,
            calculate_graph=False,
            print_summary=False,
            # result_pkl_path_prefix=result_pickle_prefix,
            **kws)
        trees = pkl.load(open(paths['result']))

        trees = filter(lambda t: t.number_of_edges() > 0,
                       trees)  # remove empty trees

        assert_true(len(trees) > 0)

        for t in trees:
            assert_true(len(t.edges()) > 0)

        return trees, nx.read_gpickle(paths['meta_graph'])
Exemplo n.º 19
0
def graph_preprocessing_with_counts(G_input=None, save_file=None):

    if not G_input:
        graph_file = os.path.join(work_dir, "adj_graph.p")
        G = nx.read_gpickle(graph_file)
    else:
        G = G_input.copy()

    print "Raw graph size:", G.size()
    print "Raw graph nodes", G.number_of_nodes()

    profile2prob = {l.split()[0]: float(l.split()[1]) for l in open(os.path.join(work_dir, 'profile_weight.txt'))}

    for edge in G.edges(data=True):
        nodes = edge[:2]
        _weight = edge[2]['weight']
        _count = edge[2]['count']
        
        if _count < 3:
            G.remove_edge(*nodes)

    print "Pre-processed graph size", G.size()
    print "Pre-processed graph nodes", G.number_of_nodes()

    G.remove_nodes_from(nx.isolates(G))

    print "Pre-processed graph size", G.size()
    print "Pre-processed graph nodes", G.number_of_nodes()
    
    if save_file:
        print "Saving to", save_file
        nx.write_gpickle(G,save_file)

    return G
Exemplo n.º 20
0
def main():
    
    options,args = parseCommandLine()
    
    pickleFileName = args[0];
    outputFileName = None
    if (len(args) == 2):
        outputFileName = args[1]
                

    print ("Loading %s...\n") % (pickleFileName)
    g = nx.read_gpickle(pickleFileName);
    
    # Convert graph to matrix form
    for u,v,d in g.edges_iter(data=True):
        g.edge[u][v]['weight'] = g.edge[u][v]['number_of_fibers']
    cmat = nx.to_numpy_matrix(g)
    
    mean = np.mean(cmat)
    std = np.std(cmat) 
    print('Total number of connections: %d') % (np.sum(cmat))
    print('Connection Matrix Mean: %f Std: %f' ) % (mean, std)

    
    # Compute binarized stats
    binarized_cmat= np.zeros(cmat.shape)
    binarized_cmat[cmat>0] = 1
    print('Binarized connection matrix Mean: %f Std: %f' ) % (np.mean(binarized_cmat),
                                                                np.std(binarized_cmat))

    
    if outputFileName != None :
        f = open(outputFileName, 'at')
        f.write( ('%f,%f\n') % (mean, std) )
        f.close() 
Exemplo n.º 21
0
 def __init__(self, synonymFile, graphFile, color):
     self.color = color
     f = open(synonymFile)
     self.synonymDict = cPickle.load(f)
     f.close()        
     self.G = nx.read_gpickle(graphFile)
     self.synonyms = self.synonymDict.keys()
Exemplo n.º 22
0
def summarize_precoth(dwi_network_file, fdg_stats_file, subject_id):
    import os.path as op
    import scipy.io as sio
    import networkx as nx

    fdg = sio.loadmat(fdg_stats_file)
    dwi_ntwk = nx.read_gpickle(dwi_network_file)

    # Thal L-1 R-2
    # Cortex 3 and 4
    # Prec L-5 R-6
    titles = ["subjid"]
    fdg_avg = ["LTh_CMR_avg","RTh_CMR_avg","LCo_CMR_avg","RCo_CMR_avg","LPre_CMR_avg","RPre_CMR_avg"]
    f_avg = [fdg["func_mean"][0][0],fdg["func_mean"][1][0],fdg["func_mean"][2][0],
               fdg["func_mean"][3][0],fdg["func_mean"][4][0],fdg["func_mean"][5][0]]

    fdg_max = ["LTh_CMR_max","RTh_CMR_max","LCo_CMR_max","RCo_CMR_max","LPre_CMR_max","RPre_CMR_max"]
    f_max = [fdg["func_max"][0][0],fdg["func_max"][1][0],fdg["func_max"][2][0],
               fdg["func_max"][3][0],fdg["func_max"][4][0],fdg["func_max"][5][0]]

    fdg_min = ["LTh_CMR_min","RTh_CMR_min","LCo_CMR_min","RCo_CMR_min","LPre_CMR_min","RPre_CMR_min"]
    f_min = [fdg["func_min"][0][0],fdg["func_min"][1][0],fdg["func_min"][2][0],
               fdg["func_min"][3][0],fdg["func_min"][4][0],fdg["func_min"][5][0]]

    fdg_std = ["LTh_CMR_std","RTh_CMR_std","LCo_CMR_std","RCo_CMR_std","LPre_CMR_std","RPre_CMR_std"]
    f_std = [fdg["func_stdev"][0][0],fdg["func_stdev"][1][0],fdg["func_stdev"][2][0],
               fdg["func_stdev"][3][0],fdg["func_stdev"][4][0],fdg["func_stdev"][5][0]]

    fdg_titles = fdg_avg + fdg_max + fdg_min + fdg_std

    dwi = nx.to_numpy_matrix(dwi_ntwk, weight="weight")

    l_thal = ["LTh_RTh","LTh_LCo","LTh_RCo","LTh_LPre","LTh_RPre"]
    l_th   = [dwi[0,1], dwi[0,2], dwi[0,3], dwi[0,4], dwi[0,5]]
    r_thal = ["RTh_LCo","RTh_RCo","RTh_LPre","RTh_RPre"]
    r_th   = [dwi[1,2], dwi[1,3], dwi[1,4], dwi[1,5]]
    l_co   = ["LCo_RCo","LCo_LPre","LCo_RPre"]
    l_cor  = [dwi[2,3], dwi[2,4], dwi[2,5]]
    r_co   = ["RCo_LPre","RCo_RPre"]
    r_cor  = [dwi[3,4], dwi[3,5]]
    l_pre  = ["LPre_RPre"]
    l_prec = [dwi[4,5]]
    conn_titles = l_thal + r_thal + l_co + r_co + l_pre

    all_titles = titles + fdg_titles + conn_titles
    volume_titles = ["VoxLTh","VoxRTh","VoxLCo", "VoxRCo", "VoxLPre", "VoxRPre"]
    all_titles = all_titles + volume_titles
    volumes = fdg["number_of_voxels"]

    all_data = f_avg + f_max + f_min + f_std + l_th + r_th + l_cor + r_cor + l_prec + volumes[:,0].tolist()

    out_file = op.abspath(subject_id + "_precoth.csv")
    f = open(out_file, "w")
    title_str = ",".join(all_titles) + "\n"
    f.write(title_str)
    all_data = map(float, all_data)
    data_str = subject_id + "," + ",".join(format(x, "10.5f") for x in all_data) + "\n"
    f.write(data_str)
    f.close()
    return out_file
Exemplo n.º 23
0
def reduceGraph(read_g, write_g, minEdgeWeight, minNodeDegree, Lp, Sp):
    """
    Simplify the undirected graph and then update the 3 undirected weight properties.
    :param read_g: is the graph pickle to read
    :param write_g: is the updated graph pickle to write
    :param minEdgeWeight: the original weight of each edge should be >= minEdgeWeight
    :param minNodeDegree: the degree of each node should be >= minNodeDegree. the degree here is G.degree(node), NOT G.degree(node,weight='weight)
    :return: None
    """
    G=nx.read_gpickle(read_g)
    print 'number of original nodes: ', nx.number_of_nodes(G)
    print 'number of original edges: ', nx.number_of_edges(G)

    for (u,v,w) in G.edges(data='weight'):
        if w < minEdgeWeight:
            G.remove_edge(u,v)

    for n in G.nodes():
        if G.degree(n)<minNodeDegree:
            G.remove_node(n)

    print 'number of new nodes: ', nx.number_of_nodes(G)
    print 'number of new edges: ', nx.number_of_edges(G)

    for (a, b, w) in G.edges_iter(data='weight'):
        unweight_allocation(G, a, b, w,Lp,Sp)

    print 'update weight ok'
    nx.write_gpickle(G, write_g)

    return
Exemplo n.º 24
0
def pullnodeIDs(in_network, name_key='dn_name'):
    """ This function will return the values contained, for each node in
    a network, given an input key. By default it will return the node names
    """
    import networkx as nx
    import numpy as np
    from nipype.interfaces.base import isdefined
    if not isdefined(in_network):
        raise ValueError
        return None
    try:
        ntwk = nx.read_graphml(in_network)
    except:
        ntwk = nx.read_gpickle(in_network)
    nodedata = ntwk.node
    ids = []
    integer_nodelist = []
    for node in nodedata.keys():
        integer_nodelist.append(int(node))
    for node in np.sort(integer_nodelist):
        try:
            nodeid = nodedata[node][name_key]
        except KeyError:
            nodeid = nodedata[str(node)][name_key]
        ids.append(nodeid)
    return ids
Exemplo n.º 25
0
def experiment_4():
    G = nx.Graph()
    G.add_edge(0, 11, weight=91)
    G.add_edge(1, 11, weight=72)
    G.add_edge(1, 13, weight=96)
    G.add_edge(2, 13, weight=49)
    G.add_edge(2, 6, weight=63)
    G.add_edge(2, 3, weight=31)
    G.add_edge(3, 9, weight=98)
    G.add_edge(3, 7, weight=1)
    G.add_edge(3, 12, weight=59)
    G.add_edge(4, 7, weight=6)
    G.add_edge(4, 9, weight=6)
    G.add_edge(4, 8, weight=95)
    G.add_edge(5, 11, weight=44)
    G.add_edge(6, 11, weight=53)
    G.add_edge(8, 10, weight=2)
    G.add_edge(8, 12, weight=48)
    G.add_edge(9, 12, weight=32)
    G.add_edge(10, 14, weight=16)
    G.add_edge(11, 13, weight=86)

    G = nx.read_gpickle('G.gpickle')
    
    path_nx = nx.dijkstra_path(G, 0, 14)
    path = dijkstra(G, 0, 14, True)
    if path_cost(G, path) > path_cost(G, path_nx):
        print 'Error'
    else:
        print 'Correct'
        
    return locals()
Exemplo n.º 26
0
def main():
  seed(0) #set seed
  #get graph info
  G = nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links
  print '|V| = ', len(G.nodes())
  print '|E| = ', len(G.edges())
  G = nx.freeze(G) #prevents edges or nodes to be added or deleted
  #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000.
  demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv')
  #get earthquake info
  q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #(input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', 'input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): 'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #(


  q.num_sites = len(q.lnsas[0])
  #determine which scenarios you want to run
  good_indices = pick_scenarios(q.lnsas, q.weights)
  
  travel_index_times = []
  index = 0
  #loop over scenarios
  for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site
    if index in good_indices:
      print 'index: ', index
      (travel_time, vmt) = run_iteration(G, scenario, demand)
      travel_index_times.append((index, travel_time, vmt))
#      print 'new travel times: ', travel_index_times
      if index%100 ==0:
        util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times)
    index += 1 #IMPORTANT
  util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times)
def topology(data, ell):
    """
    Computation of topological characteristics.
    
    Parameters
    ------------
    data : array of pathes to the graphs
    ell : list of length scales
    """    

    for i in data:
        G = nx.read_gpickle(i)
        B = nx.number_of_edges(G)
        V = nx.number_of_nodes(G)
        Euler = V - B
        C = (B-V)/float(V)
        eu.append(Euler)
        c_t.append(C)
        vert.append(V)
        bran.append(B)

    plt.plot(ell, c_t, '.', label='v23')
    #
    #np.save('/backup/yuliya/v23/graphs_largedom/Euler.npy', eu)
    #np.save('/backup/yuliya/v23/graphs_largedom/C_t.npy', c_t)
    #np.save('/backup/yuliya/v23/graphs_largedom/V.npy', vert)
    #np.save('/backup/yuliya/v23/graphs_largedom/B.npy', bran)
    #np.save('/backup/yuliya/vsi01/graphs_largdom/time.npv23/graphs_largedom/y', t)
    plt.yscale('log')
Exemplo n.º 28
0
def remove_unconnected_graphs_and_threshold(in_file):
    import nipype.interfaces.cmtk as cmtk
    import nipype.pipeline.engine as pe
    import os
    import os.path as op
    import networkx as nx
    from nipype.utils.filemanip import split_filename
    connected = []
    if in_file == None or in_file == [None]:
        return None
    elif len(in_file) == 0:
        return None
    graph = nx.read_gpickle(in_file)
    if not graph.number_of_edges() == 0:
        connected.append(in_file)
        _, name, ext = split_filename(in_file)
        filtered_network_file = op.abspath(name + '_filt' + ext)
    if connected == []:
        return None

    #threshold_graphs = pe.Node(interface=cmtk.ThresholdGraph(), name="threshold_graphs")
    threshold_graphs = cmtk.ThresholdGraph()
    from nipype.interfaces.cmtk.functional import tinv
    weight_threshold = 1  # tinv(0.95, 198-30-1)
    threshold_graphs.inputs.network_file = in_file
    threshold_graphs.inputs.weight_threshold = weight_threshold
    threshold_graphs.inputs.above_threshold = True
    threshold_graphs.inputs.edge_key = "weight"
    threshold_graphs.inputs.out_filtered_network_file = op.abspath(
        filtered_network_file)
    threshold_graphs.run()
    return op.abspath(filtered_network_file)
def readNetworks(fileNames):
	networks = []

	for filename in fileNames:
		networks.append(nx.read_gpickle(filename))

	return networks
def short_branches():
    """
    Visualization of short branches of the skeleton.
    
    """
    data1_sk = glob.glob('/backup/yuliya/vsi05/skeletons_largdom/*.h5')
    data1_sk.sort()

    for i,j, k in zip(d[1][37:47], data1_sk[46:56], ell[1][37:47]):
        g = nx.read_gpickle(i)
        dat = tb.openFile(j)
        skel = np.copy(dat.root.skel)
        bra = np.copy(dat.root.branches)
        mask = np.zeros_like(skel)    
        dat.close()
    
        length = nx.get_edge_attributes(g, 'length')
        number = nx.get_edge_attributes(g, 'number')
        num_dict = {}
        for m in number:
            for v in number[m]:
                num_dict.setdefault(v, []).append(m)
        find_br = ndimage.find_objects(bra)
        for l in list(length.keys()):
            if length[l]<0.5*k: #Criteria
                for b in number[l]:
                    mask[find_br[b-1]] = bra[find_br[b-1]]==b
        mlab.figure(bgcolor=(1,1,1), size=(1200,1200))
        mlab.contour3d(skel, colormap='hot')
        mlab.contour3d(mask)
        mlab.savefig('/backup/yuliya/vsi05/skeletons/short_bran/'+ i[42:-10] + '.png')
        mlab.close()
Exemplo n.º 31
0
# ## Project 5 - Company Emails
# 
# For this project we will be working with a company's email network where each node corresponds to a person at the company,
#and each edge indicates that at least one email has been sent between two people.
# 
# The network also contains the node attributes `Department` and `ManagementSalary`.
# 
# `Department` indicates the department in the company which the person belongs to, and `ManagementSalary` indicates whether that person is receiving a management position salary.

import networkx as nx
import pandas as pd
import numpy as np
import pickle

G = nx.read_gpickle('email_prediction.txt')

print(nx.info(G))


#  Salary Prediction
# 
# Using network `G`, identify the people in the network with missing values for the node attribute `ManagementSalary` and predict whetheror not these individuals are receiving a management position salary.
# Predictions will need to be given as the probability that the corresponding employee is receiving a management position salary.
# 

from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler

def is_management(node):
Exemplo n.º 32
0
 def load(self):
     with open(self._out_path, 'rb') as F:
         nx_graph = nx.read_gpickle(F)
         self.root_nodes = nxgraph_to_nodes(nx_graph)
Exemplo n.º 33
0
def connect_bc_funct(cut_nonelected=False, network_objects=None, data_path=None, out_path=None, border_file4326=None):
    # import crossing_onlypoints with crossing points of ch1234567 containing coordinates
    # import ch123 and ch1234567
    # for each border crossing, if it is not in a 123 way, find closest point to 123 on both sides of border
    # route between the border crossing and closest points in both sides of the border
    # keep the ways used in the routing
    # create a final graph with ch123 and the connected border crossings

    # out_path = r'C:/Users/Ion/IVT/OSM_python/networks/'
    # data_path = r'C:/Users/Ion/IVT/OSM_data'
    # border_file = str(data_path) + '/borderOSM_polygon_2056.shp'
    # network_objects = None
    # g_ch123_connected = nx.read_gpickle(str(ch1234567_path) + '/network_files/ch_connected_graph_bytime.gpickle')


    # if out_path is None:
    #     g_ch1234567 = network_objects[0]
    #     gdf = network_objects[1]
    #     splitted_ways_dict = network_objects[2]
    #     nodes_europe = network_objects[3]
    #     ch_border = network_objects[4]  # border has to be in 4326, loaded one is in 2056
    #     crossing_onlypoints = network_objects[5]
    # else:
    ch1234567_path = str(out_path) + '/ch1234567'
    ch123_path = str(out_path) + '/ch123'
    eu123_path = str(out_path) + '/eu123'
    eu123ch_path = str(out_path) + '/eu123ch4567'
    if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None:
        print(datetime.datetime.now(), 'Starting process of connecting border points and swiss nuts with unclassified ways.')
        print('------------------------------------------------------------------------')

        print(datetime.datetime.now(), 'Loading files.')
        # files from ch1234567 and eu123
        #     g_ch1234567 = nx.read_gpickle(str(ch1234567_path) + '/network_files/eu_network_graph_bytime.gpickle')
        # i think this should also be without islands, to avoid finding a closest node
        g_ch123 = nx.read_gpickle(str(ch123_path) + '/network_files/eu_network_largest_graph_bytime.gpickle')
        g_ch1234567 = nx.read_gpickle(str(ch1234567_path) + '/network_files/eu_network_largest_graph_bytime.gpickle')
        # here is preferable to avoid islands as they may be far from switzerland
        g_eu123 = nx.read_gpickle(str(eu123_path) + '/network_files/eu_network_largest_graph_bytime.gpickle')
        crossing_onlypoints = gpd.read_file(str(ch1234567_path) + '/bc_official/crossing_onlypoints.shp')

        file = open(str(ch1234567_path) + "/network_files/europe_nodes_dict4326.pkl", 'rb')
        nodes_europe = pickle.load(file)
        file.close()
        border_file4326
        # border_file = str(data_path) + '/Switzerland_OSM_polygon_4326.shp'
        ch_border = gpd.read_file(border_file4326)  # border has to be in 4326, loaded one is in 2056

        new_nodes = {}
        new_ways = {}
        # ch_border.crs = "epsg:2056"
        # ch_border = ch_border.to_crs("epsg:4326")
        nuts_path = str(data_path) + '/nuts_borders'

        print(datetime.datetime.now(), 'Files loaded.')

        print_islands(g_ch123, 'g_ch123')
        print_islands(g_ch1234567, 'g_ch1234567')
        print_islands(g_eu123, 'g_eu123')
        print('------------------------------------------------------------------------')

        # Creates graph of ch123 from graph ch1234567 and split into IN and OUT graphs
        # g_ch123 = copy.deepcopy(g_ch1234567)
        print(datetime.datetime.now(), 'Nodes/ways in g_ch1234567: ' + str(len(g_ch1234567.nodes)) + '/' + str(len(g_ch1234567.edges)))

        # for (u, v, c) in g_ch1234567.edges.data('way_type'):
        #     for way_type in ['secondary', 'tertiary', 'residential', 'unclassified']:
        #         if way_type in c:
        #             g_ch123.remove_edge(u, v)
        # g_ch123.remove_nodes_from(list(nx.isolates(g_ch123)))


        print(datetime.datetime.now(), 'Nodes/ways in g_ch123: ' + str(len(g_ch123.nodes)) + '/ ' + str(len(g_ch123.edges)))
        print('------------------------------------------------------------------------')

    if os.path.isfile(str(ch1234567_path) + '/network_files/g_ch1234567_out.gpickle') is False or out_path is None:
        # This splits both network graphs between in and out of the swiss border
        g_ch123_in, g_ch123_out = split_graphs(g_ch123, ch_border, nodes_europe)
        g_ch1234567_in, g_ch1234567_out = split_graphs(g_ch1234567, ch_border, nodes_europe)

        nx.write_gpickle(g_ch123_in, str(ch1234567_path) + '/network_files/g_ch123_in.gpickle')
        nx.write_gpickle(g_ch123_out, str(ch1234567_path) + '/network_files/g_ch123_out.gpickle')
        nx.write_gpickle(g_ch1234567_in, str(ch1234567_path) + '/network_files/g_ch1234567_in.gpickle')
        nx.write_gpickle(g_ch1234567_out, str(ch1234567_path) + '/network_files/g_ch1234567_out.gpickle')
    elif os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None:
        g_ch123_in = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch123_in.gpickle')
        g_ch123_out = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch123_out.gpickle')
        g_ch1234567_in = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch1234567_in.gpickle')
        g_ch1234567_out = nx.read_gpickle(str(ch1234567_path) + '/network_files/g_ch1234567_out.gpickle')

        print_islands(g_ch123_in, 'g_ch123_in')
        print_islands(g_ch123_out, 'g_ch123_out')
        print_islands(g_ch1234567_in, 'g_ch1234567_in')
        print_islands(g_ch1234567_out, 'g_ch1234567_out')

    if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None:
        # This creates the tree of the ch123 graphs to find the closest nodes
        g_in_tree, g_in_lonlat = closest_node(g_ch123_in, nodes_europe)
        g_out_tree, g_out_lonlat = closest_node(g_ch123_out, nodes_europe)
        # g_infull_lonlat, g_infull_tree = closest_node(g_ch1234567_in, nodes_europe)
        g_full_tree, g_full_lonlat = closest_node(g_ch1234567, nodes_europe)
        print('------------------------------------------------------------------------')

    # -----------------------------------------------------------------------------
    # CONNECT BORDER CROSSINGS WITH CH123
    # -----------------------------------------------------------------------------
    if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False or out_path is None:
        print(datetime.datetime.now(),
              'Number of edges in graphs (in/out graphs) BEFORE connecting border crossings: ' + str(len(g_ch123_in.edges)) + '/' + str(len(g_ch123_out.edges)))
        found_count = [0, 0]
        for index, row in crossing_onlypoints.iterrows():
            way_id = row['new_id']
            start_node_id = row['start_node']
            end_node_id = row['end_node_i']
            # Check if bc is from a principal highway
            # for (u, v, c) in g_ch123.edges.data('new_id'):
            #     if way_id == c:
            #         continue

            in_ch = ch_border.contains(Point(nodes_europe[start_node_id]))
            if in_ch[0] == True:
                in_node = start_node_id
                out_node = end_node_id
            else:
                in_node = end_node_id
                out_node = start_node_id

            # route in and out point in respective graphs to closest point in ch123
            # it may be that it is not connected, then 'continue'
            g_ch123_in, new_nodes, new_ways, found_count = route_bc(in_node, None, g_ch123_in, g_ch1234567_in, nodes_europe, new_nodes, new_ways, g_in_tree, g_in_lonlat, found_count)
            g_ch123_out, new_nodes, new_ways, found_count = route_bc(out_node, None, g_ch123_out, g_ch1234567_out, nodes_europe, new_nodes, new_ways, g_out_tree, g_out_lonlat, found_count)

        print(datetime.datetime.now(), 'Found path: ' + str(found_count[0]))
        print(datetime.datetime.now(), 'Not found path: ' + str(found_count[1]))
        print(datetime.datetime.now(),
              'Number of edges in graphs (in/out graphs) AFTER connecting border crossings: ' + str(
                  len(g_ch123_in.edges)) + '/' + str(len(g_ch123_out.edges)))

        print_islands(g_ch123_in, 'g_ch123_in')
        print_islands(g_ch123_out, 'g_ch123_out')
        print('------------------------------------------------------------------------')

        # -----------------------------------------------------------------------------
        # CONNECT NUTS CENTROIDS WITH CH123
        # -----------------------------------------------------------------------------
        unique_nuts_gdf = nuts_merging(nuts_path)
        found_count = [0, 0]
        for index, row in unique_nuts_gdf.iterrows():
            nutid = row['NUTS_ID']
            if 'CH' in nutid:
                nut_poly = row['geometry']
                centroid = nut_poly.centroid
                centroid_coords = (centroid.x, centroid.y)

                g_ch123_in, new_nodes, new_ways, found_count = route_bc(None, None, g_ch123_in, g_ch1234567_in, nodes_europe, new_nodes, new_ways, g_in_tree, g_in_lonlat, found_count, centroid_coords, g_full_tree, g_full_lonlat)


        print(datetime.datetime.now(), 'Found path: ' + str(found_count[0]))
        print(datetime.datetime.now(), 'Not found path: ' + str(found_count[1]))
        print(datetime.datetime.now(),
              'Number of edges in graphs (in/out graphs) AFTER connecting nuts centroids: ' + str(
                  len(g_ch123_in.edges)) + '/' + str(len(g_ch123_out.edges)))
        print_islands(g_ch123_in, 'g_ch123_in')
        print('------------------------------------------------------------------------')

        # -----------------------------------------------------------------------------
        # ADD EDGES WHICH CROSS BORDER (DELETED WHEN SPLITTING GRAPH TO IN/OUT)
        # -----------------------------------------------------------------------------
        # at the end, after merging both out and in graphs the edges which cross the border will have to be added again,
        # as this process does not count with them
        g_ch123_connected = nx.compose(g_ch123_in, g_ch123_out)
        print(datetime.datetime.now(),
              'Number of edges in connected graph after merging IN and OUT graphs WITHOUT border crossings: ' + str(
                  len(g_ch123_connected.edges)))
        print_islands(g_ch123_connected, 'g_ch123_connected')

        for index, row in crossing_onlypoints.iterrows():
            try:
                start_node_id = row['start_node']
                end_node_id = row['end_node_i']
                new_id = row['new_id']
                # length = row['length']
                time = g_ch1234567[start_node_id][end_node_id]['time']
                way_type = row['way_type']

                # and save the ways, implement them into ch_123
                # g_ch123_connected.add_edge(start_node_id, end_node_id, time=time, length=length, new_id=new_id, way_type=way_type)
                g_ch123_connected.add_edge(start_node_id, end_node_id, time=time, new_id=new_id, way_type=way_type)
            except:
                continue


        print(datetime.datetime.now(),
              'Number of edges in connected graph after merging IN and OUT graphs WITH border crossings: ' + str(
                  len(g_ch123_connected.edges)))
        print_islands(g_ch123_connected, 'g_ch123_connected')

        # -----------------------------------------------------------------------------
        # CONNECT REMAINING ISLANDS OF FINAL CONNECTED GRAPH
        # -----------------------------------------------------------------------------
        # Last, as there are some islands in the last connected graph due to splitting of graph, connected islands:
        g_ch123_connected_largest = copy.deepcopy(g_ch123_connected)
        print_islands(g_ch123_connected_largest, 'g_ch123_connected_largest')

        components = list(nx.connected_components(g_ch123_connected_largest))  # list because it returns a generator
        components.sort(key=len, reverse=True)
        largest = components.pop(0)
        isolated = set(g for cc in components for g in cc)
        g_ch123_connected_largest.remove_nodes_from(isolated)

        print_islands(g_ch123_connected_largest, 'g_ch123_connected_largest')

        g_tree_large, g_lonlat_large = closest_node(g_ch123_connected_largest, nodes_europe)
        found_count = [0, 0]
        for i in range(len(components)):
            net = components[i]
            node = random.choice(list(net))
            g_ch123_connected, new_nodes, new_ways, found_count = route_bc(node, None, g_ch123_connected, g_ch1234567, nodes_europe, new_nodes, new_ways, g_tree_large, g_lonlat_large, found_count)

        print(datetime.datetime.now(),
              'Number of edges in connected SWISS graph after connecting islands in connected graph: ' + str(
                  len(g_ch123_connected.edges)))

        print_islands(g_ch123_connected, 'g_ch123_connected')

        # In case the none elected border crossings want to be deleted, activate this
        if cut_nonelected:
            g_ch123_connected = cut_nonelected_bc(g_ch123_connected, network_objects=network_objects, out_path=out_path)

        # Join final connected graph with eu123 graph, to complete the full network
        g_eu123_connected = nx.compose(g_ch123_connected, g_eu123)
        print(datetime.datetime.now(),
              'Number of edges in connected EUROPE graph: ' + str(
                  len(g_eu123_connected.edges)))

        print_islands(g_eu123_connected, 'g_eu123_connected')

        # -----------------------------------------------------------------------------
        # ADD TO EUROPE NETWORK FILES THE ADDED NODES AND WAYS TO THE CONNECTED GRAPH
        # -----------------------------------------------------------------------------
        # file = open(str(eu123_path) + "/network_files/europe_nodes_dict4326.pkl", 'rb')
        # all_europe_nodes = pickle.load(file)
        # file.close()
        # europe_nodes_merged = {**all_europe_nodes, **new_nodes}
        # # with open(str(out_path) + '/europe_nodes_dict4326.pkl', 'wb') as f:
        # #     pickle.dump(europe_nodes_merged, f, pickle.HIGHEST_PROTOCOL)
        # print(len(all_europe_nodes), len(all_europe_nodes))
        #
        # file = open(str(eu123_path) + "/network_files/europe_ways_splitted_dict.pkl", 'rb')
        # all_europe_sw = pickle.load(file)
        # file.close()
        # europe_sw_merged = {**all_europe_sw, **new_ways}
        #
        #
        # eu_gdf = pd.read_csv(str(eu123_path) + "/network_files/gdf_MTP_europe.csv", low_memory=False)
        #
        # new_ways_df = pd.DataFrame.from_dict(new_ways, orient='index', columns=['start_node_id', 'end_node_id', 'nodes_list'])
        # new_ways_df = new_ways_df.reset_index()
        # new_ways_df = new_ways_df.rename(columns={"index": "new_id"})
        #
        # eu_gdf = pd.concat([new_ways_df, eu_gdf])

        # export graph and shp file of final network
        if os.path.isfile(str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle") is False and out_path:
            nx.write_gpickle(g_eu123_connected, str(eu123_path) + "/network_files/eu_connected_graph_bytime.gpickle")
            nx.write_gpickle(g_ch123_connected, str(ch1234567_path) + "/network_files/ch_connected_graph_bytime.gpickle")
            if os.path.isfile(str(out_path) + "/network_files/eu_connected_graph_bytime.shp") is False:
                create_shp_largest(g_ch123_connected, None, None, None,
                                   str(ch1234567_path) + "/network_files", 'ch_connected_graph_bytime', list_nodes=None)
                # create_shp_largest(g_eu123_connected, europe_nodes_merged, europe_sw_merged, eu_gdf,
                #                    str(eu123_path) + "/network_files", 'eu_connected_graph_bytime', list_nodes=None)
                create_shp_largest(g_eu123_connected, None, None, None,
                                   str(eu123ch_path) + "/network_files", 'eu_connected_graph_bytime', list_nodes=None)
    else:
        print(datetime.datetime.now(), 'Connected files already exist.')
        print('------------------------------------------------------------------------')
        print('------------------------------------------------------------------------')
Exemplo n.º 34
0
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import random

G = nx.read_gpickle('spotify_data.pickle')

genres_ = [
    'jazz', 'step', 'classical', 'elec', 'metal', 'rock', 'edm', 'tech',
    'indie', 'house', 'rap', 'hip hop', 'pop'
]
temp_set = set()
remove_nodes = set()
for node in G.nodes(data=True):
    try:
        if len(node[1]['genres']) > 0 and G.degree(node[0]) > 1:
            if get_genre(node[1]['genres']) is not None:
                temp_set.add(get_genre(node[1]['genres']))
        else:
            remove_nodes.add(node[0])

        if len(temp_set) == 0:
            remove_nodes.add(node[0])

        node[1]['genres'] = list(temp_set)
        temp_set = set()
    except:
        remove_nodes.add(node[0])


def get_genre(l):
Exemplo n.º 35
0
 def load(self, model_data_dir=None):
     model_data_dir = self.model_data_dir if model_data_dir is None else model_data_dir
     if not os.path.exists(self.graph_save_path):
         logger.warning("File '{}' not found".format(model_data_dir))
     else:
         self.graph = nx.read_gpickle(self.graph_save_path)
        new = []
        for date in init:
            if date >= start and date <= end:
                new.append(date)
        if len(new) > 0:
            ndates[tran] = new
            nG.add_node(tran[0])
            nG.add_node(tran[1])
            nG.add_edge(tran[0], tran[1], weight=len(new))
            npos[tran[0]] = pos[tran[0]]
            npos[tran[1]] = pos[tran[1]]
    return npos, nG, ndates


#decryption
aG = nx.read_gpickle("agraph.gpickle")
pickle_in = open("adates.pickle", "rb")
a_dates = pickle.load(pickle_in)
pickle_in = open("apos.pickle", "rb")
apos = pickle.load(pickle_in)

mG = nx.read_gpickle("mgraph.gpickle")
pickle_in = open("mdates.pickle", "rb")
m_dates = pickle.load(pickle_in)
pickle_in = open("mpos.pickle", "rb")
mpos = pickle.load(pickle_in)

dates = [
    datetime.datetime(2020, 1, 20, 0, 0),
    datetime.datetime(2020, 3, 1, 0, 0)
]
 def get_time_point(self, time):
     """ Loads the specific network from the time point and returns it
         returns - network file
     """
     return nx.read_gpickle(self._networks[time])
Exemplo n.º 38
0
def cmat(track_file,
         roi_file,
         resolution_network_file,
         matrix_name,
         matrix_mat_name,
         endpoint_name,
         intersections=False):
    """ Create the connection matrix for each resolution using fibers and ROIs. """

    stats = {}
    iflogger.info('Running cmat function')
    # Identify the endpoints of each fiber
    en_fname = op.abspath(endpoint_name + '_endpoints.npy')
    en_fnamemm = op.abspath(endpoint_name + '_endpointsmm.npy')

    iflogger.info('Reading Trackvis file %s', track_file)
    fib, hdr = nb.trackvis.read(track_file, False)
    stats['orig_n_fib'] = len(fib)

    roi = nb.load(roi_file, mmap=NUMPY_MMAP)
    roiData = roi.get_data()
    roiVoxelSize = roi.header.get_zooms()
    (endpoints, endpointsmm) = create_endpoints_array(fib, roiVoxelSize)

    # Output endpoint arrays
    iflogger.info('Saving endpoint array: %s', en_fname)
    np.save(en_fname, endpoints)
    iflogger.info('Saving endpoint array in mm: %s', en_fnamemm)
    np.save(en_fnamemm, endpointsmm)

    n = len(fib)
    iflogger.info('Number of fibers: %i', n)

    # Create empty fiber label array
    fiberlabels = np.zeros((n, 2))
    final_fiberlabels = []
    final_fibers_idx = []

    # Add node information from specified parcellation scheme
    path, name, ext = split_filename(resolution_network_file)
    if ext == '.pck':
        gp = nx.read_gpickle(resolution_network_file)
    elif ext == '.graphml':
        gp = nx.read_graphml(resolution_network_file)
    else:
        raise TypeError("Unable to read file:", resolution_network_file)
    nROIs = len(gp.nodes())

    # add node information from parcellation
    if 'dn_position' in gp.nodes[list(gp.nodes())[0]]:
        G = gp.copy()
    else:
        G = nx.Graph()
        for u, d in gp.nodes(data=True):
            G.add_node(int(u), **d)
            # compute a position for the node based on the mean position of the
            # ROI in voxel coordinates (segmentation volume )
            xyz = tuple(
                np.mean(np.where(
                    np.flipud(roiData) == int(d["dn_correspondence_id"])),
                        axis=1))
            G.nodes[int(u)]['dn_position'] = tuple([xyz[0], xyz[2], -xyz[1]])

    if intersections:
        iflogger.info("Filtering tractography from intersections")
        intersection_matrix, final_fiber_ids = create_allpoints_cmat(
            fib, roiData, roiVoxelSize, nROIs)
        finalfibers_fname = op.abspath(endpoint_name +
                                       '_intersections_streamline_final.trk')
        stats['intersections_n_fib'] = save_fibers(hdr, fib, finalfibers_fname,
                                                   final_fiber_ids)
        intersection_matrix = np.matrix(intersection_matrix)
        I = G.copy()
        H = nx.from_numpy_matrix(np.matrix(intersection_matrix))
        H = nx.relabel_nodes(
            H, lambda x: x + 1)  # relabel nodes so they start at 1
        I.add_weighted_edges_from(
            ((u, v, d['weight']) for u, v, d in H.edges(data=True)))

    dis = 0
    for i in range(endpoints.shape[0]):

        # ROI start => ROI end
        try:
            startROI = int(roiData[endpoints[i, 0, 0], endpoints[i, 0, 1],
                                   endpoints[i, 0, 2]])
            endROI = int(roiData[endpoints[i, 1, 0], endpoints[i, 1, 1],
                                 endpoints[i, 1, 2]])
        except IndexError:
            iflogger.error(
                'AN INDEXERROR EXCEPTION OCCURED FOR FIBER %s. '
                'PLEASE CHECK ENDPOINT GENERATION', i)
            break

        # Filter
        if startROI == 0 or endROI == 0:
            dis += 1
            fiberlabels[i, 0] = -1
            continue

        if startROI > nROIs or endROI > nROIs:
            iflogger.error(
                "Start or endpoint of fiber terminate in a voxel which is labeled higher"
            )
            iflogger.error(
                "than is expected by the parcellation node information.")
            iflogger.error("Start ROI: %i, End ROI: %i", startROI, endROI)
            iflogger.error("This needs bugfixing!")
            continue

        # Update fiber label
        # switch the rois in order to enforce startROI < endROI
        if endROI < startROI:
            tmp = startROI
            startROI = endROI
            endROI = tmp

        fiberlabels[i, 0] = startROI
        fiberlabels[i, 1] = endROI

        final_fiberlabels.append([startROI, endROI])
        final_fibers_idx.append(i)

        # Add edge to graph
        if G.has_edge(startROI,
                      endROI) and 'fiblist' in G.edge[startROI][endROI]:
            G.edge[startROI][endROI]['fiblist'].append(i)
        else:
            G.add_edge(startROI, endROI, fiblist=[i])

    # create a final fiber length array
    finalfiberlength = []
    if intersections:
        final_fibers_indices = final_fiber_ids
    else:
        final_fibers_indices = final_fibers_idx

    for idx in final_fibers_indices:
        # compute length of fiber
        finalfiberlength.append(length(fib[idx][0]))

    # convert to array
    final_fiberlength_array = np.array(finalfiberlength)

    # make final fiber labels as array
    final_fiberlabels_array = np.array(final_fiberlabels, dtype=int)

    iflogger.info(
        'Found %i (%f percent out of %i fibers) fibers that start or '
        'terminate in a voxel which is not labeled. (orphans)', dis,
        dis * 100.0 / n, n)
    iflogger.info('Valid fibers: %i (%f%%)', n - dis, 100 - dis * 100.0 / n)

    numfib = nx.Graph()
    numfib.add_nodes_from(G)
    fibmean = numfib.copy()
    fibmedian = numfib.copy()
    fibdev = numfib.copy()
    for u, v, d in G.edges(data=True):
        G.remove_edge(u, v)
        di = {}
        if 'fiblist' in d:
            di['number_of_fibers'] = len(d['fiblist'])
            idx = np.where((final_fiberlabels_array[:, 0] == int(u))
                           & (final_fiberlabels_array[:, 1] == int(v)))[0]
            di['fiber_length_mean'] = float(
                np.mean(final_fiberlength_array[idx]))
            di['fiber_length_median'] = float(
                np.median(final_fiberlength_array[idx]))
            di['fiber_length_std'] = float(np.std(
                final_fiberlength_array[idx]))
        else:
            di['number_of_fibers'] = 0
            di['fiber_length_mean'] = 0
            di['fiber_length_median'] = 0
            di['fiber_length_std'] = 0
        if not u == v:  # Fix for self loop problem
            G.add_edge(u, v, **di)
            if 'fiblist' in d:
                numfib.add_edge(u, v, weight=di['number_of_fibers'])
                fibmean.add_edge(u, v, weight=di['fiber_length_mean'])
                fibmedian.add_edge(u, v, weight=di['fiber_length_median'])
                fibdev.add_edge(u, v, weight=di['fiber_length_std'])

    iflogger.info('Writing network as %s', matrix_name)
    nx.write_gpickle(G, op.abspath(matrix_name))

    numfib_mlab = nx.to_numpy_matrix(numfib, dtype=int)
    numfib_dict = {'number_of_fibers': numfib_mlab}
    fibmean_mlab = nx.to_numpy_matrix(fibmean, dtype=np.float64)
    fibmean_dict = {'mean_fiber_length': fibmean_mlab}
    fibmedian_mlab = nx.to_numpy_matrix(fibmedian, dtype=np.float64)
    fibmedian_dict = {'median_fiber_length': fibmedian_mlab}
    fibdev_mlab = nx.to_numpy_matrix(fibdev, dtype=np.float64)
    fibdev_dict = {'fiber_length_std': fibdev_mlab}

    if intersections:
        path, name, ext = split_filename(matrix_name)
        intersection_matrix_name = op.abspath(name + '_intersections') + ext
        iflogger.info('Writing intersection network as %s',
                      intersection_matrix_name)
        nx.write_gpickle(I, intersection_matrix_name)

    path, name, ext = split_filename(matrix_mat_name)
    if not ext == '.mat':
        ext = '.mat'
        matrix_mat_name = matrix_mat_name + ext

    iflogger.info('Writing matlab matrix as %s', matrix_mat_name)
    sio.savemat(matrix_mat_name, numfib_dict)

    if intersections:
        intersect_dict = {'intersections': intersection_matrix}
        intersection_matrix_mat_name = op.abspath(name +
                                                  '_intersections') + ext
        iflogger.info('Writing intersection matrix as %s',
                      intersection_matrix_mat_name)
        sio.savemat(intersection_matrix_mat_name, intersect_dict)

    mean_fiber_length_matrix_name = op.abspath(name +
                                               '_mean_fiber_length') + ext
    iflogger.info('Writing matlab mean fiber length matrix as %s',
                  mean_fiber_length_matrix_name)
    sio.savemat(mean_fiber_length_matrix_name, fibmean_dict)

    median_fiber_length_matrix_name = op.abspath(name +
                                                 '_median_fiber_length') + ext
    iflogger.info('Writing matlab median fiber length matrix as %s',
                  median_fiber_length_matrix_name)
    sio.savemat(median_fiber_length_matrix_name, fibmedian_dict)

    fiber_length_std_matrix_name = op.abspath(name + '_fiber_length_std') + ext
    iflogger.info('Writing matlab fiber length deviation matrix as %s',
                  fiber_length_std_matrix_name)
    sio.savemat(fiber_length_std_matrix_name, fibdev_dict)

    fiberlengths_fname = op.abspath(endpoint_name + '_final_fiberslength.npy')
    iflogger.info('Storing final fiber length array as %s', fiberlengths_fname)
    np.save(fiberlengths_fname, final_fiberlength_array)

    fiberlabels_fname = op.abspath(endpoint_name + '_filtered_fiberslabel.npy')
    iflogger.info('Storing all fiber labels (with orphans) as %s',
                  fiberlabels_fname)
    np.save(
        fiberlabels_fname,
        np.array(fiberlabels, dtype=np.int32),
    )

    fiberlabels_noorphans_fname = op.abspath(endpoint_name +
                                             '_final_fiberslabels.npy')
    iflogger.info('Storing final fiber labels (no orphans) as %s',
                  fiberlabels_noorphans_fname)
    np.save(fiberlabels_noorphans_fname, final_fiberlabels_array)

    iflogger.info("Filtering tractography - keeping only no orphan fibers")
    finalfibers_fname = op.abspath(endpoint_name + '_streamline_final.trk')
    stats['endpoint_n_fib'] = save_fibers(hdr, fib, finalfibers_fname,
                                          final_fibers_idx)
    stats['endpoints_percent'] = float(stats['endpoint_n_fib']) / float(
        stats['orig_n_fib']) * 100
    stats['intersections_percent'] = float(
        stats['intersections_n_fib']) / float(stats['orig_n_fib']) * 100

    out_stats_file = op.abspath(endpoint_name + '_statistics.mat')
    iflogger.info('Saving matrix creation statistics as %s', out_stats_file)
    sio.savemat(out_stats_file, stats)
if __name__ == "__main__":
    start = time.time()

    dataset = "gnu09"
    model = "MultiValency"
    print
    dataset, model

    if model == "MultiValency":
        ep_model = "range"
    elif model == "Random":
        ep_model = "random"
    elif model == "Categories":
        ep_model = "degree"

    G = nx.read_gpickle("../../graphs/U%s.gpickle" % dataset)
    print
    'Read graph G'
    print
    time.time() - start

    Ep = dict()
    with open("Ep_%s_%s1.txt" % (dataset, ep_model)) as f:
        for line in f:
            data = line.split()
            Ep[(int(data[0]), int(data[1]))] = float(data[2])

    R = 500
    I = 1000
    ALGO_NAME = "CCWP"
    FOLDER = "Data4InfMax/"
Exemplo n.º 40
0
def read_graph(name):
    dirname = os.path.dirname(__file__)
    path = os.path.join(dirname, name + '.gpickle.bz2')
    return nx.read_gpickle(path)
Exemplo n.º 41
0
import networkx as nx
G = nx.read_gpickle("HPRD-Biogrid.pkl")
nx.double_edge_swap(G,G.number_of_nodes(),G.number_of_nodes()*10)

print nx.number_of_edges(G)

nx.write_gpickle(G,"edge_swapped_graph.pkl")



Exemplo n.º 42
0
import operator
import networkx as nx
import pickle

item = 'german'

g = nx.read_gpickle('../y_data/trees/' + item)
print len(g.nodes())

#in degree
ind = {}
for node in g.nodes():
    ind[node] = g.in_degree(node)
top_10_in = dict(
    sorted(ind.iteritems(), key=operator.itemgetter(1), reverse=True)[:50])
#print top_10_in

# file = open('../y_data/basics/' + item + '/in', 'w')
# print>>file, ind
# file.close()
print item, 'in ok'

#out degree
outd = {}
for node in g.nodes():
    outd[node] = g.out_degree(node)
top_10_out = dict(
    sorted(outd.iteritems(), key=operator.itemgetter(1), reverse=True)[:50])
#print top_10_out

# file = open('../y_data/basics/' + item + '/out', 'w')
Exemplo n.º 43
0
    graph_path = CONFIG.input
    result_path = CONFIG.output
    enable_word = CONFIG.word

    # paths = ['uiu', 'iui', 'uiciu']
    if enable_word:
        # user_paths = ['uiwiu', 'uiu']
        user_paths = ['uiwiu']
    else:
        user_paths = ['uiu']
    item_paths = ['iui']
    # item_paths = ['iui', 'iwi']
    # word_paths = ['wiuiw']

    print("Reading graph file...")
    graph = nx.read_gpickle(graph_path)
    result_path = result_path

    nodes = graph.nodes()
    user_nodes = [node for node in nodes if node.startswith('u_')]
    item_nodes = [node for node in nodes if node.startswith('i_')]
    word_nodes = [node for node in nodes if node.startswith('w_')]
    category_nodes = [node for node in nodes if node.startswith('c_')]
    print("{} users, {} items, {} words, {} category".format(
        len(user_nodes), len(item_nodes), len(word_nodes),
        len(category_nodes)))

    user_sentences = []
    item_sentences = []
    word_sentences = []
    print("Walking user sentences...")
Exemplo n.º 44
0
        for edge in joint_g.edges(data=True):
            (p1, p2, data) = edge

            # if p1 in all_profiles and p2 in all_profiles:
            edge_outf.write("%s\t%s\t%f\n" % (p1, p2, data['weight']))
            nodes_to_write.update([p1, p2])

    with open(os.path.join(graph_save_dir, "nodes.txt"), "w") as node_outf:

        for p in nodes_to_write:
            if p in cas4_profiles:
                _type = 1
            elif p in uvrd_profiles:
                _type = 2
            else:
                _type = 0

            node_outf.write("%s\t%s\t%d\n" % (p, profile2gene[p], _type))

    return joint_g


if __name__ == "__main__":

    preprocessed_file = os.path.join(work_dir, "adj_graph.p")
    # G = graph_preprocessing(preprocessed_file)
    G = nx.read_gpickle(preprocessed_file)

    # gt.degree_distributions(G)
    # gt.clustering_coefficients(G)
Exemplo n.º 45
0
	def __init__(self, handle):
		super(GraphCleaner, self).__init__()
		self.handle = handle
		self.G = nx.read_gpickle('{0} Graph with PWIs.pkl'.format(self.handle))
Exemplo n.º 46
0
                    os.path.join(output_path, 'indices.info'))
        shutil.copy(os.path.join(path_to_files, 'rootNode.info'),
                    os.path.join(output_path, 'rootNode.info'))
        shutil.copy(os.path.join(path_to_files, 'startNodeId.info'),
                    os.path.join(output_path, 'startNodeId.info'))
        print path_to_files
        if os.path.isfile(os.path.join(path_to_files, 'equivalence.info')):
            shutil.copy(os.path.join(path_to_files, 'equivalence.info'),
                        os.path.join(output_path, 'equivalence.info'))

        counter += 1


if __name__ == '__main__':
    data = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/folds/fold1-train.gpickle'
    pattern_path = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/experiments_inference/page_class/General_patterns/pattern3/'
    output = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/experiments_inference/page_class/PATTERNS/'
    general_path_file = '/home/irma/work/DATA/INFERENCE_DATA/WEBKB/experiments_inference/page_class/patterns.info'
    data_graph = nx.read_gpickle(data)
    pattern = nx.read_gml(os.path.join(pattern_path, 'pattern.gml'))
    groundings = an.get_all_possible_values(data_graph, 'word')
    ground_patterns = ground_pattern(pattern, groundings, [4, 6])
    write_patterns(ground_patterns, pattern_path, 52, output)
    #write file with all the patterns
    dirs = os.listdir(output)
    dirs.sort(key=lambda f: int(filter(str.isdigit, f)))
    with open(general_path_file, 'w') as fajl:
        for d in dirs:
            if "pattern" in d:
                fajl.write(os.path.join(output, d) + "\n")
Exemplo n.º 47
0
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
# plt.style.use('ggplot')

# filename = 'c:\\Users\\okigboo\\Desktop\\PythonDataScience\\NetworkAnalysis\\ego-twitter.p'

filename = 'C:\\Users\\Jose\\Desktop\\PythonDataScience\\NetworkAnalysis\\ego-twitter.p'

infile = open(filename, 'rb')
myfile = pickle.load(infile)
infile.close()

# G = nx.path_graph(50)
# nx.write_gpickle(G, filename)
G = nx.read_gpickle(filename)

# G = nx.read_gpickle(filename)
print(type(G))

# Create Ghe CircosPlot object: c
c = CircosPlot(G)

# Draw c to the screen
c.draw()
plt.show()

# Create the un-customized ArcPlot object: a
a = ArcPlot(G)

# Draw a to the screen
Exemplo n.º 48
0
    G_ini.remove_nodes_from(
        [node for node in G.nodes if G_ini.nodes[node]['immunization']])
    length_of_vulnerable_region = list(
        map(len, list(nx.connected_components(G_ini))))
    if len(length_of_vulnerable_region) > 0:
        size_T = max(length_of_vulnerable_region)
    else:
        size_T = 0
    G_ini = G.to_undirected()
    G_ini, max_T, R_t = paintTarget(G_ini, size_T)
    return utility_s(G_ini, v, R_t, max_T) - len(list(
        G.out_edges(v))) * alpha - G.nodes[v]['immunization'] * beta


if __name__ == '__main__':
    G = nx.read_gpickle("../test/Global test/forest.pickle")
    G.add_node(16)
    G.add_edge(17, 18)
    G.add_node(19)

    G.add_edge(17, 16)
    G.add_edge(16, 19)

    G.nodes[16]['immunization'] = True
    G.nodes[18]['immunization'] = False

    G.nodes[17]['immunization'] = False

    G.nodes[19]['immunization'] = False

    alpha = G.nodes[0]['alpha']
Exemplo n.º 49
0
Arquivo: nx.py Projeto: ofenlab/nipype
    def _run_interface(self, runtime):
        global gpickled, nodentwks, edgentwks, kntwks, matlab
        gpickled = list()
        nodentwks = list()
        edgentwks = list()
        kntwks = list()
        matlab = list()
        ntwk = nx.read_gpickle(self.inputs.in_file)

        # Each block computes, writes, and saves a measure
        # The names are then added to the output .pck file list
        # In the case of the degeneracy networks, they are given specified output names

        calculate_cliques = self.inputs.compute_clique_related_measures
        weighted = self.inputs.treat_as_weighted_graph

        global_measures = compute_singlevalued_measures(
            ntwk, weighted, calculate_cliques)
        if isdefined(self.inputs.out_global_metrics_matlab):
            global_out_file = op.abspath(self.inputs.out_global_metrics_matlab)
        else:
            global_out_file = op.abspath(
                self._gen_outfilename('globalmetrics', 'mat'))
        sio.savemat(global_out_file, global_measures, oned_as='column')
        matlab.append(global_out_file)

        node_measures = compute_node_measures(ntwk, calculate_cliques)
        for key in node_measures.keys():
            newntwk = add_node_data(node_measures[key], ntwk)
            out_file = op.abspath(self._gen_outfilename(key, 'pck'))
            nx.write_gpickle(newntwk, out_file)
            nodentwks.append(out_file)
        if isdefined(self.inputs.out_node_metrics_matlab):
            node_out_file = op.abspath(self.inputs.out_node_metrics_matlab)
        else:
            node_out_file = op.abspath(
                self._gen_outfilename('nodemetrics', 'mat'))
        sio.savemat(node_out_file, node_measures, oned_as='column')
        matlab.append(node_out_file)
        gpickled.extend(nodentwks)

        edge_measures = compute_edge_measures(ntwk)
        for key in edge_measures.keys():
            newntwk = add_edge_data(edge_measures[key], ntwk)
            out_file = op.abspath(self._gen_outfilename(key, 'pck'))
            nx.write_gpickle(newntwk, out_file)
            edgentwks.append(out_file)
        if isdefined(self.inputs.out_edge_metrics_matlab):
            edge_out_file = op.abspath(self.inputs.out_edge_metrics_matlab)
        else:
            edge_out_file = op.abspath(
                self._gen_outfilename('edgemetrics', 'mat'))
        sio.savemat(edge_out_file, edge_measures, oned_as='column')
        matlab.append(edge_out_file)
        gpickled.extend(edgentwks)

        ntwk_measures = compute_network_measures(ntwk)
        for key in ntwk_measures.keys():
            if key == 'k_core':
                out_file = op.abspath(
                    self._gen_outfilename(self.inputs.out_k_core, 'pck'))
            if key == 'k_shell':
                out_file = op.abspath(
                    self._gen_outfilename(self.inputs.out_k_shell, 'pck'))
            if key == 'k_crust':
                out_file = op.abspath(
                    self._gen_outfilename(self.inputs.out_k_crust, 'pck'))
            nx.write_gpickle(ntwk_measures[key], out_file)
            kntwks.append(out_file)
        gpickled.extend(kntwks)

        out_pickled_extra_measures = op.abspath(
            self._gen_outfilename(self.inputs.out_pickled_extra_measures,
                                  'pck'))
        dict_measures = compute_dict_measures(ntwk)
        iflogger.info(
            'Saving extra measure file to {path} in Pickle format'.format(
                path=op.abspath(out_pickled_extra_measures)))
        file = open(out_pickled_extra_measures, 'w')
        pickle.dump(dict_measures, file)
        file.close()

        iflogger.info('Saving MATLAB measures as {m}'.format(m=matlab))

        # Loops through the measures which return a dictionary,
        # converts the keys and values to a Numpy array,
        # stacks them together, and saves them in a MATLAB .mat file via Scipy
        global dicts
        dicts = list()
        for idx, key in enumerate(dict_measures.keys()):
            for idxd, keyd in enumerate(dict_measures[key].keys()):
                if idxd == 0:
                    nparraykeys = np.array(keyd)
                    nparrayvalues = np.array(dict_measures[key][keyd])
                else:
                    nparraykeys = np.append(nparraykeys, np.array(keyd))
                    values = np.array(dict_measures[key][keyd])
                    nparrayvalues = np.append(nparrayvalues, values)
            nparray = np.vstack((nparraykeys, nparrayvalues))
            out_file = op.abspath(self._gen_outfilename(key, 'mat'))
            npdict = {}
            npdict[key] = nparray
            sio.savemat(out_file, npdict, oned_as='column')
            dicts.append(out_file)
        return runtime
Exemplo n.º 50
0
def load_network_from_gpickle(filename, verbose=True):

    filename = re.sub('~', expanduser('~'), filename)
    G = nx.read_gpickle(filename)

    return G
Exemplo n.º 51
0
# Listlabel.append(ut.get_estgraphlabel(gest, "egr", weightflag=0))

# remove specific columns from label
# Labelarray = Listlabel[0]
# Labelarraynew = np.delete(Labelarray, [0])
# Labelarraynew = np.delete(Labelarraynew, tempt)

fileext = "\\plc_5000_egr_estsupbayesian"
nx.write_gpickle(gest, config.datapath + 'Bayesian'+ fileext + ".gpickle")

# with open(config.datapath + 'Bayesian'+ fileext + "_label.pickle", 'wb') as b:
#     pickle.dump(Listlabel, b)

## ================ load estimated graph and gobs label ==============
fileext = "\\plc_5000_egr_estsupbayesian"
gest = nx.read_gpickle(config.datapath + 'Bayesian'+ fileext+".gpickle")

fileext = "\\plc_5000_egr_bayesian"

with open(config.datapath + 'Bayesian' + fileext+ "_label.pickle", 'rb') as b:
    Listlabel = pickle.load(b)

# remove enties for which no node is present
Labelarray = Listlabel[0]
Labelarray = np.delete(Labelarray, [0])

## ================ load noisy graph and gobs label ==============

fileext = "\\plc_5000_gobsnoiseadd_bayesian_6195"

gobsnoise = nx.read_gpickle(config.datapath + 'Bayesian'+ fileext+".gpickle")
Exemplo n.º 52
0
def main(args):
    if args.graph_path:
        G = nx.read_gpickle(args.graph_path)

        dict_path = args.graph_path[0:args.graph_path.rfind('/') + 1]

        if (not args.no_dictionary):
            node_to_id_dict = pickle.load(open(dict_path+'node_to_id_dict.pickle', 'rb'))

        id_to_node_dict = None
        if args.print_node_names_in_top_k:
            id_to_node_dict = pickle.load(open(dict_path+'id_to_node_dict.pickle', 'rb'))
    else:
        # We need to create the graph from a csv
        G = utils.graph.get_graph_from_csv(file=args.file_path, source=args.source, target=args.target, edge_attr=args.edge_attr)

        # Convert all node names to integer IDs (starting with ID=0)
        ids = range(G.number_of_nodes())
        nodes = list(G.nodes())
        id_to_node_dict = {ids[i]: nodes[i] for i in range(len(ids))}
        node_to_id_dict = {nodes[i]: ids[i] for i in range(G.number_of_nodes())}
        G = nx.relabel_nodes(G, node_to_id_dict)

        # Save the graph and the dictionaries
        nx.write_gpickle(G, path=args.graph_output_dir + 'graph.gpickle')
        with open(args.graph_output_dir + 'id_to_node_dict.pickle', 'wb') as handle:
            pickle.dump(id_to_node_dict, handle)
        with open(args.graph_output_dir + 'node_to_id_dict.pickle', 'wb') as handle:
            pickle.dump(node_to_id_dict, handle)

    print('Input graph has', G.number_of_nodes(), 'nodes and', G.number_of_edges(), 'edges')
    utils.auxiliary_functions.set_json_attr_val('graph_info', {'num_nodes': G.number_of_nodes(), 'num_edges': G.number_of_edges()}, file_path=args.output_dir+'args.json')

    if args.user_specified_query_nodes:
        # Use the user specified query nodes
        Q = []
        for q_name in args.user_specified_query_nodes:
            Q.append(node_to_id_dict[q_name])
    else:
        # Select 'k' query nodes randomly. The nodes selected must have an out-degree of at least 1.
        Q = utils.auxiliary_functions.get_query_nodes(G, k=args.num_q_nodes)

    # Save the chosen nodes Q into the json file
    utils.auxiliary_functions.set_json_attr_val('query_nodes', Q, file_path=args.output_dir+'args.json')

    # Get the PPR scores for every node in G given a set of query nodes Q using particle filtering
    start = timer()
    print('Calculating PPR using particle filtering...')
    ppr_np_array, num_iterations = utils.ppr.get_ppr(G, Q, return_type='array')
    elapsed_time = timer()-start
    print('Finished calculating PPR using particle filtering. Took', num_iterations, 'iterations for convergence. Elapsed time is:', elapsed_time, 'seconds.\n')
    with open(args.output_dir + 'particle_filtering_ppr_scores.npy', 'wb') as f:
        np.save(f, ppr_np_array)
    utils.auxiliary_functions.set_json_attr_val('ppr_using_pf', {'runtime': elapsed_time, 'num_iterations': num_iterations }, file_path=args.output_dir+'info.json')

    # Check if we want to also run PPR from each query node seperately
    if args.run_ppr_from_each_query_node:
        single_source_output_dir = args.output_dir + 'single_source_ppr_scores/'
        print('Calculating PPR from each source in the query set...')

        if args.distributed_single_source_ppr:
            # Single source ppr multi-core implementation
            start_timer = timer()
            aggregate_ppr_single_source_node_np_array, stats_dict = utils.ppr.get_ppr_from_single_source_nodes_parallel(G, Q)
            print('Total Elapsed time distribute implementation:', timer()-start_timer)
        else:
            # Single source ppr single-core implementation
            aggregate_ppr_single_source_node_np_array = np.zeros(G.number_of_nodes())
            stats_dict = {}
            start_timer = timer()
            for query_node in tqdm(Q):
                start = timer()
                ppr_single_source_node_np_array, num_iterations = utils.ppr.get_ppr(G, [query_node], return_type='array')
                elapsed_time = timer()-start
                stats_dict[query_node] = {'runtime': elapsed_time, 'num_iterations': num_iterations}
                aggregate_ppr_single_source_node_np_array += ppr_single_source_node_np_array
            print('Total Elapsed time with single cpu:', timer()-start_timer)

        # Calculate a combined ppr vector for all sources in the query 
        ppr_single_sources = aggregate_ppr_single_source_node_np_array / len(Q) 

        utils.auxiliary_functions.set_json_attr_val('ppr_single_source_using_pf', stats_dict, file_path=args.output_dir+'info.json')
        print('Finished calculating PPR from each source in the query set.\n')

        with open(args.output_dir + 'ppr_single_source_scores.npy', 'wb') as f:
            np.save(f, ppr_single_sources)

    # Evaluation of the results 
    # Top-10 nodes using particle filtering
    top_k_ppr = utils.auxiliary_functions.get_top_k_vals_numpy(ppr_np_array, k=10)
    print('TOP-10 nodes using particle filtering')
    utils.auxiliary_functions.print_top_k_nodes(top_k_ppr, id_to_node_dict, args.print_node_names_in_top_k)

    if args.run_ppr_from_each_query_node:
        # Get top-k values from numpy array
        top_k_ppr_single_sources = utils.auxiliary_functions.get_top_k_vals_numpy(ppr_single_sources, 10)
        print('\nTOP-10 nodes using multiple sources particle filtering')
        utils.auxiliary_functions.print_top_k_nodes(top_k_ppr_single_sources, id_to_node_dict, args.print_node_names_in_top_k)

        # Calculate the normalized discounted cumulative gain (NDCG) between the ppr vs the ppr_single_source rankings
        k_vals = [1, 5, 10, 50, 100, 200, 500, 1000]
        ndcg_dict = {}
        print('\n\nNormalized discounted cumulative gain (NDCG) scores at various k values')
        for k in k_vals:
            ndcg_dict[str(k)] = ndcg_score(np.array([ppr_np_array]), np.array([ppr_single_sources]), k=k)
            print('NDCG score at k=' + str(k) + ':', ndcg_dict[str(k)])
        # Calculate NDCG scores for all rankings (k=total_number_of_nodes)
        ndcg_dict['full'] = ndcg_score(np.array([ppr_np_array]), np.array([ppr_single_sources]))

        utils.auxiliary_functions.set_json_attr_val('ndcg_scores', ndcg_dict, file_path=args.output_dir+'info.json')

    if args.distributed_pf:
        #Evaluation of Distributed Particle Filtering
        ppr_dist, num_iterations_dist = utils.ppr.get_ppr_distributed(G, Q, return_type='array')
        top_k_ppr_dist = utils.auxiliary_functions.get_top_k_vals_numpy(ppr_dist, 10)
        utils.auxiliary_functions.print_top_k_nodes(top_k_ppr_dist, id_to_node_dict, args.print_node_names_in_top_k)

        k_vals = [1, 5, 10, 50, 100, 200, 500, 1000]
        ndcg_dist_dict = {}
        print('The number of iterations for distributed PPR', num_iterations_dist)
        print('\n\nNormalized discounted cumulative gain (NDCG) scores at various k values for Dist PPR')
        for k in k_vals:
            ndcg_dict[k] = ndcg_score(np.array([ppr_np_array]), np.array([ppr_dist]), k=k)
            print('NDCG score at k=' + str(k) + ':', ndcg_dict[k])
    

    if args.run_networkx_ppr:
        # Top-10 nodes using networkx implementation of PPR
        personalization_dict = {}
        for q in Q:
            personalization_dict[q] = 1
        start = timer()
        print('\n\nCalculating PPR using NetworkX implementation of PPR')
        ppr_dict_nx = nx.pagerank(G, alpha=0.85, personalization=personalization_dict)
        print('Finished calculating PPR using NetworkX implementation of PPR. Elapsed time is:', timer()-start, 'seconds.')
        with open(args.output_dir + 'networkx_ppr_scores.pickle', 'wb') as handle:
            pickle.dump(ppr_dict_nx, handle)

        # Convert 'ppr_dict_nx' into an array for easy NDCG scores comparison
        ppr_array_nx = []
        for id in ppr_dict_nx:
            ppr_array_nx.append(ppr_dict_nx[id])

        print('\n\nTOP-10 nodes using Networkx implementation of PPR')
        top_k_ppr_nx = utils.auxiliary_functions.get_top_k_vals_numpy(np.array(ppr_array_nx), 10)
        utils.auxiliary_functions.print_top_k_nodes(top_k_ppr_nx, id_to_node_dict, args.print_node_names_in_top_k)
        
        # Calculate the NDCG scores using networkx vs ppr_np_array. The networkx scores are used as the ground truth
        k_vals = [1, 5, 10, 50, 100, 200, 500, 1000]
        ndcg_dict = {}
        print('\n\nNormalized discounted cumulative gain (NDCG) scores at various k values for networkx PPR vs PPR using PF')
        for k in k_vals:
            ndcg_dict[str(k)] = ndcg_score(np.array([ppr_array_nx]), np.array([ppr_np_array]), k=k)
            print('NDCG score at k=' + str(k) + ':', ndcg_dict[str(k)])
        ndcg_dict['full'] = ndcg_score(np.array([ppr_array_nx]), np.array([ppr_np_array]))
        utils.auxiliary_functions.set_json_attr_val('ndcg_scores_nx', ndcg_dict, file_path=args.output_dir+'info.json')
Exemplo n.º 53
0
    def create_structure(self, aggregate_by, data_directory, file_name,
                         **feature_function_pairs):
        """
		Creates structure of graph from provided relational datasets 
		using python networkx.
		:aggregate_by: str, geographic entity by which to aggregate relational data. This
		will become the nodes of the graph and will be a column in the geo-tagged relational dataset.
		:data_directory: str, path to the relevant data directory/storage bucket 
		for the dataset indicated by file_name
		:file_name: str, path to the name of the geo-tagged dataset we want to use to populate the graph
		node attributes
		:**feature_function_pairs: kwargs, feature to be aggregated from relational dataset
		as the key and the aggregation function to be applied as its value
		Returns: Writes resulting graph structre as pkl to disk in graph bucket. 
		"""

        assert (isinstance(aggregate_by, str)), "\
			argument aggrebate_by must be of type str"

        assert (isinstance(data_directory, str)), "\
			argument data_directory must be of type str"

        assert (isinstance(file_name, str)), "\
			argument file_name must be of type str"

        # load in geographic entity shape files
        if self.gcp:
            blob = self.geo_bucket.blob(
                'chicago_{}_reformatted.json'.format(aggregate_by))
            geo_entities = json.loads(blob.download_as_string(client=None))
        else:
            with open(
                    '{}/chicago_{}_reformatted.json'.format(
                        self.geo_directory, aggregate_by), 'r') as f:
                geo_entities = json.load(f)

        # load the requested specified dataframe
        df = pd.read_csv('{}/{}'.format(data_directory, file_name))
        # aggregate features from dataframe to populate graph node attributes
        df_aggregated = self.aggregate_features(df, geo_entities, aggregate_by,
                                                **feature_function_pairs)

        # determine whether a serialized version of this graph_model already exists
        exists = ""
        if self.gcp:
            graph_list = list(self.graph_bucket.list_blobs())
            result = [
                1 if self.graph_model_name + '.pkl' in str(name) else 0
                for name in graph_list
            ]
            if sum(result) >= 1:
                exists = True
                ix = result.index(1)
            else:
                exists = False

        else:
            graph_list = os.listdir(self.graph_directory)
            result = [
                1 if self.graph_model_name + '.pkl' in name else 0
                for name in graph_list
            ]
            if sum(result) >= 1:
                exists = True
                ix = result.index(1)
            else:
                exists = False

        # if there isn't already a serialized version of this graph_model
        # we create nodes named after 'aggregate_by' with attributes corresponding
        # created by aggregating the dataframe features
        if not exists:

            # first create nodes, return resulting graph structure
            G = self.create_pynx_nodes(df_aggregated,
                                       node_category=aggregate_by,
                                       attribute_columns=list(
                                           df_aggregated.columns))

            # then create edge relationships between nodes
            # current the only option is "NEXT_TO"
            G = self.add_edges_to_pynx(G, "NEXT_TO", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
                  aggregate_by, bidirectional=True, polygon_dict_1=geo, \
                  polygon_dict_2=geo)

        # if the serialized version of the graph_model already exists,
        # we load it into memory and using networkx to add nodes, attributes, edge relations
        # that dont already exist
        if exists:

            print("adding to existing graph")
            if self.gcp:

                blob = self.graph_bucket.blob('{}.pkl'.format(
                    self.graph_model_name))
                os.system("mkdir {}/create_graph_model/temp".format(
                    self.home_directory))
                blob.download_to_filename(
                    "{}/create_graph_model/temp/temp.pkl".format(
                        self.home_directory),
                    client=None)
                G = nx.read_gpickle(
                    "{}/create_graph_model/temp/temp.pkl".format(
                        self.home_directory))
                os.remove("{}/create_graph_model/temp/temp.pkl".format(
                    self.home_directory))
                os.system("rmdir {}/create_graph_model/temp".format(
                    self.home_directory))

            else:

                G = nx.read_gpickle("{}/{}.pkl".format(self.graph_directory,
                                                       self.graph_model_name))

            # we note the 'existing_graph' kwarg is specified here
            G = self.create_pynx_nodes(df_aggregated,
                                       node_category=aggregate_by,
                                       attribute_columns=list(
                                           df_aggregated.columns),
                                       existing_graph=G)

            G = self.add_edges_to_pynx(G, "NEXT_TO", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
                  aggregate_by, bidirectional=True, polygon_dict_1=geo, \
                  polygon_dict_2=geo)
        """
		RETURN TO THIS: SEPARATE SCRIPT FOR MULTIPLE GEO FILES
		# create unidirectional edges between census tract and neighborhood
		G = pynx_to_neo4j.add_edges_to_pynx(G, "CONTAINS", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
									"neighborhood", "tract", bidirectional=False, polygon_dict_1=neighborhoods, \
									polygon_dict_2=tracts)
		G = pynx_to_neo4j.add_edges_to_pynx(G, "IS_WITHIN", utilities.intersection, ["polygon_name_1", "polygon_name_2"], \
									"tract", "neighborhood", bidirectional=False, polygon_dict_1=tracts, \
									polygon_dict_2=neighborhoods)
		"""

        # save as pkl file
        if self.gcp:
            # write to disk first
            os.system("sudo mkdir {}/create_graph_model/temp".format(
                self.home_directory))
            nx.write_gpickle(
                G, "{}/create_graph_model/temp/{}.pkl".format(
                    self.home_directory, self.graph_model_name))
            # then upload to bucket
            blob = self.graph_bucket.blob('{}.pkl'.format(
                self.graph_model_name))
            blob.upload_from_filename(
                "{}/create_graph_model/temp/{}.pkl".format(
                    self.home_directory, self.graph_model_name))
            os.remove("{}/create_graph_model/temp/{}.pkl".format(
                self.home_directory, self.graph_model_name))
            os.system("rmdir {}/create_graph_model/temp".format(
                self.home_directory))

        else:
            nx.write_gpickle(
                G, "{}/{}.pkl".format(self.graph_directory,
                                      self.graph_model_name))
Exemplo n.º 54
0
Arquivo: nx.py Projeto: ofenlab/nipype
def average_networks(in_files, ntwk_res_file, group_id):
    """
    Sums the edges of input networks and divides by the number of networks
    Writes the average network as .pck and .gexf and returns the name of the written networks
    """
    import networkx as nx
    import os.path as op
    iflogger.info(
        "Creating average network for group: {grp}".format(grp=group_id))
    matlab_network_list = []
    if len(in_files) == 1:
        avg_ntwk = read_unknown_ntwk(in_files[0])
    else:
        count_to_keep_edge = np.round(float(len(in_files)) / 2)
        iflogger.info(
            "Number of networks: {L}, an edge must occur in at least {c} to remain in the average network"
            .format(L=len(in_files), c=count_to_keep_edge))
        ntwk_res_file = read_unknown_ntwk(ntwk_res_file)
        iflogger.info("{n} Nodes found in network resolution file".format(
            n=ntwk_res_file.number_of_nodes()))
        ntwk = remove_all_edges(ntwk_res_file)
        counting_ntwk = ntwk.copy()
        # Sums all the relevant variables
        for index, subject in enumerate(in_files):
            tmp = nx.read_gpickle(subject)
            iflogger.info('File {s} has {n} edges'.format(
                s=subject, n=tmp.number_of_edges()))
            edges = tmp.edges_iter()
            for edge in edges:
                data = {}
                data = tmp.edge[edge[0]][edge[1]]
                data['count'] = 1
                if ntwk.has_edge(edge[0], edge[1]):
                    current = {}
                    current = ntwk.edge[edge[0]][edge[1]]
                    #current['count'] = current['count'] + 1
                    data = add_dicts_by_key(current, data)
                ntwk.add_edge(edge[0], edge[1], data)
            nodes = tmp.nodes_iter()
            for node in nodes:
                data = {}
                data = ntwk.node[node]
                if tmp.node[node].has_key('value'):
                    data['value'] = data['value'] + tmp.node[node]['value']
                ntwk.add_node(node, data)

        # Divides each value by the number of files
        nodes = ntwk.nodes_iter()
        edges = ntwk.edges_iter()
        iflogger.info(
            'Total network has {n} edges'.format(n=ntwk.number_of_edges()))
        avg_ntwk = nx.Graph()
        newdata = {}
        for node in nodes:
            data = ntwk.node[node]
            newdata = data
            if data.has_key('value'):
                newdata['value'] = data['value'] / len(in_files)
                ntwk.node[node]['value'] = newdata
            avg_ntwk.add_node(node, newdata)

        edge_dict = {}
        edge_dict['count'] = np.zeros(
            (avg_ntwk.number_of_nodes(), avg_ntwk.number_of_nodes()))
        for edge in edges:
            data = ntwk.edge[edge[0]][edge[1]]
            if ntwk.edge[edge[0]][edge[1]]['count'] >= count_to_keep_edge:
                iflogger.info(
                    'Count: {c} is greater than or equal to the minimum, {n}, for edge {e1}-{e2}'
                    .format(c=ntwk.edge[edge[0]][edge[1]]['count'],
                            n=count_to_keep_edge,
                            e1=edge[0],
                            e2=edge[1]))
                for key in data.keys():
                    if not key == 'count':
                        data[key] = data[key] / len(in_files)
                ntwk.edge[edge[0]][edge[1]] = data
                avg_ntwk.add_edge(edge[0], edge[1], data)
            edge_dict['count'][edge[0] -
                               1][edge[1] -
                                  1] = ntwk.edge[edge[0]][edge[1]]['count']

        iflogger.info(
            'After thresholding, the average network has has {n} edges'.format(
                n=avg_ntwk.number_of_edges()))

        avg_edges = avg_ntwk.edges_iter()
        for edge in avg_edges:
            data = avg_ntwk.edge[edge[0]][edge[1]]
            for key in data.keys():
                if not key == 'count':
                    edge_dict[key] = np.zeros((avg_ntwk.number_of_nodes(),
                                               avg_ntwk.number_of_nodes()))
                    edge_dict[key][edge[0] - 1][edge[1] - 1] = data[key]

        for key in edge_dict.keys():
            tmp = {}
            network_name = group_id + '_' + key + '_average.mat'
            matlab_network_list.append(op.abspath(network_name))
            tmp[key] = edge_dict[key]
            sio.savemat(op.abspath(network_name), tmp)
            iflogger.info(
                'Saving average network for key: {k} as {out}'.format(
                    k=key, out=op.abspath(network_name)))

    # Writes the networks and returns the name
    network_name = group_id + '_average.pck'
    nx.write_gpickle(avg_ntwk, op.abspath(network_name))
    iflogger.info(
        'Saving average network as {out}'.format(out=op.abspath(network_name)))
    avg_ntwk = fix_keys_for_gexf(avg_ntwk)
    network_name = group_id + '_average.gexf'
    nx.write_gexf(avg_ntwk, op.abspath(network_name))
    iflogger.info(
        'Saving average network as {out}'.format(out=op.abspath(network_name)))
    return network_name, matlab_network_list
Exemplo n.º 55
0
    updateType    = 'async'


    targetDirectory = f'{os.getcwd()}/{args.dir}'
    os.makedirs(targetDirectory, exist_ok=True)

    settings = dict(
        nSamples         = nSamples, \
        burninSteps      = burninSteps, \
        updateMethod     = updateType
        )
    IO.saveSettings(targetDirectory, settings)

    for i, g in enumerate(ensemble):

        graph = nx.read_gpickle(g)
        filename = os.path.split(g)[-1].strip('.gpickle')

        modelSettings = dict(\
                             graph       = graph,\
                             updateType  = updateType,\
                             magSide     = magSide
                             )
        model = fastIsing.Ising(**modelSettings)

        Tc = Tc_idx = -1
        while Tc < 0:
            mags, sus, binder, abs_mags = simulation.magnetizationParallel(model, \
                                temps        = temps,        \
                                n            = nSamples,     \
                                burninSteps  = burninSteps)
Exemplo n.º 56
0
 def __read_cache(self):
     return nx.read_gpickle(self.CACHE_PATH)
Exemplo n.º 57
0
        currentPath = paths.popleft()
        currentWord = currentPath[-1]
        if currentWord == goal:
            return currentPath
        elif currentWord in extended:
            continue
        extended.add(currentWord)
        transforms = graph[currentWord]
        for word in transforms:
            if word not in currentPath:
                paths.append(currentPath[:] + [word])
    #no transformation
    return []


G = nx.read_gpickle('test3.gpickle')


def generateStartEnd():
    lower_limit = 3
    upper_limit = 8
    flag = True
    while flag:
        node1 = random.choice(G.nodes())
        node2 = random.choice(G.nodes())
        try:  # Using networkx function bidrectional_dijkstra
            isConnected = nx.bidirectional_dijkstra(G, node1, node2)
        except:
            node1 = random.choice(G.nodes())
            node2 = random.choice(G.nodes())
        words = transformWord(G, node1, node2)
Exemplo n.º 58
0
                    # Otherwise, save direction
                    else:
                        undir_G.dir_dict[(n1, n2, relation)] = '-->'
                        undir_G.dir_dict[(n2, n1, relation)] = '<--'

TAG = ''

# Save ConceptNet graph
nx.write_gpickle(undir_G,
                 "data/ConceptNet/conceptnet_full_di_rel" + TAG + ".gpickle")

# Save direction dict
with open('data/ConceptNet/dir_dict.pickle', 'wb') as handle:
    pickle.dump(undir_G.dir_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Load graph, get all nodes
cn_graph_full = nx.read_gpickle('data/ConceptNet/conceptnet_full_di_rel' +
                                TAG + '.gpickle')
cn_nodes = list(cn_graph_full.nodes)

print('# nodes:', len(cn_nodes))

# Discard nodes that have only one neighbor, create new subgraph and save
cn_nodes = [c for c in cn_nodes if len(list(cn_graph_full.neighbors(c))) > 1]
reduced_graph = nx.subgraph(cn_graph_full, cn_nodes).copy()
nx.write_gpickle(
    reduced_graph,
    "data/ConceptNet/conceptnet_full_di_rel_red" + TAG + ".gpickle")

print('# nodes (after removing nodes without neighbors):', len(cn_nodes))
Exemplo n.º 59
0
def LoadGraph(filename):
    return nx.read_gpickle(filename)
Exemplo n.º 60
0
def loadRealGraphSeries(file_prefix, startId, endId):
    graphs = []
    for file_id in range(startId, endId + 1):
        graph_file = file_prefix + str(file_id) + '_graph.gpickle'
        graphs.append(nx.read_gpickle(graph_file))
    return graphs