Пример #1
0
 def iso_json(string1,string2):
     dataG1 = json.loads(string1)
     graph1 = json_graph.node_link_graph(dataG1)
     dataG2 = json.loads(string2)
     graph2 = json_graph.node_link_graph(dataG2)
    # return nx.is_isomorphic(graph1, graph2)
     return nx.faster_could_be_isomorphic(graph1, graph2)
Пример #2
0
def restore_anm_nidb_from_json(data):

    d = ank_json_custom_loads(data)
    anm = autonetkit.anm.AbstractNetworkModel()
    nidb = autonetkit.nidb.NIDB()

    for overlay_id, overlay_data in d.items():
        if overlay_id == "nidb":
            continue # don't restore nidb graph to anm
        anm._overlays[overlay_id] = json_graph.node_link_graph(overlay_data)

    nidb._graph = json_graph.node_link_graph(d['nidb'])
    rebind_interfaces(anm)

    return anm, nidb
Пример #3
0
def graphs_json2networkx(input_dict):
    from json import loads
    from networkx.readwrite import json_graph

    gtext = loads(input_dict['graph'])
    g =  json_graph.node_link_graph(gtext)
    return {'nxgraph': g}
Пример #4
0
def main(json_file, output_prefix, source, target):
    
    with open(json_file) as data_file:    
        data = json.load(data_file)

    G = json_graph.node_link_graph(data, directed=False)

    print "Finished Reading in Graph: {0}".format(datetime.datetime.now())

    id_seq = networkx.get_node_attributes(G, "sequence")

    seq_id = { seq : node_id for node_id, seq in id_seq.items()}

    print "Created inverse lookup table: {0}".format(datetime.datetime.now())

    if ',' in target:
        targets = target.split(',')

    for target in targets:
        paths = networkx.all_shortest_paths(G, seq_id[source], seq_id[target])

        with open("{0}_paths_{1}_{2}.txt".format(output_prefix, source, target), 'w') as o:
            for path in paths:
                o.write(",".join( [id_seq[node_id] for node_id in path ] ))
	        o.write("\n")

    print "Output paths: {0}".format(datetime.datetime.now())
Пример #5
0
def restore_anm_nidb_from_json(data):
    # This can be used to extract from the json used to send to webserver

    d = ank_json_custom_loads(data)
    anm = autonetkit.anm.AbstractNetworkModel()
    nidb = autonetkit.nidb.DeviceModel()

    for overlay_id, overlay_data in d.items():
        if overlay_id == "nidb":
            continue # don't restore nidb graph to anm
        anm._overlays[overlay_id] = json_graph.node_link_graph(overlay_data)

    nidb._graph = json_graph.node_link_graph(d['nidb'])
    rebind_interfaces(anm)

    return anm, nidb
Пример #6
0
 def graph(self, node_links_data):
     self.g = json_graph.node_link_graph(node_links_data)
     remove = list()
     for _id in self.g.node:
         if self.g.node[_id]["node_type"] not in self.valid_type:
             remove.append(_id)
     self.g.remove_nodes_from(remove)
Пример #7
0
 def as_tree(graph, root=OPENSTACK_CLUSTER, reverse=False):
     linked_graph = json_graph.node_link_graph(graph)
     if 0 == nx.number_of_nodes(linked_graph):
         return {}
     if reverse:
         linked_graph = linked_graph.reverse()
     return json_graph.tree_data(linked_graph, root=root)
Пример #8
0
def load_json(stream):
    """
    Args:
        stream: Open stream containing js

    Assumes the js is in networkx link-node format
    """
    
    js = json.load(stream)
    g = json_graph.node_link_graph(js)

    assert all([nd.has_key('coords') for nd in g.node.values()]),\
           "json node-link graph must have nodes with coords for GeoGraph"

    # get coords
    coords = [v['coords'] for v in g.node.values()]

    # set default projection
    input_proj = ""
    if gm.is_in_lon_lat(coords):
        input_proj = gm.PROJ4_LATLONG
    else:
        input_proj = gm.PROJ4_FLAT_EARTH

    coords_dict = {k: v['coords'] for k, v in g.node.items()}
    # now get rid of 'coords' key,val for each node
    for node in g.node.values():
        node.pop('coords', None)

    geo_nodes = GeoGraph(srs=input_proj, coords=coords_dict, data=g)
    return geo_nodes
Пример #9
0
def get_selected_reaction(jsonGraph, nodeDic, reacIDs, org):
    """
    Filtering selected Reactions and show Results from PyNetMet calculation.
    It returns a subgraph of the Graph from the jsonGraph. The output is a DOT-Language String.
    @param jsonGraph: Graph in JSON-Format
    @param nodeDic: dict mapping names to ids
    @param reacIDs: Name of reactions that contained in the nodeDic
    @param org: organism
    @return Subgraph
    """
    # Translate reac names to IDs
    # Get substrates and products of all reacs
    metabolites = []
    for reac in reacIDs:
        metabolites += org.get_reaction(reac).metabolites

    met_ids = list(map(lambda x: nodeDic[x], metabolites))
    g = json_graph.node_link_graph(jsonGraph)

    g.remove_edges_from(list(filter(lambda x: g.get_edge_data(*x)["object"].name not in reacIDs, g.edges(met_ids))))

    # Get products/substrates directly connected to filter
    #reacIDs += flatten(g.in_edges(reacIDs)) + flatten(g.out_edges(reacIDs))

    h = g.subgraph(met_ids)
    return h
Пример #10
0
def find_min_spanning_tree(A):
	"""
		Input:
			A : Adjecency matrix in scipy.sparse format.
		Output:
			T : Minimum spanning tree.
			run_time : Total runtime to find minimum spanning tree 

	"""
	# Record start time.
	start = time.time()

	# Check if graph is pre-processed, if yes then don't process it again.
	if os.path.exists('../Data/dcg_graph.json'):
		with open('../Data/dcg_graph.json') as data:
			d = json.load(data)
		G = json_graph.node_link_graph(d)

	# If graph is not preprocessed then convert it to a Graph and save it to a JSON file.
	else:
		G = from_scipy_sparse_matrix(A)
		data = json_graph.node_link_data(G)
		with open('../Data/dcg_graph.json', 'w') as outfile:
			json.dump(data, outfile)

	# Find MST.
	T = minimum_spanning_tree(G)

	#Record total Runtime
	run_time = time.time()-start
	return T, run_time
Пример #11
0
def main(json_file, output_prefix, metric):
    
    with open(json_file) as data_file:    
        data = json.load(data_file)

    G = json_graph.node_link_graph(data)

    metrics = {}

    #metrics["degree"] = degree(G)
    metrics["closeness"] = closeness_centrality(G).values()
    #TODO: add any other metrics here using a similar format to above line.
    sequences = {}    	

    cleaved_seq = { key : val for key, val in sequences.items() if val["type"] == "CLEAVED" }

    if metric != "metrics":
	labels_to_plot = [metric]
    else:
	labels_to_plot = metrics.keys()
    n_to_plot = len(labels_to_plot)
    fig, axarr = pconv.create_ax(n_to_plot, 1, shx=False, shy=False)

    nbins = 20    

    for ind, key in enumerate(labels_to_plot):
	normed = True
        hist.draw_actual_plot(axarr[0,ind], metrics["key"], "", key.capitalize(), normed=normed, nbins=nbins)    
        axarr[0,ind].ticklabel_format(axis='x', style='sci', scilimits=(-2,2))

        #pconv.add_legend(axarr[0,ind], location="middle right")
    pconv.save_fig(fig, output_prefix, "metrics", n_to_plot*5, 5, tight=True, size=12) 
def read_from_json_gexf(fname=None,label_field_name='APIs',conv_undir = False):
    '''
    Load the graph files (.gexf or .json only supported)
    :param fname: graph file name
    :param label_field_name: filed denoting the node label
    :param conv_undir: convert to undirected graph or not
    :return: graph in networkx format
    '''
    if not fname:
        logging.error('no valid path or file name')
        return None
    else:
        try:
            try:
                with open(fname, 'rb') as File:
                    org_dep_g = json_graph.node_link_graph(json.load(File))
            except:
                org_dep_g = nx.read_gexf (path=fname)

            g = nx.DiGraph()
            for n, d in org_dep_g.nodes_iter(data=True):
                g.add_node(n, attr_dict={'label': '-'.join(d[label_field_name].split('\n'))})
            g.add_edges_from(org_dep_g.edges_iter())
        except:
            logging.error("unable to load graph from file: {}".format(fname))
            # return 0
    logging.debug('loaded {} a graph with {} nodes and {} egdes'.format(fname, g.number_of_nodes(),g.number_of_edges()))
    if conv_undir:
        g = nx.Graph (g)
        logging.debug('converted {} as undirected graph'.format (g))
    return g
def transferRedditDataFormat(dataset_dir, output_file):
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
    labels = json.load(open(dataset_dir + "/reddit-class_map.json"))

    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n]['test']]
    val_ids = [n for n in G.nodes() if G.node[n]['val']]
    train_labels = [labels[i] for i in train_ids]
    test_labels = [labels[i] for i in test_ids]
    val_labels = [labels[i] for i in val_ids]
    feats = np.load(dataset_dir + "/reddit-feats.npy")
    ## Logistic gets thrown off by big counts, so log transform num comments and score
    feats[:, 0] = np.log(feats[:, 0] + 1.0)
    feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
    feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
    feat_id_map = {id: val for id, val in feat_id_map.iteritems()}

    # train_feats = feats[[feat_id_map[id] for id in train_ids]]
    # test_feats = feats[[feat_id_map[id] for id in test_ids]]

    # numNode = len(feat_id_map)
    # adj = sp.lil_matrix(np.zeros((numNode,numNode)))
    # for edge in G.edges():
    #     adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1

    train_index = [feat_id_map[id] for id in train_ids]
    val_index = [feat_id_map[id] for id in val_ids]
    test_index = [feat_id_map[id] for id in test_ids]
    np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index,
             val_index=val_index, test_index = test_index)
Пример #14
0
def read_json_graph(istream):
    """
    Reads a json graph output by the algorithm and returns it
    """
    data = json.loads(istream.read())
    G = json_graph.node_link_graph(data)
    return G
Пример #15
0
def simple_to_nx(j_data):
    port_to_index_mapping = defaultdict(dict)
    for node in j_data['nodes']:
        if not "ports" in node:
            continue
        node_id = node['id']
        # first check for loopback zero
        ports = node['ports']
        _ports = {}  # output format
        try:
            lo_zero = [p for p in ports if p['id'] == "Loopback0"].pop()
        except IndexError:
            # can't pop -> no loopback zero, append
            lo_zero = {'category': 'loopback',
                       'description': "Loopback Zero"}
        else:
            ports.remove(lo_zero)
        finally:
            _ports[0] = lo_zero
        '''Sharad: below change is for 2nd loopback. currently commenting it out.
           change start in below loop to 2 while adding another loopback
        lo_one = {'category': 'loopback',
                      'description': "Loopback One",
                   'id':'loopback1'}
        _ports[1] = lo_one
        '''
        for index, port in enumerate(ports, start=1):
            _ports[index] = port
            port_to_index_mapping[node_id][port['id']] = index

        del node['ports']
        node['_ports'] = _ports

    nodes_by_id = {n['id']: i for i, n
                   in enumerate(j_data['nodes'])}

    unmapped_links = []

    if "links" in j_data:
        mapped_links = j_data['links']
        for link in mapped_links:
            src = link['src']
            dst = link['dst']
            src_pos = nodes_by_id[src]
            dst_pos = nodes_by_id[dst]
            src_port_id = port_to_index_mapping[src][link['src_port']]
            dst_port_id = port_to_index_mapping[dst][link['dst_port']]

            interfaces = {src: src_port_id,
                          dst: dst_port_id}

            unmapped_links.append({'source': src_pos,
                                   'target': dst_pos,
                                   '_ports': interfaces,
                                   'link_type': link['link_type']
                                   })

    j_data['links'] = unmapped_links
    return json_graph.node_link_graph(j_data)
Пример #16
0
    def load(self, content):
        loaded = json.loads(content)
        self.G = json_graph.node_link_graph(loaded['structure'])

        for folder in loaded['data']:
            temp = Folder()
            temp.load(loaded['data'][folder])
            self.data[folder] = temp
Пример #17
0
def get_all_tags_graph():
    try:
        j = AllTagsGraph.objects.all()[:1][0].graph
        d = simplejson.loads(j)
        g = json_graph.node_link_graph(d, directed=True)
    except IndexError:
        g = networkx.read_edgelist('taggraph/fixtures/wired_text_hubpagerank.edgelist',
            create_using=networkx.DiGraph())
        tg = BaseTagGraph()
        tg.graph = g
        tg._pagerank()
        j = tg.to_json()
        AllTagsGraph.objects.create(graph=simplejson.dumps(j))
        j = AllTagsGraph.objects.all()[:1][0].graph
        d = simplejson.loads(j)
        g = json_graph.node_link_graph(d, directed=True)
    return g
Пример #18
0
def create_manhattan_scenario(load='high', reduced = False):
    #load graph, get positions
    if reduced:
        with open('assets/manhattan_road_netx_constrained.json','r') as data_file: 
            road_graph = json_graph.node_link_graph(json.load(data_file))
    else:
        with open('assets/manhattan_road_netx.json','r') as data_file: 
            road_graph = json_graph.node_link_graph(json.load(data_file))

    pos = OrderedDict({node: (road_graph.node[node]["latlon"][1],
                   road_graph.node[node]["latlon"][0]) for node in road_graph.nodes()})
    
    with open('assets/manhattan_demands_50.json','r') as f:
        raw_demands = json.loads(f.read())
    
    #implement 1-NN, get relationship of source, sink -> source_node, sink_node
    station_to_node = {}

    for station, loc in raw_demands['stations'].iteritems():
        node = get_neighbor(pos, (loc[1], loc[0]), 1)[0]
        station_to_node[station] = road_graph.nodes().index(node)


    #create demands list of tuples using the 1-NN relationship and the demands
    demands = {}

    for scenario in raw_demands['scenarios']:
        demands[scenario] = {}
        for demand in raw_demands['scenarios'][scenario]:
            src = station_to_node[demand[0]]
            snk = station_to_node[demand[1]]
            if src in demands[scenario]:
                if snk in demands[scenario][src]:
                    demands[scenario][src] += demand[2]
                else:
                    demands[scenario][src][snk] = demand[2]
            else:
                demands[scenario][src] = {}
                demands[scenario][src][snk] = demand[2]
        dems = []
        for src, sinks in demands[scenario].iteritems():
            for snk, d in sinks.iteritems():
                dems.append((src,snk,d))
        demands[scenario] = dems
     
    return road_graph, pos, demands[load]
Пример #19
0
	def load_from_json(self, fileName='net.json'):
		start = timer()		
		with open(fileName) as json_file:
			json_data = json.load(json_file)
			self.G = json_graph.node_link_graph(json_data)
			self.initialLoadComplete = True
		end = start - timer()
		log.info('time taken to load: {}'.format(end))
Пример #20
0
    def calculate_average_degree(self):
        n = self.dbs.query(NetworkModel.graph_data) \
            .filter(NetworkModel.site == self.site) \
            .filter(NetworkModel.graph_type == self.graph_type) \
            .one()

        g = json_graph.node_link_graph(json.loads(n[0]), directed=False, multigraph=False)
        return nx.degree_centrality(g)
Пример #21
0
def load_graph(path):
    try:
        json_data = open(path, 'r')
        image_graph = json_graph.node_link_graph(eval(json_data.read()))
        json_data.close()
    except FileNotFoundError:
        image_graph = networkx.Graph()
    return image_graph
Пример #22
0
def get_commit_tree_json(repo):
    import json
    from networkx.readwrite import json_graph

    with open('./data/' + repo + ':commits', 'r') as f:
        data = f.read()
        json_data = json.loads(data)
        return json_graph.node_link_graph(json_data)
Пример #23
0
 def load(self,pathname):
   global igversion
   with open(pathname,"r") as f:
     try:
        self.G = json_graph.node_link_graph(json.load(f,encoding='utf-8'),multigraph=False,directed=True)
     except  ValueError:
        self.G = json_graph.node_link_graph(json.load(f),multigraph=False,directed=True)
     if 'igversion' in self.G.graph:
       if self.G.graph['igversion'] != igversion:
         raise ValueError('Mismatched version. Graph needs to be upgraded to ' + igversion)
     self.G.graph['igversion'] = igversion
     if 'idcount' in self.G.graph:
       self.idc = self.G.graph['idcount']
     elif self.G.has_node('idcount'):
       self.idc = self.G.node['idcount']['count']
       self.G.graph['idcount']=self.idc
       self.G.remove_node('idcount')
   self.dir = os.path.abspath(os.path.split(pathname)[0])
Пример #24
0
def graph_from_file(path):
	"""
	Given a file path, makes a NetworkX graph from the JSON data and returns
	"""
	with open(path) as f:
		data = json.load(f)

	g = json_graph.node_link_graph(data)
	return g
Пример #25
0
 def load(fname):
     with gzip.GzipFile(fname, 'r') as infile:
         data = json.loads(infile.read())
     def cell_from_dict(d):
         return {"id": Cell(d["address"], None, value=d["value"], formula=d["formula"], is_named_range=d["is_named_range"])}
     nodes = map(cell_from_dict, data["nodes"])
     data["nodes"] = nodes
     G = json_graph.node_link_graph(data)
     return Spreadsheet(G, G.nodes())
Пример #26
0
def validateExpDL(expdl={}, distributionPath=None):
    """ """
    # Do not modify the input object
    expdl = copy.deepcopy(expdl)
    
    if not expdl:
        expdl = dict()
        
    if not isinstance(expdl, dict):
        raise TypeError("ExpDL should be a dictionary")
    
    expdl.setdefault('topoGraph', json_graph.node_link_data(testbed.getTopoGraph()))
    nodeList = json_graph.node_link_graph(expdl['topoGraph']).nodes()
    nodeList.sort()
    expdl['nodeList'] = nodeList
    expdl.setdefault('magiNodeList', nodeList)
    if distributionPath:
        expdl['distributionPath'] = distributionPath
    else:
        expdl.setdefault('distributionPath', DEFAULT_DIST_DIR)
    
    nodePaths = expdl.setdefault('nodePaths', dict())
    
    nodeDir = nodePaths.setdefault('root', NODE_DIR)
    nodePaths.setdefault('config', os.path.join(nodeDir, 'config'))
    nodePaths.setdefault('logs', os.path.join(nodeDir, 'logs'))
    nodePaths.setdefault('db', os.path.join(nodeDir, 'db'))
    nodePaths.setdefault('temp', DEFAULT_TEMP_DIR)
    
    testbedPaths = expdl.setdefault('testbedPaths', dict())
    testbedPaths['experimentDir'] = testbed.getExperimentDir()
    
    expdl.setdefault('aal', os.path.join(testbed.getExperimentDir(), "procedure.aal"))
    
    testbedClass = expdl.setdefault('testbedClass', testbed.getTestbedClassFQCN())
    
    # In case the experimenter specifies a testbed type that is different from
    # the default type for the host machine, set the environment accordingly.
    if testbedClass != testbed.getTestbedClassFQCN():
        try:
            testbed.setTestbedClass(testbedClass)
        except Exception:
            log.exception('Could not create instance of set testbed class')
            log.info('Setting default testbed class.')
            expdl['testbedClass'] = testbed.getTestbedClassFQCN()
    
    # Setting experiment name and project name
    # testbed.toControlPlaneNodeName() uses them
    from magi.testbed.emulab import EmulabTestbed
    if isinstance(testbed.getTestbedClassInstance(), EmulabTestbed):
        experimentName = expdl.setdefault('experimentName', testbed.getExperiment())
        projectName = expdl.setdefault('projectName', testbed.getProject())
        testbed.setEID(experiment=experimentName, 
                       project=projectName)
        
    return expdl
	def __init__(self, *args, **kwargs):
		super(MRInfluence, self).__init__(*args, **kwargs)
		
		with open(self.options.graph_file, "r") as graph_data:
		    graph_data = json.load(graph_data)
		    self.graph = json_graph.node_link_graph(graph_data)
		 
		self.k = int(self.options.num_init)
		self.t = int(self.options.periods)
		self.initial_nodes = list(np.random.choice(self.graph.nodes(),self.k))
Пример #28
0
 def transform(self, data):
     """Transform."""
     try:
         for serial_data in util.read(data):
             py_obj = json.loads(serial_data)
             graph = json_graph.node_link_graph(py_obj)
             yield graph
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Пример #29
0
 def __init__(self, data_path):
     with open(data_path, 'r') as graph_file:
         graph_data = json.load(graph_file)
     self.graph = json_graph.node_link_graph(graph_data, multigraph=False)
     self.name_node_map = {node[1]['name']: node[0] for node in self.graph.nodes(data=True)}
     self.all_paths = networkx.all_pairs_dijkstra_path(self.graph)
     self.all_costs = networkx.all_pairs_dijkstra_path_length(self.graph)
     # add blank houses set to each node
     for node in self.graph.nodes():
         self.graph.node[node]['houses'] = []
Пример #30
0
 def perform_map_matching(self, road_network, trace, rank):
     if settings.DEBUG:
         log("Building road network graph...")
     graph = json_graph.node_link_graph(road_network["graph"])
     shortest_path_index = road_network["shortest_path_index"]
     beta = self.hmm_prob_model(road_network, graph, shortest_path_index, trace, rank)
     if settings.DEBUG:
         log("Implementing viterbi algorithm...")
     chosen_index = self.hmm_viterbi_forward()
     sequence = self.hmm_viterbi_backward(road_network, graph, shortest_path_index, trace, chosen_index)
     return {'path': sequence[0], 'route': sequence[1], 'dist': sequence[2], 'path_index': sequence[3], 'emission_prob': self.emission_prob, 'transition_prob': self.transition_prob, 'candidate_rid': self.candidate_rid, 'confidence': sequence[4]}
Пример #31
0
def load_data(prefix, normalize=True, load_walks=False):
    G_data = json.load(open(prefix + "-G.json"))
    G = json_graph.node_link_graph(G_data)
    # nx.draw(G, pos=nx.spring_layout(G))
    # plt.show()
    if isinstance(G.nodes()[0], int):
        conversion = lambda n: int(n)
    else:
        conversion = lambda n: n

    if os.path.exists(prefix + "-feats.npy"):
        feats = np.load(prefix + "-feats.npy")
    else:
        print("No features present.. Only identity features will be used.")
        feats = None
    id_map = json.load(open(prefix + "-id_map.json"))
    id_map = {conversion(k): int(v) for k, v in id_map.items()}
    walks = []
    class_map = json.load(open(prefix + "-class_map.json"))
    if isinstance(list(class_map.values())[0], list):
        lab_conversion = lambda n: n
    else:
        lab_conversion = lambda n: int(n)

    class_map = {
        conversion(k): lab_conversion(v)
        for k, v in class_map.items()
    }

    ## Remove all nodes that do not have val/test annotations
    ## (necessary because of networkx weirdness with the Reddit data)
    broken_count = 0
    for node in G.nodes():
        if not 'val' in G.node[node] or not 'test' in G.node[node]:
            G.remove_node(node)
            broken_count += 1
    print(
        "Removed {:d} nodes that lacked proper annotations due to networkx versioning issues"
        .format(broken_count))

    ## Make sure the graph has edge train_removed annotations
    ## (some datasets might already have this..)
    print("Loaded data.. now preprocessing..")
    for edge in G.edges():
        if (G.node[edge[0]]['val'] or G.node[edge[1]]['val']
                or G.node[edge[0]]['test'] or G.node[edge[1]]['test']):
            G[edge[0]][edge[1]]['train_removed'] = True
        else:
            G[edge[0]][edge[1]]['train_removed'] = False

    if normalize and not feats is None:
        from sklearn.preprocessing import StandardScaler
        train_ids = np.array([
            id_map[n] for n in G.nodes()
            if not G.node[n]['val'] and not G.node[n]['test']
        ])
        train_feats = feats[train_ids]
        scaler = StandardScaler()
        scaler.fit(train_feats)
        feats = scaler.transform(feats)

    if load_walks:
        with open(prefix + "-walks.txt") as fp:
            for line in fp:
                walks.append(map(conversion, line.split()))

    return G, feats, id_map, walks, class_map
Пример #32
0
def load_data(prefix):
  G_data = json.load(open(prefix + "-G.json"))
  G = json_graph.node_link_graph(G_data)
  id_map = json.load(open(prefix + "-id_map.json"))
  id_map = {int(k): int(v) for k, v in id_map.items()}
  return G, id_map
Пример #33
0
def load_graphsage_data(dataset_path, dataset_str, normalize=True):
  """Load GraphSAGE data."""
  start_time = time.time()

  graph_json = json.load(
      gfile.Open('{}/{}/{}-G.json'.format(dataset_path, dataset_str,
                                          dataset_str)))
  graph_nx = json_graph.node_link_graph(graph_json)

  id_map = json.load(
      gfile.Open('{}/{}/{}-id_map.json'.format(dataset_path, dataset_str,
                                               dataset_str)))
  is_digit = list(id_map.keys())[0].isdigit()
  id_map = {(int(k) if is_digit else k): int(v) for k, v in id_map.items()}
  class_map = json.load(
      gfile.Open('{}/{}/{}-class_map.json'.format(dataset_path, dataset_str,
                                                  dataset_str)))

  is_instance = isinstance(list(class_map.values())[0], list)
  class_map = {(int(k) if is_digit else k): (v if is_instance else int(v))
               for k, v in class_map.items()}

  broken_count = 0
  to_remove = []
  for node in graph_nx.nodes():
    if node not in id_map:
      to_remove.append(node)
      broken_count += 1
  for node in to_remove:
    graph_nx.remove_node(node)
  tf.logging.info(
      'Removed %d nodes that lacked proper annotations due to networkx versioning issues',
      broken_count)

  feats = np.load(
      gfile.Open(
          '{}/{}/{}-feats.npy'.format(dataset_path, dataset_str, dataset_str),
          'rb')).astype(np.float32)

  tf.logging.info('Loaded data (%f seconds).. now preprocessing..',
                  time.time() - start_time)
  start_time = time.time()

  edges = []
  for edge in graph_nx.edges():
    if edge[0] in id_map and edge[1] in id_map:
      edges.append((id_map[edge[0]], id_map[edge[1]]))
  num_data = len(id_map)

  val_data = np.array(
      [id_map[n] for n in graph_nx.nodes() if graph_nx.node[n]['val']],
      dtype=np.int32)
  test_data = np.array(
      [id_map[n] for n in graph_nx.nodes() if graph_nx.node[n]['test']],
      dtype=np.int32)
  is_train = np.ones((num_data), dtype=np.bool)
  is_train[val_data] = False
  is_train[test_data] = False
  train_data = np.array([n for n in range(num_data) if is_train[n]],
                        dtype=np.int32)

  train_edges = [
      (e[0], e[1]) for e in edges if is_train[e[0]] and is_train[e[1]]
  ]
  edges = np.array(edges, dtype=np.int32)
  train_edges = np.array(train_edges, dtype=np.int32)

  # Process labels
  if isinstance(list(class_map.values())[0], list):
    num_classes = len(list(class_map.values())[0])
    labels = np.zeros((num_data, num_classes), dtype=np.float32)
    for k in class_map.keys():
      labels[id_map[k], :] = np.array(class_map[k])
  else:
    num_classes = len(set(class_map.values()))
    labels = np.zeros((num_data, num_classes), dtype=np.float32)
    for k in class_map.keys():
      labels[id_map[k], class_map[k]] = 1

  if normalize:
    train_ids = np.array([
        id_map[n]
        for n in graph_nx.nodes()
        if not graph_nx.node[n]['val'] and not graph_nx.node[n]['test']
    ])
    train_feats = feats[train_ids]
    scaler = sklearn.preprocessing.StandardScaler()
    scaler.fit(train_feats)
    feats = scaler.transform(feats)

  def _construct_adj(edges):
    adj = sp.csr_matrix((np.ones(
        (edges.shape[0]), dtype=np.float32), (edges[:, 0], edges[:, 1])),
                        shape=(num_data, num_data))
    adj += adj.transpose()
    return adj

  train_adj = _construct_adj(train_edges)
  full_adj = _construct_adj(edges)

  train_feats = feats[train_data]
  test_feats = feats

  tf.logging.info('Data loaded, %f seconds.', time.time() - start_time)
  return num_data, train_adj, full_adj, feats, train_feats, test_feats, labels, train_data, val_data, test_data
Пример #34
0
def graph_from_json(f):
    with open(f, 'r') as infile:
        networkx_graph = json_graph.node_link_graph(json.load(infile))
    return networkx_graph
Пример #35
0
def __load_graph(json_data):
    G = json_graph.node_link_graph(json_data, False, False, {'name':'pub_key', 'source':'node1_pub', 'target':'node2_pub', 'key':'channel_id', 'link':'edges'})
    return G
Пример #36
0
 def _load_G(self):
     G_data = json.load(open(os.path.join(self.data_dir, "G.json")))
     self.G = json_graph.node_link_graph(G_data)
     if type(self.G.nodes()[0]) is int:
         mapping = {k: str(k) for k in self.G.nodes()}
         self.G = nx.relabel_nodes(self.G, mapping)
Пример #37
0
 def read_json_file(self, graph_dir):
     with open(graph_dir, 'r') as f:
         j_graph = json.load(f)
         return json_graph.node_link_graph(j_graph)
Пример #38
0
 def joins_for_table(self, table_name: str):
     # get the graph for networkx
     G = json_graph.node_link_graph(self.design["graph"])
     return nx.shortest_path(G,
                             source=self.base_table_name,
                             target=table_name)
Пример #39
0
 def load_data(self, directory):
     '''Loads data if already generated'''
     with open(directory + '/page_ranks.json') as rank_file:
         self.ranks = json.load(rank_file)
     with open(directory + '/citation_graph.json') as graph_file:
         self.graph = json_graph.node_link_graph(json.load(graph_file))
Пример #40
0
 def structure_raw(self):
     if self.__cached_structure_raw is None:
         g = node_link_graph(self.data)
         g.__class__ = MoleculeContainer
         self.__cached_structure_raw = g
     return self.__cached_structure_raw
Пример #41
0
 def from_json(cls, data):
     graph = node_link_graph(data[PYANNOTE_JSON_CONTENT])
     mapping = {node: T(node) for node in graph}
     graph = nx.relabel_nodes(graph, mapping)
     return cls(graph=graph, **graph.graph)
Пример #42
0
flag = 0
N = 1000
P = 0.008008
n = 1000
m = 4
if flag == 0:
    print('generate a newG')
    #ER = nx.random_graphs.erdos_renyi_graph(N, P)
    ER = nx.random_graphs.barabasi_albert_graph(n, m)
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(json_graph.node_link_data(ER), f)
else:
    print('load a goodG')
    f = open(filename, 'r', encoding='utf-8')
    nld = json.load(f)
    ER = json_graph.node_link_graph(nld)
    f.close()

# 画图
print(f'ER-Graph: {n}, {m}, {len(ER.edges())}')
'''
pos = nx.spring_layout(ER)
nx.draw(ER, pos, with_labels=False, node_size=40)
plt.show()'''

##################################
# 删边策略1-边介数
G = nx.Graph(ER)
sublist = []
len_of_mcc = []
edges = []
Пример #43
0
def load_graphs(dataset_str):
    node_labels = [None]
    edge_labels = [None]
    idx_train = [None]
    idx_val = [None]
    idx_test = [None]

    if dataset_str == 'grid':
        graphs = []
        features = []
        for _ in range(1):
            graph = nx.grid_2d_graph(20, 20)
            graph = nx.convert_node_labels_to_integers(graph)

            feature = np.identity(graph.number_of_nodes())
            graphs.append(graph)
            features.append(feature)

    elif dataset_str == 'communities':
        graphs = []
        features = []
        node_labels = []
        edge_labels = []
        for i in range(1):
            community_size = 20
            community_num = 20
            p = 0.01

            graph = nx.connected_caveman_graph(community_num, community_size)

            count = 0

            for (u, v) in graph.edges():
                if random.random() < p:  # rewire the edge
                    x = random.choice(list(graph.nodes))
                    if graph.has_edge(u, x):
                        continue
                    graph.remove_edge(u, v)
                    graph.add_edge(u, x)
                    count += 1
            print('rewire:', count)

            n = graph.number_of_nodes()
            label = np.zeros((n, n), dtype=int)
            for u in list(graph.nodes):
                for v in list(graph.nodes):
                    if u // community_size == v // community_size and u > v:
                        label[u, v] = 1
            rand_order = np.random.permutation(graph.number_of_nodes())
            feature = np.identity(graph.number_of_nodes())[:, rand_order]
            graphs.append(graph)
            features.append(feature)
            edge_labels.append(label)

    elif dataset_str == 'protein':

        graphs_all, features_all, labels_all = Graph_load_batch(
            name='PROTEINS_full')
        features_all = (features_all - np.mean(
            features_all, axis=-1, keepdims=True)) / np.std(
                features_all, axis=-1, keepdims=True)
        graphs = []
        features = []
        edge_labels = []
        for graph in graphs_all:
            n = graph.number_of_nodes()
            label = np.zeros((n, n), dtype=int)
            for i, u in enumerate(graph.nodes()):
                for j, v in enumerate(graph.nodes()):
                    if labels_all[u - 1] == labels_all[v - 1] and u > v:
                        label[i, j] = 1
            if label.sum() > n * n / 4:
                continue

            graphs.append(graph)
            edge_labels.append(label)

            idx = [node - 1 for node in graph.nodes()]
            feature = features_all[idx, :]
            features.append(feature)

        print('final num', len(graphs))

    elif dataset_str == 'email':

        with open('data/email.txt', 'rb') as f:
            graph = nx.read_edgelist(f)

        label_all = np.loadtxt('data/email_labels.txt')
        graph_label_all = label_all.copy()
        graph_label_all[:, 1] = graph_label_all[:, 1] // 6

        for edge in list(graph.edges()):
            if graph_label_all[int(edge[0])][1] != graph_label_all[int(
                    edge[1])][1]:
                graph.remove_edge(edge[0], edge[1])

        comps = [
            comp for comp in nx.connected_components(graph) if len(comp) > 10
        ]
        graphs = [graph.subgraph(comp) for comp in comps]

        edge_labels = []
        features = []

        for g in graphs:
            n = g.number_of_nodes()
            feature = np.ones((n, 1))
            features.append(feature)

            label = np.zeros((n, n), dtype=int)
            for i, u in enumerate(g.nodes()):
                for j, v in enumerate(g.nodes()):
                    if label_all[int(u)][1] == label_all[int(v)][1] and i > j:
                        label[i, j] = 1
            label = label
            edge_labels.append(label)

    elif dataset_str == 'ppi':
        dataset_dir = 'data/ppi'
        print("Loading data...")
        G = json_graph.node_link_graph(
            json.load(open(dataset_dir + "/ppi-G.json")))
        edge_labels_internal = json.load(
            open(dataset_dir + "/ppi-class_map.json"))
        edge_labels_internal = {
            int(i): l
            for i, l in edge_labels_internal.items()
        }

        train_ids = [n for n in G.nodes()]
        train_labels = np.array([edge_labels_internal[i] for i in train_ids])
        if train_labels.ndim == 1:
            train_labels = np.expand_dims(train_labels, 1)

        print("Using only features..")
        feats = np.load(dataset_dir + "/ppi-feats.npy")
        # Logistic gets thrown off by big counts, so log transform num comments and score
        feats[:, 0] = np.log(feats[:, 0] + 1.0)
        feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1))
        feat_id_map = json.load(open(dataset_dir + "/ppi-id_map.json"))
        feat_id_map = {int(id): val for id, val in feat_id_map.items()}
        train_feats = feats[[feat_id_map[id] for id in train_ids]]

        node_dict = {}
        for id, node in enumerate(G.nodes()):
            node_dict[node] = id

        comps = [comp for comp in nx.connected_components(G) if len(comp) > 10]
        graphs = [G.subgraph(comp) for comp in comps]

        id_all = []
        for comp in comps:
            id_temp = []
            for node in comp:
                id = node_dict[node]
                id_temp.append(id)
            id_all.append(np.array(id_temp))

        features = [train_feats[id_temp, :] + 0.1 for id_temp in id_all]

    # elif dataset_str == 'brightkite':
    #     dataset_dir = 'data/Brightkite'
    #     print("Loading data...")
    #     G = nx.read_edgelist(
    #         open(dataset_dir + "/Brightkite_edges.txt", "rb"))

    #     node_dict = {}
    #     for id, node in enumerate(G.nodes()):
    #         node_dict[node] = id
    #     feature = []
    #     feature_id = []

    #     fp = open(dataset_dir + "/Brightkite_totalCheckins.txt", "r")
    #     lines = fp.readlines()

    #     for line in lines:
    #         line = line.strip()
    #         line = line.split()
    #         if (len(line) < 5):
    #             continue
    #         feature_id.append(int(line[0]))
    #         feature.append([time.mktime(time.strptime(
    #             line[1], '%Y-%m-%dT%H:%M:%SZ')), float(line[2]), float(line[3])])
    #         # print(line)

    #     feature = np.array(feature)
    #     feature[:, 0] = np.log(feature[:, 0] + 1.0)
    #     feature[:, 1] = np.log(
    #         feature[:, 1] - min(np.min(feature[:, 1]), -1)+1)
    #     feature[:, 2] = np.log(
    #         feature[:, 2] - min(np.min(feature[:, 2]), -1)+1)

    #     feature_map = {}
    #     for i in range(len(feature_id)):
    #         if (feature_id[i] not in feature_map):
    #             feature_map[feature_id[i]] = []
    #         feature_map[feature_id[i]].append(feature[i])
    #     # print(feature_map)
    #     feature_actual_map = {}
    #     for k in feature_map:
    #         feature_actual_map[k] = np.mean(feature_map[k], axis=0)
    #     # print(feature_actual_map)

    #     comps = [comp for comp in nx.connected_components(G) if len(comp) > 10]
    #     graphs = [G.subgraph(comp) for comp in comps]
    #     id_all = []
    #     features = []
    #     count = 0
    #     for comp in comps:
    #         id_temp = []
    #         feat_temp = []
    #         for node in comp:
    #             id = node_dict[node]
    #             id_temp.append(id)
    #             if (id not in feature_actual_map):
    #                 feat_temp.append([0.0, 0.0, 0.0])
    #                 count = count+1
    #             else:
    #                 feat_temp.append(feature_actual_map[id])
    #             id_all.append(np.array(id_temp))
    #             features.append(np.array(feat_temp))

    #     print("Not found features of %d nodes" % {count})

    else:
        raise NotImplementedError

    return graphs, features, edge_labels, node_labels, idx_train, idx_val, idx_test
Пример #44
0
 def load_from_JSON(self, filename="temp.json"):
     with open(filename, 'r') as file:
         self.network = json_graph.node_link_graph(json.load(file))
Пример #45
0
def json_to_nxgraph(jsgraph):
    """Converts a json-like dictionary to a networkx graph."""
    return json_graph.node_link_graph(jsgraph)
Пример #46
0
 def read_from_file(self, filename=FILENAME):
     """Reads graph from JSON file in data link format"""
     with open(filename, "r") as f:
         dat = json.load(f)
     self.graph = json_graph.node_link_graph(dat)
Пример #47
0
def load(fileName):
	with open(fileName+'.json') as json_file: 
		topology = json.load(json_file)
	F = json_graph.node_link_graph(topology)	
	return F
Пример #48
0
 def __init__(self, ):
     graph_data = self._load_graph_data()
     self.graph = (json_graph.node_link_graph(graph_data)
                   if graph_data else nx.Graph())
Пример #49
0
    pairs = []
    for count, node in enumerate(nodes):
        if G.degree(node) == 0:
            continue
        for i in range(num_walks):
            curr_node = node
            for j in range(WALK_LEN):
                next_node = random.choice(G.neighbors(curr_node))
                # self co-occurrences are useless
                if curr_node != node:
                    pairs.append((node, curr_node))
                curr_node = next_node
        if count % 1000 == 0:
            print("Done walks for", count, "nodes")
    return pairs


if __name__ == "__main__":
    """ Run random walks """
    graph_file = sys.argv[1]
    out_file = sys.argv[2]
    G_data = json.load(open(graph_file))
    G = json_graph.node_link_graph(G_data)
    nodes = [
        n for n in G.nodes() if not G.node[n]["val"] and not G.node[n]["test"]
    ]
    G = G.subgraph(nodes)
    pairs = run_random_walks(G, nodes)
    with open(out_file, "w") as fp:
        fp.write("\n".join([str(p[0]) + "\t" + str(p[1]) for p in pairs]))
Пример #50
0
def read_json_file(filename):
	with open(filename) as f:
		js_graph = json.load(f)
	return json_graph.node_link_graph(js_graph)
Пример #51
0
def find_GPSA(dir_raw, GP_dict, soln, dnR, fuel_comp, n_break=0):

    GP_name = GP_dict['name']
    traced = GP_dict['traced']
    dir_save = os.path.join(dir_raw, 'GPSA', traced)
    if not os.path.exists(dir_save):
        os.makedirs(dir_save)
    path_save = os.path.join(dir_save, shorten_GP_name(GP_name) + '.json')

    # =====================================
    # if previously computed, return False

    GPSA_all = dict()
    if os.path.exists(path_save):
        GPSA_all = json.load(open(path_save, 'r'))
        if GP_name in GPSA_all.keys():
            return False

    # =====================================
    # if not loaded, compute these results

    print 'computing GPSA for ' + GP_name

    traced = GP_dict['traced']
    GP_member = GP_dict['member']
    dir_graph = os.path.join(dir_raw, 'graph')
    path_raw = os.path.join(dir_raw, 'raw.npz')
    raw = load_raw(path_raw)
    rr_mat = raw['net_reaction_rate']

    GPSA = dict()
    GPSA['R_GP'] = [
    ]  # net radical production rate associated with a Global Pathway (GP)
    GPSA['Q_GP'] = []  # net heat release rate associated with a GP
    GPSA['D_GP'] = []  # dominancy of a GP

    GPSA['R_ij'] = dict(
    )  # net radical production rate associated with a conversion step (from the i-th species to the j-th species)
    GPSA['Q_ij'] = dict()
    GPSA['a_iji'] = dict()
    #GPSA['perc_ij'] = dict()

    for i in range(len(GP_member) - 1):
        edge = find_edge_name(GP_member, i)

        GPSA['R_ij'][edge] = dict()
        GPSA['R_ij'][edge]['member'] = []
        GPSA['R_ij'][edge]['net'] = []

        GPSA['Q_ij'][edge] = dict()
        GPSA['Q_ij'][edge]['member'] = []
        GPSA['Q_ij'][edge]['net'] = []

        GPSA['a_iji'][edge] = dict()
        GPSA['a_iji'][edge]['member'] = []
        GPSA['a_iji'][edge]['net'] = []

        #GPSA['perc_ij'][edge] = dict()
        #GPSA['perc_ij'][edge]['member'] = []
        #GPSA['perc_ij'][edge]['net'] = []

    source = GP_dict['member'][0]
    traced = GP_dict['traced']
    if source not in fuel_comp.keys():
        perc_from_source = 0.0
    else:
        total_atom = 0
        for k in fuel_comp.keys():
            sp = soln.species(k)
            atom = 0.0
            if traced in sp.composition.keys():
                atom += fuel_comp[k] * sp.composition[traced]
            if source == k:
                atom_source = atom
            total_atom += atom

        perc_from_source = 1.0 * atom_source / total_atom

        #print 'total '+traced+' atoms for '+str(fuel_comp)+' is '+str(total_atom)
        #print 'source '+str(source)+' has '+str(atom_source)+' atms'
        #print 'so perc_from_source = '+str(perc_from_source)

    #GPSA['perc_ij']['from_source'] = perc_from_source

    # for each point -----------

    i_pnt = 0
    while True:

        path_graph = os.path.join(dir_graph,
                                  traced + '_' + str(i_pnt) + '.json')
        if not os.path.exists(path_graph):
            if i_pnt > n_break:
                print 'break as cannot find: ' + str(path_graph)
                break

            else:
                # fill this with None ---------------
                if i_pnt % 10 == 0:
                    print '   fill None GPSA for ' + str(path_graph)

                for i in range(len(GP_member) - 1):
                    s = GP_member[i]
                    t = GP_member[i + 1]
                    edge = find_edge_name(GP_member, i)
                    for k in ['member', 'net']:
                        GPSA['R_ij'][edge][k].append(None)
                        GPSA['Q_ij'][edge][k].append(None)
                        GPSA['a_iji'][edge][k].append(None)
                        #GPSA['perc_ij'][edge][k].append(None)

                GPSA['R_GP'].append(None)
                GPSA['D_GP'].append(None)

                i_pnt += 1
                continue

        soln = raw2soln(soln, raw, i_pnt)
        if i_pnt % 10 == 0:
            print '   finding GPSA for ' + str(path_graph)

        # fill this with real value ---------------
        #norm_Rpro = 0.0
        #norm_Rcon = 0.0
        for id_rxn in range(soln.n_reactions):
            dR = dnR[id_rxn] * rr_mat[i_pnt, id_rxn]
            #norm_Rpro += max(0, dR)
            #norm_Rcon += max(0, -dR)

        flux_graph = json_graph.node_link_graph(
            json.load(open(path_graph, 'r')))
        out_deg = flux_graph.out_degree(weight='flux')
        norm_out_deg = sum(
            [out_deg[m] for m in fuel_comp.keys() if m in out_deg.keys()])

        flux = []
        rxn_involved = []
        sum_OMEGA_R = 0
        sum_OMEGA_Q = 0
        perc_ij_list = []

        # for each edge (conversion step) -----------------

        for i in range(len(GP_member) - 1):
            s = GP_member[i]
            t = GP_member[i + 1]
            edge = find_edge_name(GP_member, i)
            GPSA['R_ij'][edge]['member'].append(dict())
            GPSA['Q_ij'][edge]['member'].append(dict())
            GPSA['a_iji'][edge]['member'].append(dict())
            #GPSA['perc_ij'][edge]['member'].append(dict())

            # ------------------------------

            try:
                st = flux_graph[s][t]
            except KeyError:
                st = None

            perc_ij = None
            if st is not None:
                flux.append(st['flux'])
                perc_ij = 1.0 * st['flux'] / out_deg[s]
                perc_ij_list.append(perc_ij)

                for id_rxn_s in st['member'].keys():
                    id_rxn = int(id_rxn_s)
                    rxn = soln.reaction_equation(id_rxn)
                    rr = rr_mat[i_pnt, id_rxn]
                    if rr < 0:
                        sign_rxn = -id_rxn
                    else:
                        sign_rxn = id_rxn

                    GPSA['a_iji'][edge]['member'][i_pnt][sign_rxn] = st[
                        'member'][id_rxn_s]
                    #GPSA['perc_ij'][edge]['member'][i_pnt][sign_rxn] = 1.0 * st['member'][id_rxn_s]/out_deg[s]

                    OMEGA_R = float(rr * dnR[id_rxn])
                    GPSA['R_ij'][edge]['member'][i_pnt][sign_rxn] = OMEGA_R

                    OMEGA_Q = float(rr * soln.delta_enthalpy[id_rxn])
                    GPSA['Q_ij'][edge]['member'][i_pnt][sign_rxn] = OMEGA_Q

                    if id_rxn not in rxn_involved:
                        sum_OMEGA_R += OMEGA_R
                        sum_OMEGA_Q += OMEGA_Q
                        rxn_involved.append(id_rxn)

            # ------------------------------

            try:
                ts = flux_graph[t][s]
            except KeyError:
                ts = None

            if ts is not None:
                for id_rxn_s in ts['member'].keys():
                    id_rxn = int(id_rxn_s)
                    rxn = soln.reaction_equation(id_rxn)
                    rr = rr_mat[i_pnt, id_rxn]
                    if rr < 0:
                        sign_rxn = -id_rxn
                    else:
                        sign_rxn = id_rxn
                    GPSA['a_iji'][edge]['member'][i_pnt][
                        sign_rxn] = -ts['member'][id_rxn_s]

            # ------------------------------

            GPSA['R_ij'][edge]['net'].append(
                sum(GPSA['R_ij'][edge]['member'][i_pnt].values()))
            GPSA['Q_ij'][edge]['net'].append(
                sum(GPSA['Q_ij'][edge]['member'][i_pnt].values()))
            GPSA['a_iji'][edge]['net'].append(
                sum(GPSA['a_iji'][edge]['member'][i_pnt].values()))
            #GPSA['perc_ij'][edge]['net'].append(perc_ij)

        domi_perc = gmean(perc_ij_list) * perc_from_source
        if bool(flux):
            min_flux = min(flux)
            if norm_out_deg > 0:
                domi_flux = 1.0 * min_flux / norm_out_deg
            else:
                domi_flux = float('nan')
        else:
            min_flux = 0.0
            domi_flux = 0.0

        R_GP = domi_perc * sum_OMEGA_R
        Q_GP = domi_perc * sum_OMEGA_Q
        GPSA['R_GP'].append(R_GP)
        GPSA['Q_GP'].append(Q_GP)
        GPSA['D_GP'].append(domi_perc)

        i_pnt += 1

    GPSA_all[GP_name] = GPSA
    json.dump(GPSA_all, open(path_save, 'w'))
    return True
Пример #52
0
def process_span(span,span_done,log_messages):    
    
    def log(m):
        if CONFIG["process_verbose"] or CONFIG["report_verbose"] :
            log_messages.put("%s: %s"%(span,m))

    # data to be reported after processing
    span_info={"span":span}
    log("starting")

    g=networkx.Graph()
    if CONFIG["export_ref_format"] =="gexf":
        if CONFIG["process_verbose"] : log("read gexf")
        g=networkx.read_gexf(os.path.join(CONFIG["parsed_data"],span,"%s.gexf"%span),node_type=unicode)
    elif CONFIG["export_ref_format"] == "edgelist":
        if CONFIG["process_verbose"] : log("read csv export")
        g=networkx.read_weighted_edgelist(os.path.join(CONFIG["parsed_data"],span,"%s.csv"%span),delimiter="\t")
    elif CONFIG["export_ref_format"] == "pajek":
        if CONFIG["process_verbose"] : log("read pajek export")
        g=networkx.read_pajek(os.path.join(CONFIG["parsed_data"],span,"%s.csv"%span))
    elif CONFIG["export_ref_format"] == "json":
        if CONFIG["process_verbose"] : log("read pajek export")
        data=json.load(open(os.path.join(CONFIG["parsed_data"],span,"%s.json"%span),"r"),encoding="UTF-8")
        g=json_graph.node_link_graph(data)
    else:
        log("no export compatible export format specified")
        exit(1)

    network_references=g.nodes()
    nb_network_references=len(network_references)

    log("loaded %s ref from graph"%nb_network_references)
    span_info["references_occ_filtered"]=nb_network_references

    with codecs.open(os.path.join(CONFIG["parsed_data"],span,"references.dat"),"r",encoding="UTF-8") as file:
        # dat file have one trailing blank line at end of file
        data_lines=file.read().split("\n")[:-1]
    references_by_articles = [(l.split("\t")[0],",".join(l.split("\t")[1:])) for l in data_lines]
    #references_by_articles_filtered=[(a,r) for a,r in references_by_articles if r in references] 

    references_by_articles.sort(key=lambda e:e[1])
    article_groupby_reference=[(reference,list(ref_arts)) for reference,ref_arts in itertools.groupby(references_by_articles,key=lambda e:e[1])]
    span_info["nb_reference_before_filtering"]=len(article_groupby_reference)
    references_article_grouped=[t for t in article_groupby_reference if len(t[1])>=CONFIG["spans"][span]["references"]["occ"]]
    del article_groupby_reference
    del references_by_articles
    #make sure we have same references than network
    ref_filtered=[r for r,_ in references_article_grouped]
    if(len(ref_filtered))!=nb_network_references:
        s1=set(ref_filtered)
        s2=set(network_references)
        to_remove = s1 - s2
        if len(to_remove)>0:
            log("filtering ref which are not in original network : removing %s ref"%len(to_remove))
            references_article_grouped=[ (r,ref_arts) for r,ref_arts in references_article_grouped if r not in to_remove]
        del s1
        del s2
    del ref_filtered
    del network_references
    # print references_article_grouped
    log("imported, filtered and grouped references by articles")

    span_info["subjects_occ_filtered"]=add_annotations(span,"subjects",references_article_grouped,g,log)
    span_info["authors_occ_filtered"]=add_annotations(span,"authors",references_article_grouped,g,log)
    span_info["institutions_occ_filtered"]=add_annotations(span,"institutions",references_article_grouped,g,log)
    span_info["article_keywords_occ_filtered"]=add_annotations(span,"article_keywords",references_article_grouped,g,log)
    span_info["title_keywords_occ_filtered"]=add_annotations(span,"title_keywords",references_article_grouped,g,log)
    span_info["isi_keywords_occ_filtered"]=add_annotations(span,"isi_keywords",references_article_grouped,g,log)
    span_info["countries_occ_filtered"]=add_annotations(span,"countries",references_article_grouped,g,log)
    
    del references_article_grouped
    
    log("have now %s nodes"%len(g.nodes()))
    if not os.path.exists(CONFIG["output_directory"]):
        os.mkdir(CONFIG["output_directory"])

    if CONFIG["export_ref_annotated_format"] =="gexf":
        log("write gexf export")
        networkx.write_gexf(g,os.path.join(CONFIG["output_directory"],"%s_annotated.gexf"%span))
    elif CONFIG["export_ref_annotated_format"] == "edgelist":
        log("write csv export")
        networkx.write_weighted_edgelist(g,os.path.join(CONFIG["output_directory"],"%s_annotated.csv"%span),delimiter="\t")
    elif CONFIG["export_ref_annotated_format"] == "pajek":
        log("write pajek export")
        networkx.write_pajek(g,os.path.join(CONFIG["output_directory"],"%s_annotated.net"%span))
    elif CONFIG["export_ref_annotated_format"] == "graphml":
        log("write pajek export")
        networkx.write_graphml(g,os.path.join(CONFIG["output_directory"],"%s_annotated.graphml"%span))
    else:
        log("no compatible export format specified")

    with codecs.open(os.path.join(CONFIG["parsed_data"],span,"articles.dat"),"r",encoding="UTF-8") as articles_file:
        nb_articles=len(articles_file.read().split("\n")[:-1])
    
    span_info["nb_articles"]=nb_articles    
    span_done.put(span_info)
    del g
Пример #53
0
 def structure(self):
     molcont = node_link_graph(self.data)
     molcont.__class__ = MoleculeContainer
     return molcont
Пример #54
0
    def process_graph(content, model, search_value, n_clicks, search_type, filepath,  G1, pos1, G2, pos2):
        """ Update/rebuild the graph when the user picks a new file or searches something.
           Stores the graph and its nodes positions in an intermediary div (replace div with dcc.store).
           This little maneuver greatly improves run-time.

        Arguments:
            content -- [The content of the uploaded file]
            search_value -- [The value searched by the user: nodes/paths/similarity]
            n_clicks -- [Number of times the path-button was clicked]
            model -- [Whether the user wants to perform the first 4 searches on the first or second model]
            filepath -- [Contains the file extension. Used to differentiate .txt from .p files]
            G1 -- [The first graph in json format]
            pos1 -- [The corresponding position of nodes in json format]
            G2 -- [The second graph in json format]
            pos2 -- [The corresponding position of nodes in json format]
        """
        ctx = dash.callback_context
        component_name = ctx.triggered[0]['prop_id'].split('.')[0]

        if component_name == 'upload-data':
            # Get content & decode
            content = content.split(',')[1]
            decoded_content = base64.b64decode(content).decode('utf-8')
            file_extension = filepath.split(".")[1]

            # Build new graph
            nodes, edges = process_file(decoded_content, file_extension)
            G = build_graph(nodes, edges)
            pos = nx.nx_pydot.graphviz_layout(G)
            graph, _ = visualize_graph(G, pos)

            if model == 'model1':
                G1, pos1, graph1 = G, pos, graph
                G2, pos2, graph2 = try_get_other_graph(G2, pos2, fig2)
            else:
                G2, pos2, graph2 = G, pos, graph
                G1, pos1, graph1 = try_get_other_graph(G1, pos1, fig1)
            return graph1, graph2, json.dumps(node_link_data(G1)), json.dumps(pos1), json.dumps(node_link_data(G2)), json.dumps(pos2), {'display': 'none'}, ''

        elif component_name != 'model_selector':
            if model == 'model1':
                try:
                    # Will be used for other searches
                    G = node_link_graph(json.loads(G1))
                    pos = json.loads(pos1)
                except (TypeError, UnboundLocalError):
                    raise dash.exceptions.PreventUpdate
                else:
                    # Needed when both graphs are utilized for similarity
                    G1, pos1, graph1 = G, pos, fig1
                G2, pos2, graph2 = try_get_other_graph(G2, pos2, fig2)
            else:
                try:
                    G = node_link_graph(json.loads(G2))
                    pos = json.loads(pos2)
                except (TypeError, UnboundLocalError):
                    raise dash.exceptions.PreventUpdate
                else:
                    G2, pos2, graph2 = G, pos, fig2
                G1, pos1, graph1 = try_get_other_graph(G1, pos1, fig1)
            error = ''

            if component_name == 'input':
                if (search_type == 'word1,n,thld') & (pos1 is not None) & (pos2 is not None) & (NLP_MODEL is not None):
                    # search using both graphs. Using 4-th search as proxy
                    graph1, error1 = visualize_graph(G1, pos1, search_value, 'word,n,thld')
                    graph2, error2 = visualize_graph(G2, pos2, search_value, 'word,n,thld')
                    error = html.P([error1, html.Br(), error2])
                else:
                    # Other searches
                    graph, error = visualize_graph(G, pos, search_value, search_type)

            elif (component_name == 'next-path-btn'):
                if n_clicks > 0:
                    # Display other paths
                    highlighted = get_clicked_path(n_clicks)
                    graph, error = visualize_graph(G, pos, '', '', highlighted)
            else:
                raise dash.exceptions.PreventUpdate

            if len(GLOBAL_PATHS) > 1:
                button_display = {'text-align': 'center', 'display': 'inline-block'}
            else:
                button_display = {'display':'none'}

            if search_type != 'word1,n,thld':
                # Update values as a search other than the two graph-similarity has been performed
                if model == 'model1':
                    G1, pos1, graph1 = G, pos, graph
                else:
                    G2, pos2, graph2 = G, pos, graph
            return graph1, graph2, json.dumps(node_link_data(G1)), json.dumps(pos1), json.dumps(node_link_data(G2)), json.dumps(pos2), button_display, error

        else:
            raise dash.exceptions.PreventUpdate
Пример #55
0
def main(args):
    args.input += "/" + args.prefix
    G_data = json.load(open(args.input + "-G.json"))
    G = json_graph.node_link_graph(G_data)
    print(nx.info(G))

    H = G.copy()
    G1, G2 = create_subnet(H, args.alpha_s, args.alpha_c)
    data1 = json_graph.node_link_data(G1)
    data2 = json_graph.node_link_data(G2)
    s1 = json.dumps(data1, indent=4, sort_keys=True)
    s2 = json.dumps(data2, indent=4, sort_keys=True)
    print("About G1")
    print(nx.info(G1))
    print("About G2")
    print(nx.info(G2))

    args.output1 += "/sourceclone,alpha_c={0},alpha_s={1}".format(
        args.alpha_c, args.alpha_s)
    args.output2 += "/targetclone,alpha_c={0},alpha_s={1}".format(
        args.alpha_c, args.alpha_s)

    if not os.path.isdir(args.output2 + '/edgelist'):
        os.makedirs(args.output2 + '/edgelist')
        os.makedirs(args.output2 + '/graphsage')
        os.makedirs(args.output2 + '/dictionaries')

    if not os.path.isdir(args.output1 + '/edgelist'):
        os.makedirs(args.output1 + '/edgelist')
        os.makedirs(args.output1 + '/graphsage')
        os.makedirs(args.output1 + '/dictionaries')

    edgelist_dir1 = args.output1 + "/edgelist/" + args.prefix + ".edgelist"
    edgelist_dir2 = args.output2 + "/edgelist/" + args.prefix + ".edgelist"

    if not os.path.isdir(args.output1): os.makedirs(args.output1)
    if not os.path.isdir(args.output2): os.makedirs(args.output2)

    nx.write_edgelist(G1, path=edgelist_dir1, delimiter=" ", data=['weight'])
    nx.write_edgelist(G2, path=edgelist_dir2, delimiter=" ", data=['weight'])

    args.output1 += "/graphsage/" + args.prefix
    args.output2 += "/graphsage/" + args.prefix

    with open(args.output1 + "-G.json", 'w') as f:
        f.write(s1)
        f.close()

    with open(args.output2 + "-G.json", 'w') as f:
        f.write(s2)
        f.close()

    copyfile(args.input + "-id_map.json", args.output1 + "-id_map.json")
    copyfile(args.input + "-id_map.json", args.output2 + "-id_map.json")
    if os.path.exists(args.input + "-class_map.json"):
        copyfile(args.input + "-class_map.json",
                 args.output1 + "-class_map.json")
        copyfile(args.input + "-class_map.json",
                 args.output2 + "-class_map.json")

    if os.path.exists(args.input + "-feats.npy"):
        copyfile(args.input + "-feats.npy", args.output1 + "-feats.npy")
        copyfile(args.input + "-feats.npy", args.output2 + "-feats.npy")

    if os.path.exists(args.input + "-walks.txt"):
        copyfile(args.input + "-walks.txt", args.output1 + "-walks.txt")
        copyfile(args.input + "-walks.txt", args.output2 + "-walks.txt")
Пример #56
0
 def read_json_file(self, filename):
     with open(filename) as f:
         js_graph = json.load(f, encoding='utf-8')
     return json_graph.node_link_graph(js_graph)
Пример #57
0
def json_network_to_networkx(graph_json):
    graph = json_graph.node_link_graph(graph_json)

    return graph
Пример #58
0
                           width=1.0,
                           alpha=1,
                           node_size=node_size)
    nx.draw_networkx_edges(
        G,
        pos,
        edgelist=near_edges,
        width=1,
        alpha=0.3,
        edge_color="r",
        node_size=node_size,
    )
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
    #  plt.xlim(0 - axis * 0.1, axis * 1.1)
    #  plt.ylim(0 - axis * 0.1, axis * 1.1)
    #  plt.xticks(np.arange(0 - axis * 0.1, axis * 1.1, step=1))
    #  plt.yticks(np.arange(0 - axis * 0.1, axis * 1.1, step=1))
    if grid:
        plt.grid()
    plt.show()


if __name__ == "__main__":
    for entry in tqdm(
            os.scandir("data/filtered/final_data/zebra-cat-computer/1")):
        if entry.name.endswith("json"):
            with open(entry, "r") as f:
                graph = json_graph.node_link_graph(json.loads(f.read()))
                render_graph(graph)
                break
Пример #59
0
    # log.fit(train_embeds, train_labels)
    # print("F1 score:", f1_score(test_labels, log.predict(test_embeds), average="micro"))
    # print("Random baseline f1 score:", f1_score(test_labels, dummy.predict(test_embeds), average="micro"))

if __name__ == '__main__':
    # parser = ArgumentParser("Run evaluation on citation data.")
    # parser.add_argument("dataset_dir", help="Path to directory containing the dataset.")
    # parser.add_argument("embed_dir", help="Path to directory containing the learned node embeddings.")
    # parser.add_argument("setting", help="Either val or test.")
    # args = parser.parse_args()
    dataset_dir = "../"  # args.dataset_dir
    data_dir = "feat" # "unsup-../graphsage_mean_small_0.000010"  or "feat"
    setting = "test"  # args.setting

    print("Loading data...")
    G = json_graph.node_link_graph(json.load(open(dataset_dir + "/acm-G.json")))
     
    train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
    test_ids = [n for n in G.nodes() if G.node[n][setting]]
    test_labels = get_class_labels(G, test_ids)
    train_labels = get_class_labels(G, train_ids)

    if data_dir == "feat":
        print("Using only features..")
        feats = np.load(dataset_dir + "/acm-feats.npy")
        feat_id_map = json.load(open(dataset_dir + "/acm-id_map.json"))
        feat_id_map = {int(id):val for id,val in feat_id_map.iteritems()}
        train_feats = feats[[feat_id_map[id] for id in train_ids]] 
        test_feats = feats[[feat_id_map[id] for id in test_ids]] 
        print("Running regression..")
        run_regression(train_feats, train_labels, test_feats, test_labels)
Пример #60
0
def mp_pool_format(G_data, graph_dir, mc_iter):
    return create_num_MC_sim_copies(json_graph.node_link_graph(G_data),
                                    graph_dir, mc_iter)