Пример #1
0
def prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id):
    tmp_file = tempfile.NamedTemporaryFile(delete=False)
    ds_tmp_file = tempfile.NamedTemporaryFile(delete=False)

    topic = topicname
    if not topic:
        return None

    #g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict= make_network(topic, date, window_size, attribute_add=False)
    key = str(real_topic_id) + '_' + str(date) + '_' + str(window_size)
    g = nx.read_gexf(str(GRAPH_PATH)+str(key)+'_g_graph.gexf')
    ds_dg = nx.read_gexf(str(GRAPH_PATH)+str(key)+'_ds_dg_graph.gexf')

    if not g or not ds_dg:
        return None

    N = len(g)
    print 'topic source network size %s' % N
    ds_N = len(ds_dg)
    print 'topic direct superior network size %s' % ds_N

    if not N or not ds_N:
        return None
    '''
    在临时文件中存放网络结构,将写入临时文件的过程写为方法write_tmp_file
    '''
    print 'start PageRank tmp_file, ds_tmp_file'
    tmp_file = write_tmp_file(tmp_file, g, N)
    ds_tmp_file = write_tmp_file(ds_tmp_file, ds_dg, ds_N)
    print 'end PageRank tmp_file, ds_tmp_file'
    return tmp_file, N, ds_tmp_file, ds_N
Пример #2
0
def occurenceCounter(charList, graphFile, bookNetworksPath):
    g = nx.read_gexf(graphFile)

    if not charList:
        # Get characters from overall.gexf graph
        overallGraphFile = bookNetworksPath + "overall.gexf"
        overall_g = nx.read_gexf(overallGraphFile)
        overallChars = nx.nodes(overall_g)

        # Sort dictionary by name (key of dictionary)
        sortedChars = sorted(overallChars)

        return sortedChars

    else:
        charList = [item for item in charList]

        for index, item in enumerate(charList):
            currentChar = None
            for node in nx.nodes(g):
                if node == item:
                    occurrence = 1
                    charList[index] = (item, occurrence)
                    currentChar = node
            # If current character is not present in the current chapter assign 0 influence.
            if not currentChar:
                occurrence = 0
                charList[index] = (item, occurrence)

        return charList
def analyze_Reff_chains():
    fl_m20 = nx.read_gexf("Flute_vs_EpiFast/Flute_match20.gexf")
    reffs_fl_m20 = tv.build_Reff_txm_chain(fl_m20)


    ef_m20 = nx.read_gexf("Flute_vs_EpiFast/Epifast_match20.gexf")
    reffs_ef_m20 = tv.build_Reff_txm_chain(ef_m20)

    max_gens = 31
Пример #4
0
    def add_LPU(self, data_file, gexf_file=None, LPU=None, win=None,
                is_input=False):
        '''
        Add data associated with a specific LPU to a visualization.
        To add a plot containing neurons from a particular LPU,
        the LPU needs to be added to the visualization using this
        function. Note that outputs from multiple neurons can
        be visualized using the same visualizer object.

        Parameters
        ----------
        data_file: str
             Location of the h5 file generated by neurokernel
             containing the output of the LPU
        gexf_file: str
            Location of the gexf file describing the LPU.
            If not specified, it will be assumed that the h5 file
            contains input.
        LPU: str
            Name of the LPU. Will be used as identifier to add plots.
            For input signals, the name of the LPU will be prepended
            with 'input_'. For example::

                V.add_LPU('vision_in.h5', LPU='vision')

            will create the LPU identifier 'input_vision'.
            Therefore, adding a plot depicting this input can be done by::

                V.add_plot({''type':'image',imlim':[-0.5,0.5]},LPU='input_vision)
        win: slice/list
            Can be used to limit the visualization to a specific time window.
        '''

        if gexf_file and not is_input:
            self._graph[LPU] = nx.read_gexf(gexf_file)

            # Map neuron ids to index into output data array:
            self._id_to_data_idx[LPU] = {m:i for i, m in \
                enumerate(sorted([int(n) for n, k in \
                                  self._graph[LPU].nodes_iter(True) if k['spiking']]))}
        else:
            if LPU:
                LPU = 'input_' + str(LPU)
            else:
                LPU = 'input_' + str(len(self._data))
            if gexf_file:
                self._graph[LPU] = nx.read_gexf(gexf_file)
        if not LPU:
            LPU = len(self._data)
        self._data[LPU] = np.transpose(sio.read_array(data_file))
        if win is not None:
            self._data[LPU] = self._data[LPU][:,win]
        if self._maxt:
            self._maxt = min(self._maxt, self._data[LPU].shape[1])
        else:
            self._maxt = self._data[LPU].shape[1]
Пример #5
0
def betweenValue(charList, graphFile, bookNetworksPath):
    # Compute betweenness for all characters in the current chapter graph.
    g = nx.read_gexf(graphFile)
    betCentrality = nx.betweenness_centrality(g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None)
    betweenValues = betCentrality.values()

    # NORMALISE betweenness values
    d = decimal.Decimal
    maxBetween = max(betweenValues)
    minBetween = min(betweenValues)
    maxMinusMin = d(maxBetween) - d(minBetween)

    if not charList:
        # Get top 10 overall characters from overall.gexf graph
        overallGraphFile = bookNetworksPath + "overall.gexf"
        overall_g = nx.read_gexf(overallGraphFile)
        overallBetweenCent = nx.betweenness_centrality(
            overall_g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None
        )

        # Quick fix for getting all characters.

        # sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True)[:10])
        sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True))
        sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True)

        charList = [seq[0] for seq in sortedCentrality]
        return charList

    else:
        charList = [item for item in charList]

        for index, item in enumerate(charList):
            currentChar = None
            for key, value in betCentrality.iteritems():
                if key == item:
                    nummerator = d(value) - d(minBetween)
                    if nummerator == 0:
                        charList[index] = (key, str(0))
                    else:
                        norm_value = (d(value) - d(minBetween)) / d(maxMinusMin)
                        charList[index] = (key, str(norm_value))
                    currentChar = key
            # If current character is not present in the current chapter assign 0 influence.
            if not currentChar:
                charList[index] = (item, 0)

        return charList
Пример #6
0
def eigValue(charList, graphFile, bookNetworksPath):
    # Compute eigenvectors for all characters in the current chapter graph.
    g = nx.read_gexf(graphFile)
    eigCentrality = nx.eigenvector_centrality(g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight")
    eigValues = eigCentrality.values()

    # NORMALISE eigenvector values
    d = decimal.Decimal
    maxEig = max(eigValues)
    minEig = min(eigValues)
    maxMinusMin = d(maxEig) - d(minEig)

    if not charList:
        # Get top 10 overall characters from overall.gexf graph
        overallGraphFile = bookNetworksPath + "overall.gexf"
        overall_g = nx.read_gexf(overallGraphFile)
        overallEigCent = nx.eigenvector_centrality(overall_g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight")

        # sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True)[:10])
        sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True))
        sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True)

        charList = [seq[0] for seq in sortedCentrality]
        return charList

    else:
        charList = [item for item in charList]

        for index, item in enumerate(charList):
            currentChar = None
            for key, value in eigCentrality.iteritems():
                if key == item:
                    # Unnormalised version...
                    charList[index] = (key, str(value))
                    currentChar = key
                # if key == item:
                #     nummerator = d(value)-d(minEig)
                #     if nummerator==0:
                #         charList[index] = (key, str(0))
                #     else:
                #         norm_value = (d(value)-d(minEig))/d(maxMinusMin)
                #         charList[index] = (key, str(norm_value))
                #     currentChar = key
            # If current character is not present in the current chapter assign 0 influence.
            if not currentChar:
                charList[index] = (item, 0)

        return charList
def read_from_json_gexf(fname=None,label_field_name='APIs',conv_undir = False):
    '''
    Load the graph files (.gexf or .json only supported)
    :param fname: graph file name
    :param label_field_name: filed denoting the node label
    :param conv_undir: convert to undirected graph or not
    :return: graph in networkx format
    '''
    if not fname:
        logging.error('no valid path or file name')
        return None
    else:
        try:
            try:
                with open(fname, 'rb') as File:
                    org_dep_g = json_graph.node_link_graph(json.load(File))
            except:
                org_dep_g = nx.read_gexf (path=fname)

            g = nx.DiGraph()
            for n, d in org_dep_g.nodes_iter(data=True):
                g.add_node(n, attr_dict={'label': '-'.join(d[label_field_name].split('\n'))})
            g.add_edges_from(org_dep_g.edges_iter())
        except:
            logging.error("unable to load graph from file: {}".format(fname))
            # return 0
    logging.debug('loaded {} a graph with {} nodes and {} egdes'.format(fname, g.number_of_nodes(),g.number_of_edges()))
    if conv_undir:
        g = nx.Graph (g)
        logging.debug('converted {} as undirected graph'.format (g))
    return g
Пример #8
0
def read_params_file(paramsDir, fName):
    """Read an xml parameters file into a list of tuples

    Each tuple consists of the following:
    entityType      simulator, economy or bank
    entityName
    dataType        parameter or attribute
    dataName
    dataValue       a string
    @param fName:
    """
    tree = ET.parse(paramsDir + fName)
    pList = read_params_from_xml(tree.getroot(), fName)

    graphFile = None
    for eType, eName, dataType, dataName, dataValue in pList:
        if dataName == "graphFile":
            graphFile = dataValue
            break
    if graphFile is None:
        msg = "No graph file specified in %s" % fName
        logger.error(msg)
        raise ParameterError(msg)
    basicGraph = nx.read_gexf(paramsDir + graphFile)  # This gives us a graph of possible lending

    return pList, basicGraph
Пример #9
0
    def importGexf(self, url ):

        # TODO once files are stored in a standard upload directory this will need to be changed
        import platform
        if platform.system() == 'Windows':
            PATH = 'c:\\inetpub\\wwwroot\\pydev\\systemshock\\modellingengine\\fincat\\parameters\\'
        else:
            PATH = '/var/lib/geonode/src/GeoNodePy/geonode/modellingengine/fincat/parameters/'

        G = nx.read_gexf(PATH + url)

        # ensure the nodes are labelled with integers starting from 0
        # TODO might need to start from current number of nodes in G
        G = nx.convert_node_labels_to_integers(G, first_label=0)

        for node in G.nodes(data=True):
            nodeid = node[0] #node array index 0 is the node id, index 1 is the attribute list
            attributes = node[1]
            attributes['guid'] = nodeid
            if 'wkt' in attributes:
                attributes['geometry'] = self.WKTtoGeoJSON(attributes['wkt'])

        for edge in G.edges(data=True):
            edgeid = unicode(edge[0]) + '-' + unicode(edge[1])
            attributes = edge[2]
            attributes['guid'] = edgeid

        self.layergraphs.append(G)  # add the new layer graph to the overall network
        return True
Пример #10
0
def classify(request, pk):
	#gets object based on id given
	graph_file = get_object_or_404(Document, pk=pk)
	#reads file into networkx graph based on extension
	if graph_file.extension() == ".gml":
		G = nx.read_gml(graph_file.uploadfile)
	else:
		G = nx.read_gexf(graph_file.uploadfile)
	#closes file so we can delete it
	graph_file.uploadfile.close()
	#loads the algorithm and tests the algorithm against the graph
	g_json = json_graph.node_link_data(G)
	#save graph into json file
	with open(os.path.join(settings.MEDIA_ROOT, 'graph.json'), 'w') as graph:
			json.dump(g_json, graph)
	with open(os.path.join(settings.MEDIA_ROOT, 'rf_classifier.pkl'), 'rb') as malgo:
		algo_loaded = pickle.load(malgo, encoding="latin1")
		dataset = np.array([G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)])
		print (dataset)
		#creates X to test against
		X = dataset
		prediction = algo_loaded.predict(X)
		
		
		
		graph_type = check_prediction(prediction)
		graph = GraphPasser(G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G))
	#gives certain variables to the view

	return render(
		request,
		'classification/classify.html',
		{'graph': graph, 'prediction': graph_type}
		)
Пример #11
0
 def test_read_simple_directed_graphml(self):
     G = self.simple_directed_graph
     H = nx.read_gexf(self.simple_directed_fh)
     assert_equal(sorted(G.nodes()), sorted(H.nodes()))
     assert_equal(sorted(G.edges()), sorted(H.edges()))
     assert_equal(sorted(G.edges(data=True)), sorted(H.edges(data=True)))
     self.simple_directed_fh.seek(0)
Пример #12
0
    def GET(self):
		web.header('Access-Control-Allow-Origin', '*')
		output = dict()
		getInput = web.input(start='2012-3-03 16:00:00', end='2012-3-03 21:00:00')
		start_time=pd.to_datetime(getInput.start).tz_localize('US/Eastern') - pd.DateOffset(hours=10)
		end_time=pd.to_datetime(getInput.end).tz_localize('US/Eastern') - pd.DateOffset(hours=10)
		
		output_nodes = set()
		all_schedules = pd.read_json('all_schedules.json')
		allnodes = pd.read_json('allnodes.json')
		nodes = set(allnodes.nodes)
		all_schedules['end'] = all_schedules['end'].map(lambda x: datetime.datetime.fromtimestamp(x/1000000000))
		all_schedules['start'] = all_schedules['start'].map(lambda x: datetime.datetime.fromtimestamp(x/1000000000))

		night_sched = all_schedules[(all_schedules.start >= start_time) & (all_schedules.end <= end_time)]
		on_nodes = set()
		for idx,show in night_sched.iterrows():
			on_nodes.add(show[2])
		
		off_nodes = nodes.difference(on_nodes)
		
		imported_graph = nx.read_gexf('./finished_network3.gexf')
		for i in off_nodes:
			try:
				imported_graph.remove_node(i)
			except:
				continue
		
		pr=nx.pagerank(imported_graph,alpha=0.9,weight='newweight',tol=.01, max_iter=200)
		
		output['nodes'] = [(i,v*1000000) for i,v in pr.items()]
		output['input_params'] = getInput
		return json.dumps(output)
def dump_melodic_phrases_in_network(network_file, output_dir, myDatabase, base_name):
    """
    This function dumps all the mp3 files for the patterns in the 'network' (gexf file)
    """
    
    cmd1 = "select file.filename, pattern.start_time, pattern.end_time from pattern join file on (pattern.file_id = file.id) where pattern.id = %d"
    
    #reading the network
    full_net = nx.read_gexf(network_file)
    
    labels = nx.get_node_attributes(full_net, 'label')
    
    patterns = full_net.nodes()
    
    try:
        con = psy.connect(database=myDatabase, user='******') 
        cur = con.cursor()
        for ii, pattern in enumerate(patterns):
            pattern = labels[pattern]
            cur.execute(cmd1%int(pattern))
            filename, start, end = cur.fetchone()
            clipAudio(output_dir, os.path.join(base_name, filename), start, end, int(pattern))
        
    except psy.DatabaseError, e:
        print 'Error %s' % e
        if con:
            con.rollback()
            con.close()
        sys.exit(1)
def create_genealogy(graph_id = 'deusto.aitoralmeida'):
    print 'Loading graph'
    merged = nx.read_gexf('merged_genealogy.gexf', node_type = None)
    print 'Loading edge index'
    dict_edges = load_merged_edge_index()
    
    print 'Building genealogy'
    to_process = [graph_id]
    tree = set()
    #get all the ascenstors in tree
    while len(to_process) > 0:
        current = to_process[0]
        to_process.remove(current)
        tree.add(current) 
        
        try:
            to_process += dict_edges[current]
        except:
            pass 
        
    print 'Creating graph'
    G = nx.DiGraph()
    for person in tree:
        print person
        G.add_node(person, name = merged.node[person]['name'])
        for target in merged.edge[person].keys(): 
            #add edges with the ancestors only       
            if target in tree:        
                G.add_edge(person, target)
    print 'Writing file'
    nx.write_gexf(G, 'created_genealogy.gexf')    
Пример #15
0
def write_estrada(path_to_file):
    graphe = nx.read_gexf(path_to_file + ".gexf")
    if type(graphe) == nx.MultiDiGraph:
        print 'has_multiple_edges'
        graphe = nx.DiGraph(graphe)
    if type(graphe) == nx.MultiGraph:
        print 'has_multiple_edges'
        graphe = nx.Graph(graphe)

    adj_mat = nx.to_numpy_matrix(graphe, weight=None)
    adj_mat = adj_mat.tolist()
    w, v = eig(adj_mat)
    argmax = np.argmax(w)
    sh = np.sinh(w)
    square = np.square(v)
    gamma = square[:, argmax]
    gammaideal = np.dot(square, sh) / sh[argmax]
    delta = 0.5 * np.log10(gamma / gammaideal)
    deltaplus = delta[(delta > 0)]
    deltaminus = delta[(delta < 0)]
    eplus = math.sqrt(mean(deltaplus ** 2))
    eminus = math.sqrt(mean(deltaminus ** 2))
    if math.isnan(eplus) or math.isnan(eminus) or eplus == float('inf') or eminus == float('inf'):
        print gamma
        print gammaideal
        print deltaplus
        print deltaminus

    classe = 1
    if eminus > 0.01: classe += 1
    if eplus > 0.01: classe += 2

    add_results_estrada(eplus, eminus, classe,path_to_file)
Пример #16
0
def abrirred():
    global G
    global fpname
    global file_path
    file_path = tkFileDialog.askopenfilename(title="Open file", filetypes=[("GEXF files", ".gexf")])
    if file_path != "":
        G = nx.read_gexf(file_path, relabel=False)
        fp = file_path.split("/")
        fpname = fp[-1]
        print "Loaded network:", fpname

        editmenu.entryconfig(1, state=NORMAL)  ## Estadisticas
        editmenu.entryconfig(9, state=NORMAL)  ## Sigma-JS
        editmenu.entryconfig(10, state=NORMAL)  ## D3 HTML Graph
    else:
        print "You have to choose a file"
        return False

    tmpdir = "tmp/"
    shutil.copyfile(file_path, tmpdir + "tmpfile.gexf")
    fp = file_path.split("/")
    fpname = fp[-1]
    fname = file("tmp/enamgexf.nme", "w")
    print >> fname, fpname[0:-4]
    fname.close()

    return False
Пример #17
0
def run(out_name):
    V = vis.visualizer()

    # Assumes that generic_lpu_0_input.h5 and generic_lpu_1_input.h5
    # contain the same data:
    V.add_LPU('./data/generic_lpu_0_input.h5', LPU='Sensory')
    V.add_plot({'type': 'waveform', 'ids': [[0]]}, 'input_Sensory')

    for i in [0, 1]:
        G = nx.read_gexf('./data/generic_lpu_%s.gexf.gz' % i)
        neu_pub = sorted([int(n) for n, d in G.nodes_iter(True) \
                          if d['public'] == True])

        V.add_LPU('generic_lpu_%s_%s_output_spike.h5' % (i, out_name),
                  './data/generic_lpu_%s.gexf.gz' % i,
                  'Generic LPU %s' % i)
        V.add_plot({'type': 'raster',
                    'ids': {0: neu_pub},
                    #'yticks': range(1, 1+len(neu_out)),
                    #'yticklabels': range(len(neu_out))
                    },
                    'Generic LPU %s' % i, 'Output')

    V._update_interval = 50
    V.rows = 3
    V.cols = 1
    V.fontsize = 18
    V.out_filename = '%s.avi' % out_name
    V.codec = 'libtheora'
    V.dt = 0.0001
    V.xlim = [0, 1.0]
    V.run()
Пример #18
0
def cyclePlot(gexFile):
    DG = nx.DiGraph(nx.read_gexf(gexFile))
    
    #generate networkx friendly position format
    #dictionary keyed by node label with values being a float32 ndarray
    pos = dict()
    for i in range(1, len(DG.node)+1):
        xPos = DG.node[str(i)]['viz']['position']['x']
        yPos = DG.node[str(i)]['viz']['position']['y']
        pos[str(i)] = np.array([xPos,yPos])
    
    #nx.draw_networkx_edges(DG,pos,nodelist=DG.node.keys(),alpha=0.05,
    #                       arrows=True)
    nx.draw_networkx_nodes(DG,pos,nodelist=DG.node.keys(),
                       node_size=30,
                       node_color='grey',
                       alpha=0.4)
    nx.draw_networkx_edges(DG,pos,alpha=0.4,
                               arrows=True,
                               edge_color='k')
    plt.show()
    
    scc=nx.strongly_connected_component_subgraphs(DG)
    CG=scc[0];
    
    #show example
    nx.draw_networkx_nodes(CG,pos,nodelist=CG.node.keys(),
                       node_size=30,
                       node_color='c')
    nx.draw_networkx_edges(CG,pos,alpha=0.5,
                               arrows=True,
                               edge_color='r')
Пример #19
0
def __prepare__(data_dir):
    """
    Takes each file in .gexf format and converts it into the igraph-readable graphml.
    """

    """
    Takes a list of files in .gexf format, and converts each to GraphML.
    """

    for f in glob.glob(os.path.join(data_dir, "*.gexf")):
        print(f)
        newFileName = f[:f.rfind('.')] + ".graphml"

        if os.path.exists(newFileName):
            continue

        G = nx.read_gexf(f)
        for node in G.node:
            for attrib in G.node[node]:
                if type(G.node[node][attrib]) == dict:
                    # graphML doesn't play nice with dictionaries as attributes.
                    # this line just deletes positional information.
                    G.node[node][attrib] = 0
        newFileName = f[:f.rfind('.')] + ".graphml"
        nx.write_graphml(G, newFileName)
Пример #20
0
def restore_path(meta_id, start_index, count, k):  # k估计参数权值
    test_path = TestPath(meta_id, start_index, count)
    visual_map4000 = nx.read_gexf('/home/elvis/map/analize/analizeTime/countXEntTime/visualMapTop4000.gexf')
    paths = []
    # while len(paths) == 0:
    #     paths = find_paths(test_path.path_time[0], test_path.path_time[-1], visual_map4000, k)
    #     k *= 1.1
    paths = find_paths(test_path.path_time[0], test_path.path_time[-1], visual_map4000, k)
    if len(paths) == 0:
        print_cache.append(['{}'.format(meta_id), 0,])
        # print('{},not find'.format(meta_id))
        return
    delta_time = (test_path.path_time[-1][1] - test_path.path_time[0][1]).total_seconds()
    map_paths = visual_to_map(paths, delta_time)
    best_path = best_alternative(map_paths, delta_time)
    file_path = '/home/elvis/map/analize/path_restore/restore_path/{k}/{meta_id}/{meta_id}-{s}-{c}'.format(
        meta_id=meta_id,
        s=start_index,
        c=count, k=k)
    to_path_txt(map_paths, file_path, mode='all')
    best_file_path = '/home/elvis/map/analize/path_restore/restore_path/{k}/{meta_id}/best'.format(meta_id=meta_id, k=k)
    to_path_txt(best_path, best_file_path, mode='best')
    print_cache.append(['{}'.format(meta_id),])
    # print('{}'.format(meta_id), end=' ')
    similarity(best_path, test_path.path_time)
Пример #21
0
def convertNetToGefx(input_file):
    G = None
    if input_file.endswith(Constants.GEXF_FORMAT):
        G = nx.read_gexf(input_file, None, True)
    elif input_file.endswith(Constants.NET_FORMAT):
        G=nx.Graph()
        f = file(input_file, 'r')
        # iterate over the lines in the file
        for line in f:
            # split the line into a list of column values
            columns = line.split('\t')
            # clean any whitespace off the items
            columns = [col.strip() for col in columns]
            if columns:
                G.add_edge(columns[0], columns[1])
        #write to a gexf file, so that GHOST can read it as well
        gexf_path = input_file[:-len(Constants.NET_FORMAT)]+Constants.GEXF_FORMAT
        #add attributes to nodes in gefx file
        for n,d in G.nodes_iter(data=True):
            G.node[n]["id"] = n
            G.node[n]["gname"] = n
        nx.write_gexf(G, gexf_path)
    else:
        print("Unsupported Format")
        exit(0)
    print("For "+input_file+" Number of Nodes =", G.number_of_nodes(), "No of edges = ", G.number_of_edges())

    return G
Пример #22
0
def main():
    # the description link graph
    g = nx.read_gexf('data/subreddits_edged_by_description_links.gexf')

    # an empty graph for showing communities
    g1 = nx.Graph()

    communities = get_coalesced_communities(g)
    for c in communities:
        g1.add_node(c.name)
        g1.node[c.name]['size'] = len(c.members)

    count = 0
    ratio_weight = 0.0

    for c1, c2 in product(communities, communities):
        if c1.id == c2.id or g1.has_edge(c1.name, c2.name) or len(c1.members) > len(c2.members):
            continue
        
        overlap = len(c1.members & c2.members)

        if overlap > 0:
            g1.add_edge(c1.name, c2.name, weight=overlap / len(c1.members))
            ratio_weight += overlap / len(c1.members)
            count += 1

    average_weight_ratio = ratio_weight / count
    print "average weight ratio: %s" % str(average_weight_ratio)

    g1.remove_edges_from(filter(lambda x: x[2]['weight'] < average_weight_ratio, g1.edges(data=True)))

    print "%d subreddits included" % len(reduce(lambda x,y: x.union(y.members), communities, set()))
    nx.write_gexf(g1, 'test_coalesce.gexf')
Пример #23
0
	def __update_structure(self):
		imported_graph = nx.read_gexf(self.file_path)

		if not isinstance(imported_graph, nx.Graph):
			raise Exception("Imported graph is not undirected")

		self.structure = nx.convert_node_labels_to_integers(imported_graph)
Пример #24
0
    def lpu_parser(filename):
        """
        GEXF LPU specification parser.

        Extract LPU specification data from a GEXF file and store it in
        Python data structures. All nodes in the GEXF file are assumed to
        correspond to neuron model instances while all edges are assumed to
        correspond to synapse model instances.

        Parameters
        ----------
        filename : str
            GEXF filename.

        Returns
        -------
        n_dict : dict of dict of list
            Each key of `n_dict` is the name of a neuron model; the values
            are dicts that map each attribute name to a list that contains the
            attribute values for each neuron class.
        s_dict : dict of dict of list
            Each key of `s_dict` is the name of a synapse model; the values are
            dicts that map each attribute name to a list that contains the
            attribute values for each each neuron.        
        """

        graph = nx.read_gexf(filename)
        return LPU.graph_to_dicts(graph)
Пример #25
0
def plot_gexf_string(string):
    import networkx as nx
    from io import StringIO

    strio = StringIO(u"%s" % string)
    graph = nx.read_gexf(strio, relabel=False)
    plot_networkx_graph(graph)
def get_sna(path):
    sna_data = {}
    print 'Building relations graph'
    G = nx.read_gexf(path)
    print 'Nodes:', len(G.nodes())
    print 'Edges:', len(G.edges())
        
    print 'Calculating centralities:'
    print '    -degrees'
    degrees = G.degree()    
    for c in degrees:
        sna_data[c] = { 'degree':degrees[c],
                            'betweenness':0,
                            'closeness':0,
                            'eigenvector':0}
        
    print '    -betweenness'
    betweenness = nx.betweenness_centrality(G)
    for c in betweenness:
        sna_data[c]['betweenness'] = betweenness[c]
        
    print '    -closeness'
    closeness = nx.closeness_centrality(G)
    for c in closeness:
        sna_data[c]['closeness'] = closeness[c]
        
    print '    -eigenvector'
    eigenvector = nx.eigenvector_centrality_numpy(G)
    for c in eigenvector:
        sna_data[c]['eigenvector'] = eigenvector[c]
        
    return sna_data
Пример #27
0
def run(out_name):
    V = vis.visualizer()

    V.add_LPU('./data/generic_input_0.h5', LPU='Sensory')
    V.add_plot({'type':'waveform', 'ids': [[0]]}, 'input_Sensory')

    for i in [0, 1]:
        G = nx.read_gexf('./data/generic_lpu_%s.gexf.gz' % i)
        neu_out = [k for k, n in G.node.items() if n['name'][:3] == 'out']

        V.add_LPU('generic_output_%s_%s_spike.h5' % (i, out_name),
                  './data/generic_lpu_%s.gexf.gz' % i,
                  'Generic LPU %s' % i)
        V.add_plot({'type': 'raster',
                    'ids': {0: range(len(neu_out))},
                    #'yticks': range(1, 1+len(neu_out)),
                    #'yticklabels': range(len(neu_out))
                    },
                    'Generic LPU %s' % i, 'Output')

    V._update_interval = 50
    V.rows = 3
    V.cols = 1
    V.fontsize = 18
    V.out_filename = 'generic_output_%s.avi' % out_name
    V.codec = 'libtheora'
    V.dt = 0.0001
    V.xlim = [0, 1.0]
    V.run()
Пример #28
0
def read_any(fname):
    """Attempt to read file in any format.

    - First, file name can be specified as a schema prefix, as in
      'gml:FILE_NAME'.

    - Then, it can be specified via filename extension.

    - Then, it can be specified via filename extension after .bz2 or
      .gz.  Note: We do not decompress it, but rely on networkx to do
      decompression.

    - Then, look at the first 512 bytes to see if the file is either
      in pajek or gml format.

    - If read is unsuccessful, raise UnknownTypeError.

    If read is successful, return a networkx graph object.

    A list of all readers in networkx:
    import networkx
    [ x for x in dir(networkx) if x.startswith('read_') ]
    """#%'    \n'.join(x for x in dir(networkx) if x.startswith('read_'))
    # Try a schema prefix based match:
    m =  re.match(r'^(\w+?):(.*)$', fname)
    if m:
        schema, fname = m.group(1), m.group(2)
        # Look for networkx.read_schema.  If it exists, use that to
        # read and return.
        reader = _get_reader(schema)
        if reader is not None:
            return reader(fname)
    # Try known file suffix based reading:
    base, ext = os.path.splitext(fname)
    #if ext == '.gml':
    #    return networkx.read_gml(fname)
    # look for any reader in networkx with this extension.
    reader = _get_reader(ext[1:])
    if reader is not None:
        return reader(fname)
    # If file is compressed, look at true extension, see if it has a
    # reader.
    if ext in ('.gz', '.bz2'):
        newext = os.path.splitext(base)
        reader = _get_reader(newext[1:])
        if reader is not None:
            return reader(fname)
    # Look inside for the schema:
    data = open(fname).read(512)
    if _test_pajek(data):    return networkx.read_pajek(fname)
    if _test_gml(data):      return networkx.read_gml(fname)
    if _test_edgelist(data): return networkx.read_edgelist(fname, data=[('weight', float)])
    if _test_gexf(data):     return networkx.read_gexf(fname)
    if _test_graphml(data):  return networkx.read_graphml(fname)

    # So it is probably an edgelist.

    # Raise exception
    raise UnknownTypeError("Can't open %s"%fname)
Пример #29
0
 def test_bool(self):
     G = nx.Graph()
     G.add_node(1, testattr=True)
     fh = io.BytesIO()
     nx.write_gexf(G, fh)
     fh.seek(0)
     H = nx.read_gexf(fh, node_type=int)
     assert_equal(H.nodes[1]['testattr'], True)
Пример #30
0
 def test_read_simple_undirected_graphml(self):
     G = self.simple_undirected_graph
     H = nx.read_gexf(self.simple_undirected_fh)
     assert_equal(sorted(G.nodes()), sorted(H.nodes()))
     assert_equal(
         sorted(sorted(e) for e in G.edges()),
         sorted(sorted(e) for e in H.edges()))
     self.simple_undirected_fh.seek(0)
Пример #31
0
def gen_avg_cited_net():
    g = nx.read_gexf("data\\timeline_new_g.gexf")
    for node in g:
        gen_avg_cited_year(g, node)

    nx.write_gexf(g, "data\\timeline_new_g.gexf")
#!/usr/bin/python
import networkx as nx
import matplotlib.pyplot as plt

G = nx.read_gexf("graph.gexf")
print("Graph has loaded")
nodes = nx.eigenvector_centrality(G)
f = open('eigenvector_centrality.txt', 'w')
for i in nodes:
    f.write(str(i) + ' ' + str(nodes[i]) +  '\n')
f.close()
Пример #33
0
    print('  ', boardString[3], ' | ', boardString[4], ' | ', boardString[5],
          ' ')
    print(' -----+-----+----- ')
    print('  ', boardString[6], ' | ', boardString[7], ' | ', boardString[8],
          ' ')
    print('It is your turn. Enter your move by indexing into the board.')
    print(
        "Obey zero-indexing. for example, entering in '1 1' will choose the center tile."
    )

    return 0


print("Hello, I am the Tic-Tac-Toe AI")
print("Type 'X' or 'O' to choose what you will play.")
G = nx.read_gexf("TicTacToeStates.gexf")
player = input()
boardString = '---------'
turnOrder = 3
while player != 'X' and player != 'O':
    print("Input invalid. Please type 'X' or 'O' to choose what you play:")
    player = input()

if player is 'X':
    turnOrder = 0
    players_turn(boardString)
    moveX = int(input('horizontal index: '))
    moveY = int(input('vertical index: '))
    while ((moveX > 2) or (moveX < 0) or (moveY > 2) or (moveY < 0)
           or (boardString[moveX + moveY * 3] != '-')):
        print(
Пример #34
0
import networkx as nx
import matplotlib.pyplot as plt

if __name__ == '__main__':

    G = nx.DiGraph()
    G = nx.read_gexf("./torGraph.dat")

    numbers = []
    rangeOfNumbers = []
    x = []

    for n in G.nodes_iter():  #calculate the in-degree for each node
        numbers.append(G.in_degree(n))
        if (G.in_degree(n) not in rangeOfNumbers):
            rangeOfNumbers.append(G.in_degree(n))

    rangeOfNumbers.sort()

    for n in rangeOfNumbers:  #create a list with the number of nodes for each in-degree
        x.append(numbers.count(n))

    plt.scatter(rangeOfNumbers, x, label=' in-degree',
                color='k')  #print a scatter with the in-degree distribution

    plt.xlim(0, 30)
    plt.ylim(0, 10000)

    plt.yticks([0, 1000, 10000, 50000])
    plt.xticks([0, 10, 20, 30])
Пример #35
0
                year.append({
                    'year': window,
                    'post_location': '',
                    'comment_location': str(ent)
                })


def WriteDictToCSV(csv_file, csv_columns, dict_data):
    try:
        with open(csv_file, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            writer.writeheader()
            for data in dict_data:
                writer.writerow(data)
    except IOError as (errno, strerror):
        print("I/O error({0}): {1}".format(errno, strerror))
    return


if __name__ == '__main__':
    for filename in os.listdir("LIWC_DATA/utility_graphs/"):
        if not filename.startswith("%s_di" % sys.argv[1]):
            continue
        print filename
        g = nx.read_gexf("LIWC_DATA/utility_graphs/" + filename)
        file = str(ntpath.basename(filename)).split('.')[0]
        windowyear = file[len(file) - 21:len(file) - 11]
        getLocation(g, windowyear)
    with open('LIWC_DATA/LOCATION_FILES/onlyLocation.txt', 'w') as outfile:
        json.dump(year, outfile)
Пример #36
0
        ranking_file = "%s/all-%s.csv" % (ranking_folder, tp)
        network_file = "%s/all-%s.gexf" % (network_folder, tp)
        positions_file = "%s/comwith_all-%s.csv" % (positions_folder, tp)
        output_file = {'graph': "%s/all-%s-network.js" % (output_folder, tp),
                       'ring': "%s/all-%s-ring.js" % (output_folder, tp)}

        with open(ranking_file, 'r') as inf:
            reader = csv.DictReader(inf)
            ranks = {row.pop("node"): row for row in reader}
        with open(positions_file, 'r') as inf:
            reader = csv.DictReader(inf)
            pos = {row["node"]: eval(row['positions']) for row in reader}

        # MERGE AND REDUCE DATA
        # H is for ring view
        H = nx.read_gexf(network_file)
        id_mapping = {}  # needed to replace numeric id's in json object
        for idx, (node, data) in enumerate(H.nodes(data=True)):
            id_mapping[idx] = node
            data["occurrence"] = data.get("occurrence", 0)
        # G is for graph view
        G = giant(H)
        # remove edges representing one-time collaboration having weight < 1
        for sourc, tar, data in G.edges(data=True):
            if data["weight"] < 1 and len(data["journal"].split(';')) == 1:
                G.remove_edge(sourc, tar)
        G = nx.convert_node_labels_to_integers(G, label_attribute="name")
        # add groups, scaled positions and text
        for node, data in G.nodes(data=True):
            name = data['name']
            # Groups (compressed)
Пример #37
0
if __name__ == "__main__":

    args = parse_args()

    np.random.seed(args.seed)

    # --
    # IO

    print("main.py: preprocessing start", file=sys.stderr)

    graph_files = sorted(glob(os.path.join(args.indir, '*.gexf')))

    label_lookup = {}
    graphs = [nx.read_gexf(graph_file) for graph_file in graph_files]
    graphs = [
        initial_relabel(g, args.label_field, label_lookup) for g in graphs
    ]

    for height in range(1, args.wl_height + 1):
        label_lookup = {}
        graphs = [wl_relabel(graph, height, label_lookup) for graph in graphs]

    for graph_file, graph in zip(graph_files, graphs):
        dump_sg2vec_str(graph_file, args.wl_height, graph)

    wlk_files = sorted(
        glob(os.path.join(args.indir, '*.g2v' + str(args.wl_height))))

    print("main.py: preprocessing done", file=sys.stderr)
Пример #38
0
#
# Check Command line arguments
#
if len(sys.argv) < 2:
    print "Error: Must supply a GraphML input file"
    sys.exit(-1)

filename = sys.argv[1]
output_filename = ""

if re.search(r'graphml', filename):
    output_filename = filename.strip("graphml")
    G = nx.read_graphml(filename)
elif re.search(r'gexf', filename):
    output_filename = filename.strip("gexf")
    G = nx.read_gexf(filename)
else:
    print "Error: Unknown file extension ", filename
    sys.exit(-1)

output_filename = output_filename + "png"

#
# Positions for all nodes
#
pos = nx.spring_layout(G)
pos = nx.random_layout(G)
pos = nx.graphviz_layout(G, prog='dot')
pos = nx.graphviz_layout(G, prog='twopi')

#print G.graph
Пример #39
0
import numpy as np
import matplotlib as mpl
mpl.use('agg')

import neurokernel.LPU.utils.visualizer as vis
import networkx as nx

nx.readwrite.gexf.GEXF.convert_bool = {
    'false': False,
    'False': False,
    'true': True,
    'True': True
}

# Select IDs of spiking projection neurons:
G = nx.read_gexf('./data/generic_lpu.gexf.gz')
neu_proj = sorted([int(k) for k, n in G.node.items() if \
                   n['name'][:4] == 'proj' and \
                   n['spiking']])

V = vis.visualizer()
V.add_LPU('./data/generic_input.h5', LPU='Sensory')
V.add_plot({'type': 'waveform', 'ids': [[0]]}, 'input_Sensory')

V.add_LPU('generic_output_spike.h5', './data/generic_lpu.gexf.gz',
          'Generic LPU')
V.add_plot(
    {
        'type': 'raster',
        'ids': {
            0: neu_proj
Пример #40
0
def main():
    if len(sys.argv) < 2:
        sys.exit('usage: %s < input gexf' % sys.argv[0])

    # Input Graph file

    infile = sys.argv[1]

    G = nx.read_gexf(infile)

    # extract the largest weakly connected component and convert to undirected for fa2l

    G = max(nx.weakly_connected_component_subgraphs(G),
            key=len).to_undirected()

    # set parameters

    colormap = {
        'null': 'lightgray',
        'partisan_2012_conservative': 'r',
        'partisan_2012_liberal': 'b',
        'partisan_2012_libertarian': 'y'
    }
    color_field = "partisan_code"
    size_field = 'inlink_count'
    filter_field = "inlink_count"
    label_field = "label"
    num_labels = 20  # number of labels to visualize
    k = 100  # number of nodes to visualize

    # If the size of Graph > 1000 nodes, set G to the subgraph containing largest 1000 nodes to get the layout

    if len(G.nodes()) > 1000:
        G = filter_graph(G, filter_by=filter_field, top=1000).to_undirected()

    # extract the positions

    pos = force_atlas2_layout(G,
                              iterations=50,
                              pos_list=None,
                              node_masses=None,
                              outbound_attraction_distribution=True,
                              lin_log_mode=True,
                              prevent_overlapping=True,
                              edge_weight_influence=1.0,
                              jitter_tolerance=1.0,
                              barnes_hut_optimize=True,
                              barnes_hut_theta=0.5,
                              scaling_ratio=38,
                              strong_gravity_mode=False,
                              multithread=False,
                              gravity=1.0)

    print("Extracted the positions")
    print(pos)

    # Extract top 500 nodes for visualization
    top_k_subgraph = filter_graph(G, filter_by=filter_field,
                                  top=k).to_undirected()

    # Set visual attributes

    node_colors = set_node_color(top_k_subgraph,
                                 color_by=color_field,
                                 colormap=colormap)
    node_sizes = set_node_size(top_k_subgraph,
                               size_field="inlink_count",
                               min_size=0.1,
                               max_size=800)
    node_labels = set_node_label(top_k_subgraph, label=label_field)
    subgraph_pos = get_subgraph_pos(top_k_subgraph, pos)
    edge_colors = edgecolor_by_source(top_k_subgraph, node_colors)

    print("Drawing the visualization")

    # Get specific labels

    subset_label_nodes = sorted(zip(top_k_subgraph.nodes(), node_sizes),
                                key=lambda x: x[1],
                                reverse=True)[0:num_labels]
    subset_labels = {n[0]: node_labels[n[0]] for n in subset_label_nodes}

    # plot the visualization

    fig = plt.figure(figsize=(10, 10), dpi=100)
    ax = fig.add_subplot(111)
    #ax.set(xlim=[0.0, 1.0], ylim=[0.0, 1.0], title='Network Viz')

    # Draw the nodes, edges, labels separately

    nodes = nx.draw_networkx_nodes(top_k_subgraph,
                                   pos=subgraph_pos,
                                   node_size=node_sizes,
                                   node_color=node_colors,
                                   alpha=.7)
    edges = nx.draw_networkx_edges(top_k_subgraph,
                                   pos=subgraph_pos,
                                   edge_color=edge_colors,
                                   alpha=0.01)
    labels = nx.draw_networkx_labels(top_k_subgraph,
                                     pos=subgraph_pos,
                                     labels=subset_labels,
                                     font_size=8)

    # Adjust label overlapping

    x_pos = [v[0] for k, v in subgraph_pos.items()]
    y_pos = [v[1] for k, v in subgraph_pos.items()]
    adjust_text(texts=list(labels.values()),
                x=x_pos,
                y=y_pos,
                arrowprops=dict(arrowstyle='->', color='lightgray'))

    # Declutter visualization

    #ax.axis("off");

    # save the plot

    plt.savefig("1.png")

    # Show the plot
    plt.show()
import networkx as nx
from modularity_maximization import partition
from modularity_maximization.utils import get_modularity

networkfile = "Feb2018toNov2018_new.gexf"
G = nx.read_gexf(networkfile)

comm_dict = partition(G)
print get_modularity(G, comm_dict)
print comm_dict
for node in G.nodes():
    print node
nx.set_node_attributes(G, name='community', values=comm_dict)
# for comm in set(comm_dict.values()):
#     print("Community %d"%comm)
#     print(', '.join([node for node in comm_dict if comm_dict[node] == comm]))
#
#     for node in comm_dict:
#         if comm_dict[node] == comm:

nx.write_gexf(G, "Feb2018toNov2018_communities_new.gexf")
Пример #42
0
    parser.add_argument('lpu_file_name',
                        nargs='?',
                        default='generic_lpu.gexf.gz',
                        help='LPU file name')
    parser.add_argument('in_file_name',
                        nargs='?',
                        default='generic_input.h5',
                        help='Input file name')
    parser.add_argument('-s', type=int, help='Seed random number generator')
    parser.add_argument('-l',
                        '--lpu',
                        type=str,
                        default='gen',
                        help='LPU name')

    args = parser.parse_args()

    if args.s is not None:
        np.random.seed(args.s)
    dt = 1e-4
    dur = 1.0
    start = 0.3
    stop = 0.6
    I_max = 0.6
    neu_num = [np.random.randint(31, 40) for i in range(3)]

    create_lpu(args.lpu_file_name, args.lpu, *neu_num)
    g = nx.read_gexf(args.lpu_file_name)
    create_input(args.in_file_name, neu_num[0], dt, dur, start, stop, I_max)
    create_lpu(args.lpu_file_name, args.lpu, *neu_num)
Пример #43
0
def main():
    num_nodes, dag = 50, 4

    result_list = []
    time_list = []
    iter_list = []
    gap_list = []
    for i in range(dag, dag + 7):
        workflow_path = './SyntheticSettings/default_dags/%d_nodes/dag_%d.gexf' % (
            num_nodes, i)
        system_path = './SyntheticSettings/default_dags/platform.gexf'

        dag = load_dag(workflow_path)
        platform = nx.read_gexf(system_path)
        # edge_list = list(platform.edges())
        # edge_list = random.Random(500).sample(edge_list, 2)
        # for (u, v) in edge_list:
        #     platform[u][v]['bandwidth'] = 100000
        # platform.remove_edges_from(edge_list[::2])
        #nx.draw(platform, with_labels=True)
        #plt.show()

        for u, v in platform.edges():
            pass
            # platform[u][v]['bandwidth'] /= 4
        for u, v in dag.edges():
            pass
            # dag[u][v]['data'] /= 2
            # dag[u][v]['data'] *= 4

        for nd in dag.nodes():
            pass
            # dag.node[nd]['Core6'] = dag.node[nd]['Core1'] // 5

        nooff_rate, localoff_rate = 0, 0
        noff_nodes = random.Random(401).sample(
            dag.nodes(), round(dag.number_of_nodes() * nooff_rate))
        localoff_nodes = random.Random(500).sample(
            [i for i in dag.nodes() if not i in noff_nodes],
            round(dag.number_of_nodes() * localoff_rate))
        for t in noff_nodes:
            for r in platform.nodes():
                if r != 'Core1':
                    dag.node[t][r] = 10000
                    # dag[u][v]['data'] *= 4

        for t in localoff_nodes:
            dag.node[t]['Core6'] = 10000

        #print('no offloading nodes:', noff_nodes)
        #print('local offloading nodes:', localoff_nodes)
        tl = 3600

        ts = time.time()
        # result,_ = lb, _ = greedy_ant.greedy_ant(dag, platform, 40)
        result, lb = hybrid.ilp(dag, platform, tl=tl)
        # result, lb = hybrid.hybrid(dag, platform, tl=tl, with_ilp=True)
        # result, lb = hybrid.hybrid(dag, platform, tl=tl, with_ilp=False)
        # result = lbbd.lbbd(dag,  platform)
        # result = lb = greedy.greedy(dag, platform)
        # result, _ = lb, _ = heft.heft(dag, platform)

        # result = hybrid.ilp(dag, platform, tl=tl)
        duration = time.time() - ts
        result_list.append(int(result))
        time_list.append(int(duration))
        gap = (result - lb) / result
        gap_list.append(round(gap, 5))
        # iter_list.append(iter)
        print("makespan = ", result_list)
        print("time_cost = ", time_list)
        print("gap = ", gap_list)
    print(result_list)
Пример #44
0
 def __init__(self):
     self.g = nx.read_gexf("data\\timeline_new_g.gexf")
Пример #45
0
    :param time_string: a time string
    :param target_time_zone: the target time zone
    :param convert_utc_time: whether transfer the datetime object to utc first
    :return:
    """
    datetime_object = datetime.strptime(time_string, '%a %b %d %H:%M:%S %z %Y')
    if convert_utc_time:
        final_time_object = datetime_object.replace(tzinfo=pytz.utc).astimezone(target_time_zone)
    else:
        final_time_object = datetime_object.astimezone(target_time_zone)
    return final_time_object


def combine_candidate_ids(dataframe: pd.DataFrame) -> set:
    """
    Get the Weibo id set, considering original post and repost
    :param dataframe: a Weibo dataframe
    :return: a Weibo id set
    """
    author_int_set = set(dataframe['weibo_id'])
    retweeter_list = list(dataframe['retweets_id'])
    retweeter_int_set = set([np.int64(str(retweet_id[1:-1])) for retweet_id in retweeter_list if retweet_id != "['no retweets']"])
    # combine the retweet id and author id together
    combine_set = {*author_int_set, *retweeter_int_set}
    return combine_set


if __name__ == '__main__':
    graph = nx.read_gexf(os.path.join(data_paths.data_path, 'graph_structure', 'user_location_graph.gexf'))
    print(get_network_statistics(graph))
Пример #46
0
import networkx as nx


def agrees(nnodes1, nnodes2):
    if nnodes1 == nnodes2:
        return True
    else:
        return False


print("----- COUNT # OF NODES -----")
print("\nreading z = 0")
z0_1 = nx.read_gexf('halos/10000/z0/halo-radius-6.0.gexf')
z0_2 = nx.read_gexf('halos/10000/z0/halo-radius-6.1.gexf')
if agrees(z0_1.number_of_nodes(), z0_2.number_of_nodes()):
    print("z = 0 has", z0_1.number_of_nodes(), "nodes")
else:
    print("z = 0 has disagreeing # of nodes.")

print("\nreading z = 0.5")
zp5_1 = nx.read_gexf('halos/10000/zp5/halo-radius-6.0.gexf')
zp5_2 = nx.read_gexf('halos/10000/zp5/halo-radius-6.1.gexf')
if agrees(zp5_1.number_of_nodes(), zp5_2.number_of_nodes()):
    print("z = 0.5 has", zp5_1.number_of_nodes(), "nodes")
else:
    print("z = 0.5 has disagreeing # of nodes.")

print("\nreading z = 1")
z1_1 = nx.read_gexf('halos/10000/z1/halo-radius-6.0.gexf')
z1_2 = nx.read_gexf('halos/10000/z1/halo-radius-6.1.gexf')
if agrees(z1_1.number_of_nodes(), z1_2.number_of_nodes()):
Пример #47
0
def importGexfWithLabels(gexffilepath, depth = 0):
    '''
    Reads gexf network file from hyphe, update or create all nodes and relationships in neo4j database
    Print . for each 100 nodes/links imported, 1000 for each 1000
    "depth" is used to prefix new properties on node and rel. Value can be 0, 1 or 2
    '''

    # imports or update all nodes / relationships in gexf file from hyphe

    G= nx.read_gexf(gexffilepath, node_type=None, relabel=False, version='1.1draft')
    data = nx.json_graph.node_link_data(G)
    totnbnodes=len(data['nodes'])
    print(totnbnodes," nodes found in gexf")
    i=1

    for node in data['nodes']:
        i=i+1
        nodematch = graph.nodes.match(site_name =node['label']).first()
        if nodematch == None:
            try:
                nodematch = Node('Website', site_name = node['label'])
                nodematch.__primarylabel__ = 'Website'
                nodematch.__primarykey__ = 'site_name'
                graph.merge(nodematch)
            except:
                print("could not import ", node['label'])

        for key in node.keys():
            nodematch["D" + str(depth) + "_" + key] = node[key]
            graph.push(nodematch)
        if i%100 == 0:
            print(".", end=" ")
        if i%1000 ==0:
            print(i,"/",totnbnodes)

    print(i," nodes imported")
    print(len(graph.nodes.match("Website")), "nodes in db after import")

    totnblinks=len(data['links'])
    print(totnblinks," links found in gexf")

    j=0
    for link in data['links']:

        if depth ==0:
            source_n = graph.nodes.match("Website", D0_id = link['source']).first()
            target_n = graph.nodes.match("Website", D0_id = link['target']).first()
        if depth == 1:
            source_n = graph.nodes.match("Website", D1_id = link['source']).first()
            target_n = graph.nodes.match("Website", D1_id = link['target']).first()
        if depth == 2:
            source_n = graph.nodes.match("Website", D2_id = link['source']).first()
            target_n = graph.nodes.match("Website", D2_id = link['target']).first()
        if depth == 3:
            source_n = graph.nodes.match("Website", D3_id = link['source']).first()
            target_n = graph.nodes.match("Website", D3_id = link['target']).first()
        relmatch = graph.relationships.match((source_n,target_n),r_type="LINKS_TO").first()

        try:
            if relmatch == None:
                rel = Relationship(source_n, "LINKS_TO", target_n)
                rel["count_D" + str(depth)]=link['count']
                graph.merge(rel)
            else:
                relmatch["count_D" + str(depth)]=link['count']
                graph.push(relmatch)
            if j%100 == 0:
                print(".", end=" ")
            if j%1000 ==0:
                print(j, "/", totnblinks)
            j=j+1
        except:
            pass
    print(j," links imported")
    print(len(graph.relationships.match()), "links in db after import")
Пример #48
0
output_file = open(data_dir + "/graphs.bss", 'w')

if has_dict == False:
    hashing = {}
    typeCnt = 0
else:
    dictionary = open('dict.pkl', 'rb')
    hashing = pickle.load(dictionary)
    typeCnt = len(hashing.keys())

count = 0
err_cnt = 0
for g_file in glob(data_dir + '/*.gexf'):
    gid = int(os.path.basename(g_file).split('.')[0])
    try:
        g = nx.read_gexf(g_file)
    except xml.etree.ElementTree.ParseError:
        continue

    g.graph['gid'] = gid

    label2node = {}

    output_file.write('%d\n' % gid)
    count = count + 1
    output_file.write("{} {}\n".format(len(g.nodes()), len(g.edges())))
    for i, n in enumerate(g.nodes(data=True)):
        #  print(n)
        if constant:
            output_file.write('1\n')
        else:
Пример #49
0
    def process(self):
        import networkx as nx

        ids, Ns = [], []
        for r_path, p_path in zip(self.raw_paths, self.processed_paths):
            names = glob.glob(osp.join(r_path, '*.gexf'))
            # Get the graph IDs given by the file name:
            ids.append(sorted([int(i.split(os.sep)[-1][:-5]) for i in names]))

            data_list = []
            # Convert graphs in .gexf format to a NetworkX Graph:
            for i, idx in enumerate(ids[-1]):
                i = i if len(ids) == 1 else i + len(ids[0])
                G = nx.read_gexf(osp.join(r_path, f'{idx}.gexf'))
                mapping = {name: j for j, name in enumerate(G.nodes())}
                G = nx.relabel_nodes(G, mapping)
                Ns.append(G.number_of_nodes())

                edge_index = torch.tensor(list(G.edges)).t().contiguous()
                if edge_index.numel() == 0:
                    edge_index = torch.empty((2, 0), dtype=torch.long)
                edge_index = to_undirected(edge_index, num_nodes=Ns[-1])

                data = Data(edge_index=edge_index, i=i)
                data.num_nodes = Ns[-1]

                # Create a one-hot encoded feature matrix denoting the atom
                # type for the  AIDS700nef dataset:
                if self.name == 'AIDS700nef':
                    x = torch.zeros(data.num_nodes, dtype=torch.long)
                    for node, info in G.nodes(data=True):
                        x[int(node)] = self.types.index(info['type'])
                    data.x = F.one_hot(x, num_classes=len(self.types)).to(
                        torch.float)

                if self.pre_filter is not None and not self.pre_filter(data):
                    continue

                if self.pre_transform is not None:
                    data = self.pre_transform(data)

                data_list.append(data)

            torch.save(self.collate(data_list), p_path)

        assoc = {idx: i for i, idx in enumerate(ids[0])}
        assoc.update({idx: i + len(ids[0]) for i, idx in enumerate(ids[1])})

        path = osp.join(self.raw_dir, self.name, 'ged.pickle')
        mat = torch.full((len(assoc), len(assoc)), float('inf'))
        with open(path, 'rb') as f:
            obj = pickle.load(f)
            xs, ys, gs = [], [], []
            for (x, y), g in obj.items():
                xs += [assoc[x]]
                ys += [assoc[y]]
                gs += [g]
            x, y = torch.tensor(xs), torch.tensor(ys)
            g = torch.tensor(gs, dtype=torch.float)
            mat[x, y], mat[y, x] = g, g

        path = osp.join(self.processed_dir, f'{self.name}_ged.pt')
        torch.save(mat, path)

        # Calculate the normalized GEDs:
        N = torch.tensor(Ns, dtype=torch.float)
        norm_mat = mat / (0.5 * (N.view(-1, 1) + N.view(1, -1)))

        path = osp.join(self.processed_dir, f'{self.name}_norm_ged.pt')
        torch.save(norm_mat, path)
Пример #50
0
    # nx.write_gexf(H, 'data/freq_greater_than_5_facebook_network_level_[4,5].gexf')
    # nx.write_gexf(G, 'data/facebook_network_level_[4,5].gexf')

    # finance_index = nx.get_node_attributes(G, 'betweenness').items()
    # food_index = nx.get_node_attributes(G, 'betweenness').items()
    #
    # print (simple_resonance(finance_index, food_index))
    # print (standardized_sr(finance_index, food_index))
    #
    # finance_iscore = nx.get_edge_attributes(G, 'pair_i')
    # food_iscore = nx.get_edge_attributes(G, 'pair_i')
    #
    # print(pair_resonance(finance_iscore, food_iscore))
    #
    # print(standardized_pr(finance_iscore, food_iscore))

    exp_config = configparser.ConfigParser()
    exp_config.read("experiments/visualizacao_post_exclusivo.ini")
    experiment_name = exp_config['EXPERIMENT']['name']
    networks = eval(exp_config['EXPERIMENT']['networks'])
    folder = exp_config['EXPERIMENT']['folder']
    networks_keys = list(networks.keys())
    for i in range(networks_keys.__len__()):
        net_1 = nx.read_gexf(folder + experiment_name + "/" + experiment_name +
                             "_" + str(networks_keys[i]) + ".gexf")
        for j in range(i + 1, networks_keys.__len__()):
            net_2 = nx.read_gexf(folder + experiment_name + "/" +
                                 experiment_name + "_" +
                                 str(networks_keys[j]) + ".gexf")
            print(simple_resonance(net_1, net_2))
Пример #51
0
    def lpu_parser(filename):
        """
        GEXF LPU specification parser.

        Extract LPU specification data from a GEXF file and store it
        in a list of dictionaries. All nodes in the GEXF file are assumed to
        correspond to neuron model instances while all edges are assumed to
        correspond to synapse model instances.

        Parameters
        ----------
        filename : str
            GEXF filename.

        Returns
        -------
        n_dict : dict of dict of neuron
            Each key of `n_dict` is the name of a neuron model; the values
            are dicts that map each attribute name to a list that contains the
            attribute values for each neuron.
        s_dict : dict of dict of synapse
            Each key of `s_dict` is the name of a synapse model; the values are
            dicts that map each attribute name to a list that contains the
            attribute values for each each neuron.

        Example
        -------
        >>> n_dict = {'LeakyIAF': {'Vr': [0.5, 0.6], 'Vt': [0.3, 0.2]},
                      'MorrisLecar': {'V1': [0.15, 0.16], 'Vt': [0.13, 0.27]}}

        Notes
        -----
        All neurons must have the following attributes; any additional 
        attributes for a specific neuron model must be provided 
        for all neurons of that model type:

        1. spiking - True if the neuron emits spikes, False if it emits graded
           potentials.
        2. model - model identifier string, e.g., 'LeakyIAF', 'MorrisLecar'
        3. public - True if the neuron emits output exposed to other LPUS.
        4. extern - True if the neuron can receive external input from a file.

        All synapses must have the following attributes:

        1. class - int indicating connection class of synapse; it may assume the
           following values:

            0. spike to spike synapse
            1. spike to graded potential synapse
            2. graded potential to spike synapse
            3. graded potential to graded potential synapse
        2. model - model identifier string, e.g., 'AlphaSynapse'
        3. conductance - True if the synapse emits conductance values, False if
           it emits current values.
        4. reverse - If the `conductance` attribute is True, this attribute
           should be set to the reverse potential.

        TODO
        ----
        Input data should be validated.
        """

        # parse the GEXF file using networkX
        graph = nx.read_gexf(filename)

        # parse neuron data
        n_dict = {}
        neurons = graph.node.items()
        # sort based on id (id is first converted to an integer)
        # this is done so that consecutive neurons of the same type
        # in the constructed LPU is the same in neurokernel
        neurons.sort(cmp=neuron_cmp)
        for id, neu in neurons:
            model = neu['model']
            # if an input_port, make sure selector is specified
            if model == PORT_IN_GPOT or model == PORT_IN_SPK:
                assert ('selector' in neu.keys())
                if model == PORT_IN_GPOT:
                    neu['spiking'] = False
                    neu['public'] = False
                else:
                    neu['spiking'] = True
                    neu['public'] = False
            # if an output_port, make sure selector is specified
            if 'public' in neu.keys():
                if neu['public']:
                    assert ('selector' in neu.keys())
            else:
                neu['public'] = False
            if 'selector' not in neu.keys():
                neu['selector'] = ''
            # if the neuron model does not appear before, add it into n_dict
            if model not in n_dict:
                n_dict[model] = {k: [] for k in neu.keys() + ['id']}

            # neurons of the same model should have the same attributes
            assert (set(n_dict[model].keys()) == set(neu.keys() + ['id']))
            # add neuron data into the subdictionary of n_dict
            for key in neu.iterkeys():
                n_dict[model][key].append(neu[key])
            n_dict[model]['id'].append(int(id))
        # remove duplicate model information
        for val in n_dict.itervalues():
            val.pop('model')
        if not n_dict: n_dict = None

        # parse synapse data
        synapses = graph.edges(data=True)
        s_dict = {}
        synapses.sort(cmp=synapse_cmp)
        for syn in synapses:
            # syn[0/1]: pre-/post-neu id; syn[2]: dict of synaptic data
            model = syn[2]['model']
            syn[2]['id'] = int(syn[2]['id'])
            # if the synapse model does not appear before, add it into s_dict
            if model not in s_dict:
                s_dict[model] = {
                    k: []
                    for k in syn[2].keys() + ['pre', 'post']
                }

            # synapses of the same model should have the same attributes
            assert (set(s_dict[model].keys()) == set(syn[2].keys() +
                                                     ['pre', 'post']))
            # add synaptic data into the subdictionary of s_dict
            for key in syn[2].iterkeys():
                s_dict[model][key].append(syn[2][key])
            s_dict[model]['pre'].append(syn[0])
            s_dict[model]['post'].append(syn[1])
        for val in s_dict.itervalues():
            val.pop('model')
        if not s_dict:
            s_dict = {}
        return n_dict, s_dict
Пример #52
0
    def update_output(clicks, clicks2, input_value):
        ctx = dash.callback_context
        if (clicks is not None) or (clicks2 is not None):
            my_file = open("test.txt", "w+")
            my_file.write(input_value)
            my_file.close()
            button_id = ctx.triggered[0]['prop_id'].split('.')[0]

            print(button_id)

            if button_id == 'submit-button':
                print('selected first submit button')
                print('getting graph')
                ################
                #COMPUTATIONS
                ################
                graph, descriptors, dict_pmid_count_mesh = run_get_graph_2()
                print('obtained graph')

                # import pdb; pdb.set_trace()
                graph = nx.read_gexf("test_graph.gexf")
                top_k_papers, top_k_papers_pmids, top_k_people, top_k_people_ids, authors_to_affiliation, papers_to_author, citation_dict, number_papers_dict, affiliation_paper_count, pmid_to_title, graph, pagerank_ordered = graph_to_recommend(
                    graph, dict_pmid_count_mesh, host, port, dbname, user,
                    password)
                # import pdb; pdb.set_trace()
                global title_to_pmid
                title_to_pmid = dict([(value, key)
                                      for key, value in pmid_to_title.items()])
                # sentences = articles_to_knowledge(top_k_papers_pmids, host, port, dbname, user, password)
                sentences = articles_to_summary(top_k_papers_pmids, host, port,
                                                dbname, user, password,
                                                summarizer)

                #################
                #CITATION NETWORK
                ##################
                #get a x,y position for each node
                pos = nx.layout.spring_layout(graph)

                #Create Edges
                edge_trace = go.Scatter(x=[],
                                        y=[],
                                        line=dict(width=0.5, color='#888'),
                                        hoverinfo='none',
                                        mode='lines')

                for edge in graph.edges():
                    x0, y0 = pos[graph.nodes[edge[0]]['label']]
                    x1, y1 = pos[graph.nodes[edge[1]]['label']]
                    edge_trace['x'] += tuple([x0, x1, None])
                    edge_trace['y'] += tuple([y0, y1, None])

                node_trace = go.Scatter(
                    x=[],
                    y=[],
                    text=[],
                    mode='markers',
                    hoverinfo='text',
                    marker=dict(showscale=True,
                                colorscale='YlGnBu',
                                reversescale=True,
                                color=[],
                                size=20,
                                colorbar=dict(thickness=15,
                                              title='Node Connections',
                                              xanchor='left',
                                              titleside='right'),
                                line=dict(width=2)))
                for node in graph.nodes():
                    x, y = pos[graph.nodes[node]['label']]
                    node_trace['x'] += tuple([x])
                    node_trace['y'] += tuple([y])

                #add color to node points
                for node, adjacencies in enumerate(graph.adjacency()):
                    node_trace['marker']['color'] += tuple(
                        [len(adjacencies[1])])
                    if str(adjacencies[0]) in pmid_to_title:
                        node_info = pmid_to_title[str(
                            adjacencies[0])] + '<br># of connections: ' + str(
                                len(adjacencies[1]))
                    else:
                        node_info = str(
                            adjacencies[0]) + '<br># of connections: ' + str(
                                len(adjacencies[1]))
                    node_trace['text'] += tuple([node_info])

                fig = go.Figure(data=[edge_trace, node_trace],
                                layout=go.Layout(
                                    title='',
                                    titlefont_size=16,
                                    showlegend=False,
                                    hovermode='closest',
                                    margin=dict(b=20, l=5, r=5, t=40),
                                    annotations=[
                                        dict(showarrow=False,
                                             xref="paper",
                                             yref="paper",
                                             x=0.005,
                                             y=-0.002)
                                    ],
                                    xaxis=dict(showgrid=False,
                                               zeroline=False,
                                               showticklabels=False),
                                    yaxis=dict(showgrid=False,
                                               zeroline=False,
                                               showticklabels=False)))
                fig.update_layout(hoverlabel=dict(
                    bgcolor="white", font_size=16, font_family="Roboto"))

                style = {'display': 'block'}

                #################
                #TABLES WITH DATA
                ##################

                layout = [
                    html.Div(
                        children=[
                            html.H2(children='Recommendations',
                                    id='title_rec'),
                            dash_table.DataTable(
                                id='table1',
                                columns=[{
                                    "name": i,
                                    "id": i
                                } for i in df1['columns']],
                                data=[{
                                    'Recommended Papers 📄':
                                    x[0:-6],
                                    'PMID':
                                    title_to_pmid[x],
                                    'Last Author':
                                    papers_to_author[x],
                                    'Article Summary':
                                    sentences[title_to_pmid[x]]
                                } for x in top_k_papers],
                                style_header={
                                    'backgroundColor': '#f2f2f2',
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_cell={'textAlign': 'left'},
                                style_data={
                                    'whiteSpace': 'pre-wrap',
                                    'height': 'auto'
                                },
                                style_table={
                                    "margin-top": "25px",
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_cell_conditional=[{
                                    'if': {
                                        'column_id': 'Recommended Papers 📄'
                                    },
                                    'width': '50%'
                                }, {
                                    'if': {
                                        'column_id': 'PMID'
                                    },
                                    'width': '5%'
                                }, {
                                    'if': {
                                        'column_id': 'Last Author'
                                    },
                                    'width': '10%'
                                }, {
                                    'if': {
                                        'column_id': 'Article Summary'
                                    },
                                    'width': '35%'
                                }]),
                            dash_table.DataTable(
                                id='table2',
                                columns=[{
                                    "name": i,
                                    "id": i
                                } for i in df2['columns']],
                                data=[{
                                    'Recommended People 👩‍🔬👨‍🔬':
                                    x,
                                    'Latest Affiliation 🏫':
                                    authors_to_affiliation[x],
                                    'Citations':
                                    citation_dict[top_k_people_ids[idx]],
                                    'Number of Papers':
                                    number_papers_dict[top_k_people_ids[idx]]
                                } for idx, x in enumerate(top_k_people)],
                                style_header={
                                    'backgroundColor': '#f2f2f2',
                                    'textColor': 'pink',
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_cell={'textAlign': 'left'},
                                style_data={
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_table={
                                    "margin-top": "40px",
                                    'whiteSpace': 'normal',
                                    'height': 'auto',
                                    'align': 'center'
                                },
                                style_cell_conditional=[{
                                    'if': {
                                        'column_id':
                                        'Recommended People 👩‍🔬👨‍🔬'
                                    },
                                    'width': '25%'
                                }, {
                                    'if': {
                                        'column_id': 'Latest Affiliation 🏫'
                                    },
                                    'width': '55%'
                                }, {
                                    'if': {
                                        'column_id': 'Citations'
                                    },
                                    'width': '8%'
                                }, {
                                    'if': {
                                        'column_id': 'Number of Papers'
                                    },
                                    'width': '12%'
                                }])
                            #,
                            #dash_table.DataTable(
                            #                        id='table3',
                            #                        columns=[{"name": i, "id": i} for i in df3['columns']],
                            #                        data=[{'Affiliation 🏫': key, 'Number of Papers': affiliation_paper_count[key]} for key in affiliation_paper_count.keys()],

                            #                      style_header={'backgroundColor': '#f2f2f2', 'whiteSpace': 'normal','height': 'auto'},
                            #                       style_cell={'textAlign': 'left'},

                            #                      style_data={'whiteSpace': 'normal','height': 'auto'},
                            #style_table={"margin-top": "40px", 'whiteSpace': 'normal', 'height': 'auto'},
                            #style_cell_conditional=[

                            #{'if': {'column_id': 'Affiliation 🏫'},

                            #'width': '85%'},
                            #{'if': {'column_id': 'Number of Papers'},
                            #'width': '15%'}
                            #]
                            #)
                        ],
                        id='div_table_analytics'),
                    html.H2(children='Citation graph of related papers',
                            id='title_graph_div')
                ]

                return layout, style, fig, go.Figure(data=[],
                                                     layout=go.Layout(
                                                         title='',
                                                         titlefont_size=16,
                                                     )), {
                                                         'display': 'none'
                                                     }

            elif button_id == 'submit-button-2':
                print('selected second submit button')
                print('getting graph')
                ################
                #COMPUTATIONS
                ################
                graph, descriptors, dict_pmid_count_mesh = run_get_graph_2()
                print('obtained graph')

                # import pdb; pdb.set_trace()
                graph = nx.read_gexf("test_graph.gexf")
                top_k_papers, top_k_papers_pmids, top_k_people, top_k_people_ids, authors_to_affiliation, papers_to_author, citation_dict, number_papers_dict, affiliation_paper_count, pmid_to_title, graph, pagerank_ordered = graph_to_recommend(
                    graph, dict_pmid_count_mesh, host, port, dbname, user,
                    password)
                title_to_pmid = dict([(value, key)
                                      for key, value in pmid_to_title.items()])
                # sentences = articles_to_knowledge(top_k_papers_pmids, host, port, dbname, user, password)
                sentences = articles_to_summary(top_k_papers_pmids, host, port,
                                                dbname, user, password,
                                                summarizer)

                #################
                #CITATION NETWORK
                ##################
                #get a x,y position for each node
                pos = nx.layout.spring_layout(graph)

                #Create Edges
                edge_trace = go.Scatter(x=[],
                                        y=[],
                                        line=dict(width=0.5, color='#888'),
                                        hoverinfo='none',
                                        mode='lines')

                for edge in graph.edges():
                    x0, y0 = pos[graph.nodes[edge[0]]['label']]
                    x1, y1 = pos[graph.nodes[edge[1]]['label']]
                    edge_trace['x'] += tuple([x0, x1, None])
                    edge_trace['y'] += tuple([y0, y1, None])

                node_trace = go.Scatter(
                    x=[],
                    y=[],
                    text=[],
                    mode='markers',
                    hoverinfo='text',
                    marker=dict(showscale=True,
                                colorscale='YlGnBu',
                                reversescale=True,
                                color=[],
                                size=20,
                                colorbar=dict(thickness=15,
                                              title='Node Connections',
                                              xanchor='left',
                                              titleside='right'),
                                line=dict(width=2)))
                for node in graph.nodes():
                    x, y = pos[graph.nodes[node]['label']]
                    node_trace['x'] += tuple([x])
                    node_trace['y'] += tuple([y])

                #add color to node points
                for node, adjacencies in enumerate(graph.adjacency()):
                    node_trace['marker']['color'] += tuple(
                        [len(adjacencies[1])])
                    if str(adjacencies[0]) in pmid_to_title:
                        node_info = pmid_to_title[str(
                            adjacencies[0])] + '<br># of connections: ' + str(
                                len(adjacencies[1]))
                    else:
                        node_info = str(
                            adjacencies[0]) + '<br># of connections: ' + str(
                                len(adjacencies[1]))
                    node_trace['text'] += tuple([node_info])

                fig = go.Figure(data=[edge_trace, node_trace],
                                layout=go.Layout(
                                    title='',
                                    titlefont_size=16,
                                    showlegend=False,
                                    hovermode='closest',
                                    margin=dict(b=20, l=5, r=5, t=40),
                                    annotations=[
                                        dict(showarrow=False,
                                             xref="paper",
                                             yref="paper",
                                             x=0.005,
                                             y=-0.002)
                                    ],
                                    xaxis=dict(showgrid=False,
                                               zeroline=False,
                                               showticklabels=False),
                                    yaxis=dict(showgrid=False,
                                               zeroline=False,
                                               showticklabels=False)))
                fig.update_layout(hoverlabel=dict(
                    bgcolor="white", font_size=16, font_family="Roboto"))

                #################
                #EMBEDDING VISUALIZATION
                ##################
                t0 = time.time()
                emb = get_embeddings_to_visualize(descriptors,
                                                  pagerank_ordered)
                t1 = time.time()
                print('got embeddings: {} secs'.format(t1 - t0))

                fig_emb = px.scatter(emb,
                                     x='x',
                                     y='y',
                                     color='Node type',
                                     opacity=0.8,
                                     hover_data={
                                         'x': False,
                                         'y': False,
                                         'Name': True
                                     })

                fig_emb.update_layout(hoverlabel=dict(bgcolor="white",
                                                      font_size=16,
                                                      font_family="Roboto"),
                                      xaxis=dict(showgrid=False,
                                                 zeroline=False,
                                                 showticklabels=False),
                                      yaxis=dict(showgrid=False,
                                                 zeroline=False,
                                                 showticklabels=False))

                fig_emb.update_traces(marker=dict(size=30),
                                      selector=dict(mode='markers'))

                style = {'display': 'block'}
                style_emb = {'display': 'block'}

                #################
                #TABLES WITH DATA
                ##################

                layout = [
                    html.Div(
                        children=[
                            html.H2(children='Recommendations',
                                    id='title_rec'),
                            dash_table.DataTable(
                                id='table1',
                                columns=[{
                                    "name": i,
                                    "id": i
                                } for i in df1['columns']],
                                data=[{
                                    'Recommended Papers 📄':
                                    x[0:-6],
                                    'PMID':
                                    title_to_pmid[x],
                                    'Last Author':
                                    papers_to_author[x],
                                    'Article Summary':
                                    sentences[title_to_pmid[x]]
                                } for x in top_k_papers],
                                style_header={
                                    'backgroundColor': '#f2f2f2',
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_cell={'textAlign': 'left'},
                                style_data={
                                    'whiteSpace': 'pre-wrap',
                                    'height': 'auto'
                                },
                                style_table={
                                    "margin-top": "25px",
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_cell_conditional=[{
                                    'if': {
                                        'column_id': 'Recommended Papers 📄'
                                    },
                                    'width': '50%'
                                }, {
                                    'if': {
                                        'column_id': 'PMID'
                                    },
                                    'width': '5%'
                                }, {
                                    'if': {
                                        'column_id': 'Last Author'
                                    },
                                    'width': '10%'
                                }, {
                                    'if': {
                                        'column_id': 'Article Summary'
                                    },
                                    'width': '35%'
                                }]),
                            dash_table.DataTable(
                                id='table2',
                                columns=[{
                                    "name": i,
                                    "id": i
                                } for i in df2['columns']],
                                data=[{
                                    'Recommended People 👩‍🔬👨‍🔬':
                                    x,
                                    'Latest Affiliation 🏫':
                                    authors_to_affiliation[x],
                                    'Citations':
                                    citation_dict[top_k_people_ids[idx]],
                                    'Number of Papers':
                                    number_papers_dict[top_k_people_ids[idx]]
                                } for idx, x in enumerate(top_k_people)],
                                style_header={
                                    'backgroundColor': '#f2f2f2',
                                    'textColor': 'pink',
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_cell={'textAlign': 'left'},
                                style_data={
                                    'whiteSpace': 'normal',
                                    'height': 'auto'
                                },
                                style_table={
                                    "margin-top": "40px",
                                    'whiteSpace': 'normal',
                                    'height': 'auto',
                                    'align': 'center'
                                },
                                style_cell_conditional=[{
                                    'if': {
                                        'column_id':
                                        'Recommended People 👩‍🔬👨‍🔬'
                                    },
                                    'width': '25%'
                                }, {
                                    'if': {
                                        'column_id': 'Latest Affiliation 🏫'
                                    },
                                    'width': '55%'
                                }, {
                                    'if': {
                                        'column_id': 'Citations'
                                    },
                                    'width': '8%'
                                }, {
                                    'if': {
                                        'column_id': 'Number of Papers'
                                    },
                                    'width': '12%'
                                }])
                            #,
                            #dash_table.DataTable(
                            #                        id='table3',
                            #                        columns=[{"name": i, "id": i} for i in df3['columns']],
                            #                        data=[{'Affiliation 🏫': key, 'Number of Papers': affiliation_paper_count[key]} for key in affiliation_paper_count.keys()],

                            #                      style_header={'backgroundColor': '#f2f2f2', 'whiteSpace': 'normal','height': 'auto'},
                            #                       style_cell={'textAlign': 'left'},

                            #                      style_data={'whiteSpace': 'normal','height': 'auto'},
                            #style_table={"margin-top": "40px", 'whiteSpace': 'normal', 'height': 'auto'},
                            #style_cell_conditional=[

                            #{'if': {'column_id': 'Affiliation 🏫'},

                            #'width': '85%'},
                            #{'if': {'column_id': 'Number of Papers'},
                            #'width': '15%'}
                            #]
                            #)
                        ],
                        id='div_table_analytics'),
                    html.H2(children='Citation graph of related papers',
                            id='title_graph_div')
                ]

                return layout, style, fig, fig_emb, style_emb

            else:
                return [], {
                    'display': 'none'
                }, go.Figure(data=[],
                             layout=go.Layout(
                                 title='',
                                 titlefont_size=16,
                             )), go.Figure(data=[],
                                           layout=go.Layout(
                                               title='',
                                               titlefont_size=16,
                                           )), {
                                               'display': 'none'
                                           }
        else:
            return [], {
                'display': 'none'
            }, go.Figure(data=[],
                         layout=go.Layout(
                             title='',
                             titlefont_size=16,
                         )), go.Figure(data=[],
                                       layout=go.Layout(
                                           title='',
                                           titlefont_size=16,
                                       )), {
                                           'display': 'none'
                                       }
Пример #53
0
                # if prediction is no attribute values, e.g. [] and so is the groundtruth
                # May happen
                if not p_value and not groundtruth[p_key]:
                    true_positive_prediction += 1
                # counts the number of good prediction for node p_key
                # here len(p_value)=1 but we could have tried to predict more values

                if p_value in groundtruth[p_key]:
                    true_positive_prediction += 1
                    #len([c for c  in p_value if c in groundtruth[p_key]])
            # no else, should not happen: train and test datasets are consistent
    return true_positive_prediction * 100 / total_predictions


# load the graph
G = nx.read_gexf("mediumLinkedin.gexf")
print("Nb of users in our graph: %d" % len(G))

# load the profiles. 3 files for each type of attribute
# Some nodes in G have no attributes
# Some nodes may have 1 attribute 'location'
# Some nodes may have 1 or more 'colleges' or 'employers', so we
# use dictionaries to store the attributes
college = {}
location = {}
employer = {}
# The dictionaries are loaded as dictionaries from the disk (see pickle in Python doc)
with open('mediumCollege_60percent_of_empty_profile.pickle', 'rb') as handle:
    college = pickle.load(handle)
with open('mediumLocation_60percent_of_empty_profile.pickle', 'rb') as handle:
    location = pickle.load(handle)
Пример #54
0
__author__ = 'Amir'
import logging

logging.basicConfig(level=logging.DEBUG, format='%(process)d\t%(asctime)s:%(levelname)s: %(message)s', datefmt='%H:%M:%S')

from pre_process import crawl_to_graph

DS_DIR = '/home/amir/pyproj/amazon-review-spam/io/same_cat_v2'

graph, membs, prods = crawl_to_graph(ds_dir=DS_DIR)
graph_orig = graph.copy()

import networkx as nx
from os import path
mgraph = nx.read_gexf(path.join(DS_DIR, '%s.gexf' % 'em_unlabeled_mgraph'))


author_product_mapping = {}
for a in mgraph:
    author_product_mapping[a] = [p for p in graph[a]]


from hardEM_gurobi import HardEM

nparts = 4
ll, partition = HardEM.run_EM(author_graph=mgraph, author_product_map=author_product_mapping, nparts=nparts*5, parallel=True, nprocs=4)

for a in mgraph:
    mgraph.node[a]['cLabel'] = int(partition[a])

Пример #55
0
def orig_graph_map_WL(graph_list_1, graph_list_2, h):
    """ Computes orig_graph_map for classes 1 and 2, for a given height h. """

    labels = {}
    label_lookup = {}
    label_counter = 0
    graph_idx = range(len(graph_list_1) + len(graph_list_2))
    orig_graph_map = {
        it: {gidx: defaultdict(lambda: 0)
             for gidx in graph_idx}
        for it in range(-1, h)
    }
    idx_2 = 0
    #initial labeling
    for gidx in graph_idx:

        if gidx < len(graph_list_1):

            G = nx.read_gexf('/your_dir/' + graph_list_1[gidx] + '.gexf',
                             node_type=None,
                             relabel=True,
                             version='1.1draft')
            G = nx.convert_node_labels_to_integers(G)
            degrees = G.degree()  #this is a dictionary
            nx.set_node_attributes(G, 'label', degrees)

            labels[gidx] = np.zeros(G.order(), dtype=np.int32)
            #for node in graph_list[gidx].node:
            #for node in range(len(graph_list[gidx])):
            for i in range(len(G.node)):
                label = G.node[i]["label"]
                if not label in label_lookup:
                    label_lookup[label] = label_counter
                    labels[gidx][i] = label_counter
                    label_counter += 1
                else:
                    labels[gidx][i] = label_lookup[label]
                orig_graph_map[-1][gidx][label] = orig_graph_map[-1][gidx].get(
                    label, 0) + 1

        else:

            G = nx.read_gexf('/your_dir/' + graph_list_2[idx_2] + '.gexf',
                             node_type=None,
                             relabel=True,
                             version='1.1draft')
            G = nx.convert_node_labels_to_integers(G)
            degrees = G.degree()  # this is a dictionary
            nx.set_node_attributes(G, 'label', degrees)

            labels[gidx] = np.zeros(G.order(), dtype=np.int32)
            #for node in graph_list[gidx].node:
            #for node in range(len(graph_list[gidx])):
            for i in range(len(G.node)):
                label = G.node[i]["label"]
                if not label in label_lookup:
                    label_lookup[label] = label_counter
                    labels[gidx][i] = label_counter
                    label_counter += 1
                else:
                    labels[gidx][i] = label_lookup[label]
                orig_graph_map[-1][gidx][label] = orig_graph_map[-1][gidx].get(
                    label, 0) + 1

            idx_2 = idx_2 + 1

    compressed_labels = deepcopy(labels)

    idx_2 = 0

    # WL iterations started
    for it in range(h):
        unique_labels_per_h = set()
        label_lookup = {}
        label_counter = 0
        idx_2 = 0

        for gidx in graph_idx:

            if gidx < len(graph_list_1):

                G = nx.read_gexf('/your_dir/' + graph_list_1[gidx] + '.gexf',
                                 node_type=None,
                                 relabel=True,
                                 version='1.1draft')
                G = nx.convert_node_labels_to_integers(G)
                degrees = G.degree()  # this is a dictionary
                nx.set_node_attributes(G, 'label', degrees)

                #for node in range(len(graph_list[gidx])):
                for i in range(len(G.node)):
                    node_label = tuple([labels[gidx][i]])
                    neighbors = G.neighbors(i)
                    #neighbors = graph_list[gidx][node]["neighbors"]
                    if len(neighbors) > 0:
                        neighbors_label = tuple(
                            [labels[gidx][i] for i in neighbors])
                        node_label = str(node_label) + "-" + str(
                            sorted(neighbors_label))
                    if not node_label in label_lookup:
                        label_lookup[node_label] = label_counter
                        compressed_labels[gidx][i] = label_counter
                        label_counter += 1
                    else:
                        compressed_labels[gidx][i] = label_lookup[node_label]
                    orig_graph_map[it][gidx][
                        node_label] = orig_graph_map[it][gidx].get(
                            node_label, 0) + 1

            else:

                G = nx.read_gexf('/your_dir/' + graph_list_2[idx_2] + '.gexf',
                                 node_type=None,
                                 relabel=True,
                                 version='1.1draft')
                G = nx.convert_node_labels_to_integers(G)
                degrees = G.degree()  # this is a dictionary
                nx.set_node_attributes(G, 'label', degrees)

                #for node in range(len(graph_list[gidx])):

                for i in range(len(G.node)):
                    node_label = tuple([labels[gidx][i]])
                    neighbors = G.neighbors(i)
                    #neighbors = graph_list[gidx][node]["neighbors"]
                    if len(neighbors) > 0:
                        neighbors_label = tuple(
                            [labels[gidx][i] for i in neighbors])
                        node_label = str(node_label) + "-" + str(
                            sorted(neighbors_label))
                    if not node_label in label_lookup:
                        label_lookup[node_label] = label_counter
                        compressed_labels[gidx][i] = label_counter
                        label_counter += 1
                    else:
                        compressed_labels[gidx][i] = label_lookup[node_label]
                    orig_graph_map[it][gidx][
                        node_label] = orig_graph_map[it][gidx].get(
                            node_label, 0) + 1

                idx_2 = idx_2 + 1

        print("Number of compressed labels at iteration %s: %s" %
              (it, len(label_lookup)))
        labels = deepcopy(compressed_labels)

    return orig_graph_map
Пример #56
0
    def read_network_data(self, path):

        self.G = nx.read_gexf(path)
Пример #57
0
def compute_mle_wl_kernel(graph_list, h):
    """ Computes original WL kernel for a given height h. """

    start_time_mle = time.time()
    labels = {}
    label_lookup = {}
    label_counter = 0
    graph_idx = range(len(graph_list))
    num_graphs = len(graph_idx)
    orig_graph_map = {
        it: {gidx: defaultdict(lambda: 0)
             for gidx in graph_idx}
        for it in range(-1, h)
    }
    # initial labeling
    for gidx in graph_idx:

        G = nx.read_gexf(
            '/Users/Syzygy/workspace/Stage_Shanghai/gexf_simplified_france/' +
            graph_list[gidx] + '.gexf',
            node_type=None,
            relabel=True,
            version='1.1draft')
        G = nx.convert_node_labels_to_integers(G)
        degrees = G.degree()  # this is a dictionary
        nx.set_node_attributes(G, 'label', degrees)

        labels[gidx] = np.zeros(G.order(), dtype=np.int32)
        #for node in graph_list[gidx].node:
        #for node in range(len(graph_list[gidx])):
        for i in range(len(G.node)):
            label = G.node[i]["label"]
            if not label in label_lookup:
                label_lookup[label] = label_counter
                labels[gidx][i] = label_counter
                label_counter += 1
            else:
                labels[gidx][i] = label_lookup[label]
            orig_graph_map[-1][gidx][label] = orig_graph_map[-1][gidx].get(
                label, 0) + 1
    compressed_labels = copy.deepcopy(labels)

    # WL iterations started
    for it in range(h):
        unique_labels_per_h = set()
        label_lookup = {}
        label_counter = 0
        for gidx in graph_idx:

            G = nx.read_gexf(
                '/Users/Syzygy/workspace/Stage_Shanghai/gexf_simplified_france/'
                + graph_list[gidx] + '.gexf',
                node_type=None,
                relabel=True,
                version='1.1draft')
            G = nx.convert_node_labels_to_integers(G)
            degrees = G.degree()  # this is a dictionary
            nx.set_node_attributes(G, 'label', degrees)

            #for node in range(len(graph_list[gidx])):
            #print(gidx,'on en est la')
            for i in range(len(G.node)):
                node_label = tuple([labels[gidx][i]])
                neighbors = G.neighbors(i)
                #neighbors = graph_list[gidx][node]["neighbors"]
                if len(neighbors) > 0:
                    neighbors_label = tuple(
                        [labels[gidx][i] for i in neighbors])
                    node_label = str(node_label) + "-" + str(
                        sorted(neighbors_label))
                if not node_label in label_lookup:
                    label_lookup[node_label] = label_counter
                    compressed_labels[gidx][i] = label_counter
                    label_counter += 1
                else:
                    compressed_labels[gidx][i] = label_lookup[node_label]
                orig_graph_map[it][
                    gidx][node_label] = orig_graph_map[it][gidx].get(
                        node_label, 0) + 1
        print("Number of compressed labels at iteration %s: %s" %
              (it, len(label_lookup)))
        labels = copy.deepcopy(compressed_labels)

    K = np.zeros((num_graphs, num_graphs))

    for it in range(-1, h):
        for i in range(num_graphs):
            for j in range(num_graphs):
                common_keys = set(orig_graph_map[it][i].keys()) & set(
                    orig_graph_map[it][j].keys())
                K[i][j] += sum([
                    orig_graph_map[it][i].get(k, 0) *
                    orig_graph_map[it][j].get(k, 0) for k in common_keys
                ])

    end_time_mle_kernel = time.time()
    print(
        "Total time for MLE computation for WL kernel (with kernel computation) %g"
        % (end_time_mle_kernel - start_time_mle))

    return K
Пример #58
0
    def process(self):
        import networkx as nx

        ids, Ns = [], []
        # Iterating over paths for raw and processed data (train + test):
        for r_path, p_path in zip(self.raw_paths, self.processed_paths):
            # Find the paths of all raw graphs:
            names = glob.glob(osp.join(r_path, '*.gexf'))
            # Get sorted graph IDs given filename: 123.gexf -> 123
            ids.append(sorted([int(i.split(os.sep)[-1][:-5]) for i in names]))

            data_list = []
            # Convert graphs in .gexf format to a NetworkX Graph:
            for i, idx in enumerate(ids[-1]):
                i = i if len(ids) == 1 else i + len(ids[0])
                # Reading the raw `*.gexf` graph:
                G = nx.read_gexf(osp.join(r_path, f'{idx}.gexf'))
                # Mapping of nodes in `G` to a contiguous number:
                mapping = {name: j for j, name in enumerate(G.nodes())}
                G = nx.relabel_nodes(G, mapping)
                Ns.append(G.number_of_nodes())

                edge_index = torch.tensor(list(G.edges)).t().contiguous()
                if edge_index.numel() == 0:
                    edge_index = torch.empty((2, 0), dtype=torch.long)
                edge_index = to_undirected(edge_index, num_nodes=Ns[-1])

                data = Data(edge_index=edge_index, i=i)
                data.num_nodes = Ns[-1]

                # Create a one-hot encoded feature matrix denoting the atom
                # type (for the `AIDS700nef` dataset):
                if self.name == 'AIDS700nef':
                    x = torch.zeros(data.num_nodes, dtype=torch.long)
                    for node, info in G.nodes(data=True):
                        x[int(node)] = self.types.index(info['type'])
                    data.x = F.one_hot(x, num_classes=len(self.types)).to(
                        torch.float)

                if self.pre_filter is not None and not self.pre_filter(data):
                    continue

                if self.pre_transform is not None:
                    data = self.pre_transform(data)

                data_list.append(data)

            torch.save(self.collate(data_list), p_path)

        assoc = {idx: i for i, idx in enumerate(ids[0])}
        assoc.update({idx: i + len(ids[0]) for i, idx in enumerate(ids[1])})

        # Extracting ground-truth GEDs from the GED pickle file
        path = osp.join(self.raw_dir, self.name, 'ged.pickle')
        # Initialize GEDs as float('inf'):
        mat = torch.full((len(assoc), len(assoc)), float('inf'))
        with open(path, 'rb') as f:
            obj = pickle.load(f)
            xs, ys, gs = [], [], []
            for (x, y), g in obj.items():
                xs += [assoc[x]]
                ys += [assoc[y]]
                gs += [g]
            # The pickle file does not contain GEDs for test graph pairs, i.e.
            # GEDs for (test_graph, test_graph) pairs are still float('inf'):
            x, y = torch.tensor(xs), torch.tensor(ys)
            ged = torch.tensor(gs, dtype=torch.float)
            mat[x, y], mat[y, x] = ged, ged

        path = osp.join(self.processed_dir, f'{self.name}_ged.pt')
        torch.save(mat, path)

        # Calculate the normalized GEDs:
        N = torch.tensor(Ns, dtype=torch.float)
        norm_mat = mat / (0.5 * (N.view(-1, 1) + N.view(1, -1)))

        path = osp.join(self.processed_dir, f'{self.name}_norm_ged.pt')
        torch.save(norm_mat, path)
Пример #59
0
import networkx as nx
import matplotlib.pyplot as plt
import random

G = nx.read_gexf('../../dataset/graph-small/graph.gexf')

pagerank = nx.pagerank(G)

for node_id in G.nodes:
    G.nodes[node_id]['pagerank'] = round(pagerank[node_id], 5)

pos = nx.spring_layout(G)

nx.draw(G, pos)
node_labels = nx.get_node_attributes(G, 'pagerank')
nx.draw_networkx_labels(G, pos, labels=node_labels)

plt.show()
Пример #60
0
# %% A Initialisation
import networkx as nx
from py2neo import Graph, Node, Relationship
import pandas as pd
from neo4j import GraphDatabase, basic_auth

graph = Graph("bolt://localhost:7687", auth=("neo4j", "Password"))
driver = GraphDatabase.driver('bolt://localhost',auth=basic_auth("neo4j", "Password"))
db = driver.session()

# results = db.run("MATCH (w:Website) WHERE 'IN' in w.D1_status RETURN w.site_name, w.D1_homepage")
# df = pd.DataFrame([r["w.D1_homepage"] for r in results])

# %% Load gexf
gexfD0DISCO="C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\202007WebsitesRS_D0\\202007WebsiteRSD0.gexf"
G= nx.read_gexf(gexfD0DISCO, node_type=None, relabel=False, version='1.1draft')
data = nx.json_graph.node_link_data(G)
totnbnodes=len(data['nodes'])
print(totnbnodes," nodes found in gexf")


# %% A Lance les firstpages : import des procedures
import sys
dataPath = 'C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data'
if dataPath not in sys.path:
    sys.path.insert(0, dataPath)

from FromNotebook import getFirstPageNode, getFirstPageRSNode, myUrlParse
# %% This downloads all firstpages if not there and put results in firstPageRS_df

medialistD0path = "C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\202007WebsitesRS_D0\\20200709_medialistForD0.txt"